From 7b070007da3b4a4b666084c8afb99eac41286281 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Mon, 12 Aug 2024 10:38:41 +0200 Subject: [PATCH] avoid null pointer exceptions during training data generation with certain nasty documents --- .../grobid/core/engines/FullTextParser.java | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/grobid-core/src/main/java/org/grobid/core/engines/FullTextParser.java b/grobid-core/src/main/java/org/grobid/core/engines/FullTextParser.java index 953d92f8b1..d2b1bf8661 100755 --- a/grobid-core/src/main/java/org/grobid/core/engines/FullTextParser.java +++ b/grobid-core/src/main/java/org/grobid/core/engines/FullTextParser.java @@ -1234,17 +1234,19 @@ public Document createTraining(File inputFile, "\t\t\t\t\n\t\t\t\t\t\n\n"); for (LabeledReferenceResult ref : references) { - if ( (ref.getReferenceText() != null) && (ref.getReferenceText().trim().length() > 0) ) { + if ( StringUtils.isNotBlank(ref.getReferenceText()) ) { BiblioItem bib = parsers.getCitationParser().processingString(ref.getReferenceText(), 0); - String authorSequence = bib.getAuthors(); - if ((authorSequence != null) && (authorSequence.trim().length() > 0) ) { - /*List inputs = new ArrayList(); - inputs.add(authorSequence);*/ - StringBuilder bufferName = parsers.getAuthorParser().trainingExtraction(authorSequence, false); - if ( (bufferName != null) && (bufferName.length()>0) ) { - writerName.write("\n\t\t\t\t\t\t"); - writerName.write(bufferName.toString()); - writerName.write("\n"); + if (bib != null) { + String authorSequence = bib.getAuthors(); + if (StringUtils.isNotBlank(authorSequence)) { + /*List inputs = new ArrayList(); + inputs.add(authorSequence);*/ + StringBuilder bufferName = parsers.getAuthorParser().trainingExtraction(authorSequence, false); + if ((bufferName != null) && (bufferName.length() > 0)) { + writerName.write("\n\t\t\t\t\t\t"); + writerName.write(bufferName.toString()); + writerName.write("\n"); + } } } }