diff --git a/src/main/java/org/jabref/logic/importer/fetcher/GrobidCitationFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/GrobidCitationFetcher.java index 7955e206fd7..7634a67cfd6 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/GrobidCitationFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/GrobidCitationFetcher.java @@ -12,10 +12,8 @@ import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.ParseException; import org.jabref.logic.importer.SearchBasedFetcher; -import org.jabref.logic.importer.fileformat.BibtexParser; import org.jabref.logic.importer.util.GrobidService; import org.jabref.model.entry.BibEntry; -import org.jabref.model.util.DummyFileUpdateMonitor; import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; import org.slf4j.Logger; @@ -45,29 +43,20 @@ public GrobidCitationFetcher(ImportFormatPreferences importFormatPreferences) { * * @return A BibTeX string if extraction is successful */ - private Optional parseUsingGrobid(String plainText) throws RuntimeException { + private Optional parseUsingGrobid(String plainText) throws RuntimeException { try { - return Optional.of(grobidService.processCitation(plainText, GrobidService.ConsolidateCitations.WITH_METADATA)); + return grobidService.processCitation(plainText, importFormatPreferences, GrobidService.ConsolidateCitations.WITH_METADATA); } catch (SocketTimeoutException e) { String msg = "Connection timed out."; LOGGER.debug(msg, e); throw new RuntimeException(msg, e); - } catch (IOException e) { + } catch (IOException | ParseException e) { String msg = "Could not process citation. " + e.getMessage(); LOGGER.debug(msg, e); return Optional.empty(); } } - private Optional parseBibToBibEntry(String bibtexString) { - try { - return BibtexParser.singleFromString(bibtexString, - importFormatPreferences, new DummyFileUpdateMonitor()); - } catch (ParseException e) { - return Optional.empty(); - } - } - @Override public String getName() { return "GROBID"; @@ -82,8 +71,6 @@ public List performSearch(String searchQuery) throws FetcherException .filter(str -> !str.isBlank()) .map(this::parseUsingGrobid) .flatMap(Optional::stream) - .map(this::parseBibToBibEntry) - .flatMap(Optional::stream) .collect(Collectors.toList()); } catch (RuntimeException e) { throw new FetcherException(e.getMessage(), e.getCause()); diff --git a/src/main/java/org/jabref/logic/importer/util/GrobidService.java b/src/main/java/org/jabref/logic/importer/util/GrobidService.java index 3259a44869a..8d99a52f66c 100644 --- a/src/main/java/org/jabref/logic/importer/util/GrobidService.java +++ b/src/main/java/org/jabref/logic/importer/util/GrobidService.java @@ -4,6 +4,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.List; +import java.util.Optional; import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.ParseException; @@ -46,12 +47,12 @@ public GrobidService(String grobidServerURL) { } /** - * Calls the Grobid server for converting the citation into BibTeX + * Calls the Grobid server for converting the citation into a BibEntry * - * @return A plain BibTeX string (generated by the Grobid server) + * @return A BibEntry for the String * @throws IOException if an I/O excecption during the call ocurred or no BibTeX entry could be determiend */ - public String processCitation(String rawCitation, ConsolidateCitations consolidateCitations) throws IOException { + public Optional processCitation(String rawCitation, ImportFormatPreferences importFormatPreferences, ConsolidateCitations consolidateCitations) throws IOException, ParseException { Connection.Response response = Jsoup.connect(grobidServerURL + "/api/processCitation") .header("Accept", MediaTypes.APPLICATION_BIBTEX) .data("citations", rawCitation) @@ -66,7 +67,7 @@ public String processCitation(String rawCitation, ConsolidateCitations consolida throw new IOException("The GROBID server response does not contain anything."); } - return httpResponse; + return BibtexParser.singleFromString(httpResponse, importFormatPreferences, new DummyFileUpdateMonitor()); } public List processPDF(Path filePath, ImportFormatPreferences importFormatPreferences) throws IOException, ParseException { diff --git a/src/test/java/org/jabref/logic/importer/fetcher/GrobidCitationFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/GrobidCitationFetcherTest.java index a8b4f5267d4..6303152440f 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/GrobidCitationFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/GrobidCitationFetcherTest.java @@ -8,6 +8,7 @@ import org.jabref.logic.importer.FetcherException; import org.jabref.logic.importer.ImportFormatPreferences; +import org.jabref.logic.importer.ParseException; import org.jabref.logic.importer.util.GrobidService; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.StandardField; @@ -116,9 +117,9 @@ public void grobidPerformSearchWithInvalidDataTest(String invalidInput) throws F } @Test - public void performSearchThrowsExceptionInCaseOfConnectionIssues() throws IOException { + public void performSearchThrowsExceptionInCaseOfConnectionIssues() throws IOException, ParseException { GrobidService grobidServiceMock = mock(GrobidService.class); - when(grobidServiceMock.processCitation(anyString(), any())).thenThrow(new SocketTimeoutException("Timeout")); + when(grobidServiceMock.processCitation(anyString(), any(), any())).thenThrow(new SocketTimeoutException("Timeout")); grobidCitationFetcher = new GrobidCitationFetcher(importFormatPreferences, grobidServiceMock); assertThrows(FetcherException.class, () -> { diff --git a/src/test/java/org/jabref/logic/importer/util/GrobidServiceTest.java b/src/test/java/org/jabref/logic/importer/util/GrobidServiceTest.java index b4a0f9e8f0d..7c7d4b1b0a8 100644 --- a/src/test/java/org/jabref/logic/importer/util/GrobidServiceTest.java +++ b/src/test/java/org/jabref/logic/importer/util/GrobidServiceTest.java @@ -12,6 +12,7 @@ import org.jabref.logic.importer.fileformat.PdfGrobidImporterTest; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.StandardField; +import org.jabref.model.entry.types.StandardEntryType; import org.jabref.testutils.category.FetcherTest; import org.junit.jupiter.api.BeforeAll; @@ -39,36 +40,36 @@ public static void setup() { } @Test - public void processValidCitationTest() throws IOException { - String response = grobidService.processCitation("Derwing, T. M., Rossiter, M. J., & Munro, " + + public void processValidCitationTest() throws IOException, ParseException { + BibEntry exampleBibEntry = new BibEntry(StandardEntryType.Article).withCitationKey("-1") + .withField(StandardField.AUTHOR, "Derwing, Tracey and Rossiter, Marian and Munro, Murray") + .withField(StandardField.TITLE, "Teaching Native Speakers to Listen to Foreign-accented Speech") + .withField(StandardField.JOURNAL, "Journal of Multilingual and Multicultural Development") + .withField(StandardField.DOI, "10.1080/01434630208666468") + .withField(StandardField.DATE, "2002-09") + .withField(StandardField.YEAR, "2002") + .withField(StandardField.MONTH, "9") + .withField(StandardField.PAGES, "245-259") + .withField(StandardField.VOLUME, "23") + .withField(StandardField.PUBLISHER, "Informa UK Limited") + .withField(StandardField.NUMBER, "4"); + Optional response = grobidService.processCitation("Derwing, T. M., Rossiter, M. J., & Munro, " + "M. J. (2002). Teaching native speakers to listen to foreign-accented speech. " + - "Journal of Multilingual and Multicultural Development, 23(4), 245-259.", GrobidService.ConsolidateCitations.WITH_METADATA); - String[] responseRows = response.split("\n"); - assertNotNull(response); - assertEquals('@', response.charAt(0)); - assertTrue(responseRows[1].contains("author") && responseRows[1].contains("Derwing, Tracey and Rossiter, Marian and Munro, Murray")); - assertTrue(responseRows[2].contains("title") && responseRows[2].contains("Teaching Native Speakers to Listen to Foreign-accented Speech")); - assertTrue(responseRows[3].contains("journal") && responseRows[3].contains("Journal of Multilingual and Multicultural")); - assertTrue(responseRows[4].contains("publisher") && responseRows[4].contains("Informa UK Limited")); - assertTrue(responseRows[5].contains("date") && responseRows[5].contains("2002-09")); - assertTrue(responseRows[6].contains("year") && responseRows[6].contains("2002")); - assertTrue(responseRows[7].contains("month") && responseRows[7].contains("9")); - assertTrue(responseRows[8].contains("pages") && responseRows[8].contains("245-259")); - assertTrue(responseRows[9].contains("volume") && responseRows[9].contains("23")); - assertTrue(responseRows[10].contains("number") && responseRows[10].contains("4")); - assertTrue(responseRows[11].contains("doi") && responseRows[11].contains("10.1080/01434630208666468")); + "Journal of Multilingual and Multicultural Development, 23(4), 245-259.", importFormatPreferences, GrobidService.ConsolidateCitations.WITH_METADATA); + assertTrue(response.isPresent()); + assertEquals(exampleBibEntry, response.get()); } @Test - public void processEmptyStringTest() throws IOException { - String response = grobidService.processCitation(" ", GrobidService.ConsolidateCitations.WITH_METADATA); + public void processEmptyStringTest() throws IOException, ParseException { + Optional response = grobidService.processCitation(" ", importFormatPreferences, GrobidService.ConsolidateCitations.WITH_METADATA); assertNotNull(response); - assertEquals("", response); + assertEquals(Optional.empty(), response); } @Test public void processInvalidCitationTest() { - assertThrows(IOException.class, () -> grobidService.processCitation("iiiiiiiiiiiiiiiiiiiiiiii", GrobidService.ConsolidateCitations.WITH_METADATA)); + assertThrows(IOException.class, () -> grobidService.processCitation("iiiiiiiiiiiiiiiiiiiiiiii", importFormatPreferences, GrobidService.ConsolidateCitations.WITH_METADATA)); } @Test