Skip to content

Commit

Permalink
Fulltext fetcher for IACR eprints (#9651)
Browse files Browse the repository at this point in the history
* Add fulltext fetcher for IACR eprints

* Add IACR full text fetcher to CHANGELOG

* Add tests and fixes for IACR full text fetcher

* Make checkstyle happy

* simplified test

* Update CHANGELOG.md

* Update CHANGELOG.md

---------

Co-authored-by: Siedlerchr <siedlerkiller@gmail.com>
Co-authored-by: Carl Christian Snethlage <50491877+calixtus@users.noreply.github.com>
  • Loading branch information
3 people authored Mar 6, 2023
1 parent bcd808b commit 85ab410
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 2 deletions.
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
### Added

- We added a field showing the BibTeX/biblatex source for added and deleted entries in the "External Changes Resolver" dialog. [#9509](https://github.com/JabRef/jabref/issues/9509)
- Add "Attach file from URL" to right-click context menu which downloads file from URL and stores it with reference library.
- We added a full text fetcher for IACR eprints. [#9651](https://github.com/JabRef/jabref/pull/9651)
- We added "Attach file from URL" to right-click context menu to download and store a file with the reference library. [#9646](https://github.com/JabRef/jabref/issues/9646)
- We enabled updating an existing entry with data from InspireHEP. [#9351](https://github.com/JabRef/jabref/issues/9351)




Expand Down
1 change: 1 addition & 0 deletions src/main/java/org/jabref/logic/importer/WebFetchers.java
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ public static Set<FulltextFetcher> getFullTextFetchers(ImportFormatPreferences i
fetchers.add(new ArXivFetcher(importFormatPreferences));
fetchers.add(new IEEE(importFormatPreferences, importerPreferences));
fetchers.add(new ApsFetcher());
fetchers.add(new IacrEprintFetcher(importFormatPreferences));

// Meta search
// fetchers.add(new JstorFetcher(importFormatPreferences));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.net.URL;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.regex.Pattern;

import org.jabref.logic.importer.FetcherException;
import org.jabref.logic.importer.FulltextFetcher;
import org.jabref.logic.importer.IdBasedFetcher;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.ParseException;
Expand All @@ -17,7 +20,7 @@
import org.jabref.model.strings.StringUtil;
import org.jabref.model.util.DummyFileUpdateMonitor;

public class IacrEprintFetcher implements IdBasedFetcher {
public class IacrEprintFetcher implements FulltextFetcher, IdBasedFetcher {

public static final String NAME = "IACR eprints";

Expand All @@ -26,6 +29,7 @@ public class IacrEprintFetcher implements IdBasedFetcher {
private static final Predicate<String> IDENTIFIER_PREDICATE = Pattern.compile("\\d{4}/\\d{3,5}").asPredicate();
private static final String CITATION_URL_PREFIX = "https://eprint.iacr.org/";
private static final String DESCRIPTION_URL_PREFIX = "https://eprint.iacr.org/";
private static final String FULLTEXT_URL_PREFIX = "https://eprint.iacr.org/";
private static final String VERSION_URL_PREFIX = "https://eprint.iacr.org/archive/versions/";

private final ImportFormatPreferences prefs;
Expand Down Expand Up @@ -130,4 +134,27 @@ private boolean isFromOrAfterYear2000(BibEntry entry) throws FetcherException {
public String getName() {
return NAME;
}

@Override
public Optional<URL> findFullText(BibEntry entry) throws IOException, FetcherException {
Objects.requireNonNull(entry);

Optional<String> urlField = entry.getField(StandardField.URL);
if (urlField.isPresent()) {
String descriptiveHtml = getHtml(urlField.get());
String startOfFulltextLink = "<a class=\"btn btn-sm btn-outline-dark\"";
String fulltextLinkAsInHtml = getRequiredValueBetween(startOfFulltextLink, ".pdf", descriptiveHtml);
// There is an additional "\n href=\"/archive/" we have to remove - and for some reason,
// getRequiredValueBetween refuses to match across the line break.
fulltextLinkAsInHtml = fulltextLinkAsInHtml.replaceFirst(".*href=\"/", "").trim();
String fulltextLink = FULLTEXT_URL_PREFIX + fulltextLinkAsInHtml + ".pdf";
return Optional.of(new URL(fulltextLink));
}
return Optional.empty();
}

@Override
public TrustLevel getTrustLevel() {
return TrustLevel.PREPRINT;
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
Expand Down Expand Up @@ -36,6 +38,7 @@ public class IacrEprintFetcherTest {

private IacrEprintFetcher fetcher;
private BibEntry abram2017;
private BibEntry abram2017noVersion;
private BibEntry beierle2016;
private BibEntry delgado2017;

Expand All @@ -55,6 +58,17 @@ public void setUp() {
.withField(StandardField.VERSION, "20171124:064527")
.withField(StandardField.YEAR, "2017");

abram2017noVersion = new BibEntry(StandardEntryType.Misc)
.withCitationKey("cryptoeprint:2017/1118")
.withField(StandardField.ABSTRACT, "dummy")
.withField(StandardField.AUTHOR, "Ittai Abraham and Dahlia Malkhi and Kartik Nayak and Ling Ren and Alexander Spiegelman")
.withField(StandardField.DATE, "2017-11-24")
.withField(StandardField.HOWPUBLISHED, "Cryptology ePrint Archive, Paper 2017/1118")
.withField(StandardField.NOTE, "\\url{https://eprint.iacr.org/2017/1118}")
.withField(StandardField.TITLE, "Solida: A Blockchain Protocol Based on Reconfigurable Byzantine Consensus")
.withField(StandardField.URL, "https://eprint.iacr.org/2017/1118")
.withField(StandardField.YEAR, "2017");

beierle2016 = new BibEntry(StandardEntryType.Misc)
.withCitationKey("cryptoeprint:2016/119")
.withField(StandardField.ABSTRACT, "dummy")
Expand Down Expand Up @@ -185,4 +199,31 @@ private static Stream<String> allNonWithdrawnIdsWithOldHtmlFormat() {
ids.removeAll(withdrawnIds);
return ids.stream();
}

@Test
public void getFulltextWithVersion() throws FetcherException, IOException {
Optional<URL> pdfUrl = fetcher.findFullText(abram2017);
assertEquals(Optional.of("https://eprint.iacr.org/archive/2017/1118/1511505927.pdf"), pdfUrl.map(URL::toString));
}

@Test
public void getFulltextWithoutVersion() throws FetcherException, IOException {
Optional<URL> pdfUrl = fetcher.findFullText(abram2017noVersion);
assertEquals(Optional.of("https://eprint.iacr.org/2017/1118.pdf"), pdfUrl.map(URL::toString));
}

@Test
public void getFulltextWithoutUrl() throws FetcherException, IOException {
BibEntry abram2017WithoutUrl = abram2017;
abram2017WithoutUrl.clearField(StandardField.URL);
Optional<URL> pdfUrl = fetcher.findFullText(abram2017WithoutUrl);
assertEquals(Optional.empty(), pdfUrl);
}

@Test
public void getFulltextWithNonIACRUrl() throws IOException {
BibEntry abram2017WithNonIACRUrl = abram2017;
abram2017WithNonIACRUrl.setField(StandardField.URL, "https://example.com");
assertThrows(FetcherException.class, () -> fetcher.findFullText(abram2017WithNonIACRUrl));
}
}

0 comments on commit 85ab410

Please sign in to comment.