Skip to content

Commit

Permalink
Add shorten DOI field formatter (koppor#343) (#5276)
Browse files Browse the repository at this point in the history
  • Loading branch information
dawidowoc authored and tobiasdiez committed Sep 8, 2019
1 parent dde32bd commit 164843b
Show file tree
Hide file tree
Showing 10 changed files with 372 additions and 25 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#

### Changed

- We added a short DOI field formatter which shortens DOI to more human readable form. [koppor#343](https://github.com/koppor/jabref/issues/343)

### Fixed

### Removed
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/org/jabref/logic/formatter/Formatters.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.jabref.logic.formatter.bibtexfields.OrdinalsToSuperscriptFormatter;
import org.jabref.logic.formatter.bibtexfields.RegexFormatter;
import org.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter;
import org.jabref.logic.formatter.bibtexfields.ShortenDOIFormatter;
import org.jabref.logic.formatter.bibtexfields.UnicodeToLatexFormatter;
import org.jabref.logic.formatter.bibtexfields.UnitsToLatexFormatter;
import org.jabref.logic.formatter.casechanger.CapitalizeFormatter;
Expand Down Expand Up @@ -67,7 +68,8 @@ public static List<Formatter> getOthers() {
new OrdinalsToSuperscriptFormatter(),
new RemoveBracesFormatter(),
new UnitsToLatexFormatter(),
new EscapeUnderscoresFormatter()
new EscapeUnderscoresFormatter(),
new ShortenDOIFormatter()
);
}

Expand Down Expand Up @@ -102,5 +104,4 @@ public static Optional<Formatter> getFormatterForModifier(String modifier) {
return getAll().stream().filter(f -> f.getKey().equals(modifier)).findAny();
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package org.jabref.logic.formatter.bibtexfields;

import java.util.Objects;
import java.util.Optional;

import org.jabref.logic.importer.util.ShortDOIService;
import org.jabref.logic.importer.util.ShortDOIServiceException;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.cleanup.Formatter;
import org.jabref.model.entry.identifier.DOI;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ShortenDOIFormatter extends Formatter {

private static final Logger LOGGER = LoggerFactory.getLogger(ShortenDOIFormatter.class);

@Override
public String getName() {
return Localization.lang("Shorten DOI");
}

@Override
public String getKey() {
return "short_doi";
}

@Override
public String format(String value) {
Objects.requireNonNull(value);

ShortDOIService shortDOIService = new ShortDOIService();

Optional<DOI> doi = Optional.empty();

try {
doi = DOI.parse(value);

if (doi.isPresent()) {
return shortDOIService.getShortDOI(doi.get()).getDOI();
}
} catch (ShortDOIServiceException e) {
LOGGER.error(e.getMessage(), e);
}

return value;
}

@Override
public String getDescription() {
return Localization.lang("Shortens DOI to more human readable form.");
}

@Override
public String getExampleInput() {
return "10.1006/jmbi.1998.2354";
}
}
63 changes: 63 additions & 0 deletions src/main/java/org/jabref/logic/importer/util/ShortDOIService.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package org.jabref.logic.importer.util;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;

import org.jabref.logic.importer.ParseException;
import org.jabref.logic.net.URLDownload;
import org.jabref.model.entry.identifier.DOI;

import org.apache.http.client.utils.URIBuilder;
import org.json.JSONException;
import org.json.JSONObject;

/**
* Class for obtaining shortened DOI names.
*
* @see http://shortdoi.org
*/
public class ShortDOIService {

private static final String BASIC_URL = "http://shortdoi.org/";

/**
* Obtains shortened DOI name for given DOI
*
* @param doi DOI
* @return A shortened DOI name
*/
public DOI getShortDOI(DOI doi) throws ShortDOIServiceException {
JSONObject responseJSON = makeRequest(doi);
String shortDoi = responseJSON.getString("ShortDOI");

return new DOI(shortDoi);
}

private JSONObject makeRequest(DOI doi) throws ShortDOIServiceException {

URIBuilder uriBuilder = null;
URL url = null;

try {
uriBuilder = new URIBuilder(BASIC_URL);
uriBuilder.setPath(uriBuilder.getPath() + doi.getDOI());
uriBuilder.addParameter("format", "json");

URI uri = uriBuilder.build();
url = uri.toURL();
} catch (URISyntaxException | MalformedURLException e) {
throw new ShortDOIServiceException("Cannot get short DOI", e);
}

URLDownload urlDownload = new URLDownload(url);

try {
return JsonReader.toJsonObject(urlDownload.asInputStream());
} catch (ParseException | IOException | JSONException e) {
throw new ShortDOIServiceException("Cannot get short DOI", e);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package org.jabref.logic.importer.util;

import org.jabref.JabRefException;

public class ShortDOIServiceException extends JabRefException {
public ShortDOIServiceException(String message) {
super(message);
}

public ShortDOIServiceException(String message, Throwable cause) {
super(message, cause);
}

public ShortDOIServiceException(String message, String localizedMessage) {
super(message, localizedMessage);
}

public ShortDOIServiceException(String message, String localizedMessage, Throwable cause) {
super(message, localizedMessage, cause);
}

public ShortDOIServiceException(Throwable cause) {
super(cause);
}
}
95 changes: 72 additions & 23 deletions src/main/java/org/jabref/model/entry/identifier/DOI.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,15 @@
import org.slf4j.LoggerFactory;

/**
* Class for working with Digital object identifiers (DOIs)
* Class for working with Digital object identifiers (DOIs) and Short DOIs
*
* @see https://en.wikipedia.org/wiki/Digital_object_identifier
* @see http://shortdoi.org
*/
public class DOI implements Identifier {
private static final Logger LOGGER = LoggerFactory.getLogger(DOI.class);

// DOI resolver
// DOI/Short DOI resolver
private static final URI RESOLVER = URI.create("https://doi.org");
// Regex
// (see http://www.doi.org/doi_handbook/2_Numbering.html)
Expand All @@ -43,20 +44,46 @@ public class DOI implements Identifier {
+ "[/:]" // divider
+ "(?:[^\\s]+)" // suffix alphanumeric without space
+ ")"; // end group \1

// Regex (Short DOI)
private static final String SHORT_DOI_EXP = ""
+ "(?:urn:)?" // optional urn
+ "(?:doi:)?" // optional doi
+ "(" // begin group \1
+ "10" // directory indicator
+ "[/:%]" // divider
+ "[a-zA-Z0-9]+"
+ ")"; // end group \1
private static final String FIND_SHORT_DOI_EXP = ""
+ "(?:urn:)?" // optional urn
+ "(?:doi:)?" // optional doi
+ "(" // begin group \1
+ "10" // directory indicator
+ "[/:]" // divider
+ "[a-zA-Z0-9]+"
+ "(?:[^\\s]+)" // suffix alphanumeric without space
+ ")"; // end group \1

private static final String HTTP_EXP = "https?://[^\\s]+?" + DOI_EXP;
private static final String SHORT_DOI_HTTP_EXP = "https?://[^\\s]+?" + SHORT_DOI_EXP;
// Pattern
private static final Pattern EXACT_DOI_PATT = Pattern.compile("^(?:https?://[^\\s]+?)?" + DOI_EXP + "$", Pattern.CASE_INSENSITIVE);
private static final Pattern DOI_PATT = Pattern.compile("(?:https?://[^\\s]+?)?" + FIND_DOI_EXP, Pattern.CASE_INSENSITIVE);
// Pattern (short DOI)
private static final Pattern EXACT_SHORT_DOI_PATT = Pattern.compile("^(?:https?://[^\\s]+?)?" + SHORT_DOI_EXP, Pattern.CASE_INSENSITIVE);
private static final Pattern SHORT_DOI_PATT = Pattern.compile("(?:https?://[^\\s]+?)?" + FIND_SHORT_DOI_EXP, Pattern.CASE_INSENSITIVE);
// DOI
private final String doi;
// Short DOI
private boolean isShortDoi;

/**
* Creates a DOI from various schemes including URL, URN, and plain DOIs.
* Creates a DOI from various schemes including URL, URN, and plain DOIs/Short DOIs.
*
* @param doi the DOI string
* @throws NullPointerException if DOI is null
* @throws IllegalArgumentException if doi does not include a valid DOI
* @param doi the DOI/Short DOI string
* @return an instance of the DOI class
* @throws NullPointerException if DOI/Short DOI is null
* @throws IllegalArgumentException if doi does not include a valid DOI/Short DOI
*/
public DOI(String doi) {
Objects.requireNonNull(doi);
Expand All @@ -65,33 +92,40 @@ public DOI(String doi) {
String trimmedDoi = doi.trim();

// HTTP URL decoding
if (doi.matches(HTTP_EXP)) {
if (doi.matches(HTTP_EXP) || doi.matches(SHORT_DOI_HTTP_EXP)) {
try {
// decodes path segment
URI url = new URI(trimmedDoi);
trimmedDoi = url.getScheme() + "://" + url.getHost() + url.getPath();
} catch (URISyntaxException e) {
throw new IllegalArgumentException(doi + " is not a valid HTTP DOI.");
throw new IllegalArgumentException(doi + " is not a valid HTTP DOI/Short DOI.");
}
}

// Extract DOI
// Extract DOI/Short DOI
Matcher matcher = EXACT_DOI_PATT.matcher(trimmedDoi);
if (matcher.find()) {
// match only group \1
this.doi = matcher.group(1);
} else {
throw new IllegalArgumentException(trimmedDoi + " is not a valid DOI.");
// Short DOI
Matcher shortDoiMatcher = EXACT_SHORT_DOI_PATT.matcher(trimmedDoi);
if (shortDoiMatcher.find()) {
this.doi = shortDoiMatcher.group(1);
isShortDoi = true;
} else {
throw new IllegalArgumentException(trimmedDoi + " is not a valid DOI/Short DOI.");
}
}
}

/**
* Creates an Optional<DOI> from various schemes including URL, URN, and plain DOIs.
*
* Useful for suppressing the <c>IllegalArgumentException</c> of the Constructor
* and checking for Optional.isPresent() instead.
* Useful for suppressing the <c>IllegalArgumentException</c> of the Constructor and checking for
* Optional.isPresent() instead.
*
* @param doi the DOI string
* @param doi the DOI/Short DOI string
* @return an Optional containing the DOI or an empty Optional
*/
public static Optional<DOI> parse(String doi) {
Expand All @@ -105,19 +139,19 @@ public static Optional<DOI> parse(String doi) {
}

/**
* Determines whether a DOI is valid or not
* Determines whether a DOI/Short DOI is valid or not
*
* @param doi the DOI string
* @param doi the DOI/Short DOI string
* @return true if DOI is valid, false otherwise
*/
public static boolean isValid(String doi) {
return parse(doi).isPresent();
}

/**
* Tries to find a DOI inside the given text.
* Tries to find a DOI/Short DOI inside the given text.
*
* @param text the Text which might contain a DOI
* @param text the Text which might contain a DOI/Short DOI
* @return an Optional containing the DOI or an empty Optional
*/
public static Optional<DOI> findInText(String text) {
Expand All @@ -128,6 +162,12 @@ public static Optional<DOI> findInText(String text) {
// match only group \1
result = Optional.of(new DOI(matcher.group(1)));
}

matcher = SHORT_DOI_PATT.matcher(text);
if (matcher.find()) {
result = Optional.of(new DOI(matcher.group(1)));
}

return result;
}

Expand All @@ -139,18 +179,27 @@ public String toString() {
}

/**
* Return the plain DOI
* Return the plain DOI/Short DOI
*
* @return the plain DOI value.
* @return the plain DOI/Short DOI value.
*/
public String getDOI() {
return doi;
}

/**
* Return a URI presentation for the DOI
* Determines whether DOI is short DOI or not
*
* @return true if DOI is short DOI, false otherwise
*/
public boolean isShortDoi() {
return isShortDoi;
}

/**
* Return a URI presentation for the DOI/Short DOI
*
* @return an encoded URI representation of the DOI
* @return an encoded URI representation of the DOI/Short DOI
*/
@Override
public Optional<URI> getExternalURI() {
Expand All @@ -165,9 +214,9 @@ public Optional<URI> getExternalURI() {
}

/**
* Return an ASCII URL presentation for the DOI
* Return an ASCII URL presentation for the DOI/Short DOI
*
* @return an encoded URL representation of the DOI
* @return an encoded URL representation of the DOI/Short DOI
*/
public String getURIAsASCIIString() {
return getExternalURI().map(URI::toASCIIString).orElse("");
Expand Down
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_en.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1555,6 +1555,8 @@ Add\ enclosing\ braces=Add enclosing braces
Add\ braces\ encapsulating\ the\ complete\ field\ content.=Add braces encapsulating the complete field content.
Remove\ enclosing\ braces=Remove enclosing braces
Removes\ braces\ encapsulating\ the\ complete\ field\ content.=Removes braces encapsulating the complete field content.
Shorten\ DOI=Shorten DOI
Shortens\ DOI\ to\ more\ human\ readable\ form.=Shortens DOI to more human readable form.
Sentence\ case=Sentence case
Shortens\ lists\ of\ persons\ if\ there\ are\ more\ than\ 2\ persons\ to\ "et\ al.".=Shortens lists of persons if there are more than 2 persons to "et al.".
Title\ case=Title case
Expand Down
Loading

0 comments on commit 164843b

Please sign in to comment.