Skip to content

Commit

Permalink
DOI matching in duplicate check (#6897)
Browse files Browse the repository at this point in the history
Co-authored-by: Andrew Kuncevich <kuncevich_andrei@mail.ru>
  • Loading branch information
Siedlerchr and KunAndrew authored Sep 29, 2020
1 parent 87779ea commit 8a242c9
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 2 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We changed the title of the window "Manage field names and content" to have the same title as the corresponding menu item [#6895](https://github.com/JabRef/jabref/pull/6895)
- We renamed the menus "View -> Previous citation style" and "View -> Next citation style" into "View -> Previous preview style" and "View -> Next preview style" and renamed the "Preview" style to "Customized preview style". [#6899](https://github.com/JabRef/jabref/pull/6899)
- We changed the default preference option "Search and store files relative to library file location" to on, as this seems to be a more intuitive behaviour. [#6863](https://github.com/JabRef/jabref/issues/6863)
- Improved detection of "short" DOIs [6880](https://github.com/JabRef/jabref/issues/6880)
- We changed the title of the window "Manage field names and content": to have the same title as the corresponding menu item [#6895](https://github.com/JabRef/jabref/pull/6895)
- We improved the detection of "short" DOIs [6880](https://github.com/JabRef/jabref/issues/6880)
- We improved the duplicate detection when identifiers like DOI or arxiv are semantiaclly the same, but just syntactically differ (e.g. with or without http(s):// prefix). [#6707](https://github.com/JabRef/jabref/issues/6707)

### Fixed

Expand Down
19 changes: 19 additions & 0 deletions src/main/java/org/jabref/logic/database/DuplicateCheck.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
Expand All @@ -22,6 +23,8 @@
import org.jabref.model.entry.field.FieldProperty;
import org.jabref.model.entry.field.OrFields;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.identifier.DOI;
import org.jabref.model.entry.identifier.ISBN;

import com.google.common.collect.Sets;
import org.slf4j.Logger;
Expand Down Expand Up @@ -57,6 +60,9 @@ public class DuplicateCheck {
DuplicateCheck.FIELD_WEIGHTS.put(StandardField.EDITOR, 2.5);
DuplicateCheck.FIELD_WEIGHTS.put(StandardField.TITLE, 3.);
DuplicateCheck.FIELD_WEIGHTS.put(StandardField.JOURNAL, 2.);
DuplicateCheck.FIELD_WEIGHTS.put(StandardField.NOTE, 0.1);
DuplicateCheck.FIELD_WEIGHTS.put(StandardField.COMMENT, 0.1);
DuplicateCheck.FIELD_WEIGHTS.put(StandardField.DOI, 3.);
}

private final BibEntryTypesManager entryTypesManager;
Expand Down Expand Up @@ -303,6 +309,19 @@ public boolean isDuplicate(final BibEntry one, final BibEntry two, final BibData
return true;
}

// check DOI
Optional<DOI> oneDOI = one.getDOI();
Optional<DOI> twoDOI = two.getDOI();
if (oneDOI.isPresent() && twoDOI.isPresent()) {
return Objects.equals(oneDOI, twoDOI);
}
// check ISBN
Optional<ISBN> oneISBN = one.getISBN();
Optional<ISBN> twoISBN = two.getISBN();
if (oneISBN.isPresent() && twoISBN.isPresent()) {
return Objects.equals(oneISBN, twoISBN);
}

if (haveDifferentEntryType(one, two) ||
haveDifferentEditions(one, two) ||
haveDifferentChaptersOrPagesOfTheSameBook(one, two)) {
Expand Down
5 changes: 5 additions & 0 deletions src/main/java/org/jabref/model/entry/BibEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.jabref.model.entry.field.OrFields;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.identifier.DOI;
import org.jabref.model.entry.identifier.ISBN;
import org.jabref.model.entry.types.EntryType;
import org.jabref.model.entry.types.IEEETranEntryType;
import org.jabref.model.entry.types.StandardEntryType;
Expand Down Expand Up @@ -481,6 +482,10 @@ public Optional<DOI> getDOI() {
return getField(StandardField.DOI).flatMap(DOI::parse);
}

public Optional<ISBN> getISBN() {
return getField(StandardField.ISBN).flatMap(ISBN::parse);
}

/**
* Return the LaTeX-free contents of the given field or its alias an an Optional
* <p>
Expand Down
1 change: 0 additions & 1 deletion src/main/java/org/jabref/model/entry/identifier/DOI.java
Original file line number Diff line number Diff line change
Expand Up @@ -279,5 +279,4 @@ public boolean equals(Object o) {
public int hashCode() {
return Objects.hash(doi.toLowerCase(Locale.ENGLISH));
}

}
18 changes: 18 additions & 0 deletions src/main/java/org/jabref/model/entry/identifier/ISBN.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.net.URI;
import java.net.URISyntaxException;
import java.util.Locale;
import java.util.Objects;
import java.util.Optional;
import java.util.regex.Matcher;
Expand Down Expand Up @@ -106,4 +107,21 @@ public Optional<URI> getExternalURI() {
return Optional.empty();
}
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if ((o == null) || (getClass() != o.getClass())) {
return false;
}
ISBN other = (ISBN) o;
return isbnString.equalsIgnoreCase(other.isbnString);
}

@Override
public int hashCode() {
return Objects.hash(isbnString.toLowerCase(Locale.ENGLISH));
}
}
21 changes: 21 additions & 0 deletions src/test/java/org/jabref/logic/database/DuplicateCheckTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,27 @@ public void twoEntriesWithSameDoiButDifferentTypesAreDuplicates() {
assertTrue(duplicateChecker.isDuplicate(simpleArticle, duplicateWithDifferentType, BibDatabaseMode.BIBTEX));
}

@Test
public void twoEntriesWithDoiContainingUnderscoresAreNotEqual() {
simpleArticle.setField(StandardField.DOI, "10.1016/j.is.2004.02.002");
// An underscore in a DOI can indicate a totally different DOI
unrelatedArticle.setField(StandardField.DOI, "10.1016/j.is.2004.02.0_02");
BibEntry duplicateWithDifferentType = unrelatedArticle;
duplicateWithDifferentType.setType(StandardEntryType.InCollection);

assertFalse(duplicateChecker.isDuplicate(simpleArticle, duplicateWithDifferentType, BibDatabaseMode.BIBTEX));
}

@Test
public void twoEntriesWithSameISBNButDifferentTypesAreDuplicates() {
simpleArticle.setField(StandardField.ISBN, "0-123456-47-9");
unrelatedArticle.setField(StandardField.ISBN, "0-123456-47-9");
BibEntry duplicateWithDifferentType = unrelatedArticle;
duplicateWithDifferentType.setType(StandardEntryType.InCollection);

assertTrue(duplicateChecker.isDuplicate(simpleArticle, duplicateWithDifferentType, BibDatabaseMode.BIBTEX));
}

@Test
public void twoInbooksWithDifferentChaptersAreNotDuplicates() {
twoEntriesWithDifferentSpecificFieldsAreNotDuplicates(simpleInbook, StandardField.CHAPTER,
Expand Down

0 comments on commit 8a242c9

Please sign in to comment.