Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ascii integrity checker #1718

Merged
merged 3 commits into from
Aug 11, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
- It is now possible to add your own lists of protected terms, see Options -> Manage protected terms
- Automatically generated group names are now converted from LaTeX to Unicode
- Unified dialogs for opening/saving files
- Add integrity check to avoid non-ASCII characters in BibTeX files

### Fixed
- Fixed [#1632](https://github.com/JabRef/jabref/issues/1632): User comments (@Comment) with or without brackets are now kept
Expand Down
24 changes: 22 additions & 2 deletions src/main/java/net/sf/jabref/logic/integrity/IntegrityCheck.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import net.sf.jabref.model.entry.InternalBibtexFields;
import net.sf.jabref.model.entry.ParsedFileField;

import com.google.common.base.CharMatcher;

public class IntegrityCheck {

private final BibDatabaseContext bibDatabaseContext;
Expand Down Expand Up @@ -50,9 +52,11 @@ private List<IntegrityMessage> checkBibtexEntry(BibEntry entry) {

result.addAll(new AuthorNameChecker().check(entry));

// BibTeX only checkers
if (!bibDatabaseContext.isBiblatexMode()) {
result.addAll(new TitleChecker().check(entry));
result.addAll(new PagesChecker().check(entry));
result.addAll(new ASCIICharacterChecker().check(entry));
} else {
result.addAll(new BiblatexPagesChecker().check(entry));
}
Expand Down Expand Up @@ -394,7 +398,6 @@ private static class BibStringChecker implements Checker {
// Detect # if it doesn't have a \ in front of it or if it starts the string
private static final Pattern UNESCAPED_HASH = Pattern.compile("(?<!\\\\)#|^#");


/**
* Checks, if there is an even number of unescaped #
*/
Expand Down Expand Up @@ -427,7 +430,6 @@ private static class HTMLCharacterChecker implements Checker {
// Detect any HTML encoded character,
private static final Pattern HTML_CHARACTER_PATTERN = Pattern.compile("&[#\\p{Alnum}]+;");


/**
* Checks, if there are any HTML encoded characters in the fields
*/
Expand All @@ -445,4 +447,22 @@ public List<IntegrityMessage> check(BibEntry entry) {
}
}

private static class ASCIICharacterChecker implements Checker {
/**
* Detect any non ASCII encoded characters, e.g., umlauts or unicode in the fields
*/
@Override
public List<IntegrityMessage> check(BibEntry entry) {
List<IntegrityMessage> results = new ArrayList<>();
for (Map.Entry<String, String> field : entry.getFieldMap().entrySet()) {
boolean asciiOnly = CharMatcher.ascii().matchesAllOf(field.getValue());
if (!asciiOnly) {
results.add(new IntegrityMessage(Localization.lang("Non-ASCII encoded character found"), entry,
field.getKey()));
}
}
return results;
}
}

}
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_da.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1750,3 +1750,5 @@ Open_OpenOffice/LibreOffice_connection=

You_must_enter_at_least_one_field_name=


Non-ASCII_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_de.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2460,3 +2460,5 @@ Open_OpenOffice/LibreOffice_connection=Öffne_OpenOffice/LibreOffice_Verbindung

You_must_enter_at_least_one_field_name=


Non-ASCII_encoded_character_found=
1 change: 1 addition & 0 deletions src/main/resources/l10n/JabRef_en.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2288,3 +2288,4 @@ Style_file=Style_file

Open_OpenOffice/LibreOffice_connection=Open_OpenOffice/LibreOffice_connection
You_must_enter_at_least_one_field_name=You_must_enter_at_least_one_field_name
Non-ASCII_encoded_character_found=Non-ASCII_encoded_character_found
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_es.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1651,3 +1651,5 @@ Open_OpenOffice/LibreOffice_connection=

You_must_enter_at_least_one_field_name=


Non-ASCII_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_fa.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2432,3 +2432,5 @@ Open_OpenOffice/LibreOffice_connection=

You_must_enter_at_least_one_field_name=


Non-ASCII_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_fr.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1692,3 +1692,5 @@ Open_OpenOffice/LibreOffice_connection=

You_must_enter_at_least_one_field_name=


Non-ASCII_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_in.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1667,3 +1667,5 @@ Open_OpenOffice/LibreOffice_connection=

You_must_enter_at_least_one_field_name=


Non-ASCII_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_it.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1768,3 +1768,5 @@ Open_OpenOffice/LibreOffice_connection=

You_must_enter_at_least_one_field_name=


Non-ASCII_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_ja.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2409,3 +2409,5 @@ Open_OpenOffice/LibreOffice_connection=

You_must_enter_at_least_one_field_name=


Non-ASCII_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_nl.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2441,3 +2441,5 @@ Open_OpenOffice/LibreOffice_connection=

You_must_enter_at_least_one_field_name=


Non-ASCII_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_no.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2833,3 +2833,5 @@ Open_OpenOffice/LibreOffice_connection=

You_must_enter_at_least_one_field_name=


Non-ASCII_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_pt_BR.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1664,3 +1664,5 @@ Open_OpenOffice/LibreOffice_connection=

You_must_enter_at_least_one_field_name=


Non-ASCII_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_ru.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2410,3 +2410,5 @@ Open_OpenOffice/LibreOffice_connection=

You_must_enter_at_least_one_field_name=


Non-ASCII_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_sv.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1609,3 +1609,5 @@ Open_OpenOffice/LibreOffice_connection=

You_must_enter_at_least_one_field_name=


Non-ASCII_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_tr.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1682,3 +1682,5 @@ Open_OpenOffice/LibreOffice_connection=

You_must_enter_at_least_one_field_name=


Non-ASCII_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_vi.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2436,3 +2436,5 @@ Open_OpenOffice/LibreOffice_connection=

You_must_enter_at_least_one_field_name=


Non-ASCII_encoded_character_found=
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_zh.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1676,3 +1676,5 @@ Open_OpenOffice/LibreOffice_connection=

You_must_enter_at_least_one_field_name=


Non-ASCII_encoded_character_found=
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,13 @@ public void testISBNChecks() {
assertWrong(createContext("isbn", "978-0-306-40615-8"));
}

@Test
public void testASCIIChecks() {
assertCorrect(createContext("title", "Only ascii characters!'@12"));
assertWrong(createContext("month", "Umlauts are nöt ällowed"));
assertWrong(createContext("author", "Some unicode ⊕"));
}

private BibDatabaseContext createContext(String field, String value, String type) {
BibEntry entry = new BibEntry();
entry.setField(field, value);
Expand Down