Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changed encoding used by IntegrityCheck #8359

Merged
merged 7 commits into from
Dec 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
### Fixed

- We fixed an issue where clicking on headings in the entry preview could lead to an exception. [#8292](https://github.com/JabRef/jabref/issues/8292)
- We fixed an issue where IntegrityCheck used the system's character encoding instead of the one set by the library or in preferences [#8022](https://github.com/JabRef/jabref/issues/8022)
- We fixed an issue about empty metadata in library properties when called from the right click menu. [#8358](https://github.com/JabRef/jabref/issues/8358)

### Removed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ public void execute() {
IntegrityCheck check = new IntegrityCheck(database,
Globals.prefs.getFilePreferences(),
Globals.prefs.getCitationKeyPatternPreferences(),
Globals.prefs.getGeneralPreferences().getDefaultEncoding(),
Globals.journalAbbreviationRepository,
Globals.prefs.getEntryEditorPreferences().shouldAllowIntegerEditionBibtex());

Expand Down
6 changes: 4 additions & 2 deletions src/main/java/org/jabref/logic/integrity/IntegrityCheck.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.jabref.logic.integrity;

import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;

Expand All @@ -20,6 +21,7 @@ public class IntegrityCheck {
public IntegrityCheck(BibDatabaseContext bibDatabaseContext,
FilePreferences filePreferences,
CitationKeyPatternPreferences citationKeyPatternPreferences,
Charset defaultEncoding,
JournalAbbreviationRepository journalAbbreviationRepository,
boolean allowIntegerEdition) {
this.bibDatabaseContext = bibDatabaseContext;
Expand All @@ -41,8 +43,8 @@ public IntegrityCheck(BibDatabaseContext bibDatabaseContext,
if (bibDatabaseContext.isBiblatexMode()) {
entryCheckers.addAll(List.of(
new JournalInAbbreviationListChecker(StandardField.JOURNALTITLE, journalAbbreviationRepository),
new UTF8Checker())
);
new UTF8Checker(bibDatabaseContext.getMetaData().getEncoding().orElse(defaultEncoding))
));
} else {
entryCheckers.addAll(List.of(
new JournalInAbbreviationListChecker(StandardField.JOURNAL, journalAbbreviationRepository),
Expand Down
16 changes: 15 additions & 1 deletion src/main/java/org/jabref/logic/integrity/UTF8Checker.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,30 @@
import org.jabref.model.entry.field.Field;

public class UTF8Checker implements EntryChecker {
private final Charset charset;

/**
* Creates a UTF8Checker that,
* <ol>
* <li>decode a String into a bytes array</li>
* <li>attempts to decode the bytes array to a character array using the UTF-8 Charset</li>
* </ol>
*
* @param charset the charset used to decode BibEntry fields
*/
public UTF8Checker(Charset charset) {
this.charset = charset;
}

/**
* Detect any non UTF-8 encoded field
*
* @param entry the BibEntry of BibLatex.
* @return return the warning of UTF-8 check for BibLatex.
*/
@Override
public List<IntegrityMessage> check(BibEntry entry) {
List<IntegrityMessage> results = new ArrayList<>();
Charset charset = Charset.forName(System.getProperty("file.encoding"));
for (Map.Entry<Field, String> field : entry.getFieldMap().entrySet()) {
boolean utfOnly = UTF8EncodingChecker(field.getValue().getBytes(charset));
if (!utfOnly) {
Expand Down
13 changes: 4 additions & 9 deletions src/test/java/org/jabref/logic/integrity/IntegrityCheckTest.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.jabref.logic.integrity;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collections;
Expand Down Expand Up @@ -139,6 +140,7 @@ void testEntryIsUnchangedAfterChecks() {
new IntegrityCheck(context,
mock(FilePreferences.class),
createCitationKeyPatternPreferences(),
StandardCharsets.UTF_8,
JournalAbbreviationLoader.loadBuiltInRepository(), false)
.check();

Expand Down Expand Up @@ -172,6 +174,7 @@ private void assertWrong(BibDatabaseContext context) {
List<IntegrityMessage> messages = new IntegrityCheck(context,
mock(FilePreferences.class),
createCitationKeyPatternPreferences(),
StandardCharsets.UTF_8,
JournalAbbreviationLoader.loadBuiltInRepository(), false)
.check();
assertNotEquals(Collections.emptyList(), messages);
Expand All @@ -183,20 +186,12 @@ private void assertCorrect(BibDatabaseContext context) {
List<IntegrityMessage> messages = new IntegrityCheck(context,
filePreferencesMock,
createCitationKeyPatternPreferences(),
StandardCharsets.UTF_8,
JournalAbbreviationLoader.loadBuiltInRepository(), false
).check();
assertEquals(Collections.emptyList(), messages);
}

private void assertCorrect(BibDatabaseContext context, boolean allowIntegerEdition) {
List<IntegrityMessage> messages = new IntegrityCheck(context,
mock(FilePreferences.class),
createCitationKeyPatternPreferences(),
JournalAbbreviationLoader.loadBuiltInRepository(),
allowIntegerEdition).check();
assertEquals(Collections.emptyList(), messages);
}

private CitationKeyPatternPreferences createCitationKeyPatternPreferences() {
final GlobalCitationKeyPattern keyPattern = GlobalCitationKeyPattern.fromPattern("[auth][year]");
return new CitationKeyPatternPreferences(
Expand Down
10 changes: 4 additions & 6 deletions src/test/java/org/jabref/logic/integrity/UTF8CheckerTest.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.jabref.logic.integrity;

import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
Expand All @@ -24,28 +25,25 @@ public class UTF8CheckerTest {
*/
@Test
void fieldAcceptsUTF8() {
UTF8Checker checker = new UTF8Checker();
UTF8Checker checker = new UTF8Checker(StandardCharsets.UTF_8);
entry.setField(StandardField.TITLE, "Only ascii characters!'@12");
assertEquals(Collections.emptyList(), checker.check(entry));
}

/**
* fieldDoesNotAcceptUmlauts to check UTF8Checker's result set
* when the entry is encoded in Non-Utf-8 charset and the System
* when the entry is encoded in Non-Utf-8 charset and the Library
* environment is Non UTF-8.
* Finally we need to reset the environment charset.
* @throws UnsupportedEncodingException initial a String in charset GBK
* Demo: new String(StringDemo.getBytes(), "GBK");
*/
@Test
void fieldDoesNotAcceptUmlauts() throws UnsupportedEncodingException {
String defaultCharset = System.getProperty("file.encoding");
System.getProperties().put("file.encoding", "GBK");
UTF8Checker checker = new UTF8Checker();
UTF8Checker checker = new UTF8Checker(Charset.forName("GBK"));
String NonUTF8 = new String("你好,这条语句使用GBK字符集".getBytes(), "GBK");
entry.setField(StandardField.MONTH, NonUTF8);
assertEquals(List.of(new IntegrityMessage("Non-UTF-8 encoded field found", entry, StandardField.MONTH)), checker.check(entry));
System.getProperties().put("file.encoding", defaultCharset);
}

/**
Expand Down