Skip to content

Commit

Permalink
Revert "[pt] Update dictionary to v1.1.0 (#10609)"
Browse files Browse the repository at this point in the history
This reverts commit e466962.
  • Loading branch information
p-goulart committed Jun 13, 2024
1 parent be211b8 commit 4f288eb
Show file tree
Hide file tree
Showing 12 changed files with 59 additions and 366 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ public List<Rule> getRelevantRules(ResourceBundle messages, UserConfig userConfi
//Specific to Portuguese:
new PostReformPortugueseCompoundRule(messages, this, userConfig),
new PortugueseColourHyphenationRule(messages, this, userConfig),
new PortugueseOrthographyReplaceRule(messages, this),
new PortugueseReplaceRule(messages, this),
new PortugueseBarbarismsRule(messages, "/pt/barbarisms.txt", this),
//new PortugueseArchaismsRule(messages, "/pt/archaisms-pt.txt"), // see https://github.com/languagetool-org/languagetool/issues/3095
Expand Down Expand Up @@ -289,24 +288,18 @@ public Map<String, Integer> getPriorityMap() {

@Override
protected int getPriorityForId(String id) {
// generic spelling rule
if (id.startsWith("MORFOLOGIK_RULE")) {
return -50;
}
// simple replace spelling rule
if (id.startsWith("PT_SIMPLE_REPLACE_ORTHOGRAPHY")) {
return -49;
}
// AI spelling rule
if (id.startsWith("AI_PT_GGEC_REPLACEMENT_ORTHOGRAPHY_SPELL")) {
return -48;
}
if (id.startsWith("PT_MULTITOKEN_SPELLING")) {
return -48;
return -49;
}
if (id.startsWith("AI_PT_GGEC_REPLACEMENT_OTHER")) {
return -4;
}
if (id.startsWith("PT_MULTITOKEN_SPELLING")) {
return -49;
}
// enclitic diacritics always take precedence over pronoun placement
if (id.startsWith("ACENTUAÇÃO_VOGAL_ÊNCLISE")) {
return -51;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
*/
package org.languagetool.rules.pt;

import com.google.errorprone.annotations.concurrent.LazyInit;
import org.languagetool.*;
import org.languagetool.rules.RuleMatch;
import org.languagetool.rules.SuggestedReplacement;
Expand All @@ -39,8 +38,6 @@
public class MorfologikPortugueseSpellerRule extends MorfologikSpellerRule {

private final Language spellerLanguage;
// Used for suggestions to words tagged as _english_ignore_. This should be lazy-initialised or memoised somehow.
private static final MorfologikSpellerRule englishSpeller = loadEnglishSpeller();
private final String dictFilepath;
// Path, in pt/resources, where the list of words to be removed from the suggestion list is to be found.
private static final String doNotSuggestWordsFilepath = "/pt/do_not_suggest.txt";
Expand Down Expand Up @@ -237,17 +234,6 @@ public MorfologikPortugueseSpellerRule(ResourceBundle messages, Language languag
dialectAlternationMapping = getDialectAlternationMapping();
}

@Nullable
private static MorfologikSpellerRule loadEnglishSpeller() {
try {
Language english = Languages.getLanguageForShortCode("en");
return (MorfologikSpellerRule) english.getDefaultSpellingRule();
} catch (IllegalArgumentException e) {
System.err.println("English is not available, so we can't use it for suggestions in the Portuguese speller.");
return null;
}
}

@Override
protected List<SuggestedReplacement> filterNoSuggestWords(List<SuggestedReplacement> suggestedReplacements) {
return suggestedReplacements.stream().filter(
Expand Down Expand Up @@ -314,15 +300,7 @@ public List<RuleMatch> getRuleMatches(String word, int startPos, AnalyzedSentenc
List<RuleMatch> ruleMatchesSoFar, int idx,
AnalyzedTokenReadings[] tokens) throws IOException {
List<RuleMatch> ruleMatches = super.getRuleMatches(word, startPos, sentence, ruleMatchesSoFar, idx, tokens);
if (englishSpeller != null && tokens[idx].hasPosTag("_english_ignore_")) {
if (englishSpeller.isMisspelled(word)) {
List<String> englishSuggestions = englishSpeller.getSpellingSuggestions(word);
String msg = "Este parece ser um termo em inglês. Se for o caso, há um erro de ortografia.";
RuleMatch match = ruleMatches.get(0);
match.setSuggestedReplacements(englishSuggestions);
match.setMessage(msg);
return Collections.singletonList(match);
}
if (tokens[idx].hasPosTag("_english_ignore_")) {
return Collections.emptyList();
}
if (!ruleMatches.isEmpty()) {
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -3530,13 +3530,6 @@
</pattern>
<disambig action="ignore_spelling"/>
</rule>

<rule id="IGNORE_SPELLING_RS_LAUGHTER">
<pattern>
<token regexp="yes">(rs){2,15}</token>
</pattern>
<disambig action="ignore_spelling"/>
</rule>
</rulegroup>

<rulegroup name="Add interjection tag to common laughter onomatopoeia" id="INTERJ_LAUGHTER">
Expand Down Expand Up @@ -4026,55 +4019,23 @@
</token>
</marker>
</pattern>
<!-- <filter class="org.languagetool.rules.IsEnglishWordFilter" args="formPositions:2"/> -->
<filter class="org.languagetool.rules.IsEnglishWordFilter" args="formPositions:2"/>
<disambig action="add"><wd pos="_english_ignore_"/></disambig>
</rule>
<rule> <!-- #2 -->
<pattern>
<marker>
<token regexp="yes" postag="UNKNOWN">\p{L}+|'s
<token regexp="yes" postag="UNKNOWN">\p{L}+
<exception regexp="yes">&english_no;|&english_forward;</exception>
<exception postag="_english_ignore_"/>
</token>
</marker>
<token postag="_english_ignore_"/>
</pattern>
<!-- <filter class="org.languagetool.rules.IsEnglishWordFilter" args="formPositions:2"/> -->
<filter class="org.languagetool.rules.IsEnglishWordFilter" args="formPositions:1"/>
<disambig action="add"><wd pos="_english_ignore_"/></disambig>
</rule>
<rule> <!-- #3 -->
<pattern>
<marker>
<token regexp="yes" postag="UNKNOWN">\p{L}+
<exception regexp="yes">&english_no;|&english_forward;</exception>
<exception postag="_english_ignore_"/>
</token>
<token regexp="yes">&english_common;</token>
</marker>
</pattern>
<!-- <filter class="org.languagetool.rules.IsEnglishWordFilter" args="formPositions:1"/> -->
<disambig action="add">
<wd pos="_english_ignore_"/>
<wd pos="_english_ignore_"/>
</disambig>
</rule>
<rule> <!-- #4 -->
<pattern>
<marker>
<token regexp="yes">&english_common;</token>
<token regexp="yes" postag="UNKNOWN">\p{L}+
<exception regexp="yes">&english_no;|&english_forward;</exception>
<exception postag="_english_ignore_"/>
</token>
</marker>
</pattern>
<!-- <filter class="org.languagetool.rules.IsEnglishWordFilter" args="formPositions:1"/> -->
<disambig action="add">
<wd pos="_english_ignore_"/>
<wd pos="_english_ignore_"/>
</disambig>
</rule>
<rule> <!-- #5, unknown single-word parenthetical -->
<rule> <!-- #3, unknown single-word parenthetical -->
<!-- This is for when we have a bunch of English words and then a single word in parenthesis,
99% of the time it's also English or it's some kind of English acronym. -->
<pattern>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
during|going|such|many|then|year|became|later|well|including|area|both|make|name|
called|until|while|against|may|number|season|several|team|work|born|
early|family|now|based|life|released|since|began|century|each|end|
following|found|located|town|around|government|named|
following|found|located|town|around|day|government|named|
often|three|too|up|along|built|career|include|left|own|still|
took|held|last|members|much|than|within|another|company|death|
down|even|five|however|published|received|served|become|best|died|history|
Expand All @@ -56,7 +56,7 @@
success|successful|track|units|white|added|attack|below|brought|census|changed|
county|decided|ever|fact|generally|hand|households|instead|introduced|least|
mid|mother|outside|private|professional|provide|provided|saw|seven|soon|space|
study|systems?|teams|thus|women|september|allowed|battle|beginning|
study|systems|teams|thus|women|september|allowed|battle|beginning|
books|director|eight|female|find|ground|higher|industry|interest|
league|lived|market|night|owned|placed|previous|primary|reached|related|
reported|runs|strong|winning|august|january|july|march|always|approximately|
Expand Down Expand Up @@ -106,7 +106,7 @@
agreement|allowing|applied|attacks|ball|bank|birth|claim|compared|
conducted|connected|critical|dead|designated|develop|enemy|
entitled|estate|estimated|existing|expanded|expected|fellow|fighting|
floor|follow|forward|founder|fully?|goes|granted|hands|hour|
floor|follow|forward|founder|fully|goes|granted|hands|hour|
ice|leaves|literature|necessary|newly|occupied|occur|occurred|
oldest|organizations|organized|owner|paid|parish|powers|promoted|providing|
read|receive|reduced|refers|refused|respectively|say|settled|settlement|
Expand Down Expand Up @@ -175,7 +175,7 @@
advertising|animated|anyone|appeal|apply|appointment|artillery|
asks|assault|assembly|attributed|benefits|blocks|buy|championships|
chapter|circumstances|claiming|closer|colony|colors|continuous|corporate|
crowd|crown|customers?|dated|daughters|debuted|defeating|departure|
crowd|crown|customers|dated|daughters|debuted|defeating|departure|
destroy|detailed|drawing|drugs|electrical|eleven|else|encouraged|
essentially|everyone|evil|expensive|facing|fighter|generated|governments|
identical|infantry|integrated|journey|junction|latest|lawyer|
Expand All @@ -200,13 +200,7 @@
spending|spoke|stream|studying|submarine|sufficient|suggest|suitable|
supposed|surgery|survey|swimming|team's|anyway|anyhow|anywhere|
somewhere|somebody|somehow|whatever|whoever|whosever|whomever|
whichever|whenever|wherever|however|art|miss|mister|whats|theyre|
weve|theyve|shes|im|Im|youre|whos|wouldve|couldve|whens|wheres|hows|
dictionary|cards?|services?|kids?|groups?|skills?|news?|news?|
tomorrow|yesterday|(sun|mon|tues|wednes|thurs|fri|satur)?days?|
thanks?|welcome|black|yellow|orange|green|purple|gr[ae]y|blue|express|
coffee|academy|gold|golden|tech|clean|dirty|users?|sellers?|buyers?|
release|headlines?|points?|transfers?|foot|feet
whichever|whenever|wherever|however|art|miss|mister
">

<!ENTITY english_common "&english_wh_words;|&english_prepositions;|&english_adverbs;|&english_pronouns;|&english_determiners;|&english_common_verbs;|&english_conjunctions;|&english_word_list;|&english_suffixed_word;">
Original file line number Diff line number Diff line change
Expand Up @@ -12044,9 +12044,4 @@ touch screens NCFP000_
José Luiz NPMSS00_
Sofia Kovalevskaya NPFSS00_
Maya Plisetskaia NPFSS00_
Campos Elísios NPMP000

QR code NCMS000_
QR codes NCMP000_
code review NCCS000_
code reviews NCCP000_
Campos Elísios NPMP000
Loading

0 comments on commit 4f288eb

Please sign in to comment.