diff --git a/languagetool-language-modules/pt/src/main/java/org/languagetool/language/Portuguese.java b/languagetool-language-modules/pt/src/main/java/org/languagetool/language/Portuguese.java index b0106fefaf05..19dcdd48b72a 100644 --- a/languagetool-language-modules/pt/src/main/java/org/languagetool/language/Portuguese.java +++ b/languagetool-language-modules/pt/src/main/java/org/languagetool/language/Portuguese.java @@ -134,6 +134,7 @@ public List getRelevantRules(ResourceBundle messages, UserConfig userConfi //Specific to Portuguese: new PostReformPortugueseCompoundRule(messages, this, userConfig), new PortugueseColourHyphenationRule(messages, this, userConfig), + new PortugueseOrthographyReplaceRule(messages, this), new PortugueseReplaceRule(messages, this), new PortugueseBarbarismsRule(messages, "/pt/barbarisms.txt", this), //new PortugueseArchaismsRule(messages, "/pt/archaisms-pt.txt"), // see https://github.com/languagetool-org/languagetool/issues/3095 @@ -288,17 +289,23 @@ public Map getPriorityMap() { @Override protected int getPriorityForId(String id) { + // generic spelling rule if (id.startsWith("MORFOLOGIK_RULE")) { return -50; } - if (id.startsWith("AI_PT_GGEC_REPLACEMENT_ORTHOGRAPHY_SPELL")) { + // simple replace spelling rule + if (id.startsWith("PT_SIMPLE_REPLACE_ORTHOGRAPHY")) { return -49; } - if (id.startsWith("AI_PT_GGEC_REPLACEMENT_OTHER")) { - return -4; + // AI spelling rule + if (id.startsWith("AI_PT_GGEC_REPLACEMENT_ORTHOGRAPHY_SPELL")) { + return -48; } if (id.startsWith("PT_MULTITOKEN_SPELLING")) { - return -49; + return -48; + } + if (id.startsWith("AI_PT_GGEC_REPLACEMENT_OTHER")) { + return -4; } // enclitic diacritics always take precedence over pronoun placement if (id.startsWith("ACENTUAÇÃO_VOGAL_ÊNCLISE")) { diff --git a/languagetool-language-modules/pt/src/main/java/org/languagetool/rules/pt/PortugueseOrthographyReplaceRule.java b/languagetool-language-modules/pt/src/main/java/org/languagetool/rules/pt/PortugueseOrthographyReplaceRule.java new file mode 100644 index 000000000000..97d7ae533f1e --- /dev/null +++ b/languagetool-language-modules/pt/src/main/java/org/languagetool/rules/pt/PortugueseOrthographyReplaceRule.java @@ -0,0 +1,88 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2005-2015 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package org.languagetool.rules.pt; + +import org.languagetool.Language; +import org.languagetool.rules.AbstractSimpleReplaceRule; +import org.languagetool.rules.Categories; +import org.languagetool.rules.ITSIssueType; +import org.languagetool.tools.Tools; + +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.ResourceBundle; + +import java.net.URL; + +/** + * Rule for simple and frequent one-to-one spelling fixes in Portuguese. + * + * @author p-goulart + */ +public class PortugueseOrthographyReplaceRule extends AbstractSimpleReplaceRule { + + public static final String PORTUGUESE_SIMPLE_REPLACE_ORTHOGRAPHY_RULE = "PT_SIMPLE_REPLACE_ORTHOGRAPHY"; + + private static final Map> wrongWords = loadFromPath("/pt/replace_orthography.txt"); + private static final Locale PT_LOCALE = new Locale("pt"); + + @Override + public Map> getWrongWords() { + return wrongWords; + } + + public PortugueseOrthographyReplaceRule(ResourceBundle messages, Language language) { + super(messages, language); + super.setCategory(Categories.TYPOS.getCategory(messages)); + setLocQualityIssueType(ITSIssueType.Misspelling); + useSubRuleSpecificIds(); + } + + @Override + public String getId() { + return PORTUGUESE_SIMPLE_REPLACE_ORTHOGRAPHY_RULE; + } + + @Override + public boolean isCaseSensitive() { + return false; + } + + @Override + public Locale getLocale() { + return PT_LOCALE; + } + + @Override + public String getDescription() { + return messages.getString("desc_spelling"); + } + + @Override + public String getShort() { + return messages.getString("desc_spelling_short"); + } + + @Override + public String getMessage(String token, List replacements) { + return messages.getString("spelling"); + } + +} diff --git a/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/disambiguation.xml b/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/disambiguation.xml index c774a7dbf585..4c35a1922fa6 100644 --- a/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/disambiguation.xml +++ b/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/disambiguation.xml @@ -3530,6 +3530,13 @@ + + + + (rs){2,15} + + + diff --git a/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/entities/english.ent b/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/entities/english.ent index ee93c6fdaf23..8ec52b3e321a 100644 --- a/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/entities/english.ent +++ b/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/entities/english.ent @@ -1,5 +1,5 @@ - + diff --git a/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/multiwords.txt b/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/multiwords.txt index 092fe81bb85c..0903c247ff8d 100644 --- a/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/multiwords.txt +++ b/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/multiwords.txt @@ -12044,4 +12044,9 @@ touch screens NCFP000_ José Luiz NPMSS00_ Sofia Kovalevskaya NPFSS00_ Maya Plisetskaia NPFSS00_ -Campos Elísios NPMP000 \ No newline at end of file +Campos Elísios NPMP000 + +QR code NCMS000_ +QR codes NCMP000_ +code review NCCS000_ +code reviews NCCP000_ diff --git a/languagetool-language-modules/pt/src/main/resources/org/languagetool/rules/pt/grammar.xml b/languagetool-language-modules/pt/src/main/resources/org/languagetool/rules/pt/grammar.xml index 4966c8bb854c..db2387b33a41 100644 --- a/languagetool-language-modules/pt/src/main/resources/org/languagetool/rules/pt/grammar.xml +++ b/languagetool-language-modules/pt/src/main/resources/org/languagetool/rules/pt/grammar.xml @@ -43516,10 +43516,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - &particles_of; - &exceptions_multitoken_spell;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; - &exceptions_multitoken_spell;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; - &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; + &particles_of; + &exceptions_multitoken_spell;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; + &exceptions_multitoken_spell;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; + &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; Possível erro ortográfico. @@ -43538,7 +43538,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - &particles_of; + &particles_of; \p{Lu}.*&exceptions_multitoken_spell;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; \p{Lu}.*&exceptions_multitoken_spell;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; \p{Lu}.*&exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; @@ -43551,7 +43551,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. \p{Lu}.*&particles_of; - &exceptions_multitoken_spell;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; + &exceptions_multitoken_spell;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; \p{Lu}.*&exceptions_multitoken_spell;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; \p{Lu}.*&exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; @@ -43564,7 +43564,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. \p{Lu}.*&particles_of; \p{Lu}.*&exceptions_multitoken_spell;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; - &exceptions_multitoken_spell;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; + &exceptions_multitoken_spell;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; \p{Lu}.*&exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; @@ -43577,7 +43577,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. \p{Lu}.*&particles_of; \p{Lu}.*&exceptions_multitoken_spell;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; \p{Lu}.*&exceptions_multitoken_spell;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; - &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; + &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; Possível erro ortográfico. @@ -43587,10 +43587,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - - - - + + + + \p{Lu}.+ @@ -43606,7 +43606,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; + &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; \p{Lu}.*&exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; \p{Lu}.*&exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; @@ -43619,7 +43619,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. \p{Lu}.*&exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; - &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; + &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; \p{Lu}.*&exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; @@ -43633,18 +43633,18 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - + José María Cuevas - + \p{Lu}.*&exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; \p{Lu}.*&exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; - &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; + &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; Possível erro ortográfico. @@ -43652,9 +43652,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; - &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; - &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; + &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; + &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; + &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; Possível erro ortográfico. @@ -43669,7 +43669,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; + &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; &particles_of;|e|& \p{Lu}.*&exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; @@ -43697,7 +43697,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. \p{Lu}.*&exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; &particles_of;|e|& - &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; + &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; Possível erro ortográfico. @@ -43719,13 +43719,13 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. \p{Lu}.* - + \p{Lu}.* \p{Lu}.* - &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; - &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; + &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; + &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; Possível erro ortográfico. @@ -43765,7 +43765,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. \p{Lu}.*&exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive; - &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive;[A-Z]{1,3}[A-Z][A-Z\d]? + &exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_sensitive;[A-Z]{1,3}[A-Z][A-Z\d]? Possível erro ortográfico. @@ -43808,12 +43808,12 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. \p{Lu}.* - + \p{Lu}.* \p{Lu}.* - \p{L}..*sir|don&exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; + \p{L}..*sir|don&exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; \p{Lu}\p{Ll}.*&exceptions_multitoken_spell;|&particles_of;|&exceptions_multitoken_spell2;|&exceptions_multitoken_sensitive; @@ -43845,7 +43845,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. &exceptions_multitoken_spell;|&particles_of;|.*\d.* - - \p{L}..*&exceptions_multitoken_spell;|&particles_of;|.*\d.* + \p{L}..*&exceptions_multitoken_spell;|&particles_of;|.*\d.* Possível erro ortográfico. @@ -43855,9 +43855,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - \p{L}..*&prefixos;|&exceptions_multitoken_spell;|&particles_of;|.*\d.* + \p{L}..*&prefixos;|&exceptions_multitoken_spell;|&particles_of;|.*\d.* - - \p{L}..*&exceptions_multitoken_spell;|&particles_of;|.*\d.* + \p{L}..*&exceptions_multitoken_spell;|&particles_of;|.*\d.* Possível erro ortográfico. @@ -43870,7 +43870,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. - \p{L}..* + \p{L}..* &prefixos;&exceptions_multitoken_spell;|&particles_of;|.*\d.* - diff --git a/languagetool-language-modules/pt/src/main/resources/org/languagetool/rules/pt/replace_orthography.txt b/languagetool-language-modules/pt/src/main/resources/org/languagetool/rules/pt/replace_orthography.txt new file mode 100644 index 000000000000..1679833f751c --- /dev/null +++ b/languagetool-language-modules/pt/src/main/resources/org/languagetool/rules/pt/replace_orthography.txt @@ -0,0 +1,27 @@ +# This file is used for the PT_SIMPLE_REPLACE_ORTHOGRAPHY rule. +# +# It must only be used for words that are: +# 1. spelt the same across *all* dialects of Portuguese; +# 2. relatively short and frequent (there's no numeric criterion here, just be reasonable); +# 3. the corrections listed here are the only conceivable ones (within reason). +# +# The other orthography rule targets foreign terms and should generally not be used for such cases. + +ja=já +sao=são +nao=não +hao=hão +vao=vão +voce=você + +acao=ação +pao=pão|pau +maos=mãos|maus +cao=cão|caô +grao=grão|grau +graca=graça +razao=razão +periodo=período +periodos=períodos +servico=serviço +servicos=serviços diff --git a/languagetool-language-modules/pt/src/main/resources/org/languagetool/rules/pt/style.xml b/languagetool-language-modules/pt/src/main/resources/org/languagetool/rules/pt/style.xml index d45f924e098f..afd458f3f193 100644 --- a/languagetool-language-modules/pt/src/main/resources/org/languagetool/rules/pt/style.xml +++ b/languagetool-language-modules/pt/src/main/resources/org/languagetool/rules/pt/style.xml @@ -19389,5 +19389,13 @@ USA https://pt.wikipedia.org/wiki/Kelvin O sol tem a temperatura de 15,7 milhões de graus kelvin. + + + + ok + + Prefira OK. + Está tudo ok com ela? + diff --git a/languagetool-language-modules/pt/src/test/java/org/languagetool/rules/pt/PortugueseOrthographyReplaceRuleTest.java b/languagetool-language-modules/pt/src/test/java/org/languagetool/rules/pt/PortugueseOrthographyReplaceRuleTest.java new file mode 100644 index 000000000000..4363f169396d --- /dev/null +++ b/languagetool-language-modules/pt/src/test/java/org/languagetool/rules/pt/PortugueseOrthographyReplaceRuleTest.java @@ -0,0 +1,72 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2020 Jaume Ortolà (http://www.languagetool.org) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package org.languagetool.rules.pt; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.languagetool.JLanguageTool; +import org.languagetool.TestTools; +import org.languagetool.language.Portuguese; +import org.languagetool.rules.RuleMatch; + +import java.io.IOException; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +public class PortugueseOrthographyReplaceRuleTest { + private static PortugueseOrthographyReplaceRule rule; + private static JLanguageTool lt; + + @BeforeClass + public static void setUp() throws Exception { + lt = new JLanguageTool(new Portuguese()); + rule = new PortugueseOrthographyReplaceRule(TestTools.getMessages("pt"), lt.getLanguage()); + } + + @Test + public void testRule() throws IOException { + assertNoMatches("Já volto."); + assertSingleMatch("Ja volto.", "Já"); + + assertNoMatches("Gosto de você."); + assertSingleMatch("Gosto de voce.", "você"); + assertNoMatches("Disse-me sotto voce."); // multi-token spelling of Italian expression + } + + private void assertRuleId(RuleMatch match) { + assert match.getRule().getId().startsWith("PT_SIMPLE_REPLACE_ORTHOGRAPHY"); + } + + private void assertNoMatches(String sentence) throws IOException { + RuleMatch[] matches = rule.match(lt.getAnalyzedSentence(sentence)); + assertEquals(0, matches.length); + } + + private void assertSingleMatch(String sentence, String ...suggestions) throws IOException { + RuleMatch[] matches = rule.match(lt.getAnalyzedSentence(sentence)); + assertEquals(1, matches.length); + assertRuleId(matches[0]); + List returnedSuggestions = matches[0].getSuggestedReplacements(); + assertEquals(suggestions.length, returnedSuggestions.size()); + for (int i = 0; i < suggestions.length; i++) { + assertEquals(suggestions[i], returnedSuggestions.get(i)); + } + } +} diff --git a/pom.xml b/pom.xml index d8cf10bee230..91e8c50e7f5a 100644 --- a/pom.xml +++ b/pom.xml @@ -223,7 +223,7 @@ 0.6 2.2 - 1.0.1 + 1.1.0 0.1 0.3 1.0.2