From 6975afcdecfecf296dc51e5cb0d9dc18f80dfeb2 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Sun, 9 Jul 2023 14:35:53 +0200 Subject: [PATCH] Allow words missing from aspell dictionaries Allowed words are listed in files named using the following scheme: codespell_lib/tests/data/en-additionnal.wordlist codespell_lib/tests/data/en_GB-additionnal.wordlist codespell_lib/tests/data/en_US-additionnal.wordlist Prefixes "en", "en_GB" or "en_US" match one of the supported languages. We cannot add words with dashes such as "low-colour" or "high-colour" to these lists, aspell doesn't allow that. --- .../data/dictionary_en-GB_to_en-US.txt | 44 +++++++++++++++++++ .../tests/data/en_GB-additionnal.wordlist | 35 +++++++++++++++ .../tests/data/en_US-additionnal.wordlist | 26 +++++++++++ codespell_lib/tests/test_dictionary.py | 10 ++++- 4 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 codespell_lib/tests/data/en_GB-additionnal.wordlist create mode 100644 codespell_lib/tests/data/en_US-additionnal.wordlist diff --git a/codespell_lib/data/dictionary_en-GB_to_en-US.txt b/codespell_lib/data/dictionary_en-GB_to_en-US.txt index 79aad073acd..8cfea891006 100644 --- a/codespell_lib/data/dictionary_en-GB_to_en-US.txt +++ b/codespell_lib/data/dictionary_en-GB_to_en-US.txt @@ -1,5 +1,10 @@ acknowledgement->acknowledgment acknowledgements->acknowledgments +aesthetic->esthetic +aesthetically->esthetically +aesthetician->esthetician +aestheticians->estheticians +aesthetics->esthetics aggrandise->aggrandize aggrandised->aggrandized aggrandisement->aggrandizement @@ -38,6 +43,7 @@ bastardising->bastardizing behaviour->behavior behavioural->behavioral behaviours->behaviors +biassed->biased cancelled->canceled cancelling->canceling capitalisation->capitalization @@ -68,6 +74,7 @@ characterising->characterizing cognisant->cognizant colour->color colouration->coloration +colourations->colorations coloured->colored colourful->colorful colourfully->colorfully @@ -84,6 +91,7 @@ crystallise->crystallize crystallised->crystallized crystallises->crystallizes crystallising->crystallizing +customisable->customizable customisation->customization customise->customize customised->customized @@ -102,6 +110,7 @@ demonised->demonized demonises->demonizes demonising->demonizing dialogue->dialog +dialogues->dialogs digitisation->digitization digitise->digitize digitised->digitized @@ -113,6 +122,8 @@ dishonourable->dishonorable dishonoured->dishonored dishonouring->dishonoring dishonours->dishonors +doughnut->donut +doughnuts->donuts economise->economize emphasise->emphasize emphasised->emphasized @@ -148,6 +159,9 @@ finalises->finalizes finalising->finalizing flavour->flavor flavours->flavors +focussed->focused +focusses->focuses +focussing->focusing formalisation->formalization formalise->formalize formalised->formalized @@ -165,9 +179,11 @@ grey->gray greyed->grayed greyish->grayish greys->grays +greyscale->grayscale haemorrhage->hemorrhage haemorrhaged->hemorrhaged haemorrhages->hemorrhages +haemorrhagic->hemorrhagic haemorrhaging->hemorrhaging honour->honor honoured->honored @@ -201,6 +217,8 @@ labelling->labeling labour->labor laboured->labored labours->labors +lambast->lambaste +lambasts->lambastes legalisation->legalization legalise->legalize legalised->legalized @@ -208,7 +226,9 @@ legalises->legalizes legalising->legalizing leukaemia->leukemia licence->license +licenced->licensed licences->licenses +licencing->licensing litre->liter litres->liters localise->localize @@ -216,7 +236,9 @@ localised->localized localises->localizes localising->localizing manoeuvre->maneuver +manoeuvred->maneuvered manoeuvres->maneuvers +manoeuvring->maneuvering marshalled->marshaled marshalling->marshaling maximisation->maximization @@ -264,6 +286,8 @@ normalised->normalized normalises->normalizes normalising->normalizing ochre->ocher +ochreous->ocherous +ochrey->ochery optimisation->optimization optimisations->optimizations optimise->optimize @@ -313,6 +337,11 @@ randomise->randomize randomised->randomized randomises->randomizes randomising->randomizing +rasterisation->rasterization +rasterise->rasterize +rasterised->rasterized +rasterises->rasterizes +rasterising->rasterizing rationalise->rationalize rationalised->rationalized rationalising->rationalizing @@ -325,6 +354,9 @@ recognise->recognize recognised->recognized recognises->recognizes recognising->recognizing +refocussed->refocused +refocusses->refocuses +refocussing->refocusing regularisation->regularization regularise->regularize regularised->regularized @@ -338,9 +370,11 @@ reorganised->reorganized reorganises->reorganizes reorganising->reorganizing rigour->rigor +sanitisation->sanitization sanitise->sanitize sanitised->sanitized sanitiser->sanitizer +sanitisers->sanitizers sanitises->sanitizes sanitising->sanitizing sceptical->skeptical @@ -386,6 +420,8 @@ symbolising->symbolizing synchronisation->synchronization synchronise->synchronize synchronised->synchronized +synchroniser->synchronizer +synchronisers->synchronizers synchronises->synchronizes synchronising->synchronizing totalled->totaled @@ -393,6 +429,7 @@ totalling->totaling unauthorised->unauthorized unfavourable->unfavorable unfavourably->unfavorably +uninitialised->uninitialized unorganised->unorganized unrecognisable->unrecognizable unrecognised->unrecognized @@ -402,6 +439,12 @@ utilise->utilize utilised->utilized utilises->utilizes utilising->utilizing +vectorisation->vectorization +vectorisations->vectorizations +vectorise->vectorize +vectorised->vectorized +vectorises->vectorizes +vectorising->vectorizing virtualisation->virtualization visualisation->visualization visualisations->visualizations @@ -410,3 +453,4 @@ visualised->visualized visualiser->visualizer visualises->visualizes visualising->visualizing +writeable->writable diff --git a/codespell_lib/tests/data/en_GB-additionnal.wordlist b/codespell_lib/tests/data/en_GB-additionnal.wordlist new file mode 100644 index 00000000000..ce88229aba0 --- /dev/null +++ b/codespell_lib/tests/data/en_GB-additionnal.wordlist @@ -0,0 +1,35 @@ +aesthetician +aestheticians +biassed +colourations +customisable +dialogues +focussed +focusses +focussing +greyscale +haemorrhagic +lambast +lambasts +licenced +licencing +ochreous +ochrey +rasterisation +rasterise +rasterised +rasterises +rasterising +refocussed +refocusses +refocussing +sanitisation +synchroniser +synchronisers +vectorisation +vectorisations +vectorise +vectorised +vectorises +vectorising +writeable diff --git a/codespell_lib/tests/data/en_US-additionnal.wordlist b/codespell_lib/tests/data/en_US-additionnal.wordlist new file mode 100644 index 00000000000..8d761ed229e --- /dev/null +++ b/codespell_lib/tests/data/en_US-additionnal.wordlist @@ -0,0 +1,26 @@ +colorations +customizable +dialogs +donut +esthetic +esthetically +esthetician +estheticians +esthetics +grayscale +ocherous +ochery +rasterization +rasterize +rasterized +rasterizes +rasterizing +sanitization +synchronizer +synchronizers +vectorization +vectorizations +vectorize +vectorized +vectorizes +vectorizing diff --git a/codespell_lib/tests/test_dictionary.py b/codespell_lib/tests/test_dictionary.py index a81643ec367..fbd3b5d7a6e 100644 --- a/codespell_lib/tests/test_dictionary.py +++ b/codespell_lib/tests/test_dictionary.py @@ -14,9 +14,14 @@ try: import aspell # type: ignore[import] + _test_data_dir = op.join(op.dirname(__file__), "..", "tests", "data") for lang in supported_languages: - spellers[lang] = aspell.Speller("lang", lang) -except Exception as exp: # probably ImportError, but maybe also language + _wordlist = op.join(_test_data_dir, f"{lang}-additionnal.wordlist") + if op.isfile(_wordlist): + spellers[lang] = aspell.Speller(("lang", lang), ("wordlists", _wordlist)) + else: + spellers[lang] = aspell.Speller("lang", lang) +except ImportError as exp: if os.getenv("REQUIRE_ASPELL", "false").lower() == "true": raise RuntimeError( "Cannot run complete tests without aspell when " @@ -259,6 +264,7 @@ def test_error_checking_in_aspell( ("dictionary.txt", "dictionary_rare.txt"), ("dictionary.txt", "dictionary_usage.txt"), ("dictionary_code.txt", "dictionary_rare.txt"), + ("dictionary_rare.txt", "dictionary_en-GB_to_en-US.txt"), ("dictionary_rare.txt", "dictionary_usage.txt"), }