From 8fd08b390f7e2dde2aa75c3cd15f74aa22f558b2 Mon Sep 17 00:00:00 2001 From: John Bauer Date: Mon, 23 Dec 2024 22:32:12 -0800 Subject: [PATCH] Test that the contextual lemmatizer is correctly loaded and functions without crashing --- stanza/tests/pipeline/test_lemmatizer.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/stanza/tests/pipeline/test_lemmatizer.py b/stanza/tests/pipeline/test_lemmatizer.py index b4de03073b..0b24349043 100644 --- a/stanza/tests/pipeline/test_lemmatizer.py +++ b/stanza/tests/pipeline/test_lemmatizer.py @@ -133,3 +133,13 @@ def test_latin_caseless_lemmatizer(): assert len(doc.sentences[0].words) == 3 for word, expected in zip(doc.sentences[0].words, expected_lemmas): assert word.lemma == expected + +def test_contextual_lemmatizer(): + nlp = stanza.Pipeline('en', processors='tokenize,pos,lemma', model_dir=TEST_MODELS_DIR, package={"lemma": "default_accurate"}, download_method="reuse_resources") + lemmatizer = nlp.processors['lemma']._trainer + # the accurate model should have a 's classifier + assert len(lemmatizer.contextual_lemmatizers) > 0 + # ideally the doc would have 'have' as the lemma for the second + # word, but maybe it's not always accurate. actually, it works + # fine at the time of this test + doc = nlp("He's added a contextual lemmatizer")