From afbd9371ddafdd8b4cef18a3f7e729d75a265f28 Mon Sep 17 00:00:00 2001 From: Thomas Baier Date: Mon, 28 Nov 2022 10:43:43 +0100 Subject: [PATCH] #1 Use lemma instead of full token for entity --- src/cltl/nlp/spacy_nlp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cltl/nlp/spacy_nlp.py b/src/cltl/nlp/spacy_nlp.py index 7815a43..a4eec76 100644 --- a/src/cltl/nlp/spacy_nlp.py +++ b/src/cltl/nlp/spacy_nlp.py @@ -46,7 +46,7 @@ def _analyze_entities(self, doc): type = EntityType.OBJECT if type: - entities.append(Entity(token.text, type, (token.idx, token.idx + len(token.text)))) + entities.append(Entity(token.lemma_.lower(), type, (token.idx, token.idx + len(token.text)))) predicates[head_id][token.dep_] = token.lemma_