From 850e5888c924a7810b5d586bc305e5a14d0c7969 Mon Sep 17 00:00:00 2001 From: John Bauer Date: Wed, 13 Mar 2024 16:45:16 -0700 Subject: [PATCH] Add lemmas to a few of the MWTs that we combine for English. A few others are still TODO, such as the n't 'll etc suite --- .../nlp/trees/ud/EnglishMWTCombiner.java | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/edu/stanford/nlp/trees/ud/EnglishMWTCombiner.java b/src/edu/stanford/nlp/trees/ud/EnglishMWTCombiner.java index e9fc8f6edd..ef2bb2c3b6 100644 --- a/src/edu/stanford/nlp/trees/ud/EnglishMWTCombiner.java +++ b/src/edu/stanford/nlp/trees/ud/EnglishMWTCombiner.java @@ -15,6 +15,8 @@ public SemanticGraph combineMWTs(SemanticGraph sg) { // combine using the CombineMWT operation, using the default concatenation for the MWT text String mwt = String.join(newline, + // TODO: separate the contractions so we can adjust the lemmas? + // In some other way fix those lemmas? "", " ", " 1", @@ -30,9 +32,19 @@ public SemanticGraph combineMWTs(SemanticGraph sg) { " ", " ", " 3", - " Edit a node's MWT for wanna/gonna", - " " + XMLUtils.escapeXML("{word:/(?i)wan|gon/;after://}=first . {word:/(?i)na/}=second") + "", + " Edit a node's MWT for wanna", + " " + XMLUtils.escapeXML("{word:/(?i)wan/;after://}=first . {word:/(?i)na/}=second") + "", " CombineMWT -node first -node second", + " EditNode -node first -lemma want", + " EditNode -node second -lemma to", + " ", + " ", + " 3b", + " Edit a node's MWT for gonna", + " " + XMLUtils.escapeXML("{word:/(?i)gon/;after://}=first . {word:/(?i)na/}=second") + "", + " CombineMWT -node first -node second", + " EditNode -node first -lemma go", + " EditNode -node second -lemma to", " ", " ", " 4", @@ -45,6 +57,8 @@ public SemanticGraph combineMWTs(SemanticGraph sg) { " Edit a node's MWT for 'tis and 'twas", " " + XMLUtils.escapeXML("{word:/'[tT]/}=first . {word:/(?i)is|was/}=second") + "", " CombineMWT -node first -node second", + " EditNode -node first -lemma it", + " EditNode -node second -lemma be", " ", " ", " 6",