Skip to content

Commit

Permalink
Added missing parts of refactoring json for phrase_translate
Browse files Browse the repository at this point in the history
  • Loading branch information
fbanados committed Nov 19, 2024
1 parent 014a9ce commit bb88751
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 74 deletions.
25 changes: 25 additions & 0 deletions src/crkeng/resources/phrase_translate/noun_wordform_to_phrase.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[
["+N", null, 0],
["+A", null, 0],
["+I", null, 0],
["+D", null, 0],
# Number
["+Sg", "TagMap.COPY_TAG_NAME", 1],
["+Pl", "TagMap.COPY_TAG_NAME", 1],
["+Obv", "TagMap.COPY_TAG_NAME", 1],
["+Loc", "TagMap.COPY_TAG_NAME", 1],
["+Distr", "TagMap.COPY_TAG_NAME", 1],
# Diminutive
["+Dim", "TagMap.COPY_TAG_NAME", 2],
["+Der/Dim", "Dim+", 2],
# Possessives
["+Px1Sg", "TagMap.COPY_TAG_NAME", 3],
["+Px2Sg", "TagMap.COPY_TAG_NAME", 3],
["+Px3Sg", "TagMap.COPY_TAG_NAME", 3],
["+Px1Pl", "TagMap.COPY_TAG_NAME", 3],
["+Px2Pl", "TagMap.COPY_TAG_NAME", 3],
["+Px12Pl", "TagMap.COPY_TAG_NAME", 3], # Maybe needs to be recoded with 12 -> 21
["+Px3Pl", "TagMap.COPY_TAG_NAME", 3],
["+Px4Sg/Pl", "TagMap.COPY_TAG_NAME", 3],
["+PxX", "PxXPl+", 3]
]
46 changes: 46 additions & 0 deletions src/crkeng/resources/phrase_translate/verb_wordform_to_phrase.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
[
["+V", null, 0],
["+TA", null, 0],
["+AI", null, 0],
["+II", null, 0],
["+TI", null, 0],
# Tense/Aspect
["PV/ki+", "Prt+", 1], # Preterite aka simple past
[["PV/ki+", "+Ind"], "Prt+", 1], # Preterite aka simple past
[["+Fut", "+Cond"], "Cond+", 1], # Future conditional
[["+Imp", "+Imm"], "Imm+", 1], # Immediate imperative
[["+Imp", "+Del"], "Del+", 1], # Delayed imperative
[["PV/wi+", "+Ind"], "Fut+", 1], # Future
["PV/wi+", "Fut+", 1], # Also accept PV/wi without indicative as future
[["PV/e+", "+Cnj"], null, 1], # conjunctive marker
# Note that these crk features as disjoint, but both are needed for the eng feature
[["PV/ka+", "+Ind"], "Def+", 1],
[["PV/ka+", "+Cnj"], "Inf+", 1],
[["PV/ta+", "+Cnj"], "Inf+", 1], # future definite
["+Ind", "Prs+", 1],
["TagMap.DEFAULT", "Prs+", 1], # default to present tense
# Person - Subject
["+1Sg", "TagMap.COPY_TAG_NAME", 2],
["+2Sg", "TagMap.COPY_TAG_NAME", 2],
["+3Sg", "TagMap.COPY_TAG_NAME", 2],
["+1Pl", "TagMap.COPY_TAG_NAME", 2],
["+12Pl", "21Pl+", 2],
["+2Pl", "TagMap.COPY_TAG_NAME", 2],
["+3Pl", "TagMap.COPY_TAG_NAME", 2],
["+4Sg/Pl", "TagMap.COPY_TAG_NAME", 2],
["+5Sg/Pl", "TagMap.COPY_TAG_NAME", 2],
["+X", "XPl+", 2],
# Person - Object
["+1SgO", "TagMap.COPY_TAG_NAME", 3],
["+2SgO", "TagMap.COPY_TAG_NAME", 3],
["+3SgO", "TagMap.COPY_TAG_NAME", 3],
["+1PlO", "TagMap.COPY_TAG_NAME", 3],
["+12PlO", "21PlO+", 3],
["+2PlO", "TagMap.COPY_TAG_NAME", 3],
["+3PlO", "TagMap.COPY_TAG_NAME", 3],
["+4Pl", "TagMap.COPY_TAG_NAME", 3],
["+4Sg", "TagMap.COPY_TAG_NAME", 3],
["+4Sg/PlO", "TagMap.COPY_TAG_NAME", 3],
["+5Sg/PlO", "TagMap.COPY_TAG_NAME", 3],
["+XO", "XPlO+", 3]
]
75 changes: 3 additions & 72 deletions src/morphodict/phrase_translate/source_tag_map.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,7 @@
from morphodict.analysis.tag_map import TagMap

# TODO Migrate to json files

COPY_TAG_NAME = TagMap.COPY_TAG_NAME
from morphodict.phrase_translate.tag_maps import *

noun_wordform_to_phrase = TagMap(
("+N", None, 0),
("+A", None, 0),
("+I", None, 0),
("+D", None, 0),
# Number
("+Sg", COPY_TAG_NAME, 1),
("+Pl", COPY_TAG_NAME, 1),
("+Obv", COPY_TAG_NAME, 1),
("+Loc", COPY_TAG_NAME, 1),
("+Distr", COPY_TAG_NAME, 1),
# Diminutive
("+Dim", COPY_TAG_NAME, 2),
("+Der/Dim", "Dim+", 2),
# Possessives
("+Px1Sg", COPY_TAG_NAME, 3),
("+Px2Sg", COPY_TAG_NAME, 3),
("+Px3Sg", COPY_TAG_NAME, 3),
("+Px1Pl", COPY_TAG_NAME, 3),
("+Px2Pl", COPY_TAG_NAME, 3),
("+Px12Pl", COPY_TAG_NAME, 3), # Maybe needs to be recoded with 12 -> 21
("+Px3Pl", COPY_TAG_NAME, 3),
("+Px4Sg/Pl", COPY_TAG_NAME, 3),
("+PxX", "PxXPl+", 3),
*tagmap_json_to_tuples(read_phrase_translate_json("noun_wordform_to_phrase.json")),
)

# Normally having a preverb tag excludes a wordform from auto-translation; this
Expand All @@ -38,48 +12,5 @@

# Cree tense/aspects:
verb_wordform_to_phrase = TagMap(
("+V", None, 0),
("+TA", None, 0),
("+AI", None, 0),
("+II", None, 0),
("+TI", None, 0),
# Tense/Aspect
("PV/ki+", "Prt+", 1), # Preterite aka simple past
(("PV/ki+", "+Ind"), "Prt+", 1), # Preterite aka simple past
(("+Fut", "+Cond"), "Cond+", 1), # Future conditional
(("+Imp", "+Imm"), "Imm+", 1), # Immediate imperative
(("+Imp", "+Del"), "Del+", 1), # Delayed imperative
(("PV/wi+", "+Ind"), "Fut+", 1), # Future
("PV/wi+", "Fut+", 1), # Also accept PV/wi without indicative as future
(("PV/e+", "+Cnj"), None, 1), # conjunctive marker
# Note that these crk features as disjoint, but both are needed for the eng feature
(("PV/ka+", "+Ind"), "Def+", 1),
(("PV/ka+", "+Cnj"), "Inf+", 1),
(("PV/ta+", "+Cnj"), "Inf+", 1), # future definite
("+Ind", "Prs+", 1),
(TagMap.DEFAULT, "Prs+", 1), # default to present tense
# Person - Subject
("+1Sg", COPY_TAG_NAME, 2),
("+2Sg", COPY_TAG_NAME, 2),
("+3Sg", COPY_TAG_NAME, 2),
("+1Pl", COPY_TAG_NAME, 2),
("+12Pl", "21Pl+", 2),
("+2Pl", COPY_TAG_NAME, 2),
("+3Pl", COPY_TAG_NAME, 2),
("+4Sg/Pl", COPY_TAG_NAME, 2),
("+5Sg/Pl", COPY_TAG_NAME, 2),
("+X", "XPl+", 2),
# Person - Object
("+1SgO", COPY_TAG_NAME, 3),
("+2SgO", COPY_TAG_NAME, 3),
("+3SgO", COPY_TAG_NAME, 3),
("+1PlO", COPY_TAG_NAME, 3),
("+12PlO", "21PlO+", 3),
("+2PlO", COPY_TAG_NAME, 3),
("+3PlO", COPY_TAG_NAME, 3),
("+4Pl", COPY_TAG_NAME, 3),
("+4Sg", COPY_TAG_NAME, 3),
("+4Sg/PlO", COPY_TAG_NAME, 3),
("+5Sg/PlO", COPY_TAG_NAME, 3),
("+XO", "XPlO+", 3),
*tagmap_json_to_tuples(read_phrase_translate_json("verb_wordform_to_phrase.json")),
)
2 changes: 2 additions & 0 deletions src/morphodict/phrase_translate/tag_maps.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ def element_process(x):
return tuple(x)
if x == "TagMap.DEFAULT":
return TagMap.DEFAULT
if x == "TagMap.COPY_TAG_NAME":
return TagMap.COPY_TAG_NAME
return x

return tuple(element_process(x) for x in l)
Expand Down
4 changes: 2 additions & 2 deletions src/morphodict/phrase_translate/to_target/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging

from django.conf import settings
from morphodict.analysis import RichAnalysis
from morphodict.phrase_translate.source_tag_map import (
noun_wordform_to_phrase,
Expand All @@ -17,7 +17,7 @@
def inflect_target_language_phrase(analysis, lemma_definition):
if isinstance(analysis, tuple):
analysis = RichAnalysis(analysis)
cree_wordform_tag_list = analysis.prefix_tags + analysis.suffix_tags
cree_wordform_tag_list = analysis.prefix_tags + analysis.suffix_tags + settings.DEFAULT_TARGET_LANGUAGE_PHRASE_TAGS

if "+N" in cree_wordform_tag_list:
tags_for_phrase = noun_wordform_to_phrase.map_tags(cree_wordform_tag_list)
Expand Down

0 comments on commit bb88751

Please sign in to comment.