Skip to content

Commit

Permalink
Updated FSTs to correctly have multicharacter symbols for tags
Browse files Browse the repository at this point in the history
  • Loading branch information
fbanados committed Nov 19, 2024
1 parent 08dfc20 commit e0ebd1b
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 17 deletions.
110 changes: 98 additions & 12 deletions src/stoeng/resources/dictionary/stoeng_test_db.importjson
Original file line number Diff line number Diff line change
@@ -1,22 +1,40 @@
[
{
"head": "agdaa",
"analysis": [
[],
"agdaa",
[
"+3Sg",
"+ContentInterr"
]
],
"linguistInfo": {
"inflectional_category": "wa-ya-Reg",
"pos": "V",
"stem": "a_gdaa"
"stem": "a_gdaa",
"wordclass" : "VI"
},
"paradigm": "VI",
"senses": [
{
"definition": "for S to run",
"definition": "for s/he to run",
"sources": ["McA&K&C"]
}
],
"slug": "agdaa"
},
{
"head": "apa",
"analysis": [
[],
"apa",
[
"+3Sg",
"+3SgO",
"+ContentInterr"
]
],
"linguistInfo": {
"inflectional_category": "poly-wa-ya",
"pos": "V",
Expand All @@ -25,14 +43,22 @@
"paradigm": "VT",
"senses": [
{
"definition": "for S to hit O",
"definition": "for s/he to hit s.o.",
"sources": ["McA&K&C"]
}
],
"slug": "apa"
},
{
"head": "cîga",
"analysis": [
[],
"cîga",
[
"+3Sg",
"+ContentInterr"
]
],
"linguistInfo": {
"inflectional_category": "wa-ya-Reg",
"pos": "V",
Expand All @@ -41,14 +67,22 @@
"paradigm": "VI",
"senses": [
{
"definition": "for S to want him/her/it",
"definition": "for s/he to want s.o.",
"sources": ["McA&K&C"]
}
],
"slug": "cîga"
},
{
"head": "hnagda",
"analysis": [
[],
"hnagda",
[
"+3Sg",
"+ContentInterr"
]
],
"linguistInfo": {
"inflectional_category": "wa-ya-Reg",
"pos": "V",
Expand All @@ -57,14 +91,22 @@
"paradigm": "VI",
"senses": [
{
"definition": "for S to drink their own",
"definition": "for s/he to drink their own",
"sources": ["McA&K&C"]
}
],
"slug": "hnagda"
},
{
"head": "hnuda",
"analysis": [
[],
"hnuda",
[
"+3Sg",
"+ContentInterr"
]
],
"linguistInfo": {
"inflectional_category": "wa-ya-Reg",
"pos": "V",
Expand All @@ -73,14 +115,22 @@
"paradigm": "VI",
"senses": [
{
"definition": "for S to eat their own O",
"definition": "for s/he to eat their own s.o.",
"sources": ["McA&K&C"]
}
],
"slug": "hnuda"
},
{
"head": "ishdima",
"analysis": [
[],
"ishdima",
[
"+3Sg",
"+ContentInterr"
]
],
"linguistInfo": {
"inflectional_category": "m-n-Stems",
"pos": "V",
Expand All @@ -89,14 +139,23 @@
"paradigm": "VI",
"senses": [
{
"definition": "for S to be asleep",
"definition": "for s/he to be asleep",
"sources": ["McA&K&C"]
}
],
"slug": "ishdima"
},
{
"head": "nata",
"analysis": [
[],
"nata",
[
"+3Sg",
"+3SgO",
"+ContentInterr"
]
],
"linguistInfo": {
"inflectional_category": "poly-wa-ya",
"pos": "V",
Expand All @@ -105,14 +164,23 @@
"paradigm": "VT",
"senses": [
{
"definition": "for S to kick O",
"definition": "for s/he to kick s.o.",
"sources": ["McA&K&C"]
}
],
"slug": "nata"
},
{
"head": "uge",
"analysis": [
[],
"uge",
[
"+3Sg",
"+3SgO",
"+ContentInterr"
]
],
"linguistInfo": {
"inflectional_category": "poly-wa-ya",
"pos": "V",
Expand All @@ -121,14 +189,22 @@
"paradigm": "VT",
"senses": [
{
"definition": "for S to help O",
"definition": "for s/he to help s.o.",
"sources": ["McA&K&C"]
}
],
"slug": "uge"
},
{
"head": "yagda",
"analysis": [
[],
"yagda",
[
"+3Sg",
"+ContentInterr"
]
],
"linguistInfo": {
"inflectional_category": "m-n-Stems",
"pos": "V",
Expand All @@ -137,23 +213,33 @@
"paradigm": "VI",
"senses": [
{
"definition": "for S to drink it",
"definition": "for s/he to drink it",
"sources": ["McA&K&C"]
}
],
"slug": "yagda"
},
{
"head": "yúda",
"analysis": [
[],
"yúda",
[
"+2Sg",
"+Imp",
"+Imm"
]
],
"linguistInfo": {
"inflectional_category": "m-n-Stems",
"pos": "V",
"stem": "0_y2uda"
"stem": "0_y2uda",
"wordclass": "VI"
},
"paradigm": "VI",
"senses": [
{
"definition": "for S to eat it",
"definition": "s/he eats s.t.",
"sources": ["McA&K&C"]
}
],
Expand Down
4 changes: 2 additions & 2 deletions src/stoeng/resources/fst/analyser-gt-dict-norm.hfstol
Git LFS file not shown
4 changes: 2 additions & 2 deletions src/stoeng/resources/fst/generator-gt-dict-norm.hfstol
Git LFS file not shown
2 changes: 1 addition & 1 deletion src/stoeng/site/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@

# Without this, importjson by default will not attempt to add inflected definitions using phrase translation.
MORPHODICT_SUPPORTS_AUTO_DEFINITIONS = True
MORPHODICT_ENABLE_FST_LEMMA_SUPPORT = True

DEFAULT_TARGET_LANGUAGE_PHRASE_TAGS = ("+V",)

MORPHODICT_ORTHOGRAPHY = {
Expand Down

0 comments on commit e0ebd1b

Please sign in to comment.