-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlemmatizer.py
43 lines (31 loc) · 945 Bytes
/
lemmatizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
"""
"""
from zoegas import reader
from cltk.lemmatize import backoff
from eddas import reader as eddas_reader
voeluspaa = eddas_reader.PoeticEddaLemmatizationReader("Völuspá")
voeluspaa.tagged_words()
lemmatizer = backoff.IdentityLemmatizer()
print(lemmatizer.lemmatize(voeluspaa.words()))
dictionary = reader.Dictionary(reader.dictionary_name)
entries = dictionary.get_entries()
segja = dictionary.find("segja")
print(segja.description)
segja = dictionary.find("ár")
print(segja.description)
segja = dictionary.find("sá")
print(segja.description)
segja = dictionary.find("akr")
print(segja.description)
segja = dictionary.find("telja")
print(segja.description)
segja = dictionary.find("mæla")
print(segja.description)
segja = dictionary.find("vaka")
print(segja.description)
class ZoegasLemmatizer:
"""
The Zoëga's lemmatizer returns all the inflected forms from an entry.
"""
def __init__(self):
pass