forked from norvig/pytudes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse.py
52 lines (46 loc) · 1.45 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
grammar = {
'Noun': ['stench', 'wumpus'],
'Verb': ['is', 'smell'],
'Adjective': ['dead', 'smelly'],
'Adverb': ['left', 'back'],
'Pronoun': ['me', 'you'],
'Name': ['John', 'Mary'],
'Article': ['the', 'a'],
'Preposition': ['to', 'in'],
'Conjunction': ['and', 'or'],
'Digit': ['0', '1'],
'S': [['NP', 'VP'], ['S', 'Comjunction', 'S']],
'NP': ['Pronoun', 'Noun', ['Article', 'Noun'], ['Digit', 'Digit'],
['NP', 'PP'], ['NP', 'RelClause']],
'VP': ['Verb', ['VP', 'NP'], ['VP', 'Adjective'], ['VP', 'PP'],
['VP', 'Adverb']],
'PP': [['Preposition', 'NP']],
'RelClause': [['that', 'VP']]
}
def parse(forest, grammar):
if len(forest) == 1 and category(forest[0]) == 'S':
return forest[0]
for i in range(len(forest)):
for lhs in grammar.keys():
for rhs in grammar[lhs]:
rhs = mklist(rhs)
n = len(rhs)
subsequence = forest[i:i+n]
if match(subsequence, rhs):
print subsequence, lhs, '=>', rhs
forest2 = forest[:]
forest2[i:i+n] = [(lhs, subsequence)]
result = parse(forest2, grammar)
if result != None:
return result
return None
def mklist(x):
if type(x) == type([]): return x
else: return [x]
def match(forest, rhs):
for i in range(len(rhs)):
if category(forest[i]) != rhs[i] and forest[i] != rhs[i]: return 0
return 1
def category(forest):
if type(forest) == type(()): return forest[0]
else: return 'word'