-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathreader.py
189 lines (127 loc) · 3.62 KB
/
reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
"""The reader is responsible for converting a source code string into an abstract syntax tree.
This involves lexing, parsing and expansions.
"""
import re
from collections import defaultdict
class Symbol:
"""A symbol is a special token, representing a variable name."""
def __init__(self, name):
self.name = name
def __repr__(self):
return str(self.name)
def is_symbol(token):
return isinstance(token, Symbol)
def is_list(token):
return type(token) in [list, tuple]
def convert_bool(value):
return True if '#t' == value else False
TOKEN_TYPES = (
(convert_bool, re.compile('(#[tf])')),
(float, re.compile('((0|[1-9]+[0-9]*)\.[0-9]+)')),
(int, re.compile('(0|[1-9]+[0-9]*)')),
(str, re.compile('"([^"]*)"')),
(Symbol, re.compile('([^\(\)\'"\s]+)'))
)
def _find_atom(line, tokens):
if line.startswith(';'):
return ''
for atom in ['(', ')', "'"]:
if line.startswith(atom):
tokens.append(atom)
return line[len(atom):]
return None
def _find_token(line, tokens):
for cast, pattern in TOKEN_TYPES:
r = pattern.match(line);
if not r:
continue
tokens.append(cast(r.group(1)))
return line[len(r.group(0)):]
return None
def _tokenize(line, tokens):
line = line.lstrip()
if len(line) == 0:
return
r = _find_atom(line, tokens)
if None != r:
_tokenize(r, tokens)
return
r = _find_token(line, tokens)
if None != r:
_tokenize(r, tokens)
return
raise Exception("Failed tokenizing: %s" % line)
def tokenize(line):
tokens = []
_tokenize(line, tokens)
return tokens
def tokenize_file(fname):
line_num = 0
tokens = []
for line in open(fname).read().splitlines():
line_num += 1
try:
_tokenize(line, tokens)
except:
raise Exception("Lexer error on line %d: \n%s" % (line_num, line))
return tokens
def get_ast(tokens):
"""
Transform flat token list into a tree.
"""
lists = defaultdict(list)
i = 0
level = 0
while i < len(tokens):
if '(' == tokens[i]:
level += 1
elif ')' == tokens[i]:
lists[level-1].append(lists[level])
del(lists[level])
level -= 1
else:
lists[level].append(tokens[i])
i += 1
return (lists[0], level)
def expand_quotes(expr):
"""
Converts '(1 2 3) to (quote (1 2 3))
"""
if not is_list(expr):
return expr
new_expr = []
n = len(expr)
i = 0
while i<n:
if "'" == expr[i]:
new_expr.append([Symbol('quote'), expand_quotes(expr[i+1])])
i += 2
else:
new_expr.append(expand_quotes(expr[i]))
i += 1
return new_expr
def expand_define(expr):
"""
Converts (define (fn x) ...) to (define fn (lambda (x) ...))
"""
if not is_list(expr):
return expr
new_expr = []
n = len(expr)
i = 0
while i<n:
if is_symbol(expr[i]) and expr[i].name == 'define' and is_list(expr[i+1]):
symbol, args = expr[i+1][0], expr[i+1][1:]
body = expand_define(expr[i+2:])
new_expr += [expr[i], symbol, [Symbol('lambda'), args] + body]
break
else:
new_expr.append(expand_define(expr[i]))
i += 1
return new_expr
def expr_to_str(expr):
if is_list(expr):
return '(' + ' '.join(expr_to_str(token) for token in expr) + ')'
if bool == type(expr):
return '#t' if expr else '#f'
return str(expr)