-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.a68
166 lines (126 loc) Β· 3.31 KB
/
lexer.a68
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
INT
assign token = 1,
comment token = 2,
string token = 3,
number token = 4,
brackets token = 5,
important token = 6,
keyword token = 7,
unknown token = 8,
function token = 9;
PROC lexer = (STRING source, PROC (INT, BOOL) VOID callback) [] TOKEN: (
INT counter := 1;
CHAR current := " ";
# Defaults #
[] STRING key words = (
"FOR", "FROM", "BY", "PAR", "GO", "TO", "WHILE", "IF", "THEN", "ELIF", "ELSE", "FI", "DO", "OD", "BEGIN", "END", "OF",
"PROC", "OP", "REF", "INT", "BOOL", "STRING", "CHAR", "BITS", "AND", "OR", "MODE", "STRUCT", "FILE", "PR"
);
[] STRING sys funcs = (
"print", "print", "printf", "put", "putf", "read", "read char", "get", "getf", "whole", "system", "on logical file end",
"open", "close", "argc", "argv", "new line", "newline", "include", "UPB"
);
# Functions #
PROC next = REF CHAR: (
current := source[counter];
counter +:= 1;
current
);
PROC back = REF CHAR: (
counter -:= 2;
next;
current
);
PROC get word = STRING: (
STRING word := "";
WHILE str includes(next, alphabet + digits) AND counter < UPB source DO
word +:= current
OD;
back;
word
);
# Loops #
PROC lex = TOKEN: (
current := next;
STRING buff := current;
TOKEN res := (-1, "");
callback(counter, ~);
# Assignments #
IF str includes(current, ":+=-*/<>") THEN
res := TOKEN(assign token, buff)
# Brackets #
ELIF str includes(current, "[]();,") THEN
res := TOKEN(brackets token, buff)
# Comments #
ELIF current = "#" THEN
WHILE next /= "#" AND counter < UPB source DO
buff +:= current
OD;
res := TOKEN(comment token, buff + current)
# String and regex #
ELIF current = """" OR current = "$" THEN
CHAR opnened = current;
WHILE next /= opnened AND counter < UPB source DO
buff +:= current
OD;
res := TOKEN(string token, buff + current)
# Number #
ELIF str includes(current, digits) THEN
WHILE str includes(next, digits + ".") AND counter < UPB source DO
buff +:= current
OD;
back;
res := TOKEN(number token, buff)
# Identifiers #
ELIF str includes(current, alphabet) THEN
STRING word := get word;
INT kind := unknown token;
buff +:= word;
WHILE NOT only uppercase(word) DO
WHILE next = " " DO
buff +:= current
OD;
back;
word := get word;
IF only uppercase(word) THEN
TO UPB word DO
back
OD
ELSE
buff +:= word
FI
OD;
TO UPB rtrim(buff) DO
back
OD;
IF str arr includes(buff, key words) THEN
kind := keyword token
ELIF buff = "VOID" THEN
kind := assign token
ELIF str arr includes(buff, sys funcs) THEN
kind := function token
ELIF str arr includes(buff, ("TRUE", "FALSE")) THEN
kind := number token
FI;
res := TOKEN(kind, buff)
# Unknown #
ELSE
res := TOKEN(unknown token, buff)
FI;
res
);
PROC lex all = [] TOKEN: (
FLEX [0] TOKEN tokens;
TOKEN token;
WHILE TRUE DO
token := lex;
tokens +:= token;
IF counter = UPB source THEN
GO TO done
FI
OD;
done:
tokens
);
lex all
)