-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtokenizer.py
More file actions
112 lines (93 loc) · 2.75 KB
/
tokenizer.py
File metadata and controls
112 lines (93 loc) · 2.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import re
import sys
from typing import Pattern
from bffuncs import BrainFuck
def main():
program = ' '.join(line for line in sys.stdin)
for type, id in tokenize(program):
print(f'{type + ":": <12}{id}')
# bf = BrainFuck()
# bf.puts('THIS IS A BIG ASS TEST\n')
# bf.assign('a', 35) # a = 35
# bf.assign('b', 33) # b = 35
# bf.puts('a=') # print(f"{a=}\n{b=}")
# bf.putv('a')
# bf.puts('\n')
# bf.puts('b=')
# bf.putv('b')
# bf.puts('\n')
# bf.addvars('a', 'b', 'a') # a = a + b
# bf.puts('a=')
# bf.putv('a')
# bf.puts('\n')
# bf.puts('b=')
# bf.putv('b')
# bf.puts('\n')
PATTERN_STRINGS = [
# Keywords
('FUNSIG', r'fun\s'),
('VARSIG', r'var\s'),
('COND', r'(else if|else|if)\s'),
('LOOP', r'(for|while)\s'),
# Literals
('NUMLIT', r'\d+'),
('STRLIT', r'(["\'])[^\1]*\1'),
# Identifier
('IDENT', r'[a-zA-Z_]\w*'),
# Syntax tokens
('UNOP', r'!'),
('BIOP', r'(==|!=|<=|>=|[<>+\-*/=])'),
('BRACK', r'[{}]'),
('SEMIC', r';'),
('COMNT', r'#.*'),
]
def tokenize(program):
ptr = 0
tokens = []
patterns = []
for type, pattern in PATTERN_STRINGS:
patterns.append((type, re.compile(r'\s*' + pattern)))
token, size = next_token(program[ptr:], patterns)
while token[0] not in ['EOF', 'ERROR']:
ptr += size
tokens.append(token)
token, size = next_token(program[ptr:], patterns)
tokens.append(token)
return tokens
def next_token(program, patterns):
for type, pattern in patterns:
print(pattern)
match = pattern.match(program)
if match:
value = match.group().lstrip().rstrip()
if type == 'NUM':
value = int(value)
elif type == 'STR':
value = value[1:-1]
return ((type, value), match.span()[1])
if program.isspace():
return (('EOF', ''), 0)
return (('ERR', 'Syntax error'), 0)
def parse(tokens):
parser = Parser(tokens)
parser.parse()
class Parser:
def __init__(self, tokens):
self.idx = 0
self.tokens = tokens
def parse(self):
# def peek(n):
# return tokens[idx + n]
# def eat(n):
# idx += 1
# return tokens[idx - 1]
while self.idx < len(self.tokens):
type, value = self.tokens[self.idx]
if type == 'DCLR':
if value.startswith('fun'):
pass
# TODO: func definition
elif value.startswith('var'):
self.parseAssignment()
if __name__ == '__main__':
main()