if DEBUG:
pprint.pp(f"[DEBUG] {msg}")
+class Loc:
+ def __init__(self, file, line, col):
+ self.filename = file
+ self.line = line
+ self.col = col
+
+ def __str__(self):
+ return f"{self.filename}:{self.line}:{self.col}"
+
class Token_Type(IntEnum):
IDENTIFIER = auto()
NUMBER = auto()
+ # SYMBOLS
OPEN_PAREN = auto()
CLOSE_PAREN = auto()
COMMA = auto()
OPEN_BRACE = auto()
CLOSE_BRACE = auto()
SEMICOLON = auto()
+ HASH = auto()
+ #
STRING = auto()
COUNT = auto()
+assert Token_Type.COUNT == 14, "Check if every symbols are handled here"
+symbols = "(),[]{}:;#"
+
class Token:
def __init__(self, typ, literal_string):
self.typ = typ
self.literal_string = literal_string
def type_as_str(self):
- assert Token_Type.COUNT == 13, "Every enum value is not handled!"
+ assert Token_Type.COUNT == 14, "Every enum value is not handled!"
if self.typ == Token_Type.IDENTIFIER: return "IDENTIFIER";
if self.typ == Token_Type.NUMBER: return "NUMBER";
if self.typ == Token_Type.OPEN_PAREN: return "OPEN_PAREN";
if self.typ == Token_Type.OPEN_BRACE: return "OPEN_BRACE";
if self.typ == Token_Type.CLOSE_BRACE: return "CLOSE_BRACE";
if self.typ == Token_Type.SEMICOLON: return "SEMICOLON";
+ if self.typ == Token_Type.HASH: return "HASH";
if self.typ == Token_Type.STRING: return "STRING";
def __repr__(self):
return f"(Token)'{self.literal_string}' ({self.type_as_str()})"
class Parser:
- def __init__(self, src):
- self.src = src
- self.line = 0
+ def __init__(self, filename):
+ with open(filename, mode='r') as file:
+ self.src = file.read()
self.bol = 0 # beginning of line
self.cur = 0 # cursor position
+ self.loc = Loc(filename, 0, 0)
- def current_char(self):
- if self.cur > len(self.src)-1: return -1
- return self.src[self.cur]
-
- def next_char(self):
- if self.cur > len(self.src)-2: return -1
- return self.src[self.cur + 1]
-
- # NOTE: Advances cursor and returns next char, NOT the current char.
- def advance_char(self, by = 1):
- self.cur += by
- return self.current_char()
+ def peek_char(self, by=0):
+ if (self.cur+by) > len(self.src)-1:
+ raise Exception("Exhausted!")
+ return self.src[self.cur + by]
- def next_line(self):
- c = self.current_char()
- assert(c == '\n')
- while c == '\n':
- c = self.advance_char()
+ def chop_char(self):
+ current_ch = self.peek_char()
+ self.cur += 1
+ if current_ch.isspace():
self.bol = self.cur
- self.line += 1
- # print(f"next_line-> cur: '{c}'{self.cur}")
+ self.loc.line += 1
+ return current_ch
def consume_comment(self) -> str:
- c = self.current_char()
- n = self.next_char()
+ assert self.peek_char() == '/' and self.peek_char(1) == '/'
comment = ''
- if c == '/' and n == '/':
- while c != '\n':
- comment += c
- c = self.advance_char()
- self.next_line()
- else:
- return
- c = self.current_char()
- # print(f"consume_comment-> cur: '{c}'{self.cur}")
+ # Remove //
+ self.chop_char()
+ self.chop_char()
+
+ while self.peek_char() != '\n':
+ comment += self.peek_char()
+ self.chop_char()
- # dlog(f"Comment: '{comment}'")
- return comment
+ assert self.peek_char() == '\n'
+ self.trim_left()
def consume_identifier(self) -> str:
- c = self.current_char()
+ c = self.peek_char()
assert(c.isalpha() or c == '_')
- identifier = c
- c = self.advance_char()
+ identifier = ''
while c.isalnum() or c == '_':
identifier += c
- c = self.advance_char()
- # dlog(f"Identifier: '{identifier}'")
+ c = self.chop_char()
return identifier
def consume_number(self) -> str:
- c = self.current_char()
+ c = self.peek_char()
assert(c.isdigit())
number = ''
while c.isdigit():
number += c
- c = self.advance_char()
+ c = self.chop_char()
# dlog(f"Number: '{number}'")
return number
def consume_symbol(self) -> str:
- c = self.current_char()
- assert(c in "(),[]{}:;")
+ c = self.peek_char()
+ assert(c in symbols)
symbol = c
- self.advance_char()
- # dlog(f"Symbol: '{symbol}'")
+ self.chop_char()
return symbol
def consume_string(self) -> str:
- c = self.current_char()
+ c = self.peek_char()
assert(c == '"')
# TODO: Does the string include the ""s? (for now it doesn't)
string = ''
- c = self.advance_char()
while c != '"':
string += c
- c = self.advance_char()
+ c = self.chop_char()
# Remove " at the end
- self.advance_char()
+ assert self.peek_char() == '"'
+ self.chop_char()
# dlog(f"String: '{string}'");
return string
def exhausted(self) -> bool:
- return self.cur >= len(self.src)-1
+ return self.cur > len(self.src)-1
+
+ def trim_left(self):
+ while self.peek_char().isspace():
+ self.chop_char()
def next_token(self) -> bool | Token:
- comment = self.consume_comment()
- c = self.current_char()
+ dlog(str(self.cur))
+ self.trim_left()
- # print(f"consume_comment-> cur: '{c}'")
- if (self.exhausted()):
- # dlog(f"cur: {self.cur}, src.len: {len(self.src)}")
- return None
+ if self.peek_char() == '/' and self.peek_char() == '/':
+ comment = self.consume_comment()
- while c.isspace():
- c = self.advance_char()
+ c = self.peek_char()
if c.isalpha() or c == '_':
return Token(Token_Type.IDENTIFIER, self.consume_identifier())
elif c.isdigit(): # TODO: Only handles base-10 numbers
return Token(Token_Type.NUMBER, self.consume_number())
- elif c in "(),[]{}:;":
+ elif c in symbols:
symbol = self.consume_symbol()
token = Token(Token_Type.COUNT, symbol)
if symbol == "(":
token.typ = Token_Type.CLOSE_BRACE
elif symbol == ";":
token.typ = Token_Type.SEMICOLON
+ elif symbol == "#":
+ token.typ = Token_Type.HASH
else:
raise Exception(f"Unexpected symbol '{symbol}'")
raise Exception("Please provide the filename!")
filename = sys.argv.pop(0)
# 1. Source
- src = ""
- with open(filename, mode='r') as file:
- src = file.read()
+ parser = Parser(filename)
# 2. Lexical Analysis
- parser = Parser(src)
tokens = parser.lex()
- # pprint.pp(tokens)
+ pprint.pp(tokens)
# 3. TODO: Syntactical Analysis
for i in range(0, len(tokens)-1):
# pprint.pp(f" Token: {token}")
# pprint.pp(f" Next: {next}")
-
if __name__ == '__main__':
main()