DEBUG = True
+# Logging
def dlog(msg):
if DEBUG:
pprint.pp(f"[DEBUG] {msg}")
+def info(msg):
+ pprint.pp(f"[INFO] {msg}")
+
+def error(msg):
+ pprint.pp(f"[ERROR] {msg}")
+
+def fatal(msg, err_code = 1):
+ error(msg)
+ exit(err_code)
+
+def usage(program: str):
+ info(f"Usage: {program} <file>")
+
class Loc:
def __init__(self, file, line, col):
self.filename = file
def __str__(self):
return f"{self.filename}:{self.line}:{self.col}"
-def error(msg):
- pprint.pp(f"[ERROR] {msg}")
-
-def handle_func(i, tokens):
- def forward(i):
- i += 1
- if i >= len(tokens)-2:
- error(f"Incomplete function!")
- token = tokens[i]
- next = tokens[i+1]
-
- return i, token, next
-
- # ERROR: func is the last or second to last token
- if i >= len(tokens)-2:
- error(f"Incomplete function!")
- token = tokens[i]
- next = tokens[i+1]
- args = []
-
- func_name = 'INVALID'
- while i < len(tokens)-2:
- func_name = token.literal_string
- if token.typ == Token_Type.IDENTIFIER:
- # Check if function name is not a keyword
- if token.literal_string in KEYWORDS:
- error(f"Function name cannot be a keyword '{token.literal_string}'")
- break
- i, token, next = forward(i)
-
- if next.typ != Token_Type.OPEN_PAREN:
- error(f"Expected '(' after function name but got '{next.literal_string}'")
- i, token, next = forward(i)
-
- dlog(f"Function name '{func_name}'")
-
- # Get args
- while i < len(tokens)-2:
- if token.typ == Token_Type.CLOSE_PAREN:
- break
- args.append(token)
- i, token, next = forward(i)
-
- # dlog(f"Function args: '{args}'")
-
- ret_type = 'void'
-
- # Get return type if present
- if next.typ == Token_Type.COLON:
- i, token, next = forward(i)
- ret_type = next.literal_string
-
- dlog(f"Function return type: '{ret_type}'")
-
- exit(1)
- # assert False, "RAAH"
-KEYWORDS = {
- 'func': handle_func,
-}
+ def get_row(self, cur, bol):
+ return cur - bol
-class Token_Type(IntEnum):
- IDENTIFIER = auto()
- NUMBER = auto()
- # SYMBOLS
- OPEN_PAREN = auto()
- CLOSE_PAREN = auto()
- COMMA = auto()
- OPEN_SQUARE_BRACE = auto()
- CLOSE_SQUARE_BRACE = auto()
- COLON = auto()
- OPEN_BRACE = auto()
- CLOSE_BRACE = auto()
- SEMICOLON = auto()
- HASH = auto()
- #
- STRING = auto()
- COUNT = auto()
-
-assert Token_Type.COUNT == 14, "Check if every symbols are handled here"
-symbols = "(),[]{}:;#"
+class TokenType(IntEnum):
+ IDENT = auto()
+ STRING = auto()
+ COUNT = auto()
class Token:
- def __init__(self, typ, literal_string):
+ def __init__(self, typ: TokenType, value: str, loc: Loc):
self.typ = typ
- self.literal_string = literal_string
-
- def type_as_str(self):
- assert Token_Type.COUNT == 14, "Every enum value is not handled!"
- if self.typ == Token_Type.IDENTIFIER: return "IDENTIFIER";
- if self.typ == Token_Type.NUMBER: return "NUMBER";
- if self.typ == Token_Type.OPEN_PAREN: return "OPEN_PAREN";
- if self.typ == Token_Type.CLOSE_PAREN: return "CLOSE_PAREN";
- if self.typ == Token_Type.COMMA: return "COMMA";
- if self.typ == Token_Type.OPEN_SQUARE_BRACE: return "OPEN_SQUARE_BRACE";
- if self.typ == Token_Type.CLOSE_SQUARE_BRACE: return "CLOSE_SQUARE_BRACE";
- if self.typ == Token_Type.COLON: return "COLON";
- if self.typ == Token_Type.OPEN_BRACE: return "OPEN_BRACE";
- if self.typ == Token_Type.CLOSE_BRACE: return "CLOSE_BRACE";
- if self.typ == Token_Type.SEMICOLON: return "SEMICOLON";
- if self.typ == Token_Type.HASH: return "HASH";
- if self.typ == Token_Type.STRING: return "STRING";
+ self.value = value
+ self.loc = loc
- def __repr__(self):
- return f"(Token)'{self.literal_string}' ({self.type_as_str()})"
class Parser:
- def __init__(self, filename):
- with open(filename, mode='r') as file:
- self.src = file.read()
+ def __init__(self, filename: str):
+ try:
+ with open(filename, mode='r') as f:
+ self.src = f.read()
+ except FileNotFoundError:
+ fatal(f"File '{filename}' not found!")
+ self.cur = 0
+ self.bol = 0
self.line = 1
- self.bol = 0 # beginning of line
- self.cur = 0 # cursor position
- self.loc = Loc(filename, self.line, 0)
-
- def peek_char(self, by=0):
- if (self.cur+by) > len(self.src)-1:
- raise Exception("Exhausted!")
- return self.src[self.cur + by]
-
- def next_char(self):
- if self.cur+1 > len(self.src)-1: return -1
- return self.src[self.cur + 1]
-
- # NOTE: Advances cursor and returns next char, NOT the current char.
- def advance_char(self, by = 1):
- self.cur += by
- return self.current_char()
+ self.filename = filename
- def next_line(self):
- c = self.current_char()
- while c == '\n':
- c = self.advance_char()
- self.bol = self.cur
- self.loc.line += 1
- return current_ch
-
- def consume_comment(self) -> str:
- assert self.peek_char() == '/' and self.peek_char(1) == '/'
- comment = ''
- if c == '/' and n == '/':
- while c != '\n':
- comment += c
- c = self.advance_char()
- self.next_line()
- else:
- return
- return comment
-
- def consume_identifier(self) -> str:
- c = self.peek_char()
- assert(c.isalpha() or c == '_')
+ def row(self):
+ return self.cur - self.bol
- identifier = ''
+ def fatal(self, msg):
+ error(f"{self.filename}:{self.line}:{self.row()}: {msg}")
+ exit(1)
- while c.isalnum() or c == '_':
- identifier += c
- c = self.chop_char()
- return identifier
+ def eof(self) -> bool:
+ return self.cur >= len(self.src)
- def consume_number(self) -> str:
- c = self.peek_char()
- assert(c.isdigit())
- number = ''
+ def current_char(self) -> str:
+ assert self.cur < len(self.src), f"cur: {self.cur}, src_len: {len(self.src)}"
+ return self.src[self.cur]
- while c.isdigit():
- number += c
- c = self.chop_char()
-
- # dlog(f"Number: '{number}'")
- return number
+ def consume_char(self) -> str:
+ c = self.current_char()
+ self.cur += 1
+ return c
- def consume_symbol(self) -> str:
- c = self.peek_char()
- assert(c in symbols)
- symbol = c
- self.chop_char()
- return symbol
+ def consume_string(self):
+ assert self.current_char() == '"', "Called consume_string() at wrong character!"
+ string: str = ''
- def consume_string(self) -> str:
- c = self.peek_char()
- assert(c == '"')
- # TODO: Does the string include the ""s? (for now it doesn't)
- string = ''
+ self.consume_char()
+ c = self.consume_char()
while c != '"':
+ if self.eof():
+ self.fatal(f"Unterminated string!")
string += c
- c = self.chop_char()
- # Remove " at the end
- assert self.peek_char() == '"'
- self.chop_char()
+ c = self.consume_char()
- # dlog(f"String: '{string}'");
- return string
-
- def exhausted(self) -> bool:
- return self.cur > len(self.src)-1
+ assert self.consume_char() == '"', "This shouldn't happen according to the while loop above"
- def trim_left(self):
- while self.peek_char().isspace():
- self.chop_char()
+ return string
- def next_token(self) -> bool | Token:
- while self.current_char() == '/' and self.next_char() == '/':
- comment = self.consume_comment()
+ def next_token(self) -> Token | None:
c = self.current_char()
- if (self.exhausted()):
- return None
-
- while c.isspace():
- if c == '\n':
- self.next_line()
- else:
- # dlog(f"Skipped '{c}' at line {self.line}:{self.cur - self.bol}")
- self.advance_char()
- c = self.current_char()
-
- if c.isalpha() or c == '_':
- return Token(Token_Type.IDENTIFIER, self.consume_identifier())
- elif c.isdigit(): # TODO: Only handles base-10 numbers
- return Token(Token_Type.NUMBER, self.consume_number())
- elif c in symbols:
- symbol = self.consume_symbol()
- token = Token(Token_Type.COUNT, symbol)
- if symbol == "(":
- token.typ = Token_Type.OPEN_PAREN
- elif symbol == ")":
- token.typ = Token_Type.CLOSE_PAREN
- elif symbol == ",":
- token.typ = Token_Type.COMMA
- elif symbol == "[":
- token.typ = Token_Type.OPEN_SQUARE_BRACE
- elif symbol == "]":
- token.typ = Token_Type.CLOSE_SQUARE_BRACE
- elif symbol == ":":
- token.typ = Token_Type.COLON
- elif symbol == "{":
- token.typ = Token_Type.OPEN_BRACE
- elif symbol == "}":
- token.typ = Token_Type.CLOSE_BRACE
- elif symbol == ";":
- token.typ = Token_Type.SEMICOLON
- elif symbol == "#":
- token.typ = Token_Type.HASH
- else:
- raise Exception(f"Unexpected symbol '{symbol}'")
-
- return token
- elif c == '"':
- return Token(Token_Type.STRING, self.consume_string())
- else:
- # error(f"Unexpected char '{c}'. At line {self.line}:{self.cur - self.bol}")
- raise Exception(f"Unexpected char '{c}'. At line {self.line}:{self.cur - self.bol}")
-
- return None
-
- def lex(self) -> [Token]:
- tokens = []
- token = self.next_token()
- tokens.append(token)
- while token:
- token = self.next_token()
- tokens.append(token)
- dlog("Done lexing...")
- return tokens
+ t: Token | None = None
+ if c == '"':
+ t = Token(TokenType.STRING, self.consume_string(), Loc(self.filename, self.line, self.row()))
+ pass
+ return t
def main():
- program = sys.argv.pop(0)
- if (len(sys.argv) <= 0):
- raise Exception("Please provide the filename!")
- filename = sys.argv.pop(0)
- # 1. Source
- parser = Parser(filename)
+ program: str = sys.argv.pop(0)
- # 2. Lexical Analysis
- tokens = parser.lex()
- pprint.pp(tokens)
+ if len(sys.argv) <= 0:
+ error(f"Please provide a filename!")
+ usage(program)
+ exit(1)
- output_filename = "output"
- output = open(output_filename, 'w')
+ filename: str = sys.argv.pop(0)
- # 3. TODO: Syntactical Analysis
- for i in range(0, len(tokens)-1):
- token = tokens[i]
- next = tokens[i+1]
+ parser = Parser(filename)
- if token.typ == Token_Type.IDENTIFIER:
- # assert len(KEYWORDS) == 1, "Every keyword is not handled!"
- if token.literal_string in KEYWORDS:
- func = KEYWORDS[token.literal_string]
- func(i+1, tokens)
- dlog(f"Found keyword: '{token.literal_string}'")
- else:
- dlog(f"Found ident '{token.literal_string}'")
- elif token.typ == Token_Type.OPEN_PAREN:
- dlog(f"Found Open paren")
- elif token.typ == Token_Type.CLOSE_PAREN:
- dlog(f"Found Close paren")
- elif token.typ == Token_Type.COLON:
- dlog(f"Found Colon")
- elif token.typ == Token_Type.COMMA:
- dlog(f"Found Comma")
- elif token.typ == Token_Type.OPEN_SQUARE_BRACE:
- dlog(f"found open square brace")
- elif token.typ == Token_Type.CLOSE_SQUARE_BRACE:
- dlog(f"Found Close square brace")
- elif token.typ == Token_Type.OPEN_BRACE:
- dlog(f"found open brace")
- elif token.typ == Token_Type.CLOSE_BRACE:
- dlog(f"Found Close brace")
- elif token.typ == Token_Type.STRING:
- dlog(f"Found String")
- elif token.typ == Token_Type.SEMICOLON:
- dlog(f"Found Semicolon")
- elif token.typ == Token_Type.NUMBER:
- dlog(f"Found Number")
+ # token = parser.next_token()
- else:
- assert False, f"Token_type '{token.type_as_str()}' is unimplemented!"
- # pprint.pp("------------------------------")
- # pprint.pp(f" Token: {token}")
- # pprint.pp(f" Next: {next}")
- output.close()
if __name__ == '__main__':
main()