From ab68320595fadd4ca7e9c4003984562e33718553 Mon Sep 17 00:00:00 2001 From: ahmedsamyh Date: Mon, 21 Oct 2024 12:54:15 +0500 Subject: [PATCH] Initial Commit. - Can parse, Identifiers, Numbers and Symbols. - TODO: Parse Strings. --- main.momo | 6 ++ main.py | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+) create mode 100644 main.momo create mode 100644 main.py diff --git a/main.momo b/main.momo new file mode 100644 index 0000000..aabfb08 --- /dev/null +++ b/main.momo @@ -0,0 +1,6 @@ +// # import "stdio"; + +func main(int argc, string argv[]): int { + print("Hello, World!"); + return 0; +} diff --git a/main.py b/main.py new file mode 100644 index 0000000..9dd8bd2 --- /dev/null +++ b/main.py @@ -0,0 +1,160 @@ +import sys +from enum import IntEnum, auto +import pprint + +DEBUG = True + +def dlog(msg): + if DEBUG: + pprint.pp(f"[DEBUG] {msg}") + +class Token_Type(IntEnum): + IDENTIFIER = auto() + NUMBER = auto() + SYMBOL = auto() + STRING = auto() + COUNT = auto() + +class Token: + def __init__(self, typ, literal_string): + self.typ = typ + self.literal_string = literal_string + + def type_as_str(self): + assert(Token_Type.COUNT == 4, "Every enum value is not handled!") + if self.typ == Token_Type.IDENTIFIER: return "IDENTIFIER"; + if self.typ == Token_Type.NUMBER: return "NUMBER"; + if self.typ == Token_Type.SYMBOL: return "SYMBOL"; + if self.typ == Token_Type.STRING: return "STRING"; + + def __repr__(self): + return f"(Token)'{self.literal_string}' ({self.type_as_str()})" + +class Lexer: + def __init__(self, src): + self.src = src + self.line = 0 + self.bol = 0 # beginning of line + self.cur = 0 # cursor position + + def current_char(self): + if self.cur > len(self.src)-1: return -1 + return self.src[self.cur] + + def next_char(self): + if self.cur > len(self.src)-2: return -1 + return self.src[self.cur + 1] + + # NOTE: Advances cursor and returns next char, NOT the current char. + def advance_char(self, by = 1): + self.cur += by + return self.current_char() + + def next_line(self): + c = self.current_char() + assert(c == '\n') + while c == '\n': + c = self.advance_char() + self.bol = self.cur + self.line += 1 + # print(f"next_line-> cur: '{c}'{self.cur}") + + def consume_comment(self) -> str: + c = self.current_char() + n = self.next_char() + comment = '' + if c == '/' and n == '/': + while c != '\n': + comment += c + c = self.advance_char() + self.next_line() + else: + return + c = self.current_char() + # print(f"consume_comment-> cur: '{c}'{self.cur}") + + # dlog(f"Comment: '{comment}'") + return comment + + def consume_identifier(self) -> str: + c = self.current_char() + assert(c.isalpha() or c == '_') + + identifier = c + c = self.advance_char() + + while c.isalnum() or c == '_': + identifier += c + c = self.advance_char() + # dlog(f"Identifier: '{identifier}'") + return identifier + + def consume_number(self) -> str: + c = self.current_char() + assert(c.isdigit()) + number = '' + + while c.isdigit(): + number += c + c = self.advance_char() + + # dlog(f"Number: '{number}'") + return number + + def consume_symbol(self) -> str: + c = self.current_char() + assert(c in "(),[]{}:;") + symbol = c + self.advance_char() + # dlog(f"Symbol: '{symbol}'") + return symbol + + def consume_string(self) -> str: + string = '' + return string + + def exhausted(self) -> bool: + return self.cur >= len(self.src)-1 + + def next_token(self) -> bool | Token: + comment = self.consume_comment() + c = self.current_char() + + # print(f"consume_comment-> cur: '{c}'") + if (self.exhausted()): + # dlog(f"cur: {self.cur}, src.len: {len(self.src)}") + return None + + while c.isspace(): + c = self.advance_char() + + if c.isalpha() or c == '_': + return Token(Token_Type.IDENTIFIER, self.consume_identifier()) + elif c.isdigit(): # TODO: Only handles base-10 numbers + return Token(Token_Type.NUMBER, self.consume_number()) + elif c in "(),[]{}:;": + return Token(Token_Type.SYMBOL, self.consume_symbol()) + elif c == '"': + return Token(Token_Type.STRING, self.consume_string()) + else: + raise Exception(f"Unexpected char '{c}'") + + return None + +def main(): + program = sys.argv.pop(0) + if (len(sys.argv) <= 0): + raise Exception("Please provide the filename!") + filename = sys.argv.pop(0) + src = "" + with open(filename, mode='r') as file: + src = file.read() + lexer = Lexer(src) + token = lexer.next_token() + while token: + dlog(token) + token = lexer.next_token() + + +if __name__ == '__main__': + main() -- 2.39.5