From: ahmedsamyh Date: Mon, 21 Oct 2024 11:14:28 +0000 (+0500) Subject: Started to refactor INCOMPLETE!!! X-Git-Url: https://www.git.momoyon.org/?a=commitdiff_plain;h=4c562931298c3b5976c390d63974b3425ff718bf;p=lang.git Started to refactor INCOMPLETE!!! --- diff --git a/main.momo b/main.momo index fa7f294..18916aa 100644 --- a/main.momo +++ b/main.momo @@ -1,6 +1,11 @@ -// # import "stdio"; + + // Comment test + +#import "stdio"; func main(argc: int, argv: string[]): int { + // This is a comment! print("Hello, World!"); + // This is another comment! return 0; } diff --git a/main.py b/main.py index a60e698..6b04e0c 100644 --- a/main.py +++ b/main.py @@ -8,9 +8,19 @@ def dlog(msg): if DEBUG: pprint.pp(f"[DEBUG] {msg}") +class Loc: + def __init__(self, file, line, col): + self.filename = file + self.line = line + self.col = col + + def __str__(self): + return f"{self.filename}:{self.line}:{self.col}" + class Token_Type(IntEnum): IDENTIFIER = auto() NUMBER = auto() + # SYMBOLS OPEN_PAREN = auto() CLOSE_PAREN = auto() COMMA = auto() @@ -20,16 +30,21 @@ class Token_Type(IntEnum): OPEN_BRACE = auto() CLOSE_BRACE = auto() SEMICOLON = auto() + HASH = auto() + # STRING = auto() COUNT = auto() +assert Token_Type.COUNT == 14, "Check if every symbols are handled here" +symbols = "(),[]{}:;#" + class Token: def __init__(self, typ, literal_string): self.typ = typ self.literal_string = literal_string def type_as_str(self): - assert Token_Type.COUNT == 13, "Every enum value is not handled!" + assert Token_Type.COUNT == 14, "Every enum value is not handled!" if self.typ == Token_Type.IDENTIFIER: return "IDENTIFIER"; if self.typ == Token_Type.NUMBER: return "NUMBER"; if self.typ == Token_Type.OPEN_PAREN: return "OPEN_PAREN"; @@ -41,125 +56,113 @@ class Token: if self.typ == Token_Type.OPEN_BRACE: return "OPEN_BRACE"; if self.typ == Token_Type.CLOSE_BRACE: return "CLOSE_BRACE"; if self.typ == Token_Type.SEMICOLON: return "SEMICOLON"; + if self.typ == Token_Type.HASH: return "HASH"; if self.typ == Token_Type.STRING: return "STRING"; def __repr__(self): return f"(Token)'{self.literal_string}' ({self.type_as_str()})" class Parser: - def __init__(self, src): - self.src = src - self.line = 0 + def __init__(self, filename): + with open(filename, mode='r') as file: + self.src = file.read() self.bol = 0 # beginning of line self.cur = 0 # cursor position + self.loc = Loc(filename, 0, 0) - def current_char(self): - if self.cur > len(self.src)-1: return -1 - return self.src[self.cur] - - def next_char(self): - if self.cur > len(self.src)-2: return -1 - return self.src[self.cur + 1] - - # NOTE: Advances cursor and returns next char, NOT the current char. - def advance_char(self, by = 1): - self.cur += by - return self.current_char() + def peek_char(self, by=0): + if (self.cur+by) > len(self.src)-1: + raise Exception("Exhausted!") + return self.src[self.cur + by] - def next_line(self): - c = self.current_char() - assert(c == '\n') - while c == '\n': - c = self.advance_char() + def chop_char(self): + current_ch = self.peek_char() + self.cur += 1 + if current_ch.isspace(): self.bol = self.cur - self.line += 1 - # print(f"next_line-> cur: '{c}'{self.cur}") + self.loc.line += 1 + return current_ch def consume_comment(self) -> str: - c = self.current_char() - n = self.next_char() + assert self.peek_char() == '/' and self.peek_char(1) == '/' comment = '' - if c == '/' and n == '/': - while c != '\n': - comment += c - c = self.advance_char() - self.next_line() - else: - return - c = self.current_char() - # print(f"consume_comment-> cur: '{c}'{self.cur}") + # Remove // + self.chop_char() + self.chop_char() + + while self.peek_char() != '\n': + comment += self.peek_char() + self.chop_char() - # dlog(f"Comment: '{comment}'") - return comment + assert self.peek_char() == '\n' + self.trim_left() def consume_identifier(self) -> str: - c = self.current_char() + c = self.peek_char() assert(c.isalpha() or c == '_') - identifier = c - c = self.advance_char() + identifier = '' while c.isalnum() or c == '_': identifier += c - c = self.advance_char() - # dlog(f"Identifier: '{identifier}'") + c = self.chop_char() return identifier def consume_number(self) -> str: - c = self.current_char() + c = self.peek_char() assert(c.isdigit()) number = '' while c.isdigit(): number += c - c = self.advance_char() + c = self.chop_char() # dlog(f"Number: '{number}'") return number def consume_symbol(self) -> str: - c = self.current_char() - assert(c in "(),[]{}:;") + c = self.peek_char() + assert(c in symbols) symbol = c - self.advance_char() - # dlog(f"Symbol: '{symbol}'") + self.chop_char() return symbol def consume_string(self) -> str: - c = self.current_char() + c = self.peek_char() assert(c == '"') # TODO: Does the string include the ""s? (for now it doesn't) string = '' - c = self.advance_char() while c != '"': string += c - c = self.advance_char() + c = self.chop_char() # Remove " at the end - self.advance_char() + assert self.peek_char() == '"' + self.chop_char() # dlog(f"String: '{string}'"); return string def exhausted(self) -> bool: - return self.cur >= len(self.src)-1 + return self.cur > len(self.src)-1 + + def trim_left(self): + while self.peek_char().isspace(): + self.chop_char() def next_token(self) -> bool | Token: - comment = self.consume_comment() - c = self.current_char() + dlog(str(self.cur)) + self.trim_left() - # print(f"consume_comment-> cur: '{c}'") - if (self.exhausted()): - # dlog(f"cur: {self.cur}, src.len: {len(self.src)}") - return None + if self.peek_char() == '/' and self.peek_char() == '/': + comment = self.consume_comment() - while c.isspace(): - c = self.advance_char() + c = self.peek_char() if c.isalpha() or c == '_': return Token(Token_Type.IDENTIFIER, self.consume_identifier()) elif c.isdigit(): # TODO: Only handles base-10 numbers return Token(Token_Type.NUMBER, self.consume_number()) - elif c in "(),[]{}:;": + elif c in symbols: symbol = self.consume_symbol() token = Token(Token_Type.COUNT, symbol) if symbol == "(": @@ -180,6 +183,8 @@ class Parser: token.typ = Token_Type.CLOSE_BRACE elif symbol == ";": token.typ = Token_Type.SEMICOLON + elif symbol == "#": + token.typ = Token_Type.HASH else: raise Exception(f"Unexpected symbol '{symbol}'") @@ -206,14 +211,11 @@ def main(): raise Exception("Please provide the filename!") filename = sys.argv.pop(0) # 1. Source - src = "" - with open(filename, mode='r') as file: - src = file.read() + parser = Parser(filename) # 2. Lexical Analysis - parser = Parser(src) tokens = parser.lex() - # pprint.pp(tokens) + pprint.pp(tokens) # 3. TODO: Syntactical Analysis for i in range(0, len(tokens)-1): @@ -224,6 +226,5 @@ def main(): # pprint.pp(f" Token: {token}") # pprint.pp(f" Next: {next}") - if __name__ == '__main__': main()