From: ahmedsamyh Date: Mon, 4 Nov 2024 17:09:12 +0000 (+0500) Subject: Merge remote-tracking branch 'origin/work' X-Git-Url: https://www.git.momoyon.org/?a=commitdiff_plain;h=b9d1d071202f62592565b29c8130645ad557ddf0;p=lang.git Merge remote-tracking branch 'origin/work' --- b9d1d071202f62592565b29c8130645ad557ddf0 diff --cc main.momo index 18916aa,7a2b3fe..89d0f63 --- a/main.momo +++ b/main.momo @@@ -1,11 -1,7 +1,7 @@@ - - // Comment test -// # import "stdio"; -- -func main(argc: int, argv: string[] -// ): int { -// print("Hello, World!"); -// return 0; -// } +#import "stdio"; - +func main(argc: int, argv: string[]): int { + // This is a comment! + print("Hello, World!"); + // This is another comment! + return 0; +} diff --cc main.py index 6b04e0c,74a0b5c..4fb6384 --- a/main.py +++ b/main.py @@@ -8,15 -8,66 +8,75 @@@ def dlog(msg) if DEBUG: pprint.pp(f"[DEBUG] {msg}") +class Loc: + def __init__(self, file, line, col): + self.filename = file + self.line = line + self.col = col + + def __str__(self): + return f"{self.filename}:{self.line}:{self.col}" + + def error(msg): + pprint.pp(f"[ERROR] {msg}") + + def handle_func(i, tokens): + def forward(i): + i += 1 + if i >= len(tokens)-2: + error(f"Incomplete function!") + token = tokens[i] + next = tokens[i+1] + + return i, token, next + + # ERROR: func is the last or second to last token + if i >= len(tokens)-2: + error(f"Incomplete function!") + token = tokens[i] + next = tokens[i+1] + args = [] + + func_name = 'INVALID' + while i < len(tokens)-2: + func_name = token.literal_string + if token.typ == Token_Type.IDENTIFIER: + # Check if function name is not a keyword + if token.literal_string in KEYWORDS: + error(f"Function name cannot be a keyword '{token.literal_string}'") + break + i, token, next = forward(i) + + if next.typ != Token_Type.OPEN_PAREN: + error(f"Expected '(' after function name but got '{next.literal_string}'") + i, token, next = forward(i) + + dlog(f"Function name '{func_name}'") + + # Get args + while i < len(tokens)-2: + if token.typ == Token_Type.CLOSE_PAREN: + break + args.append(token) + i, token, next = forward(i) + + # dlog(f"Function args: '{args}'") + + ret_type = 'void' + + # Get return type if present + if next.typ == Token_Type.COLON: + i, token, next = forward(i) + ret_type = next.literal_string + + dlog(f"Function return type: '{ret_type}'") + + exit(1) + # assert False, "RAAH" + KEYWORDS = { + 'func': handle_func, + } + class Token_Type(IntEnum): IDENTIFIER = auto() NUMBER = auto() @@@ -63,45 -107,54 +123,53 @@@ class Token return f"(Token)'{self.literal_string}' ({self.type_as_str()})" class Parser: - def __init__(self, src): - self.src = src + def __init__(self, filename): + with open(filename, mode='r') as file: + self.src = file.read() + self.line = 1 self.bol = 0 # beginning of line self.cur = 0 # cursor position - self.loc = Loc(filename, 0, 0) ++ self.loc = Loc(filename, self.line, 0) - def current_char(self): - if self.cur > len(self.src)-1: return -1 - return self.src[self.cur] + def peek_char(self, by=0): + if (self.cur+by) > len(self.src)-1: + raise Exception("Exhausted!") + return self.src[self.cur + by] - def chop_char(self): - current_ch = self.peek_char() - self.cur += 1 - if current_ch.isspace(): + def next_char(self): - if self.cur > len(self.src)-2: return -1 ++ if self.cur+1 > len(self.src)-1: return -1 + return self.src[self.cur + 1] + + # NOTE: Advances cursor and returns next char, NOT the current char. + def advance_char(self, by = 1): + self.cur += by + return self.current_char() + + def next_line(self): + c = self.current_char() + while c == '\n': + c = self.advance_char() self.bol = self.cur - self.line += 1 - # print(f"next_line-> cur: '{c}'{self.cur}") + self.loc.line += 1 + return current_ch def consume_comment(self) -> str: - c = self.current_char() - n = self.next_char() + assert self.peek_char() == '/' and self.peek_char(1) == '/' comment = '' - # Remove // - self.chop_char() - self.chop_char() - - while self.peek_char() != '\n': - comment += self.peek_char() - self.chop_char() - - assert self.peek_char() == '\n' - self.trim_left() + if c == '/' and n == '/': + while c != '\n': + comment += c + c = self.advance_char() + self.next_line() + else: + return - - # dlog(f"Comment: '{comment}'") + return comment def consume_identifier(self) -> str: - c = self.current_char() + c = self.peek_char() assert(c.isalpha() or c == '_') - identifier = c - c = self.advance_char() + identifier = '' while c.isalnum() or c == '_': identifier += c @@@ -143,20 -198,23 +211,27 @@@ return string def exhausted(self) -> bool: - return self.cur >= len(self.src)-1 + return self.cur > len(self.src)-1 + + def trim_left(self): + while self.peek_char().isspace(): + self.chop_char() def next_token(self) -> bool | Token: - dlog(str(self.cur)) - self.trim_left() - - if self.peek_char() == '/' and self.peek_char() == '/': + while self.current_char() == '/' and self.next_char() == '/': comment = self.consume_comment() + c = self.current_char() - c = self.peek_char() + if (self.exhausted()): + return None + + while c.isspace(): + if c == '\n': + self.next_line() + else: + # dlog(f"Skipped '{c}' at line {self.line}:{self.cur - self.bol}") + self.advance_char() + c = self.current_char() if c.isalpha() or c == '_': return Token(Token_Type.IDENTIFIER, self.consume_identifier()) @@@ -192,7 -248,8 +267,8 @@@ elif c == '"': return Token(Token_Type.STRING, self.consume_string()) else: - raise Exception(f"Unexpected char '{c}'") - error(f"Unexpected char '{c}'. At line {self.line}:{self.cur - self.bol}") ++ # error(f"Unexpected char '{c}'. At line {self.line}:{self.cur - self.bol}") + raise Exception(f"Unexpected char '{c}'. At line {self.line}:{self.cur - self.bol}") return None @@@ -211,12 -269,18 +288,15 @@@ def main() raise Exception("Please provide the filename!") filename = sys.argv.pop(0) # 1. Source - src = "" - with open(filename, mode='r') as file: - src = file.read() + parser = Parser(filename) # 2. Lexical Analysis - parser = Parser(src) tokens = parser.lex() - # pprint.pp(tokens) + pprint.pp(tokens) + output_filename = "output" + output = open(output_filename, 'w') + # 3. TODO: Syntactical Analysis for i in range(0, len(tokens)-1): token = tokens[i]