From: ahmedsamyh <ahmedsamyh10@gmail.com>
Date: Mon, 21 Oct 2024 11:14:28 +0000 (+0500)
Subject: Started to refactor INCOMPLETE!!!
X-Git-Url: https://www.git.momoyon.org/?a=commitdiff_plain;h=4c562931298c3b5976c390d63974b3425ff718bf;p=lang.git

Started to refactor INCOMPLETE!!!
---

diff --git a/main.momo b/main.momo
index fa7f294..18916aa 100644
--- a/main.momo
+++ b/main.momo
@@ -1,6 +1,11 @@
-// # import "stdio";
+
+    // Comment test
+
+#import "stdio";
 
 func main(argc: int, argv: string[]): int {
+    // This is a comment!
     print("Hello, World!");
+    // This is another comment!
     return 0;
 }
diff --git a/main.py b/main.py
index a60e698..6b04e0c 100644
--- a/main.py
+++ b/main.py
@@ -8,9 +8,19 @@ def dlog(msg):
     if DEBUG:
         pprint.pp(f"[DEBUG] {msg}")
 
+class Loc:
+    def __init__(self, file, line, col):
+        self.filename = file
+        self.line = line
+        self.col = col
+
+    def __str__(self):
+        return f"{self.filename}:{self.line}:{self.col}"
+
 class Token_Type(IntEnum):
     IDENTIFIER         = auto()
     NUMBER             = auto()
+    # SYMBOLS
     OPEN_PAREN         = auto()
     CLOSE_PAREN        = auto()
     COMMA              = auto()
@@ -20,16 +30,21 @@ class Token_Type(IntEnum):
     OPEN_BRACE         = auto()
     CLOSE_BRACE        = auto()
     SEMICOLON          = auto()
+    HASH               = auto()
+    #
     STRING             = auto()
     COUNT              = auto()
 
+assert Token_Type.COUNT == 14, "Check if every symbols are handled here"
+symbols = "(),[]{}:;#"
+
 class Token:
     def __init__(self, typ, literal_string):
         self.typ = typ
         self.literal_string = literal_string
 
     def type_as_str(self):
-        assert Token_Type.COUNT == 13, "Every enum value is not handled!"
+        assert Token_Type.COUNT == 14, "Every enum value is not handled!"
         if self.typ == Token_Type.IDENTIFIER: return "IDENTIFIER";
         if self.typ == Token_Type.NUMBER: return "NUMBER";
         if self.typ == Token_Type.OPEN_PAREN: return "OPEN_PAREN";
@@ -41,125 +56,113 @@ class Token:
         if self.typ == Token_Type.OPEN_BRACE: return "OPEN_BRACE";
         if self.typ == Token_Type.CLOSE_BRACE: return "CLOSE_BRACE";
         if self.typ == Token_Type.SEMICOLON: return "SEMICOLON";
+        if self.typ == Token_Type.HASH: return "HASH";
         if self.typ == Token_Type.STRING: return "STRING";
 
     def __repr__(self):
         return f"(Token)'{self.literal_string}' ({self.type_as_str()})"
 
 class Parser:
-    def __init__(self, src):
-        self.src = src
-        self.line = 0
+    def __init__(self, filename):
+        with open(filename, mode='r') as file:
+            self.src = file.read()
         self.bol  = 0 # beginning of line
         self.cur  = 0 # cursor position
+        self.loc = Loc(filename, 0, 0)
 
-    def current_char(self):
-        if self.cur > len(self.src)-1: return -1
-        return self.src[self.cur]
-
-    def next_char(self):
-        if self.cur > len(self.src)-2: return -1
-        return self.src[self.cur + 1]
-
-    # NOTE: Advances cursor and returns next char, NOT the current char.
-    def advance_char(self, by = 1):
-        self.cur += by
-        return self.current_char()
+    def peek_char(self, by=0):
+        if (self.cur+by) > len(self.src)-1:
+            raise Exception("Exhausted!")
+        return self.src[self.cur + by]
 
-    def next_line(self):
-        c = self.current_char()
-        assert(c == '\n')
-        while c == '\n':
-            c = self.advance_char()
+    def chop_char(self):
+        current_ch = self.peek_char()
+        self.cur += 1
+        if current_ch.isspace():
             self.bol = self.cur
-            self.line += 1
-        # print(f"next_line-> cur: '{c}'{self.cur}")
+            self.loc.line += 1
+        return current_ch
 
     def consume_comment(self) -> str:
-        c = self.current_char()
-        n = self.next_char()
+        assert self.peek_char() == '/' and self.peek_char(1) == '/'
         comment = ''
-        if c == '/' and n == '/':
-            while c != '\n':
-                comment += c
-                c = self.advance_char()
-            self.next_line()
-        else:
-            return
-        c = self.current_char()
-        # print(f"consume_comment-> cur: '{c}'{self.cur}")
+        # Remove //
+        self.chop_char()
+        self.chop_char()
+
+        while self.peek_char() != '\n':
+            comment += self.peek_char()
+            self.chop_char()
 
-        # dlog(f"Comment: '{comment}'")
-        return comment
+        assert self.peek_char() == '\n'
+        self.trim_left()
 
     def consume_identifier(self) -> str:
-        c = self.current_char()
+        c = self.peek_char()
         assert(c.isalpha() or c == '_')
 
-        identifier = c
-        c = self.advance_char()
+        identifier = ''
 
         while c.isalnum() or c == '_':
             identifier += c
-            c = self.advance_char()
-        # dlog(f"Identifier: '{identifier}'")
+            c = self.chop_char()
         return identifier
 
     def consume_number(self) -> str:
-        c = self.current_char()
+        c = self.peek_char()
         assert(c.isdigit())
         number = ''
 
         while c.isdigit():
             number += c
-            c = self.advance_char()
+            c = self.chop_char()
 
         # dlog(f"Number: '{number}'")
         return number
 
     def consume_symbol(self) -> str:
-        c = self.current_char()
-        assert(c in "(),[]{}:;")
+        c = self.peek_char()
+        assert(c in symbols)
         symbol = c
-        self.advance_char()
-        # dlog(f"Symbol: '{symbol}'")
+        self.chop_char()
         return symbol
 
     def consume_string(self) -> str:
-        c = self.current_char()
+        c = self.peek_char()
         assert(c == '"')
         # TODO: Does the string include the ""s? (for now it doesn't)
         string = ''
-        c = self.advance_char()
         while c != '"':
             string += c
-            c = self.advance_char()
+            c = self.chop_char()
         # Remove " at the end
-        self.advance_char()
+        assert self.peek_char() == '"'
+        self.chop_char()
 
         # dlog(f"String: '{string}'");
         return string
 
     def exhausted(self) -> bool:
-        return self.cur >= len(self.src)-1
+        return self.cur > len(self.src)-1
+
+    def trim_left(self):
+        while self.peek_char().isspace():
+            self.chop_char()
 
     def next_token(self) -> bool | Token:
-        comment = self.consume_comment()
-        c = self.current_char()
+        dlog(str(self.cur))
+        self.trim_left()
 
-        # print(f"consume_comment-> cur: '{c}'")
-        if (self.exhausted()):
-            # dlog(f"cur: {self.cur}, src.len: {len(self.src)}")
-            return None
+        if self.peek_char() == '/' and self.peek_char() == '/':
+            comment = self.consume_comment()
 
-        while c.isspace():
-            c = self.advance_char()
+        c = self.peek_char()
 
         if c.isalpha() or c == '_':
             return Token(Token_Type.IDENTIFIER, self.consume_identifier())
         elif c.isdigit(): # TODO: Only handles base-10 numbers
             return Token(Token_Type.NUMBER, self.consume_number())
-        elif c in "(),[]{}:;":
+        elif c in symbols:
             symbol = self.consume_symbol()
             token = Token(Token_Type.COUNT, symbol)
             if symbol == "(":
@@ -180,6 +183,8 @@ class Parser:
                 token.typ = Token_Type.CLOSE_BRACE
             elif symbol == ";":
                 token.typ = Token_Type.SEMICOLON
+            elif symbol == "#":
+                token.typ = Token_Type.HASH
             else:
                 raise Exception(f"Unexpected symbol '{symbol}'")
 
@@ -206,14 +211,11 @@ def main():
         raise Exception("Please provide the filename!")
     filename = sys.argv.pop(0)
     # 1. Source
-    src = ""
-    with open(filename, mode='r') as file:
-        src = file.read()
+    parser = Parser(filename)
 
     # 2. Lexical Analysis
-    parser = Parser(src)
     tokens = parser.lex()
-    # pprint.pp(tokens)
+    pprint.pp(tokens)
 
     # 3. TODO: Syntactical Analysis
     for i in range(0, len(tokens)-1):
@@ -224,6 +226,5 @@ def main():
         # pprint.pp(f"  Token: {token}")
         # pprint.pp(f"  Next:  {next}")
 
-
 if __name__ == '__main__':
     main()