Started to refactor INCOMPLETE!!!

author ahmedsamyh <ahmedsamyh10@gmail.com>

Mon, 21 Oct 2024 11:14:28 +0000 (16:14 +0500)

committer ahmedsamyh <ahmedsamyh10@gmail.com>

Mon, 21 Oct 2024 11:14:28 +0000 (16:14 +0500)
author ahmedsamyh <ahmedsamyh10@gmail.com>
Mon, 21 Oct 2024 11:14:28 +0000 (16:14 +0500)
committer ahmedsamyh <ahmedsamyh10@gmail.com>
Mon, 21 Oct 2024 11:14:28 +0000 (16:14 +0500)
diff --git a/main.momo b/main.momo

index fa7f2946836cedbe5ac51cf487032abb09726c82..18916aa58d9a47b1627f99f0398352f16e4c6eae 100644 (file)
--- a/main.momo
+++ b/main.momo
@@ -1,6 +1,11 @@
-// # import "stdio";
+
+    // Comment test
+
+#import "stdio";
  
  func main(argc: int, argv: string[]): int {
+    // This is a comment!
      print("Hello, World!");
+    // This is another comment!
      return 0;
  }
diff --git a/main.py b/main.py

index a60e698bdd0817ee2a8ef6af2a4dff1100d32ee4..6b04e0c3c5358c1dde3381214930c1f072010c30 100644 (file)
--- a/main.py
+++ b/main.py
@@ -8,9 +8,19 @@ def dlog(msg):
      if DEBUG:
          pprint.pp(f"[DEBUG] {msg}")
  
+class Loc:
+    def __init__(self, file, line, col):
+        self.filename = file
+        self.line = line
+        self.col = col
+
+    def __str__(self):
+        return f"{self.filename}:{self.line}:{self.col}"
+
  class Token_Type(IntEnum):
      IDENTIFIER         = auto()
      NUMBER             = auto()
+    # SYMBOLS
      OPEN_PAREN         = auto()
      CLOSE_PAREN        = auto()
      COMMA              = auto()
@@ -20,16 +30,21 @@ class Token_Type(IntEnum):
      OPEN_BRACE         = auto()
      CLOSE_BRACE        = auto()
      SEMICOLON          = auto()
+    HASH               = auto()
+    #
      STRING             = auto()
      COUNT              = auto()
  
+assert Token_Type.COUNT == 14, "Check if every symbols are handled here"
+symbols = "(),[]{}:;#"
+
  class Token:
      def __init__(self, typ, literal_string):
          self.typ = typ
          self.literal_string = literal_string
  
      def type_as_str(self):
-        assert Token_Type.COUNT == 13, "Every enum value is not handled!"
+        assert Token_Type.COUNT == 14, "Every enum value is not handled!"
          if self.typ == Token_Type.IDENTIFIER: return "IDENTIFIER";
          if self.typ == Token_Type.NUMBER: return "NUMBER";
          if self.typ == Token_Type.OPEN_PAREN: return "OPEN_PAREN";
@@ -41,125 +56,113 @@ class Token:
          if self.typ == Token_Type.OPEN_BRACE: return "OPEN_BRACE";
          if self.typ == Token_Type.CLOSE_BRACE: return "CLOSE_BRACE";
          if self.typ == Token_Type.SEMICOLON: return "SEMICOLON";
+        if self.typ == Token_Type.HASH: return "HASH";
          if self.typ == Token_Type.STRING: return "STRING";
  
      def __repr__(self):
          return f"(Token)'{self.literal_string}' ({self.type_as_str()})"
  
  class Parser:
-    def __init__(self, src):
-        self.src = src
-        self.line = 0
+    def __init__(self, filename):
+        with open(filename, mode='r') as file:
+            self.src = file.read()
          self.bol  = 0 # beginning of line
          self.cur  = 0 # cursor position
+        self.loc = Loc(filename, 0, 0)
  
-    def current_char(self):
-        if self.cur > len(self.src)-1: return -1
-        return self.src[self.cur]
-
-    def next_char(self):
-        if self.cur > len(self.src)-2: return -1
-        return self.src[self.cur + 1]
-
-    # NOTE: Advances cursor and returns next char, NOT the current char.
-    def advance_char(self, by = 1):
-        self.cur += by
-        return self.current_char()
+    def peek_char(self, by=0):
+        if (self.cur+by) > len(self.src)-1:
+            raise Exception("Exhausted!")
+        return self.src[self.cur + by]
  
-    def next_line(self):
-        c = self.current_char()
-        assert(c == '\n')
-        while c == '\n':
-            c = self.advance_char()
+    def chop_char(self):
+        current_ch = self.peek_char()
+        self.cur += 1
+        if current_ch.isspace():
              self.bol = self.cur
-            self.line += 1
-        # print(f"next_line-> cur: '{c}'{self.cur}")
+            self.loc.line += 1
+        return current_ch
  
      def consume_comment(self) -> str:
-        c = self.current_char()
-        n = self.next_char()
+        assert self.peek_char() == '/' and self.peek_char(1) == '/'
          comment = ''
-        if c == '/' and n == '/':
-            while c != '\n':
-                comment += c
-                c = self.advance_char()
-            self.next_line()
-        else:
-            return
-        c = self.current_char()
-        # print(f"consume_comment-> cur: '{c}'{self.cur}")
+        # Remove //
+        self.chop_char()
+        self.chop_char()
+
+        while self.peek_char() != '\n':
+            comment += self.peek_char()
+            self.chop_char()
  
-        # dlog(f"Comment: '{comment}'")
-        return comment
+        assert self.peek_char() == '\n'
+        self.trim_left()
  
      def consume_identifier(self) -> str:
-        c = self.current_char()
+        c = self.peek_char()
          assert(c.isalpha() or c == '_')
  
-        identifier = c
-        c = self.advance_char()
+        identifier = ''
  
          while c.isalnum() or c == '_':
              identifier += c
-            c = self.advance_char()
-        # dlog(f"Identifier: '{identifier}'")
+            c = self.chop_char()
          return identifier
  
      def consume_number(self) -> str:
-        c = self.current_char()
+        c = self.peek_char()
          assert(c.isdigit())
          number = ''
  
          while c.isdigit():
              number += c
-            c = self.advance_char()
+            c = self.chop_char()
  
          # dlog(f"Number: '{number}'")
          return number
  
      def consume_symbol(self) -> str:
-        c = self.current_char()
-        assert(c in "(),[]{}:;")
+        c = self.peek_char()
+        assert(c in symbols)
          symbol = c
-        self.advance_char()
-        # dlog(f"Symbol: '{symbol}'")
+        self.chop_char()
          return symbol
  
      def consume_string(self) -> str:
-        c = self.current_char()
+        c = self.peek_char()
          assert(c == '"')
          # TODO: Does the string include the ""s? (for now it doesn't)
          string = ''
-        c = self.advance_char()
          while c != '"':
              string += c
-            c = self.advance_char()
+            c = self.chop_char()
          # Remove " at the end
-        self.advance_char()
+        assert self.peek_char() == '"'
+        self.chop_char()
  
          # dlog(f"String: '{string}'");
          return string
  
      def exhausted(self) -> bool:
-        return self.cur >= len(self.src)-1
+        return self.cur > len(self.src)-1
+
+    def trim_left(self):
+        while self.peek_char().isspace():
+            self.chop_char()
  
      def next_token(self) -> bool | Token:
-        comment = self.consume_comment()
-        c = self.current_char()
+        dlog(str(self.cur))
+        self.trim_left()
  
-        # print(f"consume_comment-> cur: '{c}'")
-        if (self.exhausted()):
-            # dlog(f"cur: {self.cur}, src.len: {len(self.src)}")
-            return None
+        if self.peek_char() == '/' and self.peek_char() == '/':
+            comment = self.consume_comment()
  
-        while c.isspace():
-            c = self.advance_char()
+        c = self.peek_char()
  
          if c.isalpha() or c == '_':
              return Token(Token_Type.IDENTIFIER, self.consume_identifier())
          elif c.isdigit(): # TODO: Only handles base-10 numbers
              return Token(Token_Type.NUMBER, self.consume_number())
-        elif c in "(),[]{}:;":
+        elif c in symbols:
              symbol = self.consume_symbol()
              token = Token(Token_Type.COUNT, symbol)
              if symbol == "(":
@@ -180,6 +183,8 @@ class Parser:
                  token.typ = Token_Type.CLOSE_BRACE
              elif symbol == ";":
                  token.typ = Token_Type.SEMICOLON
+            elif symbol == "#":
+                token.typ = Token_Type.HASH
              else:
                  raise Exception(f"Unexpected symbol '{symbol}'")
  
@@ -206,14 +211,11 @@ def main():
          raise Exception("Please provide the filename!")
      filename = sys.argv.pop(0)
      # 1. Source
-    src = ""
-    with open(filename, mode='r') as file:
-        src = file.read()
+    parser = Parser(filename)
  
      # 2. Lexical Analysis
-    parser = Parser(src)
      tokens = parser.lex()
-    # pprint.pp(tokens)
+    pprint.pp(tokens)
  
      # 3. TODO: Syntactical Analysis
      for i in range(0, len(tokens)-1):
@@ -224,6 +226,5 @@ def main():
          # pprint.pp(f"  Token: {token}")
          # pprint.pp(f"  Next:  {next}")
  
-
  if __name__ == '__main__':
      main()
author	ahmedsamyh <ahmedsamyh10@gmail.com>
	Mon, 21 Oct 2024 11:14:28 +0000 (16:14 +0500)
committer	ahmedsamyh <ahmedsamyh10@gmail.com>
	Mon, 21 Oct 2024 11:14:28 +0000 (16:14 +0500)
main.momo		patch \| blob \| history
main.py		patch \| blob \| history