]> www.git.momoyon.org Git - lang.git/commitdiff
Initial Commit.
authorahmedsamyh <ahmedsamyh10@gmail.com>
Mon, 21 Oct 2024 07:54:15 +0000 (12:54 +0500)
committerahmedsamyh <ahmedsamyh10@gmail.com>
Mon, 21 Oct 2024 07:54:15 +0000 (12:54 +0500)
- Can parse, Identifiers, Numbers and Symbols.
- TODO: Parse Strings.

main.momo [new file with mode: 0644]
main.py [new file with mode: 0644]

diff --git a/main.momo b/main.momo
new file mode 100644 (file)
index 0000000..aabfb08
--- /dev/null
+++ b/main.momo
@@ -0,0 +1,6 @@
+// # import "stdio";
+
+func main(int argc, string argv[]): int {
+    print("Hello, World!");
+    return 0;
+}
diff --git a/main.py b/main.py
new file mode 100644 (file)
index 0000000..9dd8bd2
--- /dev/null
+++ b/main.py
@@ -0,0 +1,160 @@
+import sys
+from enum import IntEnum, auto
+import pprint
+
+DEBUG = True
+
+def dlog(msg):
+    if DEBUG:
+        pprint.pp(f"[DEBUG] {msg}")
+
+class Token_Type(IntEnum):
+    IDENTIFIER = auto()
+    NUMBER = auto()
+    SYMBOL = auto()
+    STRING = auto()
+    COUNT  = auto()
+
+class Token:
+    def __init__(self, typ, literal_string):
+        self.typ = typ
+        self.literal_string = literal_string
+
+    def type_as_str(self):
+        assert(Token_Type.COUNT == 4, "Every enum value is not handled!")
+        if self.typ == Token_Type.IDENTIFIER: return "IDENTIFIER";
+        if self.typ == Token_Type.NUMBER: return "NUMBER";
+        if self.typ == Token_Type.SYMBOL: return "SYMBOL";
+        if self.typ == Token_Type.STRING: return "STRING";
+
+    def __repr__(self):
+        return f"(Token)'{self.literal_string}' ({self.type_as_str()})"
+
+class Lexer:
+    def __init__(self, src):
+        self.src = src
+        self.line = 0
+        self.bol  = 0 # beginning of line
+        self.cur  = 0 # cursor position
+
+    def current_char(self):
+        if self.cur > len(self.src)-1: return -1
+        return self.src[self.cur]
+
+    def next_char(self):
+        if self.cur > len(self.src)-2: return -1
+        return self.src[self.cur + 1]
+
+    # NOTE: Advances cursor and returns next char, NOT the current char.
+    def advance_char(self, by = 1):
+        self.cur += by
+        return self.current_char()
+
+    def next_line(self):
+        c = self.current_char()
+        assert(c == '\n')
+        while c == '\n':
+            c = self.advance_char()
+            self.bol = self.cur
+            self.line += 1
+        # print(f"next_line-> cur: '{c}'{self.cur}")
+
+    def consume_comment(self) -> str:
+        c = self.current_char()
+        n = self.next_char()
+        comment = ''
+        if c == '/' and n == '/':
+            while c != '\n':
+                comment += c
+                c = self.advance_char()
+            self.next_line()
+        else:
+            return
+        c = self.current_char()
+        # print(f"consume_comment-> cur: '{c}'{self.cur}")
+
+        # dlog(f"Comment: '{comment}'")
+        return comment
+
+    def consume_identifier(self) -> str:
+        c = self.current_char()
+        assert(c.isalpha() or c == '_')
+
+        identifier = c
+        c = self.advance_char()
+
+        while c.isalnum() or c == '_':
+            identifier += c
+            c = self.advance_char()
+        # dlog(f"Identifier: '{identifier}'")
+        return identifier
+
+    def consume_number(self) -> str:
+        c = self.current_char()
+        assert(c.isdigit())
+        number = ''
+
+        while c.isdigit():
+            number += c
+            c = self.advance_char()
+
+        # dlog(f"Number: '{number}'")
+        return number
+
+    def consume_symbol(self) -> str:
+        c = self.current_char()
+        assert(c in "(),[]{}:;")
+        symbol = c
+        self.advance_char()
+        # dlog(f"Symbol: '{symbol}'")
+        return symbol
+
+    def consume_string(self) -> str:
+        string = ''
+        return string
+
+    def exhausted(self) -> bool:
+        return self.cur >= len(self.src)-1
+
+    def next_token(self) -> bool | Token:
+        comment = self.consume_comment()
+        c = self.current_char()
+
+        # print(f"consume_comment-> cur: '{c}'")
+        if (self.exhausted()):
+            # dlog(f"cur: {self.cur}, src.len: {len(self.src)}")
+            return None
+
+        while c.isspace():
+            c = self.advance_char()
+
+        if c.isalpha() or c == '_':
+            return Token(Token_Type.IDENTIFIER, self.consume_identifier())
+        elif c.isdigit(): # TODO: Only handles base-10 numbers
+            return Token(Token_Type.NUMBER, self.consume_number())
+        elif c in "(),[]{}:;":
+            return Token(Token_Type.SYMBOL, self.consume_symbol())
+        elif c == '"':
+            return Token(Token_Type.STRING, self.consume_string())
+        else:
+            raise Exception(f"Unexpected char '{c}'")
+
+        return None
+
+def main():
+    program  = sys.argv.pop(0)
+    if (len(sys.argv) <= 0):
+        raise Exception("Please provide the filename!")
+    filename = sys.argv.pop(0)
+    src = ""
+    with open(filename, mode='r') as file:
+        src = file.read()
+    lexer = Lexer(src)
+    token = lexer.next_token()
+    while token:
+        dlog(token)
+        token = lexer.next_token()
+
+
+if __name__ == '__main__':
+    main()