Can parse identifiers.

author ahmedsamyh <ahmedsamyh10@gmail.com>

Sat, 16 Nov 2024 16:54:12 +0000 (21:54 +0500)

committer ahmedsamyh <ahmedsamyh10@gmail.com>

Sat, 16 Nov 2024 16:54:12 +0000 (21:54 +0500)
author ahmedsamyh <ahmedsamyh10@gmail.com>
Sat, 16 Nov 2024 16:54:12 +0000 (21:54 +0500)
committer ahmedsamyh <ahmedsamyh10@gmail.com>
Sat, 16 Nov 2024 16:54:12 +0000 (21:54 +0500)
diff --git a/main.momo b/main.momo

index 794a03c174cbe3aa909d9c084470a670e003184a..dad80ffce9ff2e9e8164c4a0c838f6e73f9a1dbf 100644 (file)
--- a/main.momo
+++ b/main.momo
@@ -1,6 +1,2 @@
  
-
-
-
-
-"This is a long ass string"
+   foo : string = "Value of foo";
diff --git a/main.py b/main.py

index 0a40e28c16decd46b90f6b0cb5f61da4fef65ee8..0fee0a9507a37f0f790d028a3725606da068c408 100644 (file)
--- a/main.py
+++ b/main.py
@@ -100,13 +100,31 @@ class Parser:
  
          return (string, string_loc)
  
+    def consume_identifier(self) -> (str, Loc):
+        # Identifiers can start with [a-z][A-Z]_ and contain [0-9] after the first char
+        assert self.current_char().isalpha() or self.current_char() == '_', "Called consume_identifier() at the wrong character!"
+        ident_loc: Loc = Loc(self.filename, self.line, self.row())
+        ident: str = self.consume_char()
+
+        c = self.consume_char()
+
+        while c.isalpha() or c == '_' or c.isdigit() or self.eof():
+            ident += c
+            c = self.consume_char()
+
+        return (ident, ident_loc)
+
      def left_trim(self):
          while self.current_char().isspace():
+            if self.current_char() == '\n':
+                self.line += 1
+                self.bol = self.cur + 1
              self.consume_char()
              # dlog(f"Skipping {self.current_char()}")
  
          # dlog(f"Char after left trim: '{self.current_char()}'")
  
+
      def next_token(self) -> Token | None:
          self.left_trim()
  
@@ -116,10 +134,14 @@ class Parser:
          if c == '"':
              value, loc = self.consume_string()
              t = Token(TokenType.STRING, value, loc)
+        elif c.isalpha() or c == '_':
+            ident, loc = self.consume_identifier()
+            t = Token(TokenType.IDENT, ident, loc)
          else:
              fatal(f"Unrecognized character '{c}'")
  
          return t
+
  def main():
      program: str = sys.argv.pop(0)
  
diff --git a/tests/03-whitespaced-string.momo b/tests/03-whitespaced-string.momo

new file mode 100644 (file)

index 0000000..c168e11
--- /dev/null
+++ b/tests/03-whitespaced-string.momo
@@ -0,0 +1,3 @@
+
+
+"This is a long ass string"
diff --git a/tests/03-whitespaced-string.momo.test b/tests/03-whitespaced-string.momo.test

new file mode 100644 (file)

index 0000000..0085efe
--- /dev/null
+++ b/tests/03-whitespaced-string.momo.test
@@ -0,0 +1 @@
+Token (2, 'This is a long ass string', ./tests/03-whitespaced-string.momo:3:0)
diff --git a/tests/04-identifier.momo b/tests/04-identifier.momo

new file mode 100644 (file)

index 0000000..dad80ff
--- /dev/null
+++ b/tests/04-identifier.momo
@@ -0,0 +1,2 @@
+
+   foo : string = "Value of foo";
diff --git a/tests/04-identifier.momo.test b/tests/04-identifier.momo.test

new file mode 100644 (file)

index 0000000..8eebbcc
--- /dev/null
+++ b/tests/04-identifier.momo.test
@@ -0,0 +1 @@
+Token (1, 'foo', ./tests/04-identifier.momo:2:3)
author	ahmedsamyh <ahmedsamyh10@gmail.com>
	Sat, 16 Nov 2024 16:54:12 +0000 (21:54 +0500)
committer	ahmedsamyh <ahmedsamyh10@gmail.com>
	Sat, 16 Nov 2024 16:54:12 +0000 (21:54 +0500)
main.momo		patch \| blob \| history
main.py		patch \| blob \| history
tests/03-whitespaced-string.momo	[new file with mode: 0644]	patch \| blob
tests/03-whitespaced-string.momo.test	[new file with mode: 0644]	patch \| blob
tests/04-identifier.momo	[new file with mode: 0644]	patch \| blob
tests/04-identifier.momo.test	[new file with mode: 0644]	patch \| blob