From 736ec0f40d2a0f229739cdfbd0a74cac6f23de76 Mon Sep 17 00:00:00 2001
From: aNNiMON <annimon119@gmail.com>
Date: Thu, 29 Feb 2024 00:16:13 +0200
Subject: [PATCH] Add lexer

---
 src/main.ts             |   5 +-
 src/parser/Lexer.ts     | 243 ++++++++++++++++++++++++++++++++++++++++
 src/parser/Token.ts     |  20 ++++
 src/parser/TokenType.ts |  76 +++++++++++++
 4 files changed, 343 insertions(+), 1 deletion(-)
 create mode 100644 src/parser/Lexer.ts
 create mode 100644 src/parser/Token.ts
 create mode 100644 src/parser/TokenType.ts
diff --git a/src/main.ts b/src/main.ts
index 4c1ad42..445f5c9 100644
--- a/src/main.ts
+++ b/src/main.ts
@@ -1 +1,4 @@
-console.log("It works!");
+import { Lexer } from './parser/Lexer'
+
+const tokens = new Lexer("10 / 2").process().getTokens();
+console.log(tokens);
\ No newline at end of file
diff --git a/src/parser/Lexer.ts b/src/parser/Lexer.ts
new file mode 100644
index 0000000..47d0c86
--- /dev/null
+++ b/src/parser/Lexer.ts
@@ -0,0 +1,243 @@
+import { Token } from "./Token";
+import { TokenType } from "./TokenType";
+
+export class Lexer {
+    private static readonly KEYWORDS = {
+        "play": TokenType.PLAY,
+        "queue": TokenType.QUEUE,
+        "stop": TokenType.STOP,
+        "music": TokenType.MUSIC,
+        "ambience": TokenType.AMBIENCE,
+        "sound": TokenType.SOUND,
+        "sound_loop": TokenType.SOUNDLOOP,
+        "fadein": TokenType.FADEIN,
+        "fadeout": TokenType.FADEOUT,
+
+        "scene": TokenType.SCENE,
+        "anim": TokenType.ANIM,
+        "bg": TokenType.BG,
+        "cg": TokenType.CG,
+        "at": TokenType.AT,
+        "as": TokenType.AS,
+        "define": TokenType.DEFINE,
+        "window": TokenType.WINDOW,
+        "hide": TokenType.HIDE,
+        "show": TokenType.SHOW,
+        "with": TokenType.WITH,
+
+        "return": TokenType.RETURN,
+        "menu": TokenType.MENU,
+        "endmenu": TokenType.ENDMENU,
+        "jump": TokenType.JUMP,
+        "label": TokenType.LABEL,
+
+        "if": TokenType.IF,
+        "else": TokenType.ELSE,
+        "endif": TokenType.ENDIF,
+        "or": TokenType.OR,
+        "and": TokenType.AND,
+        "not": TokenType.NOT,
+
+        "renpy.pause": TokenType.RENPY_PAUSE,
+        "renpy.say": TokenType.RENPY_SAY,
+        "persistent.sprite_time": TokenType.PERSISTENT_SPRITE_TIME,
+        "prolog_time": TokenType.PROLOG_TIME,
+        "day_time": TokenType.DAY_TIME,
+        "sunset_time": TokenType.SUNSET_TIME,
+        "night_time": TokenType.NIGHT_TIME,
+        "make_names_known": TokenType.MAKE_NAMES_KNOWN,
+        "make_names_unknown": TokenType.MAKE_NAMES_UNKNOWN,
+        "set_name": TokenType.SET_NAME,
+        "meet": TokenType.SET_NAME,
+        "disable_all_zones": TokenType.DISABLE_ALL_ZONES,
+        "disable_current_zone": TokenType.DISABLE_CURRENT_ZONE,
+        "reset_zone": TokenType.RESET_ZONE,
+        "set_zone": TokenType.SET_ZONE,
+        "show_map": TokenType.SHOW_MAP
+    };
+
+    private static readonly OPERATOR_CHARS = "=+-<>()[]!$:";
+    private static readonly OPERATOR_TYPES = [
+        TokenType.EQ,
+        TokenType.PLUS, TokenType.MINUS,
+        TokenType.LT, TokenType.GT,
+        TokenType.LPAREN, TokenType.RPAREN, TokenType.LBRACKET, TokenType.RBRACKET,
+        TokenType.EXCL, TokenType.COMMAND, TokenType.COLON
+    ];
+
+
+    private tokens: Array<Token>
+    private length: number
+    private pos: number
+    private buffer: string
+
+    constructor(
+        private readonly input: string
+    ) {
+        this.input = input;
+        this.tokens = [];
+        this.length = input.length;
+        this.pos = 0;
+        this.buffer = "";
+    }
+
+    public getTokens(): Array<Token> { return this.tokens; }
+
+    public process(): Lexer {
+        this.pos = 0;
+        while (this.pos < this.length) {
+            this.tokenize();
+        }
+        return this;
+    }
+
+    private tokenize(): void {
+        this.skipWhitespaces();
+        const ch = this.peek(0);
+        if (ch.match(/[a-z]/i)) {
+            // Keyword/command
+            this.tokenizeWord();
+        } else if (ch.match(/[0-9]/i)) {
+            this.tokenizeNumber();
+        } else if (ch === '"' || ch === '\'') {
+            // Text in " '
+            this.tokenizeText(ch);
+        } else if (ch === '#') {
+            this.tokenizeComment();
+        } else {
+            // Operators/special symbols
+            this.tokenizeOperator();
+        }
+    }
+
+    private tokenizeWord(): void {
+        let ch = this.peek(0);
+        // Unicode u"text" or u'text'
+        if (ch === 'u') {
+            let textStartChar = this.peek(1);
+            if (textStartChar === '"' || textStartChar === '\'') {
+                this.next(); // u
+                this.tokenizeText(textStartChar);
+                return;
+            }
+        }
+
+        this.clearBuffer();
+        while (ch.match(/[a-z0-9_\.]/i)) {
+            this.buffer += (ch);
+            ch = this.next();
+        }
+
+        let word = this.buffer;
+        let key = word.toLowerCase();
+        if (key in Lexer.KEYWORDS) {
+            this.addToken(Lexer.KEYWORDS[key]);
+        } else {
+            this.addToken(TokenType.WORD, word);
+        }
+    }
+
+    private tokenizeNumber(): void {
+        let ch = this.peek(0);
+        this.clearBuffer();
+        let decimal = false;
+        while (true) {
+            // Integer or decimal
+            if (ch === '.') {
+                // Skip floating point if more then 1 present
+                if (!decimal) this.buffer += (ch);
+                decimal = true;
+                ch = this.next();
+                continue;
+            } else if (!ch.match(/[0-9]/i)) {
+                break;
+            }
+            this.buffer += (ch);
+            ch = this.next();
+        }
+        this.addToken(TokenType.NUMBER, this.buffer);
+    }
+    
+    private tokenizeOperator(): void {
+        let ch = this.peek(0);
+        var index = Lexer.OPERATOR_CHARS.indexOf(ch);
+        if (index !== -1) {
+            this.addToken(Lexer.OPERATOR_TYPES[index]);
+        }
+        this.next();
+    }
+    
+    private tokenizeText(textStartChar: string): void {
+        this.clearBuffer();
+        let ch = this.next(); // skip open "
+        while (true) {
+            if (ch === textStartChar) break;
+            if (ch === '\0') break; // " is not closed, but we'll add what's left 
+            if (ch === '\\') {
+                ch = this.next();
+                switch (ch) { // TODO fix escaping
+                    case 'n': ch = this.next(); this.buffer += ('\n'); continue;
+                    case 't': ch = this.next(); this.buffer += ('\t'); continue;
+                    default:
+                        if (ch === textStartChar) {
+                            ch = this.next();
+                            this.buffer += ('"');
+                            continue;
+                        }
+                }
+                this.buffer += ('\\');
+                continue;
+            }
+            this.buffer += (ch);
+            ch = this.next();
+        }
+        this.next(); // skip closing "
+        this.addToken(TokenType.TEXT, this.buffer);
+    }
+    
+    private tokenizeComment(): void {
+        let ch = this.peek(0);
+        while ("\n\r\0".indexOf(ch) === -1) {
+            ch = this.next();
+        }
+    }
+    
+    private skipWhitespaces(): void {
+        let ch = this.peek(0);
+        while (ch !== '\0' && ch.match(/\s/)) {
+            ch = this.next();
+        }
+    }
+    
+    private addToken(type: TokenType, text = ""): void {
+        this.tokens.push(new Token(text, type));
+    }
+    
+    private clearBuffer(): void {
+        this.buffer = "";
+    }
+    
+    private next(): string {
+        this.pos++;
+        if (this.pos >= this.length) return '\0';
+        return this.input.charAt(this.pos);
+    }
+    
+    private peek(relativePosition: number): string {
+        const tempPos = this.pos + relativePosition;
+        if (tempPos >= this.length) return '\0';
+        return this.input.charAt(tempPos);
+    }
+}
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/parser/Token.ts b/src/parser/Token.ts
new file mode 100644
index 0000000..6f6a1db
--- /dev/null
+++ b/src/parser/Token.ts
@@ -0,0 +1,20 @@
+import { TokenType } from "./TokenType";
+
+export class Token {
+    constructor(
+        private readonly text: string,
+        private readonly type: TokenType
+    ) {}
+
+    getText(): string {
+        return this.text;
+    }
+
+    getType(): TokenType {
+        return this.type;
+    }
+
+    toString(): string {
+        return `${this.type} ${this.text}`;
+    }
+}
\ No newline at end of file
diff --git a/src/parser/TokenType.ts b/src/parser/TokenType.ts
new file mode 100644
index 0000000..da392a1
--- /dev/null
+++ b/src/parser/TokenType.ts
@@ -0,0 +1,76 @@
+export enum TokenType {
+    COMMAND, // starts with $
+    WORD,
+    TEXT,
+    NUMBER,
+    
+    // Operators and pecial symbols
+    EQ,
+    PLUS,
+    MINUS,
+    LT,
+    GT,
+    LPAREN,
+    RPAREN,
+    LBRACKET,
+    RBRACKET,
+    EXCL,
+    COLON,
+    
+    // Keywords
+    PLAY,
+    QUEUE,
+    STOP,
+    MUSIC,
+    AMBIENCE,
+    SOUND,
+    SOUNDLOOP,
+    FADEIN,
+    FADEOUT,
+    
+    SCENE,
+    ANIM,
+    BG,
+    CG,
+    
+    WINDOW,
+    HIDE,
+    SHOW,
+    
+    AT,
+    AS,
+    WITH,
+    DEFINE,
+    
+    MENU,
+    ENDMENU,
+    JUMP,
+    LABEL,
+    RETURN,
+    
+    IF,
+    ELSE,
+    ENDIF,
+    OR,
+    AND,
+    NOT,
+    
+    // Commands
+    RENPY_PAUSE,
+    RENPY_SAY,
+    PERSISTENT_SPRITE_TIME,
+    PROLOG_TIME,
+    DAY_TIME,
+    SUNSET_TIME,
+    NIGHT_TIME,
+    MAKE_NAMES_KNOWN,
+    MAKE_NAMES_UNKNOWN,
+    SET_NAME,
+    DISABLE_ALL_ZONES,
+    DISABLE_CURRENT_ZONE,
+    RESET_ZONE,
+    SET_ZONE,
+    SHOW_MAP,
+    
+    EOF
+};
\ No newline at end of file