From 736ec0f40d2a0f229739cdfbd0a74cac6f23de76 Mon Sep 17 00:00:00 2001 From: aNNiMON Date: Thu, 29 Feb 2024 00:16:13 +0200 Subject: [PATCH] Add lexer --- src/main.ts | 5 +- src/parser/Lexer.ts | 243 ++++++++++++++++++++++++++++++++++++++++ src/parser/Token.ts | 20 ++++ src/parser/TokenType.ts | 76 +++++++++++++ 4 files changed, 343 insertions(+), 1 deletion(-) create mode 100644 src/parser/Lexer.ts create mode 100644 src/parser/Token.ts create mode 100644 src/parser/TokenType.ts diff --git a/src/main.ts b/src/main.ts index 4c1ad42..445f5c9 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1 +1,4 @@ -console.log("It works!"); +import { Lexer } from './parser/Lexer' + +const tokens = new Lexer("10 / 2").process().getTokens(); +console.log(tokens); \ No newline at end of file diff --git a/src/parser/Lexer.ts b/src/parser/Lexer.ts new file mode 100644 index 0000000..47d0c86 --- /dev/null +++ b/src/parser/Lexer.ts @@ -0,0 +1,243 @@ +import { Token } from "./Token"; +import { TokenType } from "./TokenType"; + +export class Lexer { + private static readonly KEYWORDS = { + "play": TokenType.PLAY, + "queue": TokenType.QUEUE, + "stop": TokenType.STOP, + "music": TokenType.MUSIC, + "ambience": TokenType.AMBIENCE, + "sound": TokenType.SOUND, + "sound_loop": TokenType.SOUNDLOOP, + "fadein": TokenType.FADEIN, + "fadeout": TokenType.FADEOUT, + + "scene": TokenType.SCENE, + "anim": TokenType.ANIM, + "bg": TokenType.BG, + "cg": TokenType.CG, + "at": TokenType.AT, + "as": TokenType.AS, + "define": TokenType.DEFINE, + "window": TokenType.WINDOW, + "hide": TokenType.HIDE, + "show": TokenType.SHOW, + "with": TokenType.WITH, + + "return": TokenType.RETURN, + "menu": TokenType.MENU, + "endmenu": TokenType.ENDMENU, + "jump": TokenType.JUMP, + "label": TokenType.LABEL, + + "if": TokenType.IF, + "else": TokenType.ELSE, + "endif": TokenType.ENDIF, + "or": TokenType.OR, + "and": TokenType.AND, + "not": TokenType.NOT, + + "renpy.pause": TokenType.RENPY_PAUSE, + "renpy.say": TokenType.RENPY_SAY, + "persistent.sprite_time": TokenType.PERSISTENT_SPRITE_TIME, + "prolog_time": TokenType.PROLOG_TIME, + "day_time": TokenType.DAY_TIME, + "sunset_time": TokenType.SUNSET_TIME, + "night_time": TokenType.NIGHT_TIME, + "make_names_known": TokenType.MAKE_NAMES_KNOWN, + "make_names_unknown": TokenType.MAKE_NAMES_UNKNOWN, + "set_name": TokenType.SET_NAME, + "meet": TokenType.SET_NAME, + "disable_all_zones": TokenType.DISABLE_ALL_ZONES, + "disable_current_zone": TokenType.DISABLE_CURRENT_ZONE, + "reset_zone": TokenType.RESET_ZONE, + "set_zone": TokenType.SET_ZONE, + "show_map": TokenType.SHOW_MAP + }; + + private static readonly OPERATOR_CHARS = "=+-<>()[]!$:"; + private static readonly OPERATOR_TYPES = [ + TokenType.EQ, + TokenType.PLUS, TokenType.MINUS, + TokenType.LT, TokenType.GT, + TokenType.LPAREN, TokenType.RPAREN, TokenType.LBRACKET, TokenType.RBRACKET, + TokenType.EXCL, TokenType.COMMAND, TokenType.COLON + ]; + + + private tokens: Array + private length: number + private pos: number + private buffer: string + + constructor( + private readonly input: string + ) { + this.input = input; + this.tokens = []; + this.length = input.length; + this.pos = 0; + this.buffer = ""; + } + + public getTokens(): Array { return this.tokens; } + + public process(): Lexer { + this.pos = 0; + while (this.pos < this.length) { + this.tokenize(); + } + return this; + } + + private tokenize(): void { + this.skipWhitespaces(); + const ch = this.peek(0); + if (ch.match(/[a-z]/i)) { + // Keyword/command + this.tokenizeWord(); + } else if (ch.match(/[0-9]/i)) { + this.tokenizeNumber(); + } else if (ch === '"' || ch === '\'') { + // Text in " ' + this.tokenizeText(ch); + } else if (ch === '#') { + this.tokenizeComment(); + } else { + // Operators/special symbols + this.tokenizeOperator(); + } + } + + private tokenizeWord(): void { + let ch = this.peek(0); + // Unicode u"text" or u'text' + if (ch === 'u') { + let textStartChar = this.peek(1); + if (textStartChar === '"' || textStartChar === '\'') { + this.next(); // u + this.tokenizeText(textStartChar); + return; + } + } + + this.clearBuffer(); + while (ch.match(/[a-z0-9_\.]/i)) { + this.buffer += (ch); + ch = this.next(); + } + + let word = this.buffer; + let key = word.toLowerCase(); + if (key in Lexer.KEYWORDS) { + this.addToken(Lexer.KEYWORDS[key]); + } else { + this.addToken(TokenType.WORD, word); + } + } + + private tokenizeNumber(): void { + let ch = this.peek(0); + this.clearBuffer(); + let decimal = false; + while (true) { + // Integer or decimal + if (ch === '.') { + // Skip floating point if more then 1 present + if (!decimal) this.buffer += (ch); + decimal = true; + ch = this.next(); + continue; + } else if (!ch.match(/[0-9]/i)) { + break; + } + this.buffer += (ch); + ch = this.next(); + } + this.addToken(TokenType.NUMBER, this.buffer); + } + + private tokenizeOperator(): void { + let ch = this.peek(0); + var index = Lexer.OPERATOR_CHARS.indexOf(ch); + if (index !== -1) { + this.addToken(Lexer.OPERATOR_TYPES[index]); + } + this.next(); + } + + private tokenizeText(textStartChar: string): void { + this.clearBuffer(); + let ch = this.next(); // skip open " + while (true) { + if (ch === textStartChar) break; + if (ch === '\0') break; // " is not closed, but we'll add what's left + if (ch === '\\') { + ch = this.next(); + switch (ch) { // TODO fix escaping + case 'n': ch = this.next(); this.buffer += ('\n'); continue; + case 't': ch = this.next(); this.buffer += ('\t'); continue; + default: + if (ch === textStartChar) { + ch = this.next(); + this.buffer += ('"'); + continue; + } + } + this.buffer += ('\\'); + continue; + } + this.buffer += (ch); + ch = this.next(); + } + this.next(); // skip closing " + this.addToken(TokenType.TEXT, this.buffer); + } + + private tokenizeComment(): void { + let ch = this.peek(0); + while ("\n\r\0".indexOf(ch) === -1) { + ch = this.next(); + } + } + + private skipWhitespaces(): void { + let ch = this.peek(0); + while (ch !== '\0' && ch.match(/\s/)) { + ch = this.next(); + } + } + + private addToken(type: TokenType, text = ""): void { + this.tokens.push(new Token(text, type)); + } + + private clearBuffer(): void { + this.buffer = ""; + } + + private next(): string { + this.pos++; + if (this.pos >= this.length) return '\0'; + return this.input.charAt(this.pos); + } + + private peek(relativePosition: number): string { + const tempPos = this.pos + relativePosition; + if (tempPos >= this.length) return '\0'; + return this.input.charAt(tempPos); + } +} + + + + + + + + + + + + diff --git a/src/parser/Token.ts b/src/parser/Token.ts new file mode 100644 index 0000000..6f6a1db --- /dev/null +++ b/src/parser/Token.ts @@ -0,0 +1,20 @@ +import { TokenType } from "./TokenType"; + +export class Token { + constructor( + private readonly text: string, + private readonly type: TokenType + ) {} + + getText(): string { + return this.text; + } + + getType(): TokenType { + return this.type; + } + + toString(): string { + return `${this.type} ${this.text}`; + } +} \ No newline at end of file diff --git a/src/parser/TokenType.ts b/src/parser/TokenType.ts new file mode 100644 index 0000000..da392a1 --- /dev/null +++ b/src/parser/TokenType.ts @@ -0,0 +1,76 @@ +export enum TokenType { + COMMAND, // starts with $ + WORD, + TEXT, + NUMBER, + + // Operators and pecial symbols + EQ, + PLUS, + MINUS, + LT, + GT, + LPAREN, + RPAREN, + LBRACKET, + RBRACKET, + EXCL, + COLON, + + // Keywords + PLAY, + QUEUE, + STOP, + MUSIC, + AMBIENCE, + SOUND, + SOUNDLOOP, + FADEIN, + FADEOUT, + + SCENE, + ANIM, + BG, + CG, + + WINDOW, + HIDE, + SHOW, + + AT, + AS, + WITH, + DEFINE, + + MENU, + ENDMENU, + JUMP, + LABEL, + RETURN, + + IF, + ELSE, + ENDIF, + OR, + AND, + NOT, + + // Commands + RENPY_PAUSE, + RENPY_SAY, + PERSISTENT_SPRITE_TIME, + PROLOG_TIME, + DAY_TIME, + SUNSET_TIME, + NIGHT_TIME, + MAKE_NAMES_KNOWN, + MAKE_NAMES_UNKNOWN, + SET_NAME, + DISABLE_ALL_ZONES, + DISABLE_CURRENT_ZONE, + RESET_ZONE, + SET_ZONE, + SHOW_MAP, + + EOF +}; \ No newline at end of file