Add lexer

This commit is contained in:
aNNiMON 2024-02-29 00:16:13 +02:00
parent d72b431453
commit 736ec0f40d
4 changed files with 343 additions and 1 deletions

View File

@ -1 +1,4 @@
console.log("It works!");
import { Lexer } from './parser/Lexer'
const tokens = new Lexer("10 / 2").process().getTokens();
console.log(tokens);

243
src/parser/Lexer.ts Normal file
View File

@ -0,0 +1,243 @@
import { Token } from "./Token";
import { TokenType } from "./TokenType";
export class Lexer {
private static readonly KEYWORDS = {
"play": TokenType.PLAY,
"queue": TokenType.QUEUE,
"stop": TokenType.STOP,
"music": TokenType.MUSIC,
"ambience": TokenType.AMBIENCE,
"sound": TokenType.SOUND,
"sound_loop": TokenType.SOUNDLOOP,
"fadein": TokenType.FADEIN,
"fadeout": TokenType.FADEOUT,
"scene": TokenType.SCENE,
"anim": TokenType.ANIM,
"bg": TokenType.BG,
"cg": TokenType.CG,
"at": TokenType.AT,
"as": TokenType.AS,
"define": TokenType.DEFINE,
"window": TokenType.WINDOW,
"hide": TokenType.HIDE,
"show": TokenType.SHOW,
"with": TokenType.WITH,
"return": TokenType.RETURN,
"menu": TokenType.MENU,
"endmenu": TokenType.ENDMENU,
"jump": TokenType.JUMP,
"label": TokenType.LABEL,
"if": TokenType.IF,
"else": TokenType.ELSE,
"endif": TokenType.ENDIF,
"or": TokenType.OR,
"and": TokenType.AND,
"not": TokenType.NOT,
"renpy.pause": TokenType.RENPY_PAUSE,
"renpy.say": TokenType.RENPY_SAY,
"persistent.sprite_time": TokenType.PERSISTENT_SPRITE_TIME,
"prolog_time": TokenType.PROLOG_TIME,
"day_time": TokenType.DAY_TIME,
"sunset_time": TokenType.SUNSET_TIME,
"night_time": TokenType.NIGHT_TIME,
"make_names_known": TokenType.MAKE_NAMES_KNOWN,
"make_names_unknown": TokenType.MAKE_NAMES_UNKNOWN,
"set_name": TokenType.SET_NAME,
"meet": TokenType.SET_NAME,
"disable_all_zones": TokenType.DISABLE_ALL_ZONES,
"disable_current_zone": TokenType.DISABLE_CURRENT_ZONE,
"reset_zone": TokenType.RESET_ZONE,
"set_zone": TokenType.SET_ZONE,
"show_map": TokenType.SHOW_MAP
};
private static readonly OPERATOR_CHARS = "=+-<>()[]!$:";
private static readonly OPERATOR_TYPES = [
TokenType.EQ,
TokenType.PLUS, TokenType.MINUS,
TokenType.LT, TokenType.GT,
TokenType.LPAREN, TokenType.RPAREN, TokenType.LBRACKET, TokenType.RBRACKET,
TokenType.EXCL, TokenType.COMMAND, TokenType.COLON
];
private tokens: Array<Token>
private length: number
private pos: number
private buffer: string
constructor(
private readonly input: string
) {
this.input = input;
this.tokens = [];
this.length = input.length;
this.pos = 0;
this.buffer = "";
}
public getTokens(): Array<Token> { return this.tokens; }
public process(): Lexer {
this.pos = 0;
while (this.pos < this.length) {
this.tokenize();
}
return this;
}
private tokenize(): void {
this.skipWhitespaces();
const ch = this.peek(0);
if (ch.match(/[a-z]/i)) {
// Keyword/command
this.tokenizeWord();
} else if (ch.match(/[0-9]/i)) {
this.tokenizeNumber();
} else if (ch === '"' || ch === '\'') {
// Text in " '
this.tokenizeText(ch);
} else if (ch === '#') {
this.tokenizeComment();
} else {
// Operators/special symbols
this.tokenizeOperator();
}
}
private tokenizeWord(): void {
let ch = this.peek(0);
// Unicode u"text" or u'text'
if (ch === 'u') {
let textStartChar = this.peek(1);
if (textStartChar === '"' || textStartChar === '\'') {
this.next(); // u
this.tokenizeText(textStartChar);
return;
}
}
this.clearBuffer();
while (ch.match(/[a-z0-9_\.]/i)) {
this.buffer += (ch);
ch = this.next();
}
let word = this.buffer;
let key = word.toLowerCase();
if (key in Lexer.KEYWORDS) {
this.addToken(Lexer.KEYWORDS[key]);
} else {
this.addToken(TokenType.WORD, word);
}
}
private tokenizeNumber(): void {
let ch = this.peek(0);
this.clearBuffer();
let decimal = false;
while (true) {
// Integer or decimal
if (ch === '.') {
// Skip floating point if more then 1 present
if (!decimal) this.buffer += (ch);
decimal = true;
ch = this.next();
continue;
} else if (!ch.match(/[0-9]/i)) {
break;
}
this.buffer += (ch);
ch = this.next();
}
this.addToken(TokenType.NUMBER, this.buffer);
}
private tokenizeOperator(): void {
let ch = this.peek(0);
var index = Lexer.OPERATOR_CHARS.indexOf(ch);
if (index !== -1) {
this.addToken(Lexer.OPERATOR_TYPES[index]);
}
this.next();
}
private tokenizeText(textStartChar: string): void {
this.clearBuffer();
let ch = this.next(); // skip open "
while (true) {
if (ch === textStartChar) break;
if (ch === '\0') break; // " is not closed, but we'll add what's left
if (ch === '\\') {
ch = this.next();
switch (ch) { // TODO fix escaping
case 'n': ch = this.next(); this.buffer += ('\n'); continue;
case 't': ch = this.next(); this.buffer += ('\t'); continue;
default:
if (ch === textStartChar) {
ch = this.next();
this.buffer += ('"');
continue;
}
}
this.buffer += ('\\');
continue;
}
this.buffer += (ch);
ch = this.next();
}
this.next(); // skip closing "
this.addToken(TokenType.TEXT, this.buffer);
}
private tokenizeComment(): void {
let ch = this.peek(0);
while ("\n\r\0".indexOf(ch) === -1) {
ch = this.next();
}
}
private skipWhitespaces(): void {
let ch = this.peek(0);
while (ch !== '\0' && ch.match(/\s/)) {
ch = this.next();
}
}
private addToken(type: TokenType, text = ""): void {
this.tokens.push(new Token(text, type));
}
private clearBuffer(): void {
this.buffer = "";
}
private next(): string {
this.pos++;
if (this.pos >= this.length) return '\0';
return this.input.charAt(this.pos);
}
private peek(relativePosition: number): string {
const tempPos = this.pos + relativePosition;
if (tempPos >= this.length) return '\0';
return this.input.charAt(tempPos);
}
}

20
src/parser/Token.ts Normal file
View File

@ -0,0 +1,20 @@
import { TokenType } from "./TokenType";
export class Token {
constructor(
private readonly text: string,
private readonly type: TokenType
) {}
getText(): string {
return this.text;
}
getType(): TokenType {
return this.type;
}
toString(): string {
return `${this.type} ${this.text}`;
}
}

76
src/parser/TokenType.ts Normal file
View File

@ -0,0 +1,76 @@
export enum TokenType {
COMMAND, // starts with $
WORD,
TEXT,
NUMBER,
// Operators and pecial symbols
EQ,
PLUS,
MINUS,
LT,
GT,
LPAREN,
RPAREN,
LBRACKET,
RBRACKET,
EXCL,
COLON,
// Keywords
PLAY,
QUEUE,
STOP,
MUSIC,
AMBIENCE,
SOUND,
SOUNDLOOP,
FADEIN,
FADEOUT,
SCENE,
ANIM,
BG,
CG,
WINDOW,
HIDE,
SHOW,
AT,
AS,
WITH,
DEFINE,
MENU,
ENDMENU,
JUMP,
LABEL,
RETURN,
IF,
ELSE,
ENDIF,
OR,
AND,
NOT,
// Commands
RENPY_PAUSE,
RENPY_SAY,
PERSISTENT_SPRITE_TIME,
PROLOG_TIME,
DAY_TIME,
SUNSET_TIME,
NIGHT_TIME,
MAKE_NAMES_KNOWN,
MAKE_NAMES_UNKNOWN,
SET_NAME,
DISABLE_ALL_ZONES,
DISABLE_CURRENT_ZONE,
RESET_ZONE,
SET_ZONE,
SHOW_MAP,
EOF
};