package com.annimon.everlastingsummer; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; /** * @author aNNiMON */ public final class Lexer { public static List tokenize(String input) { return new Lexer().process(input).getTokens(); } private static final String OPERATOR_CHARS = "=+-()[]!$:"; private static final TokenType[] OPERATOR_TYPES = new TokenType[] { TokenType.EQ, TokenType.PLUS, TokenType.MINUS, TokenType.LPAREN, TokenType.RPAREN, TokenType.LBRACKET, TokenType.RBRACKET, TokenType.EXCL, TokenType.COMMAND, TokenType.COLON, }; private static final Map KEYWORDS; static { KEYWORDS = new HashMap(); KEYWORDS.put("play", TokenType.PLAY); KEYWORDS.put("stop", TokenType.STOP); KEYWORDS.put("music", TokenType.MUSIC); KEYWORDS.put("ambience", TokenType.AMBIENCE); KEYWORDS.put("sound", TokenType.SOUND); KEYWORDS.put("sound_loop", TokenType.SOUNDLOOP); KEYWORDS.put("fadein", TokenType.FADEIN); KEYWORDS.put("fadeout", TokenType.FADEOUT); KEYWORDS.put("scene", TokenType.SCENE); KEYWORDS.put("anim", TokenType.ANIM); KEYWORDS.put("bg", TokenType.BG); KEYWORDS.put("cg", TokenType.CG); KEYWORDS.put("at", TokenType.AT); KEYWORDS.put("window", TokenType.WINDOW); KEYWORDS.put("hide", TokenType.HIDE); KEYWORDS.put("show", TokenType.SHOW); KEYWORDS.put("with", TokenType.WITH); KEYWORDS.put("return", TokenType.RETURN); KEYWORDS.put("menu", TokenType.MENU); KEYWORDS.put("endmenu", TokenType.ENDMENU); KEYWORDS.put("jump", TokenType.JUMP); KEYWORDS.put("label", TokenType.LABEL); KEYWORDS.put("if", TokenType.IF); KEYWORDS.put("else", TokenType.ELSE); KEYWORDS.put("endif", TokenType.ENDIF); KEYWORDS.put("renpy.pause", TokenType.RENPY_PAUSE); KEYWORDS.put("persistent.sprite_time", TokenType.PERSISTENT_SPRITE_TIME); KEYWORDS.put("prolog_time", TokenType.PROLOG_TIME); KEYWORDS.put("day_time", TokenType.DAY_TIME); KEYWORDS.put("sunset_time", TokenType.SUNSET_TIME); KEYWORDS.put("night_time", TokenType.NIGHT_TIME); } private final List tokens; private final StringBuilder buffer; private TokenizeState state; private int pos; private enum TokenizeState { DEFAULT, NUMBER, OPERATOR, WORD, TEXT, COMMENT } private Lexer() { tokens = new ArrayList(); buffer = new StringBuilder(); state = TokenizeState.DEFAULT; } public List getTokens() { return tokens; } public Lexer process(String input) { final int length = input.length(); for (pos = 0; pos < length; pos++) { tokenize(input.charAt(pos)); } tokenize('\0');// EOF addToken(TokenType.EOF, false); input = null; return this; } private void tokenize(char ch) { switch (state) { case DEFAULT: tokenizeDefault(ch); break; case WORD: tokenizeWord(ch); break; case NUMBER: tokenizeNumber(ch); break; case OPERATOR: tokenizeOperator(ch); break; case TEXT: tokenizeText(ch); break; case COMMENT: tokenizeComment(ch); break; } } private void tokenizeDefault(char ch) { if (Character.isLetter(ch)) { // Слово (ключевое слово или команда) buffer.append(ch); state = TokenizeState.WORD; } else if (Character.isDigit(ch)) { // Число buffer.append(ch); state = TokenizeState.NUMBER; } else if (ch == '"') { // Текст в "кавычках" state = TokenizeState.TEXT; } else if (ch == '#') { clearBuffer(); state = TokenizeState.COMMENT; } else { // Операторы и спецсимволы tokenizeOperator(ch); } } private void tokenizeWord(char ch) { if (Character.isLetterOrDigit(ch) || (ch == '_') || (ch == '.')) { buffer.append(ch); } else { final String word = buffer.toString().toLowerCase(Locale.ENGLISH); addToken(KEYWORDS.containsKey(word) ? KEYWORDS.get(word) : TokenType.WORD); } } private void tokenizeNumber(char ch) { // Целое или вещественное число. if (ch == '.') { // Пропускаем десятичные точки, если они уже были в числе. if (buffer.indexOf(".") == -1) buffer.append(ch); } else if (Character.isDigit(ch)) { buffer.append(ch); } else { addToken(TokenType.NUMBER); } } private void tokenizeOperator(char ch) { final int index = OPERATOR_CHARS.indexOf(ch); if (index != -1) { addToken(OPERATOR_TYPES[index], false); } } private void tokenizeText(char ch) { if (ch == '"') { final int len = buffer.length(); // Добавляем токен, если не было экранирования символа кавычки. if (len == 0 || ( (len > 0) && (buffer.charAt(len - 1) != '\\') )) { addToken(TokenType.TEXT, false); return; } // Экранируем символ кавычки. if (len > 0) { buffer.setCharAt(len - 1, '\"'); return; } } buffer.append(ch); } private void tokenizeComment(char ch) { if (ch == '\n' || ch == '\r') { state = TokenizeState.DEFAULT; } } private void addToken(TokenType type) { addToken(type, true); } private void addToken(TokenType type, boolean reprocessLastChar) { tokens.add(new Token(buffer.toString(), type)); clearBuffer(); if (reprocessLastChar) pos--; state = TokenizeState.DEFAULT; } private void clearBuffer() { buffer.setLength(0); } }