191 lines
5.8 KiB
Java
191 lines
5.8 KiB
Java
|
package com.annimon.everlastingsummer;
|
||
|
|
||
|
import java.util.LinkedList;
|
||
|
import java.util.List;
|
||
|
|
||
|
/**
|
||
|
* @author aNNiMON
|
||
|
*/
|
||
|
public final class Lexer {
|
||
|
|
||
|
public static List<Token> tokenize(String input) {
|
||
|
return new Lexer().process(input).getTokens();
|
||
|
}
|
||
|
|
||
|
private final List<Token> tokens;
|
||
|
private final StringBuilder buffer;
|
||
|
|
||
|
private static final String OPERATOR_CHARS = "=+-()[]$";
|
||
|
private static final TokenType[] OPERATOR_TYPES = new TokenType[] {
|
||
|
TokenType.EQ,
|
||
|
TokenType.PLUS, TokenType.MINUS,
|
||
|
TokenType.LPAREN, TokenType.RPAREN, TokenType.LBRACKET, TokenType.RBRACKET,
|
||
|
TokenType.COMMAND
|
||
|
};
|
||
|
|
||
|
private static final String[] KEYWORDS = {
|
||
|
"play", "stop",
|
||
|
"music", "ambience", "sound", "sound_loop",
|
||
|
"fadein", "fadeout",
|
||
|
|
||
|
"scene", "anim", "bg", "cg",
|
||
|
"at",
|
||
|
"window", "hide", "show",
|
||
|
"with",
|
||
|
"return",
|
||
|
|
||
|
"renpy.pause", "persistent.sprite_time",
|
||
|
"prolog_time", "day_time", "sunset_time", "night_time"
|
||
|
};
|
||
|
private static final TokenType[] KEYWORD_TYPES = new TokenType[] {
|
||
|
TokenType.PLAY, TokenType.STOP,
|
||
|
TokenType.MUSIC, TokenType.AMBIENCE, TokenType.SOUND, TokenType.SOUNDLOOP,
|
||
|
TokenType.FADEIN, TokenType.FADEOUT,
|
||
|
|
||
|
TokenType.SCENE, TokenType.ANIM, TokenType.BG, TokenType.CG,
|
||
|
TokenType.AT,
|
||
|
TokenType.WINDOW, TokenType.HIDE, TokenType.SHOW,
|
||
|
TokenType.WITH,
|
||
|
TokenType.RETURN,
|
||
|
|
||
|
TokenType.RENPY_PAUSE, TokenType.PERSISTENT_SPRITE_TIME,
|
||
|
TokenType.PROLOG_TIME, TokenType.DAY_TIME, TokenType.SUNSET_TIME, TokenType.NIGHT_TIME
|
||
|
};
|
||
|
|
||
|
private TokenizeState state;
|
||
|
private int pos;
|
||
|
|
||
|
private enum TokenizeState {
|
||
|
DEFAULT, NUMBER, OPERATOR, WORD, TEXT, COMMENT
|
||
|
}
|
||
|
|
||
|
private Lexer() {
|
||
|
tokens = new LinkedList<Token>();
|
||
|
buffer = new StringBuilder();
|
||
|
state = TokenizeState.DEFAULT;
|
||
|
}
|
||
|
|
||
|
public List<Token> getTokens() {
|
||
|
return tokens;
|
||
|
}
|
||
|
|
||
|
public Lexer process(String input) {
|
||
|
final int length = input.length();
|
||
|
for (pos = 0; pos < length; pos++) {
|
||
|
tokenize(input.charAt(pos));
|
||
|
}
|
||
|
tokenize('\0');// EOF
|
||
|
addToken(TokenType.EOF, false);
|
||
|
return this;
|
||
|
}
|
||
|
|
||
|
private void tokenize(char ch) {
|
||
|
switch (state) {
|
||
|
case DEFAULT: tokenizeDefault(ch); break;
|
||
|
case WORD: tokenizeWord(ch); break;
|
||
|
case NUMBER: tokenizeNumber(ch); break;
|
||
|
case OPERATOR: tokenizeOperator(ch); break;
|
||
|
case TEXT: tokenizeText(ch); break;
|
||
|
case COMMENT: tokenizeComment(ch); break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private void tokenizeDefault(char ch) {
|
||
|
if (Character.isLetter(ch)) {
|
||
|
// Слово (ключевое слово или команда)
|
||
|
buffer.append(ch);
|
||
|
state = TokenizeState.WORD;
|
||
|
} else if (Character.isDigit(ch)) {
|
||
|
// Число
|
||
|
buffer.append(ch);
|
||
|
state = TokenizeState.NUMBER;
|
||
|
} else if (ch == '"') {
|
||
|
// Текст в "кавычках"
|
||
|
state = TokenizeState.TEXT;
|
||
|
} else if (ch == '#') {
|
||
|
clearBuffer();
|
||
|
state = TokenizeState.COMMENT;
|
||
|
} else {
|
||
|
// Операторы и спецсимволы
|
||
|
tokenizeOperator(ch);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private void tokenizeWord(char ch) {
|
||
|
if (ch == ':') {
|
||
|
addToken(TokenType.LABEL, false);
|
||
|
return;
|
||
|
}
|
||
|
if (Character.isLetterOrDigit(ch) || (ch == '_') || (ch == '.')) {
|
||
|
buffer.append(ch);
|
||
|
} else {
|
||
|
final String word = buffer.toString();
|
||
|
for (int i = 0; i < KEYWORDS.length; i++) {
|
||
|
if (KEYWORDS[i].equalsIgnoreCase(word)) {
|
||
|
addToken(KEYWORD_TYPES[i]);
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
addToken(TokenType.WORD);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private void tokenizeNumber(char ch) {
|
||
|
// Целое или вещественное число.
|
||
|
if (ch == '.') {
|
||
|
// Пропускаем десятичные точки, если они уже были в числе.
|
||
|
if (buffer.indexOf(".") == -1) buffer.append(ch);
|
||
|
} else if (Character.isDigit(ch)) {
|
||
|
buffer.append(ch);
|
||
|
} else {
|
||
|
addToken(TokenType.NUMBER);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private void tokenizeOperator(char ch) {
|
||
|
final int index = OPERATOR_CHARS.indexOf(ch);
|
||
|
if (index != -1) {
|
||
|
addToken(OPERATOR_TYPES[index], false);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private void tokenizeText(char ch) {
|
||
|
if (ch == '"') {
|
||
|
final int len = buffer.length();
|
||
|
// Добавляем токен, если не было экранирования символа кавычки.
|
||
|
if (len == 0 ||
|
||
|
( (len > 0) && (buffer.charAt(len - 1) != '\\') )) {
|
||
|
addToken(TokenType.TEXT, false);
|
||
|
return;
|
||
|
}
|
||
|
// Экранируем символ кавычки.
|
||
|
if (len > 0) {
|
||
|
buffer.setCharAt(len - 1, '\"');
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
buffer.append(ch);
|
||
|
}
|
||
|
|
||
|
private void tokenizeComment(char ch) {
|
||
|
if (ch == '\n' || ch == '\r') {
|
||
|
state = TokenizeState.DEFAULT;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private void addToken(TokenType type) {
|
||
|
addToken(type, true);
|
||
|
}
|
||
|
|
||
|
private void addToken(TokenType type, boolean reprocessLastChar) {
|
||
|
tokens.add(new Token(buffer.toString(), type));
|
||
|
clearBuffer();
|
||
|
if (reprocessLastChar) pos--;
|
||
|
state = TokenizeState.DEFAULT;
|
||
|
}
|
||
|
|
||
|
private void clearBuffer() {
|
||
|
buffer.setLength(0);
|
||
|
}
|
||
|
}
|