mirror of
https://github.com/aNNiMON/HotaruFX.git
synced 2024-09-19 14:14:21 +03:00
Add lexer
This commit is contained in:
parent
dc0dbe0db7
commit
37cf836c40
4
app/src/main/java/com/annimon/hotarufx/Main.java
Normal file
4
app/src/main/java/com/annimon/hotarufx/Main.java
Normal file
@ -0,0 +1,4 @@
|
||||
package com.annimon.hotarufx;
|
||||
|
||||
public class Main {
|
||||
}
|
@ -0,0 +1,14 @@
|
||||
package com.annimon.hotarufx.exceptions;
|
||||
|
||||
import com.annimon.hotarufx.lexer.SourcePosition;
|
||||
|
||||
public class LexerException extends RuntimeException {
|
||||
|
||||
public LexerException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public LexerException(SourcePosition position, String message) {
|
||||
super(position.toString() + " " + message);
|
||||
}
|
||||
}
|
200
app/src/main/java/com/annimon/hotarufx/lexer/HotaruLexer.java
Normal file
200
app/src/main/java/com/annimon/hotarufx/lexer/HotaruLexer.java
Normal file
@ -0,0 +1,200 @@
|
||||
package com.annimon.hotarufx.lexer;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import lombok.val;
|
||||
|
||||
public class HotaruLexer extends Lexer {
|
||||
|
||||
public static List<Token> tokenize(String input) {
|
||||
val lexer = new HotaruLexer(input);
|
||||
lexer.tokenize();
|
||||
return lexer.getTokens();
|
||||
}
|
||||
|
||||
private static final String TEXT_CHARS = "'\"";
|
||||
private static final String OPERATOR_CHARS = "(){}=.,";
|
||||
|
||||
private static final Map<String, HotaruTokenId> OPERATORS;
|
||||
static {
|
||||
OPERATORS = new HashMap<>();
|
||||
OPERATORS.put("(", HotaruTokenId.LPAREN);
|
||||
OPERATORS.put(")", HotaruTokenId.RPAREN);
|
||||
OPERATORS.put("{", HotaruTokenId.LBRACE);
|
||||
OPERATORS.put("}", HotaruTokenId.RBRACE);
|
||||
OPERATORS.put("=", HotaruTokenId.EQ);
|
||||
OPERATORS.put(".", HotaruTokenId.DOT);
|
||||
OPERATORS.put(",", HotaruTokenId.COMMA);
|
||||
}
|
||||
|
||||
public HotaruLexer(String input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
public Token nextToken() {
|
||||
val current = peek(0);
|
||||
if (Character.isDigit(current)) return tokenizeNumber();
|
||||
else if (Character.isJavaIdentifierStart(current)) return tokenizeWord();
|
||||
else if (current == '#') return tokenizeComment();
|
||||
else if (current == '/' && peek(1) == '*') {
|
||||
return tokenizeMultilineComment();
|
||||
}
|
||||
else if (TEXT_CHARS.indexOf(current) != -1) {
|
||||
return tokenizeText(current);
|
||||
}
|
||||
else if (OPERATOR_CHARS.indexOf(current) != -1) {
|
||||
return tokenizeOperator();
|
||||
}
|
||||
else if (Character.isWhitespace(current)) {
|
||||
return tokenizeWhitespaces();
|
||||
}
|
||||
else {
|
||||
// other
|
||||
next();
|
||||
}
|
||||
return createToken(HotaruTokenId.WS, "", 1);
|
||||
}
|
||||
|
||||
private Token tokenizeNumber() {
|
||||
clearBuffer();
|
||||
char current = peek(0);
|
||||
while (true) {
|
||||
if (current == '.') {
|
||||
if (getBuffer().indexOf(".") != -1)
|
||||
throw error("Invalid float number");
|
||||
} else if (!Character.isDigit(current)) {
|
||||
break;
|
||||
}
|
||||
getBuffer().append(current);
|
||||
current = next();
|
||||
}
|
||||
return addToken(HotaruTokenId.NUMBER);
|
||||
}
|
||||
|
||||
private Token tokenizeWord() {
|
||||
clearBuffer();
|
||||
getBuffer().append(peek(0));
|
||||
char current = next();
|
||||
while (!isEOF()) {
|
||||
if (!Character.isJavaIdentifierPart(current)) {
|
||||
break;
|
||||
}
|
||||
getBuffer().append(current);
|
||||
current = next();
|
||||
}
|
||||
|
||||
val word = getBuffer().toString();
|
||||
return addToken(HotaruTokenId.WORD, word);
|
||||
}
|
||||
|
||||
private Token tokenizeText(char openChar) {
|
||||
next();// "
|
||||
clearBuffer();
|
||||
char current = peek(0);
|
||||
while (true) {
|
||||
if (current == '\\') {
|
||||
val buffer = getBuffer();
|
||||
current = next();
|
||||
if (current == openChar) {
|
||||
current = next();
|
||||
buffer.append(openChar);
|
||||
continue;
|
||||
}
|
||||
switch (current) {
|
||||
case '0': current = next(); buffer.append('\0'); continue;
|
||||
case 'b': current = next(); buffer.append('\b'); continue;
|
||||
case 'f': current = next(); buffer.append('\f'); continue;
|
||||
case 'n': current = next(); buffer.append('\n'); continue;
|
||||
case 'r': current = next(); buffer.append('\r'); continue;
|
||||
case 't': current = next(); buffer.append('\t'); continue;
|
||||
case 'u': // http://docs.oracle.com/javase/specs/jls/se8/html/jls-3.html#jls-3.3
|
||||
int rollbackPosition = getPos();
|
||||
while (current == 'u') current = next();
|
||||
int escapedValue = 0;
|
||||
for (int i = 12; i >= 0 && escapedValue != -1; i -= 4) {
|
||||
if (isHexNumber(current)) {
|
||||
escapedValue |= (Character.digit(current, 16) << i);
|
||||
} else {
|
||||
escapedValue = -1;
|
||||
}
|
||||
current = next();
|
||||
}
|
||||
if (escapedValue >= 0) {
|
||||
buffer.append((char) escapedValue);
|
||||
} else {
|
||||
// rollback
|
||||
buffer.append("\\u");
|
||||
setPos(rollbackPosition);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
buffer.append('\\');
|
||||
continue;
|
||||
}
|
||||
if (current == openChar) break;
|
||||
if (current == '\0') {
|
||||
throw error("Reached end of file while parsing text");
|
||||
}
|
||||
getBuffer().append(current);
|
||||
current = next();
|
||||
}
|
||||
next(); // "
|
||||
return addToken(HotaruTokenId.TEXT, getBuffer().toString(), getBuffer().length() + 2);
|
||||
}
|
||||
|
||||
private Token tokenizeOperator() {
|
||||
char current = peek(0);
|
||||
clearBuffer();
|
||||
while (true) {
|
||||
val text = getBuffer().toString();
|
||||
if (!text.isEmpty() && !OPERATORS.containsKey(text + current)) {
|
||||
return addToken(OPERATORS.get(text), "", text.length());
|
||||
}
|
||||
getBuffer().append(current);
|
||||
current = next();
|
||||
}
|
||||
}
|
||||
|
||||
private Token tokenizeComment() {
|
||||
next(); // #
|
||||
clearBuffer();
|
||||
getBuffer().append("#");
|
||||
char current = peek(0);
|
||||
while ("\r\n\0".indexOf(current) == -1) {
|
||||
getBuffer().append(current);
|
||||
current = next();
|
||||
}
|
||||
return createToken(HotaruTokenId.SINGLE_LINE_COMMENT);
|
||||
}
|
||||
|
||||
private Token tokenizeMultilineComment() {
|
||||
next(); // /
|
||||
next(); // *
|
||||
clearBuffer();
|
||||
getBuffer().append("/*");
|
||||
char current = peek(0);
|
||||
while (true) {
|
||||
if (current == '*' && peek(1) == '/') break;
|
||||
if (current == '\0') {
|
||||
throw error("Reached end of file while parsing multiline comment");
|
||||
}
|
||||
getBuffer().append(current);
|
||||
current = next();
|
||||
}
|
||||
next(); // *
|
||||
next(); // /
|
||||
getBuffer().append("*/");
|
||||
return createToken(HotaruTokenId.MULTI_LINE_COMMENT);
|
||||
}
|
||||
|
||||
private Token tokenizeWhitespaces() {
|
||||
clearBuffer();
|
||||
char current = peek(0);
|
||||
while (Character.isWhitespace(current)) {
|
||||
getBuffer().append(current);
|
||||
current = next();
|
||||
}
|
||||
return createToken(HotaruTokenId.WS);
|
||||
}
|
||||
}
|
@ -0,0 +1,36 @@
|
||||
package com.annimon.hotarufx.lexer;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
|
||||
@AllArgsConstructor(access = AccessLevel.PACKAGE)
|
||||
public enum HotaruTokenId {
|
||||
|
||||
NUMBER(Category.NUMBER),
|
||||
WORD(Category.IDENTIFIER),
|
||||
TEXT(Category.STRING),
|
||||
|
||||
EQ(Category.OPERATOR),
|
||||
LPAREN(Category.OPERATOR),
|
||||
RPAREN(Category.OPERATOR),
|
||||
LBRACE(Category.OPERATOR),
|
||||
RBRACE(Category.OPERATOR),
|
||||
COMMA(Category.OPERATOR),
|
||||
DOT(Category.OPERATOR),
|
||||
|
||||
SINGLE_LINE_COMMENT(Category.COMMENT),
|
||||
MULTI_LINE_COMMENT(Category.COMMENT),
|
||||
|
||||
WS(Category.WHITESPACE),
|
||||
EOF(Category.WHITESPACE);
|
||||
|
||||
private enum Category {
|
||||
NUMBER, IDENTIFIER, STRING, OPERATOR, COMMENT, WHITESPACE
|
||||
}
|
||||
|
||||
private final Category category;
|
||||
|
||||
public String getPrimaryCategory() {
|
||||
return category.name().toLowerCase();
|
||||
}
|
||||
}
|
113
app/src/main/java/com/annimon/hotarufx/lexer/Lexer.java
Normal file
113
app/src/main/java/com/annimon/hotarufx/lexer/Lexer.java
Normal file
@ -0,0 +1,113 @@
|
||||
package com.annimon.hotarufx.lexer;
|
||||
|
||||
import com.annimon.hotarufx.exceptions.LexerException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import lombok.val;
|
||||
|
||||
public abstract class Lexer {
|
||||
|
||||
private final String input;
|
||||
private final int length;
|
||||
|
||||
private final List<Token> tokens;
|
||||
private final StringBuilder buffer;
|
||||
|
||||
private int pos;
|
||||
private int row, col;
|
||||
|
||||
public Lexer(String input) {
|
||||
this.input = input;
|
||||
length = input.length();
|
||||
tokens = new ArrayList<>();
|
||||
buffer = new StringBuilder();
|
||||
pos = 0;
|
||||
row = col = 1;
|
||||
}
|
||||
|
||||
public List<Token> getTokens() {
|
||||
return tokens;
|
||||
}
|
||||
|
||||
protected StringBuilder getBuffer() {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
protected int getPos() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
protected void setPos(int pos) {
|
||||
this.pos = pos;
|
||||
}
|
||||
|
||||
public boolean isEOF() {
|
||||
return pos >= length;
|
||||
}
|
||||
|
||||
public List<Token> tokenize() {
|
||||
final List<Token> allTokens = new ArrayList<>();
|
||||
while (!isEOF()) {
|
||||
allTokens.add(nextToken());
|
||||
}
|
||||
return allTokens;
|
||||
}
|
||||
|
||||
public abstract Token nextToken();
|
||||
|
||||
protected void clearBuffer() {
|
||||
buffer.setLength(0);
|
||||
}
|
||||
|
||||
protected char next() {
|
||||
pos++;
|
||||
final char result = peek(0);
|
||||
if (result == '\n') {
|
||||
row++;
|
||||
col = 1;
|
||||
} else col++;
|
||||
return result;
|
||||
}
|
||||
|
||||
protected char peek(int relativePosition) {
|
||||
final int position = pos + relativePosition;
|
||||
if (position >= length) return '\0';
|
||||
return input.charAt(position);
|
||||
}
|
||||
|
||||
protected SourcePosition currentPosition() {
|
||||
return new SourcePosition(pos, row, col);
|
||||
}
|
||||
|
||||
protected Token addToken(HotaruTokenId tokenId) {
|
||||
return addToken(tokenId, buffer.toString());
|
||||
}
|
||||
|
||||
protected Token addToken(HotaruTokenId tokenId, String text) {
|
||||
return addToken(tokenId, text, text.length());
|
||||
}
|
||||
|
||||
protected Token addToken(HotaruTokenId tokenId, String text, int length) {
|
||||
val token = createToken(tokenId, text, length);
|
||||
tokens.add(token);
|
||||
return token;
|
||||
}
|
||||
|
||||
protected Token createToken(HotaruTokenId tokenId) {
|
||||
return createToken(tokenId, buffer.toString(), buffer.length());
|
||||
}
|
||||
|
||||
protected Token createToken(HotaruTokenId tokenId, String text, int length) {
|
||||
return new Token(tokenId, text, length, currentPosition());
|
||||
}
|
||||
|
||||
protected LexerException error(String message) {
|
||||
return new LexerException(currentPosition(), message);
|
||||
}
|
||||
|
||||
protected boolean isHexNumber(char current) {
|
||||
return Character.isDigit(current)
|
||||
|| ('a' <= current && current <= 'f')
|
||||
|| ('A' <= current && current <= 'F');
|
||||
}
|
||||
}
|
@ -0,0 +1,16 @@
|
||||
package com.annimon.hotarufx.lexer;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class SourcePosition {
|
||||
|
||||
private final int position;
|
||||
private final int row;
|
||||
private final int column;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "[" + row + ", " + column + "]";
|
||||
}
|
||||
}
|
17
app/src/main/java/com/annimon/hotarufx/lexer/Token.java
Normal file
17
app/src/main/java/com/annimon/hotarufx/lexer/Token.java
Normal file
@ -0,0 +1,17 @@
|
||||
package com.annimon.hotarufx.lexer;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class Token {
|
||||
|
||||
private final HotaruTokenId type;
|
||||
private final String text;
|
||||
private final int length;
|
||||
private final SourcePosition position;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return type.name() + " " + position + " " + text;
|
||||
}
|
||||
}
|
@ -0,0 +1,160 @@
|
||||
package com.annimon.hotarufx.lexer;
|
||||
|
||||
import com.annimon.hotarufx.exceptions.LexerException;
|
||||
import java.util.List;
|
||||
import org.hamcrest.FeatureMatcher;
|
||||
import org.hamcrest.Matcher;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.hamcrest.Matchers.contains;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class HotaruLexerTest {
|
||||
|
||||
static List<Token> t(String input) {
|
||||
return HotaruLexer.tokenize(input);
|
||||
}
|
||||
|
||||
static List<Token> all(String input) {
|
||||
return new HotaruLexer(input).tokenize();
|
||||
}
|
||||
|
||||
static Token single(String input) {
|
||||
List<Token> tokens = t(input);
|
||||
if (tokens.isEmpty()) {
|
||||
throw new AssertionError("Tokens list is empty");
|
||||
}
|
||||
return tokens.get(0);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testTokenizeNumbers() {
|
||||
assertThat(all("1 1.5 2"), contains(
|
||||
tokenId(HotaruTokenId.NUMBER),
|
||||
tokenId(HotaruTokenId.WS),
|
||||
tokenId(HotaruTokenId.NUMBER),
|
||||
tokenId(HotaruTokenId.WS),
|
||||
tokenId(HotaruTokenId.NUMBER)
|
||||
));
|
||||
|
||||
assertThrows(LexerException.class, () -> {
|
||||
all("1.2.3");
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
void testTokenizeWords() {
|
||||
assertThat(all("a b c"), contains(
|
||||
tokenId(HotaruTokenId.WORD),
|
||||
tokenId(HotaruTokenId.WS),
|
||||
tokenId(HotaruTokenId.WORD),
|
||||
tokenId(HotaruTokenId.WS),
|
||||
tokenId(HotaruTokenId.WORD)
|
||||
));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testTokenizeText() {
|
||||
assertThat(t("1 \" 1\n2 3 '\""), contains(
|
||||
tokenId(HotaruTokenId.NUMBER),
|
||||
tokenId(HotaruTokenId.TEXT)
|
||||
));
|
||||
assertThat(t("1 ' 1\n2 3 ' 2"), contains(
|
||||
tokenId(HotaruTokenId.NUMBER),
|
||||
tokenId(HotaruTokenId.TEXT),
|
||||
tokenId(HotaruTokenId.NUMBER)
|
||||
));
|
||||
assertThrows(LexerException.class, () -> {
|
||||
all("' ... ");
|
||||
});
|
||||
assertThat(single("'\\\''").getText(), is("'"));
|
||||
assertThat(single("\'\\\"\'").getText(), is("\\\""));
|
||||
assertThat(single("\"\\\"\"").getText(), is("\""));
|
||||
assertThat(single("\"\\\'\"").getText(), is("\\\'"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testTokenizeOperators() {
|
||||
assertThat(t("(){}=,."), contains(
|
||||
tokenId(HotaruTokenId.LPAREN),
|
||||
tokenId(HotaruTokenId.RPAREN),
|
||||
tokenId(HotaruTokenId.LBRACE),
|
||||
tokenId(HotaruTokenId.RBRACE),
|
||||
tokenId(HotaruTokenId.EQ),
|
||||
tokenId(HotaruTokenId.COMMA),
|
||||
tokenId(HotaruTokenId.DOT)
|
||||
));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testTokenizeComments() {
|
||||
assertThat(all("1 # 2 3 4"), contains(
|
||||
tokenId(HotaruTokenId.NUMBER),
|
||||
tokenId(HotaruTokenId.WS),
|
||||
tokenId(HotaruTokenId.SINGLE_LINE_COMMENT)
|
||||
));
|
||||
assertThat(t("1 # 2 3 4\n 2"), contains(
|
||||
tokenId(HotaruTokenId.NUMBER),
|
||||
tokenId(HotaruTokenId.NUMBER)
|
||||
));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testTokenizeMultilineComments() {
|
||||
assertThat(all("1 /* 2\n3\n4 */"), contains(
|
||||
tokenId(HotaruTokenId.NUMBER),
|
||||
tokenId(HotaruTokenId.WS),
|
||||
tokenId(HotaruTokenId.MULTI_LINE_COMMENT)
|
||||
));
|
||||
assertThat(t("1 /* 2 3 4 */ 2"), contains(
|
||||
tokenId(HotaruTokenId.NUMBER),
|
||||
tokenId(HotaruTokenId.NUMBER)
|
||||
));
|
||||
assertThrows(LexerException.class, () -> {
|
||||
all("/* ... ");
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
void testStatements() {
|
||||
assertThat(t("A = node()"), contains(
|
||||
tokenId(HotaruTokenId.WORD),
|
||||
tokenId(HotaruTokenId.EQ),
|
||||
tokenId(HotaruTokenId.WORD),
|
||||
tokenId(HotaruTokenId.LPAREN),
|
||||
tokenId(HotaruTokenId.RPAREN)
|
||||
));
|
||||
assertThat(t("B.x = 100"), contains(
|
||||
tokenId(HotaruTokenId.WORD),
|
||||
tokenId(HotaruTokenId.DOT),
|
||||
tokenId(HotaruTokenId.WORD),
|
||||
tokenId(HotaruTokenId.EQ),
|
||||
tokenId(HotaruTokenId.NUMBER)
|
||||
));
|
||||
assertThat(t("G1 = group(A, B)"), contains(
|
||||
tokenId(HotaruTokenId.WORD),
|
||||
tokenId(HotaruTokenId.EQ),
|
||||
tokenId(HotaruTokenId.WORD),
|
||||
tokenId(HotaruTokenId.LPAREN),
|
||||
tokenId(HotaruTokenId.WORD),
|
||||
tokenId(HotaruTokenId.COMMA),
|
||||
tokenId(HotaruTokenId.WORD),
|
||||
tokenId(HotaruTokenId.RPAREN)
|
||||
));
|
||||
}
|
||||
|
||||
Matcher<Token> tokenId(HotaruTokenId tokenId) {
|
||||
return tokenId(is(tokenId));
|
||||
}
|
||||
|
||||
Matcher<Token> tokenId(Matcher<HotaruTokenId> matcher) {
|
||||
return new FeatureMatcher<Token, HotaruTokenId>(matcher, "tokenId", "tokenId") {
|
||||
|
||||
@Override
|
||||
protected HotaruTokenId featureValueOf(Token actual) {
|
||||
return actual.getType();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user