mirror of
https://github.com/aNNiMON/Own-Programming-Language-Tutorial.git
synced 2024-09-20 00:34:20 +03:00
More strict lexer, fixed HEX numbers and quote escaping
This commit is contained in:
parent
15c277d145
commit
2c0b19eb0a
@ -133,21 +133,20 @@ public final class Lexer {
|
|||||||
while (pos < length) {
|
while (pos < length) {
|
||||||
// Fast path for skipping whitespaces
|
// Fast path for skipping whitespaces
|
||||||
while (Character.isWhitespace(peek(0))) {
|
while (Character.isWhitespace(peek(0))) {
|
||||||
next();
|
skip();
|
||||||
}
|
}
|
||||||
|
|
||||||
final char current = peek(0);
|
final char current = peek(0);
|
||||||
if (Character.isDigit(current)) tokenizeNumber();
|
if (isNumber(current)) tokenizeNumber();
|
||||||
else if (isOwnLangIdentifierStart(current)) tokenizeWord();
|
else if (isOwnLangIdentifierStart(current)) tokenizeWord();
|
||||||
else if (current == '`') tokenizeExtendedWord();
|
|
||||||
else if (current == '"') tokenizeText();
|
else if (current == '"') tokenizeText();
|
||||||
|
else if (OPERATOR_CHARS.indexOf(current) != -1) tokenizeOperator();
|
||||||
|
else if (Character.isWhitespace(current)) skip();
|
||||||
|
else if (current == '`') tokenizeExtendedWord();
|
||||||
else if (current == '#') tokenizeHexNumber(1);
|
else if (current == '#') tokenizeHexNumber(1);
|
||||||
else if (OPERATOR_CHARS.indexOf(current) != -1) {
|
else if (current == ';') skip(); // ignore semicolon
|
||||||
tokenizeOperator();
|
else if (current == '\0') break;
|
||||||
} else {
|
else throw error("Unknown token " + current);
|
||||||
// whitespaces
|
|
||||||
next();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return tokens;
|
return tokens;
|
||||||
}
|
}
|
||||||
@ -163,7 +162,7 @@ public final class Lexer {
|
|||||||
boolean hasDot = false;
|
boolean hasDot = false;
|
||||||
while (true) {
|
while (true) {
|
||||||
if (current == '.') {
|
if (current == '.') {
|
||||||
if (hasDot) throw error("Invalid float number");
|
if (hasDot) throw error("Invalid float number " + buffer);
|
||||||
hasDot = true;
|
hasDot = true;
|
||||||
} else if (!Character.isDigit(current)) {
|
} else if (!Character.isDigit(current)) {
|
||||||
break;
|
break;
|
||||||
@ -178,7 +177,7 @@ public final class Lexer {
|
|||||||
clearBuffer();
|
clearBuffer();
|
||||||
final Pos startPos = markPos();
|
final Pos startPos = markPos();
|
||||||
// Skip HEX prefix 0x or #
|
// Skip HEX prefix 0x or #
|
||||||
for (int i = 0; i < skipChars; i++) next();
|
for (int i = 0; i < skipChars; i++) skip();
|
||||||
|
|
||||||
char current = peek(0);
|
char current = peek(0);
|
||||||
while (isHexNumber(current) || (current == '_')) {
|
while (isHexNumber(current) || (current == '_')) {
|
||||||
@ -188,13 +187,18 @@ public final class Lexer {
|
|||||||
}
|
}
|
||||||
current = next();
|
current = next();
|
||||||
}
|
}
|
||||||
if (!buffer.isEmpty()) {
|
|
||||||
addToken(TokenType.HEX_NUMBER, buffer.toString(), startPos);
|
if (buffer.isEmpty()) throw error("Empty HEX value");
|
||||||
}
|
if (peek(-1) == '_') throw error("HEX value cannot end with _");
|
||||||
|
addToken(TokenType.HEX_NUMBER, buffer.toString(), startPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isNumber(char current) {
|
||||||
|
return ('0' <= current && current <= '9');
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean isHexNumber(char current) {
|
private static boolean isHexNumber(char current) {
|
||||||
return Character.isDigit(current)
|
return ('0' <= current && current <= '9')
|
||||||
|| ('a' <= current && current <= 'f')
|
|| ('a' <= current && current <= 'f')
|
||||||
|| ('A' <= current && current <= 'F');
|
|| ('A' <= current && current <= 'F');
|
||||||
}
|
}
|
||||||
@ -203,13 +207,9 @@ public final class Lexer {
|
|||||||
char current = peek(0);
|
char current = peek(0);
|
||||||
if (current == '/') {
|
if (current == '/') {
|
||||||
if (peek(1) == '/') {
|
if (peek(1) == '/') {
|
||||||
next();
|
|
||||||
next();
|
|
||||||
tokenizeComment();
|
tokenizeComment();
|
||||||
return;
|
return;
|
||||||
} else if (peek(1) == '*') {
|
} else if (peek(1) == '*') {
|
||||||
next();
|
|
||||||
next();
|
|
||||||
tokenizeMultilineComment();
|
tokenizeMultilineComment();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -247,7 +247,7 @@ public final class Lexer {
|
|||||||
|
|
||||||
private void tokenizeExtendedWord() {
|
private void tokenizeExtendedWord() {
|
||||||
final Pos startPos = markPos();
|
final Pos startPos = markPos();
|
||||||
next();// skip `
|
skip();// skip `
|
||||||
clearBuffer();
|
clearBuffer();
|
||||||
char current = peek(0);
|
char current = peek(0);
|
||||||
while (current != '`') {
|
while (current != '`') {
|
||||||
@ -256,19 +256,20 @@ public final class Lexer {
|
|||||||
buffer.append(current);
|
buffer.append(current);
|
||||||
current = next();
|
current = next();
|
||||||
}
|
}
|
||||||
next(); // skip closing `
|
skip(); // skip closing `
|
||||||
addToken(TokenType.WORD, buffer.toString(), startPos);
|
addToken(TokenType.WORD, buffer.toString(), startPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void tokenizeText() {
|
private void tokenizeText() {
|
||||||
final Pos startPos = markPos();
|
final Pos startPos = markPos();
|
||||||
next();// skip "
|
skip();// skip "
|
||||||
clearBuffer();
|
clearBuffer();
|
||||||
char current = peek(0);
|
char current = peek(0);
|
||||||
while (true) {
|
while (true) {
|
||||||
if (current == '\\') {
|
if (current == '\\') {
|
||||||
current = next();
|
current = next();
|
||||||
switch (current) {
|
switch (current) {
|
||||||
|
case '\\': current = next(); buffer.append('\\'); continue;
|
||||||
case '"': current = next(); buffer.append('"'); continue;
|
case '"': current = next(); buffer.append('"'); continue;
|
||||||
case '0': current = next(); buffer.append('\0'); continue;
|
case '0': current = next(); buffer.append('\0'); continue;
|
||||||
case 'b': current = next(); buffer.append('\b'); continue;
|
case 'b': current = next(); buffer.append('\b'); continue;
|
||||||
@ -305,12 +306,14 @@ public final class Lexer {
|
|||||||
buffer.append(current);
|
buffer.append(current);
|
||||||
current = next();
|
current = next();
|
||||||
}
|
}
|
||||||
next(); // skip closing "
|
skip(); // skip closing "
|
||||||
|
|
||||||
addToken(TokenType.TEXT, buffer.toString(), startPos);
|
addToken(TokenType.TEXT, buffer.toString(), startPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void tokenizeComment() {
|
private void tokenizeComment() {
|
||||||
|
skip(); // /
|
||||||
|
skip(); // /
|
||||||
char current = peek(0);
|
char current = peek(0);
|
||||||
while ("\r\n\0".indexOf(current) == -1) {
|
while ("\r\n\0".indexOf(current) == -1) {
|
||||||
current = next();
|
current = next();
|
||||||
@ -318,13 +321,15 @@ public final class Lexer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void tokenizeMultilineComment() {
|
private void tokenizeMultilineComment() {
|
||||||
|
skip(); // /
|
||||||
|
skip(); // *
|
||||||
char current = peek(0);
|
char current = peek(0);
|
||||||
while (current != '*' || peek(1) != '/') {
|
while (current != '*' || peek(1) != '/') {
|
||||||
if (current == '\0') throw error("Reached end of file while parsing multiline comment");
|
if (current == '\0') throw error("Reached end of file while parsing multiline comment");
|
||||||
current = next();
|
current = next();
|
||||||
}
|
}
|
||||||
next(); // *
|
skip(); // *
|
||||||
next(); // /
|
skip(); // /
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isOwnLangIdentifierStart(char current) {
|
private boolean isOwnLangIdentifierStart(char current) {
|
||||||
@ -332,7 +337,7 @@ public final class Lexer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private boolean isOwnLangIdentifierPart(char current) {
|
private boolean isOwnLangIdentifierPart(char current) {
|
||||||
return (Character.isLetterOrDigit(current) || (current == '_') || (current == '$'));
|
return isOwnLangIdentifierStart(current) || isNumber(current);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void clearBuffer() {
|
private void clearBuffer() {
|
||||||
@ -342,18 +347,22 @@ public final class Lexer {
|
|||||||
private Pos markPos() {
|
private Pos markPos() {
|
||||||
return new Pos(row, col);
|
return new Pos(row, col);
|
||||||
}
|
}
|
||||||
|
|
||||||
private char next() {
|
private void skip() {
|
||||||
final char result = peek(0);
|
if (pos >= length) return;
|
||||||
|
final char result = input.charAt(pos);
|
||||||
if (result == '\n') {
|
if (result == '\n') {
|
||||||
row++;
|
row++;
|
||||||
col = 1;
|
col = 1;
|
||||||
} else col++;
|
} else col++;
|
||||||
|
|
||||||
pos++;
|
pos++;
|
||||||
return peek(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private char next() {
|
||||||
|
skip();
|
||||||
|
return peek(0);
|
||||||
|
}
|
||||||
|
|
||||||
private char peek(int relativePosition) {
|
private char peek(int relativePosition) {
|
||||||
final int position = pos + relativePosition;
|
final int position = pos + relativePosition;
|
||||||
if (position >= length) return '\0';
|
if (position >= length) return '\0';
|
||||||
|
@ -31,6 +31,26 @@ class LexerPositionsTest {
|
|||||||
text = "line1
|
text = "line1
|
||||||
line2
|
line2
|
||||||
line3"
|
line3"
|
||||||
|
a = 3
|
||||||
|
""".stripIndent();
|
||||||
|
List<Token> result = Lexer.tokenize(input);
|
||||||
|
|
||||||
|
assertThat(result)
|
||||||
|
.hasSize(6)
|
||||||
|
.extracting(s -> s.pos().row(), s -> s.pos().col(), Token::type)
|
||||||
|
.containsExactly(
|
||||||
|
tuple(1, 1, WORD), tuple(1, 6, EQ), tuple(1, 8, TEXT),
|
||||||
|
tuple(4, 1, WORD), tuple(4, 3, EQ), tuple(4, 5, NUMBER)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testMultilineComment() {
|
||||||
|
String input = """
|
||||||
|
/*
|
||||||
|
line2
|
||||||
|
line*/a =/*
|
||||||
|
*/3
|
||||||
""".stripIndent();
|
""".stripIndent();
|
||||||
List<Token> result = Lexer.tokenize(input);
|
List<Token> result = Lexer.tokenize(input);
|
||||||
|
|
||||||
@ -38,7 +58,7 @@ class LexerPositionsTest {
|
|||||||
.hasSize(3)
|
.hasSize(3)
|
||||||
.extracting(s -> s.pos().row(), s -> s.pos().col(), Token::type)
|
.extracting(s -> s.pos().row(), s -> s.pos().col(), Token::type)
|
||||||
.containsExactly(
|
.containsExactly(
|
||||||
tuple(1, 1, WORD), tuple(1, 6, EQ), tuple(1, 8, TEXT)
|
tuple(3, 9, WORD), tuple(3, 11, EQ), tuple(4, 3, NUMBER)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -2,181 +2,106 @@ package com.annimon.ownlang.parser;
|
|||||||
|
|
||||||
import com.annimon.ownlang.exceptions.LexerException;
|
import com.annimon.ownlang.exceptions.LexerException;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.params.ParameterizedTest;
|
||||||
import java.util.ArrayList;
|
import org.junit.jupiter.params.provider.Arguments;
|
||||||
|
import org.junit.jupiter.params.provider.MethodSource;
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.stream.Stream;
|
||||||
import static com.annimon.ownlang.parser.TokenType.*;
|
import static com.annimon.ownlang.parser.TokenType.*;
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @author aNNiMON
|
* @author aNNiMON
|
||||||
*/
|
*/
|
||||||
public class LexerTest {
|
public class LexerTest {
|
||||||
|
|
||||||
|
public static Stream<Arguments> validData() {
|
||||||
|
return LexerValidDataProvider.getAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Stream<Arguments> invalidData() {
|
||||||
|
return Stream.<Arguments>builder()
|
||||||
|
.add(Arguments.of("Wrong float point", "3.14.15"))
|
||||||
|
.add(Arguments.of("Wrong HEX number", "0Xf7_p6_s5"))
|
||||||
|
.add(Arguments.of("HEX number ends with _", "0Xf7_"))
|
||||||
|
.add(Arguments.of("Empty rest of HEX number", "#"))
|
||||||
|
.add(Arguments.of("Unicode character identifier", "€ = 1"))
|
||||||
|
.add(Arguments.of("Unicode character only", "€"))
|
||||||
|
.add(Arguments.of("String error", "\"1\"\""))
|
||||||
|
.add(Arguments.of("Multiline comment EOF", "/* 1234 \n"))
|
||||||
|
.add(Arguments.of("Extended word EOF", "` 1234"))
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNumbers() {
|
public void testNumbers() {
|
||||||
String input = "0 3.1415 0xCAFEBABE 0Xf7_d6_c5 #FFFF #";
|
String input = "0 3.1415 0xCAFEBABE 0Xf7_d6_c5 #FFFF";
|
||||||
List<Token> expList = list(NUMBER, NUMBER, HEX_NUMBER, HEX_NUMBER, HEX_NUMBER);
|
|
||||||
List<Token> result = Lexer.tokenize(input);
|
List<Token> result = Lexer.tokenize(input);
|
||||||
assertTokens(expList, result);
|
assertTokens(result, NUMBER, NUMBER, HEX_NUMBER, HEX_NUMBER, HEX_NUMBER);
|
||||||
assertEquals("0", result.get(0).text());
|
assertThat(result)
|
||||||
assertEquals("3.1415", result.get(1).text());
|
.extracting(Token::text)
|
||||||
assertEquals("CAFEBABE", result.get(2).text());
|
.containsExactly("0", "3.1415", "CAFEBABE", "f7d6c5", "FFFF");
|
||||||
assertEquals("f7d6c5", result.get(3).text());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testNumbersError() {
|
|
||||||
final String input = "3.14.15 0Xf7_p6_s5";
|
|
||||||
assertThrows(LexerException.class, () -> Lexer.tokenize(input));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testArithmetic() {
|
|
||||||
String input = "x = -1 + 2 * 3 % 4 / 5";
|
|
||||||
List<Token> expList = list(WORD, EQ, MINUS, NUMBER, PLUS, NUMBER, STAR, NUMBER, PERCENT, NUMBER, SLASH, NUMBER);
|
|
||||||
List<Token> result = Lexer.tokenize(input);
|
|
||||||
assertTokens(expList, result);
|
|
||||||
assertEquals("x", result.get(0).text());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testKeywords() {
|
|
||||||
String input = "if else while for include";
|
|
||||||
List<Token> expList = list(IF, ELSE, WHILE, FOR, INCLUDE);
|
|
||||||
List<Token> result = Lexer.tokenize(input);
|
|
||||||
assertTokens(expList, result);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testWord() {
|
|
||||||
String input = "if bool include \"text\n\ntext\"";
|
|
||||||
List<Token> expList = list(IF, WORD, INCLUDE, TEXT);
|
|
||||||
List<Token> result = Lexer.tokenize(input);
|
|
||||||
assertTokens(expList, result);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testString() {
|
public void testString() {
|
||||||
String input = "\"1\\\"2\"";
|
String input = "\"1\\\"2\"";
|
||||||
List<Token> expList = list(TEXT);
|
|
||||||
List<Token> result = Lexer.tokenize(input);
|
List<Token> result = Lexer.tokenize(input);
|
||||||
assertTokens(expList, result);
|
assertTokens(result, TEXT);
|
||||||
assertEquals("1\"2", result.get(0).text());
|
assertEquals("1\"2", result.get(0).text());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEscapeString() {
|
||||||
|
String input = """
|
||||||
|
"\\\\/\\\\"
|
||||||
|
""".stripIndent();
|
||||||
|
List<Token> result = Lexer.tokenize(input);
|
||||||
|
assertTokens(result, TEXT);
|
||||||
|
assertEquals("\\/\\", result.get(0).text());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testEmptyString() {
|
public void testEmptyString() {
|
||||||
String input = "\"\"";
|
String input = "\"\"";
|
||||||
List<Token> expList = list(TEXT);
|
|
||||||
List<Token> result = Lexer.tokenize(input);
|
List<Token> result = Lexer.tokenize(input);
|
||||||
assertTokens(expList, result);
|
assertTokens(result, TEXT);
|
||||||
assertEquals("", result.get(0).text());
|
assertEquals("", result.get(0).text());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testStringError() {
|
|
||||||
String input = "\"1\"\"";
|
|
||||||
List<Token> expList = list(TEXT);
|
|
||||||
assertThrows(LexerException.class, () -> {
|
|
||||||
List<Token> result = Lexer.tokenize(input);
|
|
||||||
assertTokens(expList, result);
|
|
||||||
assertEquals("1", result.get(0).text());
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testOperators() {
|
|
||||||
String input = "=+-*/%<>!&|";
|
|
||||||
List<Token> expList = list(EQ, PLUS, MINUS, STAR, SLASH, PERCENT, LT, GT, EXCL, AMP, BAR);
|
|
||||||
List<Token> result = Lexer.tokenize(input);
|
|
||||||
assertTokens(expList, result);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testOperators2Char() {
|
|
||||||
String input = "== != <= >= && || ==+ >=- ->";
|
|
||||||
List<Token> expList = list(EQEQ, EXCLEQ, LTEQ, GTEQ, AMPAMP, BARBAR,
|
|
||||||
EQEQ, PLUS, GTEQ, MINUS, MINUS, GT);
|
|
||||||
List<Token> result = Lexer.tokenize(input);
|
|
||||||
assertTokens(expList, result);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testComments() {
|
public void testComments() {
|
||||||
String input = "// 1234 \n /* */ 123 /* \n 12345 \n\n\n */";
|
String input = "// 1234 \n /* */ 123 /* \n 12345 \n\n\n */";
|
||||||
List<Token> expList = list(NUMBER);
|
|
||||||
List<Token> result = Lexer.tokenize(input);
|
List<Token> result = Lexer.tokenize(input);
|
||||||
assertTokens(expList, result);
|
assertTokens(result, NUMBER);
|
||||||
assertEquals("123", result.get(0).text());
|
assertEquals("123", result.get(0).text());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@ParameterizedTest
|
||||||
public void testComments2() {
|
@MethodSource("validData")
|
||||||
String input = "// /* 1234 \n */";
|
public void testValidInput(String name, String input, List<TokenType> tokenTypes) throws IOException {
|
||||||
List<Token> expList = list(STAR, SLASH);
|
|
||||||
List<Token> result = Lexer.tokenize(input);
|
List<Token> result = Lexer.tokenize(input);
|
||||||
assertTokens(expList, result);
|
assertThat(result)
|
||||||
}
|
.hasSize(tokenTypes.size())
|
||||||
|
.extracting(Token::type)
|
||||||
@Test
|
.containsAll(tokenTypes);
|
||||||
public void testCommentsError() {
|
|
||||||
final String input = "/* 1234 \n";
|
|
||||||
assertThrows(LexerException.class, () -> Lexer.tokenize(input));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@ParameterizedTest
|
||||||
public void testExtendedWordError() {
|
@MethodSource("invalidData")
|
||||||
final String input = "` 1234";
|
public void testInvalidInput(String name, String input) throws IOException {
|
||||||
assertThrows(LexerException.class, () -> Lexer.tokenize(input));
|
assertThatThrownBy(() -> Lexer.tokenize(input))
|
||||||
}
|
.isInstanceOf(LexerException.class);
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testUnicodeCharacterIdentifier() {
|
|
||||||
String input = "€ = 1";
|
|
||||||
List<Token> expList = list(EQ, NUMBER);
|
|
||||||
List<Token> result = Lexer.tokenize(input);
|
|
||||||
assertTokens(expList, result);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testUnicodeCharacterExtendedWordIdentifier() {
|
|
||||||
String input = "`€` = 1";
|
|
||||||
List<Token> expList = list(WORD, EQ, NUMBER);
|
|
||||||
List<Token> result = Lexer.tokenize(input);
|
|
||||||
assertTokens(expList, result);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testUnicodeCharacterEOF() {
|
|
||||||
String input = "€";
|
|
||||||
assertTrue(Lexer.tokenize(input).isEmpty());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void assertTokens(List<Token> expList, List<Token> result) {
|
private static void assertTokens(List<Token> result, TokenType... tokenTypes) {
|
||||||
final int length = expList.size();
|
assertThat(result)
|
||||||
assertEquals(length, result.size());
|
.hasSize(tokenTypes.length)
|
||||||
for (int i = 0; i < length; i++) {
|
.extracting(Token::type)
|
||||||
assertEquals(expList.get(i).type(), result.get(i).type());
|
.containsExactly(tokenTypes);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<Token> list(TokenType... types) {
|
|
||||||
final List<Token> list = new ArrayList<>();
|
|
||||||
for (TokenType t : types) {
|
|
||||||
list.add(token(t));
|
|
||||||
}
|
|
||||||
return list;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Token token(TokenType type) {
|
|
||||||
return token(type, "", new Pos(0, 0));
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Token token(TokenType type, String text, Pos pos) {
|
|
||||||
return new Token(type, text, pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,91 @@
|
|||||||
|
package com.annimon.ownlang.parser;
|
||||||
|
|
||||||
|
import org.junit.jupiter.params.provider.Arguments;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
import static com.annimon.ownlang.parser.TokenType.*;
|
||||||
|
|
||||||
|
public class LexerValidDataProvider {
|
||||||
|
|
||||||
|
public static Stream<Arguments> getAll() {
|
||||||
|
final var result = new ArrayList<Arguments>();
|
||||||
|
result.addAll(numbers());
|
||||||
|
result.addAll(keywords());
|
||||||
|
result.addAll(words());
|
||||||
|
result.addAll(operators());
|
||||||
|
result.addAll(comments());
|
||||||
|
result.addAll(other());
|
||||||
|
result.addAll(notSupported());
|
||||||
|
return result.stream();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Arguments> numbers() {
|
||||||
|
return List.of(
|
||||||
|
Arguments.of("Numbers",
|
||||||
|
"12 7.8 90000000 10.03",
|
||||||
|
List.of(NUMBER, NUMBER, NUMBER, NUMBER)),
|
||||||
|
Arguments.of("Hex numbers",
|
||||||
|
"#FF 0xCA 0x12fb 0xFF",
|
||||||
|
List.of(HEX_NUMBER, HEX_NUMBER, HEX_NUMBER, HEX_NUMBER))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Arguments> keywords() {
|
||||||
|
return List.of(
|
||||||
|
Arguments.of("Keywords",
|
||||||
|
"if else while for include",
|
||||||
|
List.of(IF, ELSE, WHILE, FOR, INCLUDE))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Arguments> words() {
|
||||||
|
return List.of(
|
||||||
|
Arguments.of("Word",
|
||||||
|
"if bool include \"text\n\ntext\"",
|
||||||
|
List.of(IF, WORD, INCLUDE, TEXT)),
|
||||||
|
Arguments.of("Extended word identifier",
|
||||||
|
"`€` = 1",
|
||||||
|
List.of(WORD, EQ, NUMBER))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Arguments> operators() {
|
||||||
|
return List.of(
|
||||||
|
Arguments.of("Operators",
|
||||||
|
"=+-*/%<>!&|",
|
||||||
|
List.of(EQ, PLUS, MINUS, STAR, SLASH, PERCENT, LT, GT, EXCL, AMP, BAR)),
|
||||||
|
Arguments.of("Operators 2 characters",
|
||||||
|
"== != <= >= && || ==+ >=- ->",
|
||||||
|
List.of(EQEQ, EXCLEQ, LTEQ, GTEQ, AMPAMP, BARBAR,
|
||||||
|
EQEQ, PLUS, GTEQ, MINUS, MINUS, GT))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Arguments> comments() {
|
||||||
|
return List.of(
|
||||||
|
Arguments.of("Comments",
|
||||||
|
"// /* 1234 \n */",
|
||||||
|
List.of(STAR, SLASH))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Arguments> other() {
|
||||||
|
return List.of(
|
||||||
|
Arguments.of("Arithmetic",
|
||||||
|
"x = -1 + 2 * 3 % 4 / 5",
|
||||||
|
List.of(WORD, EQ, MINUS, NUMBER, PLUS, NUMBER, STAR, NUMBER, PERCENT, NUMBER, SLASH, NUMBER))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Arguments> notSupported() {
|
||||||
|
return List.of(
|
||||||
|
Arguments.of("Float notation",
|
||||||
|
"7e8",
|
||||||
|
List.of(NUMBER, WORD)),
|
||||||
|
Arguments.of("Float hex numbers",
|
||||||
|
"0Xf7p6",
|
||||||
|
List.of(HEX_NUMBER, WORD))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user