mirror of
https://github.com/aNNiMON/Own-Programming-Language-Tutorial.git
synced 2024-09-20 00:34:20 +03:00
Fix incorrect token positions in lexer
This commit is contained in:
parent
fc73bce943
commit
7baf9f6fc8
@ -7,7 +7,8 @@ ext {
|
|||||||
jline: '2.14.5', // jline:jline
|
jline: '2.14.5', // jline:jline
|
||||||
|
|
||||||
junit: '5.9.2', // org.junit:junit-bom
|
junit: '5.9.2', // org.junit:junit-bom
|
||||||
jmh: '1.37' // org.openjdk.jmh:jmh-core
|
jmh: '1.37', // org.openjdk.jmh:jmh-core
|
||||||
|
assertj: '3.24.2' // org.assertj:assertj-core
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -12,6 +12,7 @@ dependencies {
|
|||||||
testImplementation platform("org.junit:junit-bom:${versions.junit}")
|
testImplementation platform("org.junit:junit-bom:${versions.junit}")
|
||||||
testImplementation "org.junit.jupiter:junit-jupiter-params:${versions.junit}"
|
testImplementation "org.junit.jupiter:junit-jupiter-params:${versions.junit}"
|
||||||
testImplementation 'org.junit.jupiter:junit-jupiter'
|
testImplementation 'org.junit.jupiter:junit-jupiter'
|
||||||
|
testImplementation("org.assertj:assertj-core:${versions.assertj}")
|
||||||
testImplementation "org.openjdk.jmh:jmh-core:${versions.jmh}"
|
testImplementation "org.openjdk.jmh:jmh-core:${versions.jmh}"
|
||||||
testImplementation "org.openjdk.jmh:jmh-generator-annprocess:${versions.jmh}"
|
testImplementation "org.openjdk.jmh:jmh-generator-annprocess:${versions.jmh}"
|
||||||
testAnnotationProcessor "org.openjdk.jmh:jmh-generator-annprocess:${versions.jmh}"
|
testAnnotationProcessor "org.openjdk.jmh:jmh-generator-annprocess:${versions.jmh}"
|
||||||
|
@ -138,10 +138,7 @@ public final class Lexer {
|
|||||||
else if (isOwnLangIdentifierStart(current)) tokenizeWord();
|
else if (isOwnLangIdentifierStart(current)) tokenizeWord();
|
||||||
else if (current == '`') tokenizeExtendedWord();
|
else if (current == '`') tokenizeExtendedWord();
|
||||||
else if (current == '"') tokenizeText();
|
else if (current == '"') tokenizeText();
|
||||||
else if (current == '#') {
|
else if (current == '#') tokenizeHexNumber(1);
|
||||||
next();
|
|
||||||
tokenizeHexNumber(1);
|
|
||||||
}
|
|
||||||
else if (OPERATOR_CHARS.indexOf(current) != -1) {
|
else if (OPERATOR_CHARS.indexOf(current) != -1) {
|
||||||
tokenizeOperator();
|
tokenizeOperator();
|
||||||
} else {
|
} else {
|
||||||
@ -154,10 +151,9 @@ public final class Lexer {
|
|||||||
|
|
||||||
private void tokenizeNumber() {
|
private void tokenizeNumber() {
|
||||||
clearBuffer();
|
clearBuffer();
|
||||||
|
final Pos startPos = markPos();
|
||||||
char current = peek(0);
|
char current = peek(0);
|
||||||
if (current == '0' && (peek(1) == 'x' || (peek(1) == 'X'))) {
|
if (current == '0' && (peek(1) == 'x' || (peek(1) == 'X'))) {
|
||||||
next();
|
|
||||||
next();
|
|
||||||
tokenizeHexNumber(2);
|
tokenizeHexNumber(2);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -170,11 +166,15 @@ public final class Lexer {
|
|||||||
buffer.append(current);
|
buffer.append(current);
|
||||||
current = next();
|
current = next();
|
||||||
}
|
}
|
||||||
addToken(TokenType.NUMBER, buffer.toString());
|
addToken(TokenType.NUMBER, buffer.toString(), startPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void tokenizeHexNumber(int skipped) {
|
private void tokenizeHexNumber(int skipChars) {
|
||||||
clearBuffer();
|
clearBuffer();
|
||||||
|
final Pos startPos = markPos();
|
||||||
|
// Skip HEX prefix 0x or #
|
||||||
|
for (int i = 0; i < skipChars; i++) next();
|
||||||
|
|
||||||
char current = peek(0);
|
char current = peek(0);
|
||||||
while (isHexNumber(current) || (current == '_')) {
|
while (isHexNumber(current) || (current == '_')) {
|
||||||
if (current != '_') {
|
if (current != '_') {
|
||||||
@ -185,7 +185,7 @@ public final class Lexer {
|
|||||||
}
|
}
|
||||||
final int length = buffer.length();
|
final int length = buffer.length();
|
||||||
if (length > 0) {
|
if (length > 0) {
|
||||||
addToken(TokenType.HEX_NUMBER, buffer.toString());
|
addToken(TokenType.HEX_NUMBER, buffer.toString(), startPos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -210,11 +210,13 @@ public final class Lexer {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final Pos startPos = markPos();
|
||||||
clearBuffer();
|
clearBuffer();
|
||||||
while (true) {
|
while (true) {
|
||||||
final String text = buffer.toString();
|
final String text = buffer.toString();
|
||||||
if (!text.isEmpty() && !OPERATORS.containsKey(text + current)) {
|
if (!text.isEmpty() && !OPERATORS.containsKey(text + current)) {
|
||||||
addToken(OPERATORS.get(text));
|
addToken(OPERATORS.get(text), startPos);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
buffer.append(current);
|
buffer.append(current);
|
||||||
@ -224,6 +226,7 @@ public final class Lexer {
|
|||||||
|
|
||||||
private void tokenizeWord() {
|
private void tokenizeWord() {
|
||||||
clearBuffer();
|
clearBuffer();
|
||||||
|
final Pos startPos = markPos();
|
||||||
buffer.append(peek(0));
|
buffer.append(peek(0));
|
||||||
char current = next();
|
char current = next();
|
||||||
while (true) {
|
while (true) {
|
||||||
@ -236,13 +239,14 @@ public final class Lexer {
|
|||||||
|
|
||||||
final String word = buffer.toString();
|
final String word = buffer.toString();
|
||||||
if (KEYWORDS.containsKey(word)) {
|
if (KEYWORDS.containsKey(word)) {
|
||||||
addToken(KEYWORDS.get(word));
|
addToken(KEYWORDS.get(word), startPos);
|
||||||
} else {
|
} else {
|
||||||
addToken(TokenType.WORD, word);
|
addToken(TokenType.WORD, word, startPos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void tokenizeExtendedWord() {
|
private void tokenizeExtendedWord() {
|
||||||
|
final Pos startPos = markPos();
|
||||||
next();// skip `
|
next();// skip `
|
||||||
clearBuffer();
|
clearBuffer();
|
||||||
char current = peek(0);
|
char current = peek(0);
|
||||||
@ -254,10 +258,11 @@ public final class Lexer {
|
|||||||
current = next();
|
current = next();
|
||||||
}
|
}
|
||||||
next(); // skip closing `
|
next(); // skip closing `
|
||||||
addToken(TokenType.WORD, buffer.toString());
|
addToken(TokenType.WORD, buffer.toString(), startPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void tokenizeText() {
|
private void tokenizeText() {
|
||||||
|
final Pos startPos = markPos();
|
||||||
next();// skip "
|
next();// skip "
|
||||||
clearBuffer();
|
clearBuffer();
|
||||||
char current = peek(0);
|
char current = peek(0);
|
||||||
@ -303,7 +308,7 @@ public final class Lexer {
|
|||||||
}
|
}
|
||||||
next(); // skip closing "
|
next(); // skip closing "
|
||||||
|
|
||||||
addToken(TokenType.TEXT, buffer.toString());
|
addToken(TokenType.TEXT, buffer.toString(), startPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void tokenizeComment() {
|
private void tokenizeComment() {
|
||||||
@ -336,14 +341,19 @@ public final class Lexer {
|
|||||||
buffer.setLength(0);
|
buffer.setLength(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Pos markPos() {
|
||||||
|
return new Pos(row, col);
|
||||||
|
}
|
||||||
|
|
||||||
private char next() {
|
private char next() {
|
||||||
pos++;
|
|
||||||
final char result = peek(0);
|
final char result = peek(0);
|
||||||
if (result == '\n') {
|
if (result == '\n') {
|
||||||
row++;
|
row++;
|
||||||
col = 1;
|
col = 1;
|
||||||
} else col++;
|
} else col++;
|
||||||
return result;
|
|
||||||
|
pos++;
|
||||||
|
return peek(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
private char peek(int relativePosition) {
|
private char peek(int relativePosition) {
|
||||||
@ -352,15 +362,15 @@ public final class Lexer {
|
|||||||
return input.charAt(position);
|
return input.charAt(position);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addToken(TokenType type) {
|
private void addToken(TokenType type, Pos startPos) {
|
||||||
addToken(type, "");
|
addToken(type, "", startPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addToken(TokenType type, String text) {
|
private void addToken(TokenType type, String text, Pos startRow) {
|
||||||
tokens.add(new Token(type, text, new Pos(row, col)));
|
tokens.add(new Token(type, text, startRow));
|
||||||
}
|
}
|
||||||
|
|
||||||
private LexerException error(String text) {
|
private LexerException error(String text) {
|
||||||
return new LexerException(new Pos(row, col), text);
|
return new LexerException(markPos(), text);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,44 @@
|
|||||||
|
package com.annimon.ownlang.parser;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import java.util.List;
|
||||||
|
import static com.annimon.ownlang.parser.TokenType.*;
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
import static org.assertj.core.api.Assertions.tuple;
|
||||||
|
|
||||||
|
class LexerPositionsTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testMultiline() {
|
||||||
|
String input = """
|
||||||
|
x = 123
|
||||||
|
y = "abc"
|
||||||
|
""".stripIndent();
|
||||||
|
List<Token> result = Lexer.tokenize(input);
|
||||||
|
|
||||||
|
assertThat(result)
|
||||||
|
.hasSize(6)
|
||||||
|
.extracting(s -> s.pos().row(), s -> s.pos().col(), Token::type)
|
||||||
|
.containsExactly(
|
||||||
|
tuple(1, 1, WORD), tuple(1, 3, EQ), tuple(1, 5, NUMBER),
|
||||||
|
tuple(2, 1, WORD), tuple(2, 3, EQ), tuple(2, 5, TEXT)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testMultilineText() {
|
||||||
|
String input = """
|
||||||
|
text = "line1
|
||||||
|
line2
|
||||||
|
line3"
|
||||||
|
""".stripIndent();
|
||||||
|
List<Token> result = Lexer.tokenize(input);
|
||||||
|
|
||||||
|
assertThat(result)
|
||||||
|
.hasSize(3)
|
||||||
|
.extracting(s -> s.pos().row(), s -> s.pos().col(), Token::type)
|
||||||
|
.containsExactly(
|
||||||
|
tuple(1, 1, WORD), tuple(1, 6, EQ), tuple(1, 8, TEXT)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user