mirror of
https://github.com/aNNiMON/Own-Programming-Language-Tutorial.git
synced 2024-09-20 00:34:20 +03:00
Fix incorrect token positions in lexer
This commit is contained in:
parent
fc73bce943
commit
7baf9f6fc8
@ -7,7 +7,8 @@ ext {
|
||||
jline: '2.14.5', // jline:jline
|
||||
|
||||
junit: '5.9.2', // org.junit:junit-bom
|
||||
jmh: '1.37' // org.openjdk.jmh:jmh-core
|
||||
jmh: '1.37', // org.openjdk.jmh:jmh-core
|
||||
assertj: '3.24.2' // org.assertj:assertj-core
|
||||
]
|
||||
}
|
||||
|
||||
|
@ -12,6 +12,7 @@ dependencies {
|
||||
testImplementation platform("org.junit:junit-bom:${versions.junit}")
|
||||
testImplementation "org.junit.jupiter:junit-jupiter-params:${versions.junit}"
|
||||
testImplementation 'org.junit.jupiter:junit-jupiter'
|
||||
testImplementation("org.assertj:assertj-core:${versions.assertj}")
|
||||
testImplementation "org.openjdk.jmh:jmh-core:${versions.jmh}"
|
||||
testImplementation "org.openjdk.jmh:jmh-generator-annprocess:${versions.jmh}"
|
||||
testAnnotationProcessor "org.openjdk.jmh:jmh-generator-annprocess:${versions.jmh}"
|
||||
|
@ -138,10 +138,7 @@ public final class Lexer {
|
||||
else if (isOwnLangIdentifierStart(current)) tokenizeWord();
|
||||
else if (current == '`') tokenizeExtendedWord();
|
||||
else if (current == '"') tokenizeText();
|
||||
else if (current == '#') {
|
||||
next();
|
||||
tokenizeHexNumber(1);
|
||||
}
|
||||
else if (current == '#') tokenizeHexNumber(1);
|
||||
else if (OPERATOR_CHARS.indexOf(current) != -1) {
|
||||
tokenizeOperator();
|
||||
} else {
|
||||
@ -154,10 +151,9 @@ public final class Lexer {
|
||||
|
||||
private void tokenizeNumber() {
|
||||
clearBuffer();
|
||||
final Pos startPos = markPos();
|
||||
char current = peek(0);
|
||||
if (current == '0' && (peek(1) == 'x' || (peek(1) == 'X'))) {
|
||||
next();
|
||||
next();
|
||||
tokenizeHexNumber(2);
|
||||
return;
|
||||
}
|
||||
@ -170,11 +166,15 @@ public final class Lexer {
|
||||
buffer.append(current);
|
||||
current = next();
|
||||
}
|
||||
addToken(TokenType.NUMBER, buffer.toString());
|
||||
addToken(TokenType.NUMBER, buffer.toString(), startPos);
|
||||
}
|
||||
|
||||
private void tokenizeHexNumber(int skipped) {
|
||||
private void tokenizeHexNumber(int skipChars) {
|
||||
clearBuffer();
|
||||
final Pos startPos = markPos();
|
||||
// Skip HEX prefix 0x or #
|
||||
for (int i = 0; i < skipChars; i++) next();
|
||||
|
||||
char current = peek(0);
|
||||
while (isHexNumber(current) || (current == '_')) {
|
||||
if (current != '_') {
|
||||
@ -185,7 +185,7 @@ public final class Lexer {
|
||||
}
|
||||
final int length = buffer.length();
|
||||
if (length > 0) {
|
||||
addToken(TokenType.HEX_NUMBER, buffer.toString());
|
||||
addToken(TokenType.HEX_NUMBER, buffer.toString(), startPos);
|
||||
}
|
||||
}
|
||||
|
||||
@ -210,11 +210,13 @@ public final class Lexer {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
final Pos startPos = markPos();
|
||||
clearBuffer();
|
||||
while (true) {
|
||||
final String text = buffer.toString();
|
||||
if (!text.isEmpty() && !OPERATORS.containsKey(text + current)) {
|
||||
addToken(OPERATORS.get(text));
|
||||
addToken(OPERATORS.get(text), startPos);
|
||||
return;
|
||||
}
|
||||
buffer.append(current);
|
||||
@ -224,6 +226,7 @@ public final class Lexer {
|
||||
|
||||
private void tokenizeWord() {
|
||||
clearBuffer();
|
||||
final Pos startPos = markPos();
|
||||
buffer.append(peek(0));
|
||||
char current = next();
|
||||
while (true) {
|
||||
@ -236,13 +239,14 @@ public final class Lexer {
|
||||
|
||||
final String word = buffer.toString();
|
||||
if (KEYWORDS.containsKey(word)) {
|
||||
addToken(KEYWORDS.get(word));
|
||||
addToken(KEYWORDS.get(word), startPos);
|
||||
} else {
|
||||
addToken(TokenType.WORD, word);
|
||||
addToken(TokenType.WORD, word, startPos);
|
||||
}
|
||||
}
|
||||
|
||||
private void tokenizeExtendedWord() {
|
||||
final Pos startPos = markPos();
|
||||
next();// skip `
|
||||
clearBuffer();
|
||||
char current = peek(0);
|
||||
@ -254,10 +258,11 @@ public final class Lexer {
|
||||
current = next();
|
||||
}
|
||||
next(); // skip closing `
|
||||
addToken(TokenType.WORD, buffer.toString());
|
||||
addToken(TokenType.WORD, buffer.toString(), startPos);
|
||||
}
|
||||
|
||||
private void tokenizeText() {
|
||||
final Pos startPos = markPos();
|
||||
next();// skip "
|
||||
clearBuffer();
|
||||
char current = peek(0);
|
||||
@ -303,7 +308,7 @@ public final class Lexer {
|
||||
}
|
||||
next(); // skip closing "
|
||||
|
||||
addToken(TokenType.TEXT, buffer.toString());
|
||||
addToken(TokenType.TEXT, buffer.toString(), startPos);
|
||||
}
|
||||
|
||||
private void tokenizeComment() {
|
||||
@ -336,14 +341,19 @@ public final class Lexer {
|
||||
buffer.setLength(0);
|
||||
}
|
||||
|
||||
private Pos markPos() {
|
||||
return new Pos(row, col);
|
||||
}
|
||||
|
||||
private char next() {
|
||||
pos++;
|
||||
final char result = peek(0);
|
||||
if (result == '\n') {
|
||||
row++;
|
||||
col = 1;
|
||||
} else col++;
|
||||
return result;
|
||||
|
||||
pos++;
|
||||
return peek(0);
|
||||
}
|
||||
|
||||
private char peek(int relativePosition) {
|
||||
@ -352,15 +362,15 @@ public final class Lexer {
|
||||
return input.charAt(position);
|
||||
}
|
||||
|
||||
private void addToken(TokenType type) {
|
||||
addToken(type, "");
|
||||
private void addToken(TokenType type, Pos startPos) {
|
||||
addToken(type, "", startPos);
|
||||
}
|
||||
|
||||
private void addToken(TokenType type, String text) {
|
||||
tokens.add(new Token(type, text, new Pos(row, col)));
|
||||
private void addToken(TokenType type, String text, Pos startRow) {
|
||||
tokens.add(new Token(type, text, startRow));
|
||||
}
|
||||
|
||||
private LexerException error(String text) {
|
||||
return new LexerException(new Pos(row, col), text);
|
||||
return new LexerException(markPos(), text);
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,44 @@
|
||||
package com.annimon.ownlang.parser;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import java.util.List;
|
||||
import static com.annimon.ownlang.parser.TokenType.*;
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.tuple;
|
||||
|
||||
class LexerPositionsTest {
|
||||
|
||||
@Test
|
||||
void testMultiline() {
|
||||
String input = """
|
||||
x = 123
|
||||
y = "abc"
|
||||
""".stripIndent();
|
||||
List<Token> result = Lexer.tokenize(input);
|
||||
|
||||
assertThat(result)
|
||||
.hasSize(6)
|
||||
.extracting(s -> s.pos().row(), s -> s.pos().col(), Token::type)
|
||||
.containsExactly(
|
||||
tuple(1, 1, WORD), tuple(1, 3, EQ), tuple(1, 5, NUMBER),
|
||||
tuple(2, 1, WORD), tuple(2, 3, EQ), tuple(2, 5, TEXT)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testMultilineText() {
|
||||
String input = """
|
||||
text = "line1
|
||||
line2
|
||||
line3"
|
||||
""".stripIndent();
|
||||
List<Token> result = Lexer.tokenize(input);
|
||||
|
||||
assertThat(result)
|
||||
.hasSize(3)
|
||||
.extracting(s -> s.pos().row(), s -> s.pos().col(), Token::type)
|
||||
.containsExactly(
|
||||
tuple(1, 1, WORD), tuple(1, 6, EQ), tuple(1, 8, TEXT)
|
||||
);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user