mirror of
https://github.com/aNNiMON/Own-Programming-Language-Tutorial.git
synced 2024-09-20 08:44:20 +03:00
Исправлено зависание лексера на некоторых юникодных символах
This commit is contained in:
parent
1dcabe6d61
commit
3b0f1f009e
@ -120,7 +120,7 @@ public final class Lexer {
|
|||||||
while (pos < length) {
|
while (pos < length) {
|
||||||
final char current = peek(0);
|
final char current = peek(0);
|
||||||
if (Character.isDigit(current)) tokenizeNumber();
|
if (Character.isDigit(current)) tokenizeNumber();
|
||||||
else if (Character.isJavaIdentifierStart(current)) tokenizeWord();
|
else if (isOwnLangIdentifierStart(current)) tokenizeWord();
|
||||||
else if (current == '`') tokenizeExtendedWord();
|
else if (current == '`') tokenizeExtendedWord();
|
||||||
else if (current == '"') tokenizeText();
|
else if (current == '"') tokenizeText();
|
||||||
else if (current == '#') {
|
else if (current == '#') {
|
||||||
@ -208,9 +208,10 @@ public final class Lexer {
|
|||||||
|
|
||||||
private void tokenizeWord() {
|
private void tokenizeWord() {
|
||||||
clearBuffer();
|
clearBuffer();
|
||||||
char current = peek(0);
|
buffer.append(peek(0));
|
||||||
|
char current = next();
|
||||||
while (true) {
|
while (true) {
|
||||||
if (!Character.isLetterOrDigit(current) && (current != '_') && (current != '$')) {
|
if (!isOwnLangIdentifierPart(current)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
buffer.append(current);
|
buffer.append(current);
|
||||||
@ -224,7 +225,7 @@ public final class Lexer {
|
|||||||
addToken(TokenType.WORD, word);
|
addToken(TokenType.WORD, word);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void tokenizeExtendedWord() {
|
private void tokenizeExtendedWord() {
|
||||||
next();// skip `
|
next();// skip `
|
||||||
clearBuffer();
|
clearBuffer();
|
||||||
@ -306,6 +307,14 @@ public final class Lexer {
|
|||||||
next(); // *
|
next(); // *
|
||||||
next(); // /
|
next(); // /
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean isOwnLangIdentifierStart(char current) {
|
||||||
|
return (Character.isLetter(current) || (current == '_') || (current == '$'));
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isOwnLangIdentifierPart(char current) {
|
||||||
|
return (Character.isLetterOrDigit(current) || (current == '_') || (current == '$'));
|
||||||
|
}
|
||||||
|
|
||||||
private void clearBuffer() {
|
private void clearBuffer() {
|
||||||
buffer.setLength(0);
|
buffer.setLength(0);
|
||||||
|
@ -122,6 +122,34 @@ public class LexerTest {
|
|||||||
String input = "/* 1234 \n";
|
String input = "/* 1234 \n";
|
||||||
Lexer.tokenize(input);
|
Lexer.tokenize(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(expected = LexerException.class)
|
||||||
|
public void testExtendedWordError() {
|
||||||
|
String input = "` 1234";
|
||||||
|
Lexer.tokenize(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUnicodeCharacterIdentifier() {
|
||||||
|
String input = "€ = 1";
|
||||||
|
List<Token> expList = list(EQ, NUMBER);
|
||||||
|
List<Token> result = Lexer.tokenize(input);
|
||||||
|
assertTokens(expList, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUnicodeCharacterExtendedWordIdentifier() {
|
||||||
|
String input = "`€` = 1";
|
||||||
|
List<Token> expList = list(WORD, EQ, NUMBER);
|
||||||
|
List<Token> result = Lexer.tokenize(input);
|
||||||
|
assertTokens(expList, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUnicodeCharacterEOF() {
|
||||||
|
String input = "€";
|
||||||
|
assertTrue(Lexer.tokenize(input).isEmpty());
|
||||||
|
}
|
||||||
|
|
||||||
private static void assertTokens(List<Token> expList, List<Token> result) {
|
private static void assertTokens(List<Token> expList, List<Token> result) {
|
||||||
final int length = expList.size();
|
final int length = expList.size();
|
||||||
|
Loading…
Reference in New Issue
Block a user