Исправлено зависание лексера на некоторых юникодных символах

This commit is contained in:
Victor 2016-06-29 12:39:04 +03:00
parent 1dcabe6d61
commit 3b0f1f009e
2 changed files with 41 additions and 4 deletions

View File

@ -120,7 +120,7 @@ public final class Lexer {
while (pos < length) { while (pos < length) {
final char current = peek(0); final char current = peek(0);
if (Character.isDigit(current)) tokenizeNumber(); if (Character.isDigit(current)) tokenizeNumber();
else if (Character.isJavaIdentifierStart(current)) tokenizeWord(); else if (isOwnLangIdentifierStart(current)) tokenizeWord();
else if (current == '`') tokenizeExtendedWord(); else if (current == '`') tokenizeExtendedWord();
else if (current == '"') tokenizeText(); else if (current == '"') tokenizeText();
else if (current == '#') { else if (current == '#') {
@ -208,9 +208,10 @@ public final class Lexer {
private void tokenizeWord() { private void tokenizeWord() {
clearBuffer(); clearBuffer();
char current = peek(0); buffer.append(peek(0));
char current = next();
while (true) { while (true) {
if (!Character.isLetterOrDigit(current) && (current != '_') && (current != '$')) { if (!isOwnLangIdentifierPart(current)) {
break; break;
} }
buffer.append(current); buffer.append(current);
@ -307,6 +308,14 @@ public final class Lexer {
next(); // / next(); // /
} }
private boolean isOwnLangIdentifierStart(char current) {
return (Character.isLetter(current) || (current == '_') || (current == '$'));
}
private boolean isOwnLangIdentifierPart(char current) {
return (Character.isLetterOrDigit(current) || (current == '_') || (current == '$'));
}
private void clearBuffer() { private void clearBuffer() {
buffer.setLength(0); buffer.setLength(0);
} }

View File

@ -123,6 +123,34 @@ public class LexerTest {
Lexer.tokenize(input); Lexer.tokenize(input);
} }
@Test(expected = LexerException.class)
public void testExtendedWordError() {
String input = "` 1234";
Lexer.tokenize(input);
}
@Test
public void testUnicodeCharacterIdentifier() {
String input = "€ = 1";
List<Token> expList = list(EQ, NUMBER);
List<Token> result = Lexer.tokenize(input);
assertTokens(expList, result);
}
@Test
public void testUnicodeCharacterExtendedWordIdentifier() {
String input = "`€` = 1";
List<Token> expList = list(WORD, EQ, NUMBER);
List<Token> result = Lexer.tokenize(input);
assertTokens(expList, result);
}
@Test
public void testUnicodeCharacterEOF() {
String input = "";
assertTrue(Lexer.tokenize(input).isEmpty());
}
private static void assertTokens(List<Token> expList, List<Token> result) { private static void assertTokens(List<Token> expList, List<Token> result) {
final int length = expList.size(); final int length = expList.size();
assertEquals(length, result.size()); assertEquals(length, result.size());