diff --git a/src/com/annimon/ownlang/parser/Lexer.java b/src/com/annimon/ownlang/parser/Lexer.java index ebb9428..8ad26ca 100644 --- a/src/com/annimon/ownlang/parser/Lexer.java +++ b/src/com/annimon/ownlang/parser/Lexer.java @@ -120,7 +120,7 @@ public final class Lexer { while (pos < length) { final char current = peek(0); if (Character.isDigit(current)) tokenizeNumber(); - else if (Character.isJavaIdentifierStart(current)) tokenizeWord(); + else if (isOwnLangIdentifierStart(current)) tokenizeWord(); else if (current == '`') tokenizeExtendedWord(); else if (current == '"') tokenizeText(); else if (current == '#') { @@ -208,9 +208,10 @@ public final class Lexer { private void tokenizeWord() { clearBuffer(); - char current = peek(0); + buffer.append(peek(0)); + char current = next(); while (true) { - if (!Character.isLetterOrDigit(current) && (current != '_') && (current != '$')) { + if (!isOwnLangIdentifierPart(current)) { break; } buffer.append(current); @@ -224,7 +225,7 @@ public final class Lexer { addToken(TokenType.WORD, word); } } - + private void tokenizeExtendedWord() { next();// skip ` clearBuffer(); @@ -306,6 +307,14 @@ public final class Lexer { next(); // * next(); // / } + + private boolean isOwnLangIdentifierStart(char current) { + return (Character.isLetter(current) || (current == '_') || (current == '$')); + } + + private boolean isOwnLangIdentifierPart(char current) { + return (Character.isLetterOrDigit(current) || (current == '_') || (current == '$')); + } private void clearBuffer() { buffer.setLength(0); diff --git a/test/com/annimon/ownlang/parser/LexerTest.java b/test/com/annimon/ownlang/parser/LexerTest.java index 6633a23..6e9b5eb 100644 --- a/test/com/annimon/ownlang/parser/LexerTest.java +++ b/test/com/annimon/ownlang/parser/LexerTest.java @@ -122,6 +122,34 @@ public class LexerTest { String input = "/* 1234 \n"; Lexer.tokenize(input); } + + @Test(expected = LexerException.class) + public void testExtendedWordError() { + String input = "` 1234"; + Lexer.tokenize(input); + } + + @Test + public void testUnicodeCharacterIdentifier() { + String input = "€ = 1"; + List expList = list(EQ, NUMBER); + List result = Lexer.tokenize(input); + assertTokens(expList, result); + } + + @Test + public void testUnicodeCharacterExtendedWordIdentifier() { + String input = "`€` = 1"; + List expList = list(WORD, EQ, NUMBER); + List result = Lexer.tokenize(input); + assertTokens(expList, result); + } + + @Test + public void testUnicodeCharacterEOF() { + String input = "€"; + assertTrue(Lexer.tokenize(input).isEmpty()); + } private static void assertTokens(List expList, List result) { final int length = expList.size();