From 664c9a4f315f3be4a0f24976d6a64579457d1ea6 Mon Sep 17 00:00:00 2001 From: Victor Date: Thu, 15 Nov 2018 18:10:05 +0200 Subject: [PATCH] Initial --- pom.xml | 25 + .../com/annimon/parboiled/calc/AstNode.java | 10 + .../com/annimon/parboiled/calc/BlockNode.java | 36 + .../annimon/parboiled/calc/NumberNode.java | 25 + .../com/annimon/parboiled/calc/OkaNode.java | 19 + .../com/annimon/parboiled/calc/OkaParser.java | 73 ++ .../com/annimon/parboiled/calc/OriNode.java | 28 + .../org/parboiled/examples/abc/AbcParser.java | 50 + .../java/org/parboiled/examples/abc/Main.java | 49 + .../calculators/CalculatorParser.java | 75 ++ .../calculators/CalculatorParser0.java | 63 + .../calculators/CalculatorParser1.java | 103 ++ .../calculators/CalculatorParser2.java | 151 +++ .../calculators/CalculatorParser3.java | 183 +++ .../calculators/CalculatorParser4.java | 109 ++ .../examples/indenting/IndentNode.java | 32 + .../parboiled/examples/indenting/Main.java | 30 + .../examples/indenting/SimpleIndent.java | 61 + .../java/AbstractJavaCharacterMatcher.java | 58 + .../examples/java/JavaLetterMatcher.java | 29 + .../java/JavaLetterOrDigitMatcher.java | 29 + .../parboiled/examples/java/JavaParser.java | 1141 +++++++++++++++++ .../examples/java/JavaParserProfiler.java | 46 + .../org/parboiled/examples/java/Main.java | 171 +++ .../examples/sparql/SparqlParser.java | 796 ++++++++++++ .../org/parboiled/examples/time/Main.java | 48 + .../parboiled/examples/time/TimeParser.java | 103 ++ 27 files changed, 3543 insertions(+) create mode 100644 pom.xml create mode 100644 src/main/java/com/annimon/parboiled/calc/AstNode.java create mode 100644 src/main/java/com/annimon/parboiled/calc/BlockNode.java create mode 100644 src/main/java/com/annimon/parboiled/calc/NumberNode.java create mode 100644 src/main/java/com/annimon/parboiled/calc/OkaNode.java create mode 100644 src/main/java/com/annimon/parboiled/calc/OkaParser.java create mode 100644 src/main/java/com/annimon/parboiled/calc/OriNode.java create mode 100644 src/main/java/org/parboiled/examples/abc/AbcParser.java create mode 100644 src/main/java/org/parboiled/examples/abc/Main.java create mode 100644 src/main/java/org/parboiled/examples/calculators/CalculatorParser.java create mode 100644 src/main/java/org/parboiled/examples/calculators/CalculatorParser0.java create mode 100644 src/main/java/org/parboiled/examples/calculators/CalculatorParser1.java create mode 100644 src/main/java/org/parboiled/examples/calculators/CalculatorParser2.java create mode 100644 src/main/java/org/parboiled/examples/calculators/CalculatorParser3.java create mode 100644 src/main/java/org/parboiled/examples/calculators/CalculatorParser4.java create mode 100644 src/main/java/org/parboiled/examples/indenting/IndentNode.java create mode 100644 src/main/java/org/parboiled/examples/indenting/Main.java create mode 100644 src/main/java/org/parboiled/examples/indenting/SimpleIndent.java create mode 100644 src/main/java/org/parboiled/examples/java/AbstractJavaCharacterMatcher.java create mode 100644 src/main/java/org/parboiled/examples/java/JavaLetterMatcher.java create mode 100644 src/main/java/org/parboiled/examples/java/JavaLetterOrDigitMatcher.java create mode 100644 src/main/java/org/parboiled/examples/java/JavaParser.java create mode 100644 src/main/java/org/parboiled/examples/java/JavaParserProfiler.java create mode 100644 src/main/java/org/parboiled/examples/java/Main.java create mode 100644 src/main/java/org/parboiled/examples/sparql/SparqlParser.java create mode 100644 src/main/java/org/parboiled/examples/time/Main.java create mode 100644 src/main/java/org/parboiled/examples/time/TimeParser.java diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..2e3c71c --- /dev/null +++ b/pom.xml @@ -0,0 +1,25 @@ + + + 4.0.0 + com.annimon + ParboiledExample + 1.0-SNAPSHOT + jar + + UTF-8 + 1.8 + 1.8 + + + + org.parboiled + parboiled-core + 1.1.7 + + + org.parboiled + parboiled-java + 1.1.7 + + + \ No newline at end of file diff --git a/src/main/java/com/annimon/parboiled/calc/AstNode.java b/src/main/java/com/annimon/parboiled/calc/AstNode.java new file mode 100644 index 0000000..2d87b80 --- /dev/null +++ b/src/main/java/com/annimon/parboiled/calc/AstNode.java @@ -0,0 +1,10 @@ +package com.annimon.parboiled.calc; + +/** + * + * @author aNNiMON + */ +public interface AstNode { + + int eval(); +} diff --git a/src/main/java/com/annimon/parboiled/calc/BlockNode.java b/src/main/java/com/annimon/parboiled/calc/BlockNode.java new file mode 100644 index 0000000..f61d8e4 --- /dev/null +++ b/src/main/java/com/annimon/parboiled/calc/BlockNode.java @@ -0,0 +1,36 @@ +package com.annimon.parboiled.calc; + +import java.util.Arrays; +import java.util.List; +import org.parboiled.trees.ImmutableTreeNode; + +/** + * + * @author aNNiMON + */ +public final class BlockNode extends ImmutableTreeNode implements AstNode { + + private final List nodes; + + public BlockNode(AstNode node) { + this(Arrays.asList(node)); + System.out.println(node.getClass().getSimpleName()); + } + + public BlockNode(AstNode... nodes) { + this(Arrays.asList(nodes)); + } + + public BlockNode(List nodes) { + super(nodes); + this.nodes = nodes; + } + + @Override + public int eval() { + nodes.stream() + .filter(n -> n != null) + .forEach(AstNode::eval); + return 0; + } +} diff --git a/src/main/java/com/annimon/parboiled/calc/NumberNode.java b/src/main/java/com/annimon/parboiled/calc/NumberNode.java new file mode 100644 index 0000000..f9bb697 --- /dev/null +++ b/src/main/java/com/annimon/parboiled/calc/NumberNode.java @@ -0,0 +1,25 @@ +package com.annimon.parboiled.calc; + +import org.parboiled.trees.ImmutableTreeNode; + +/** + * + * @author aNNiMON + */ +public final class NumberNode extends ImmutableTreeNode implements AstNode { + + private final int value; + + public NumberNode(int value) { + this.value = value; + } + + public int getValue() { + return value; + } + + @Override + public int eval() { + return value; + } +} diff --git a/src/main/java/com/annimon/parboiled/calc/OkaNode.java b/src/main/java/com/annimon/parboiled/calc/OkaNode.java new file mode 100644 index 0000000..5217c69 --- /dev/null +++ b/src/main/java/com/annimon/parboiled/calc/OkaNode.java @@ -0,0 +1,19 @@ +package com.annimon.parboiled.calc; + +import org.parboiled.trees.ImmutableTreeNode; + +/** + * + * @author aNNiMON + */ +public final class OkaNode extends ImmutableTreeNode implements AstNode { + + public OkaNode() { + } + + @Override + public int eval() { + System.out.println("Я Ока"); + return 0; + } +} diff --git a/src/main/java/com/annimon/parboiled/calc/OkaParser.java b/src/main/java/com/annimon/parboiled/calc/OkaParser.java new file mode 100644 index 0000000..05930ac --- /dev/null +++ b/src/main/java/com/annimon/parboiled/calc/OkaParser.java @@ -0,0 +1,73 @@ +package com.annimon.parboiled.calc; + +import java.util.ArrayList; +import java.util.List; +import org.parboiled.BaseParser; +import static org.parboiled.BaseParser.EOI; +import org.parboiled.Parboiled; +import org.parboiled.Rule; +import static org.parboiled.errors.ErrorUtils.printParseErrors; +import org.parboiled.parserunners.RecoveringParseRunner; +import org.parboiled.support.ParsingResult; + +public class OkaParser extends BaseParser { + + public static void main(String[] args) { + final OkaParser parser = Parboiled.createParser(OkaParser.class); + ParsingResult result = new RecoveringParseRunner(parser.Program()) + .run("ок ори 3 ори 2 ока ока ори 2"); + if (result.hasErrors()) { + System.out.println("\nParse Errors:\n" + printParseErrors(result)); + } + + AstNode node = (AstNode) result.resultValue; + node.eval(); + } + + public Rule Program() { + return Sequence(Block(), EOI); + } + + Rule Block() { + final List nodes = new ArrayList<>(); + return Sequence( + Statement(), + ZeroOrMore(nodes.add(pop()), Statement()), + push(new BlockNode(nodes)) + ); + } + + Rule Statement() { + return FirstOf(Oka(), Ori()); + } + + Rule Oka() { + return Sequence( + IgnoreCase("ока "), + push(new OkaNode()) + ); + } + + Rule Ori() { + return Sequence( + Sequence(IgnoreCase("ори "), Number()), + push(new OriNode(pop())) + ); + } + + Rule Number() { + return Sequence( + OneOrMore(Digit()), + push(new NumberNode(Integer.parseInt(matchOrDefault("0")))), + WhiteSpace() + ); + } + + Rule Digit() { + return CharRange('0', '9'); + } + + Rule WhiteSpace() { + return ZeroOrMore(AnyOf(" \t\f")); + } +} diff --git a/src/main/java/com/annimon/parboiled/calc/OriNode.java b/src/main/java/com/annimon/parboiled/calc/OriNode.java new file mode 100644 index 0000000..8cc1052 --- /dev/null +++ b/src/main/java/com/annimon/parboiled/calc/OriNode.java @@ -0,0 +1,28 @@ +package com.annimon.parboiled.calc; + +import java.util.Arrays; +import java.util.stream.IntStream; +import org.parboiled.trees.ImmutableTreeNode; + +/** + * + * @author aNNiMON + */ +public final class OriNode extends ImmutableTreeNode implements AstNode { + + private final AstNode node; + + public OriNode(AstNode node) { + super(Arrays.asList(node)); + this.node = node; + } + + @Override + public int eval() { + if (node == null) return 0; + + IntStream.range(0, node.eval()) + .forEach(i -> System.out.println("ору")); + return 0; + } +} diff --git a/src/main/java/org/parboiled/examples/abc/AbcParser.java b/src/main/java/org/parboiled/examples/abc/AbcParser.java new file mode 100644 index 0000000..a648c9d --- /dev/null +++ b/src/main/java/org/parboiled/examples/abc/AbcParser.java @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2009-2011 Mathias Doenitz + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.parboiled.examples.abc; + +import org.parboiled.BaseParser; +import org.parboiled.Rule; +import org.parboiled.annotations.BuildParseTree; + +/** + * A parser for the classic non-context free language example { a^n b^n c^n : n >= 1 } + * S <- &(A c) a+ B !(a|b|c) + * A <- a A? b + * B <- b B? c + */ +@SuppressWarnings({"InfiniteRecursion"}) +@BuildParseTree +public class AbcParser extends BaseParser { + + public Rule S() { + return Sequence( + Test(A(), 'c'), + OneOrMore('a'), + B(), + TestNot(AnyOf("abc")) + ); + } + + public Rule A() { + return Sequence('a', Optional(A()), 'b'); + } + + public Rule B() { + return Sequence('b', Optional(B()), 'c'); + } + +} diff --git a/src/main/java/org/parboiled/examples/abc/Main.java b/src/main/java/org/parboiled/examples/abc/Main.java new file mode 100644 index 0000000..b89b041 --- /dev/null +++ b/src/main/java/org/parboiled/examples/abc/Main.java @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2009-2011 Mathias Doenitz + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.parboiled.examples.abc; + +import org.parboiled.Parboiled; +import org.parboiled.common.StringUtils; +import org.parboiled.errors.ErrorUtils; +import org.parboiled.parserunners.RecoveringParseRunner; +import static org.parboiled.support.ParseTreeUtils.printNodeTree; + +import org.parboiled.parserunners.ReportingParseRunner; +import org.parboiled.support.ParsingResult; + +import java.util.Scanner; + +public class Main { + + public static void main(String[] args) { + AbcParser parser = Parboiled.createParser(AbcParser.class); + + while (true) { + System.out.print("Enter an a^n b^n c^n expression (single RETURN to exit)!\n"); + String input = new Scanner(System.in).nextLine(); + if (StringUtils.isEmpty(input)) break; + + ParsingResult result = new ReportingParseRunner(parser.S()).run(input); + + if (!result.parseErrors.isEmpty()) + System.out.println(ErrorUtils.printParseError(result.parseErrors.get(0))); + else + System.out.println(printNodeTree(result) + '\n'); + } + } + +} \ No newline at end of file diff --git a/src/main/java/org/parboiled/examples/calculators/CalculatorParser.java b/src/main/java/org/parboiled/examples/calculators/CalculatorParser.java new file mode 100644 index 0000000..aaf9600 --- /dev/null +++ b/src/main/java/org/parboiled/examples/calculators/CalculatorParser.java @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2009-2011 Mathias Doenitz + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.parboiled.examples.calculators; + +import org.parboiled.BaseParser; +import org.parboiled.Parboiled; +import org.parboiled.parserunners.RecoveringParseRunner; +import org.parboiled.Rule; +import org.parboiled.common.StringUtils; +import org.parboiled.support.ParsingResult; +import org.parboiled.support.ToStringFormatter; +import org.parboiled.trees.GraphNode; + +import java.util.Scanner; + +import static org.parboiled.errors.ErrorUtils.printParseErrors; +import static org.parboiled.support.ParseTreeUtils.printNodeTree; +import static org.parboiled.trees.GraphUtils.printTree; + +/** + * Base class of all calculator parsers in the org.parboiled.examples.calculators package. + * Simply adds the public static main entry point. + * + * @param the type of the main value object created by the parser + */ +public abstract class CalculatorParser extends BaseParser { + + public abstract Rule InputLine(); + + @SuppressWarnings({"unchecked"}) + public static > void main(Class

parserClass) { + CalculatorParser parser = Parboiled.createParser(parserClass); + + while (true) { + System.out.print("Enter a calculators expression (single RETURN to exit)!\n"); + String input = new Scanner(System.in).nextLine(); + if (StringUtils.isEmpty(input)) break; + + ParsingResult result = new RecoveringParseRunner(parser.InputLine()).run(input); + + if (result.hasErrors()) { + System.out.println("\nParse Errors:\n" + printParseErrors(result)); + } + + Object value = result.parseTreeRoot.getValue(); + if (value != null) { + String str = value.toString(); + int ix = str.indexOf('|'); + if (ix >= 0) str = str.substring(ix + 2); // extract value part of AST node toString() + System.out.println(input + " = " + str + '\n'); + } + if (value instanceof GraphNode) { + System.out.println("\nAbstract Syntax Tree:\n" + + printTree((GraphNode) value, new ToStringFormatter(null)) + '\n'); + } else { + System.out.println("\nParse Tree:\n" + printNodeTree(result) + '\n'); + } + } + } + +} diff --git a/src/main/java/org/parboiled/examples/calculators/CalculatorParser0.java b/src/main/java/org/parboiled/examples/calculators/CalculatorParser0.java new file mode 100644 index 0000000..8634f8e --- /dev/null +++ b/src/main/java/org/parboiled/examples/calculators/CalculatorParser0.java @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2009-2011 Mathias Doenitz + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.parboiled.examples.calculators; + +import org.parboiled.Rule; +import org.parboiled.annotations.BuildParseTree; + +/** + * A basic calculator parser without any actions. + */ +@BuildParseTree +public class CalculatorParser0 extends CalculatorParser { + + @Override + public Rule InputLine() { + return Sequence(Expression(), EOI); + } + + Rule Expression() { + return Sequence(Term(), ZeroOrMore(AnyOf("+-"), Term())); + } + + Rule Term() { + return Sequence(Factor(), ZeroOrMore(AnyOf("*/"), Factor())); + } + + Rule Factor() { + return FirstOf(Number(), Parens()); + } + + Rule Parens() { + return Sequence('(', Expression(), ')'); + } + + Rule Number() { + return OneOrMore(Digit()); + } + + Rule Digit() { + return CharRange('0', '9'); + } + + //**************** MAIN **************** + + public static void main(String[] args) { + main(CalculatorParser0.class); + } + +} \ No newline at end of file diff --git a/src/main/java/org/parboiled/examples/calculators/CalculatorParser1.java b/src/main/java/org/parboiled/examples/calculators/CalculatorParser1.java new file mode 100644 index 0000000..308e904 --- /dev/null +++ b/src/main/java/org/parboiled/examples/calculators/CalculatorParser1.java @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2009-2011 Mathias Doenitz + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.parboiled.examples.calculators; + +import org.parboiled.Rule; +import org.parboiled.annotations.BuildParseTree; +import org.parboiled.annotations.SuppressSubnodes; + +/** + * A calculator parser building calculation results directly in the parsers value stack. + * All calculations are implemented directly in action expressions. + */ +@BuildParseTree +public class CalculatorParser1 extends CalculatorParser { + + @Override + public Rule InputLine() { + return Sequence(Expression(), EOI); + } + + public Rule Expression() { + return Sequence( + Term(), // a successful match of a Term pushes one Integer value onto the value stack + ZeroOrMore( + FirstOf( + // the action that is run after the '+' and the Term have been matched consumes the + // two top value stack elements and replaces them with the calculation result + Sequence('+', Term(), push(pop() + pop())), + + // same for the '-' operator, however, here the order of the "pop"s matters, we need to + // retrieve the second to last value first, which is what the pop(1) call does + Sequence('-', Term(), push(pop(1) - pop())) + ) + ) + ); + } + + public Rule Term() { + return Sequence( + Factor(), // a successful match of a Factor pushes one Integer value onto the value stack + ZeroOrMore( + FirstOf( + // the action that is run after the '*' and the Factor have been matched consumes the + // two top value stack elements and replaces them with the calculation result + Sequence('*', Factor(), push(pop() * pop())), + + // same for the '/' operator, however, here the order of the "pop"s matters, we need to + // retrieve the second to last value first, which is what the pop(1) call does + Sequence('/', Factor(), push(pop(1) / pop())) + ) + ) + ); + } + + public Rule Factor() { + return FirstOf(Number(), Parens()); // a factor "produces" exactly one Integer value on the value stack + } + + public Rule Parens() { + return Sequence('(', Expression(), ')'); + } + + public Rule Number() { + return Sequence( + Digits(), + + // parse the input text matched by the preceding "Digits" rule, + // convert it into an Integer and push it onto the value stack + // the action uses a default string in case it is run during error recovery (resynchronization) + push(Integer.parseInt(matchOrDefault("0"))) + ); + } + + @SuppressSubnodes + public Rule Digits() { + return OneOrMore(Digit()); + } + + public Rule Digit() { + return CharRange('0', '9'); + } + + //**************** MAIN **************** + + public static void main(String[] args) { + main(CalculatorParser1.class); + } + +} \ No newline at end of file diff --git a/src/main/java/org/parboiled/examples/calculators/CalculatorParser2.java b/src/main/java/org/parboiled/examples/calculators/CalculatorParser2.java new file mode 100644 index 0000000..470461f --- /dev/null +++ b/src/main/java/org/parboiled/examples/calculators/CalculatorParser2.java @@ -0,0 +1,151 @@ +/* + * Copyright (C) 2009-2011 Mathias Doenitz + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.parboiled.examples.calculators; + +import org.parboiled.Rule; +import org.parboiled.annotations.BuildParseTree; +import org.parboiled.annotations.SuppressSubnodes; +import org.parboiled.examples.calculators.CalculatorParser2.CalcNode; +import org.parboiled.support.Var; +import org.parboiled.trees.ImmutableBinaryTreeNode; + +/** + * A calculator parser building an AST representing the expression structure before performing the actual calculation. + * The parser value stack is used to build the AST nodes of type CalcNode. + */ +@BuildParseTree +public class CalculatorParser2 extends CalculatorParser { + + @Override + public Rule InputLine() { + return Sequence(Expression(), EOI); + } + + public Rule Expression() { + Var op = new Var(); // we use an action variable to hold the operator character + return Sequence( + Term(), + ZeroOrMore( + AnyOf("+-"), + op.set(matchedChar()), // set the action variable to the matched operator char + Term(), + + // create an AST node for the operation that was just matched + // we consume the two top stack elements and replace them with a new AST node + // we use an alternative technique to the one shown in CalculatorParser1 to reverse + // the order of the two top value stack elements + swap() && push(new CalcNode(op.get(), pop(), pop())) + ) + ); + } + + public Rule Term() { + Var op = new Var(); // we use an action variable to hold the operator character + return Sequence( + Factor(), + ZeroOrMore( + AnyOf("*/"), + op.set(matchedChar()), // set the action variable to the matched operator char + Factor(), + + // create an AST node for the operation that was just matched + // we consume the two top stack elements and replace them with a new AST node + // we use an alternative technique to the one shown in CalculatorParser1 to reverse + // the order of the two top value stack elements + swap() && push(new CalcNode(op.get(), pop(), pop())) + ) + ); + } + + public Rule Factor() { + return FirstOf(Number(), Parens()); + } + + public Rule Parens() { + return Sequence('(', Expression(), ')'); + } + + public Rule Number() { + return Sequence( + Digits(), + + // parse the input text matched by the preceding "Digits" rule, + // convert it into an Integer and push a new AST node for it onto the value stack + // the action uses a default string in case it is run during error recovery (resynchronization) + push(new CalcNode(Integer.parseInt(matchOrDefault("0")))) + ); + } + + @SuppressSubnodes + public Rule Digits() { + return OneOrMore(Digit()); + } + + public Rule Digit() { + return CharRange('0', '9'); + } + + //**************************************************************** + + /** + * The AST node for the calculators. The type of the node is carried as a Character that can either contain + * an operator char or be null. In the latter case the AST node is a leaf directly containing a value. + */ + public static class CalcNode extends ImmutableBinaryTreeNode { + private int value; + private Character operator; + + public CalcNode(int value) { + super(null, null); + this.value = value; + } + + public CalcNode(Character operator, CalcNode left, CalcNode right) { + super(left, right); + this.operator = operator; + } + + public int getValue() { + if (operator == null) return value; + switch (operator) { + case '+': + return left().getValue() + right().getValue(); + case '-': + return left().getValue() - right().getValue(); + case '*': + return left().getValue() * right().getValue(); + case '/': + return left().getValue() / right().getValue(); + default: + throw new IllegalStateException(); + } + } + + @Override + public String toString() { + return (operator == null ? "Value " + value : "Operator '" + operator + '\'') + " | " + getValue(); + } + + } + + //**************** MAIN **************** + + public static void main(String[] args) { + main(CalculatorParser2.class); + } + +} \ No newline at end of file diff --git a/src/main/java/org/parboiled/examples/calculators/CalculatorParser3.java b/src/main/java/org/parboiled/examples/calculators/CalculatorParser3.java new file mode 100644 index 0000000..f76f2d1 --- /dev/null +++ b/src/main/java/org/parboiled/examples/calculators/CalculatorParser3.java @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2009-2011 Mathias Doenitz + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.parboiled.examples.calculators; + +import org.parboiled.Rule; +import org.parboiled.annotations.BuildParseTree; +import org.parboiled.examples.calculators.CalculatorParser3.CalcNode; +import org.parboiled.support.Var; +import org.parboiled.trees.ImmutableBinaryTreeNode; + +/** + * A calculator parser building an AST representing the expression structure before performing the actual calculation. + * The value field of the parse tree nodes is used for AST nodes. + * As opposed to the CalculatorParser2 this parser also supports floating point operations, negative numbers, a "power" + * and a "SQRT" operation as well as optional whitespace between the various expressions components. + */ +@BuildParseTree +public class CalculatorParser3 extends CalculatorParser { + + @Override + public Rule InputLine() { + return Sequence(Expression(), EOI); + } + + Rule Expression() { + Var op = new Var(); + return Sequence( + Term(), + ZeroOrMore( + // we use a FirstOf(String, String) instead of a AnyOf(String) so we can use the + // fromStringLiteral transformation (see below), which automatically consumes trailing whitespace + FirstOf("+ ", "- "), op.set(matchedChar()), + Term(), + + // same as in CalculatorParser2 + push(new CalcNode(op.get(), pop(1), pop())) + ) + ); + } + + Rule Term() { + Var op = new Var(); + return Sequence( + Factor(), + ZeroOrMore( + FirstOf("* ", "/ "), op.set(matchedChar()), + Factor(), + push(new CalcNode(op.get(), pop(1), pop())) + ) + ); + } + + Rule Factor() { + return Sequence( + Atom(), + ZeroOrMore( + "^ ", + Atom(), + push(new CalcNode('^', pop(1), pop())) + ) + ); + } + + Rule Atom() { + return FirstOf(Number(), SquareRoot(), Parens()); + } + + Rule SquareRoot() { + return Sequence( + "SQRT ", + Parens(), + + // create a new AST node with a special operator 'R' and only one child + push(new CalcNode('R', pop(), null)) + ); + } + + Rule Parens() { + return Sequence("( ", Expression(), ") "); + } + + Rule Number() { + return Sequence( + // we use another Sequence in the "Number" Sequence so we can easily access the input text matched + // by the three enclosed rules with "match()" or "matchOrDefault()" + Sequence( + Optional('-'), + OneOrMore(Digit()), + Optional('.', OneOrMore(Digit())) + ), + + // the matchOrDefault() call returns the matched input text of the immediately preceding rule + // or a default string (in this case if it is run during error recovery (resynchronization)) + push(new CalcNode(Double.parseDouble(matchOrDefault("0")))), + WhiteSpace() + ); + } + + Rule Digit() { + return CharRange('0', '9'); + } + + Rule WhiteSpace() { + return ZeroOrMore(AnyOf(" \t\f")); + } + + // we redefine the rule creation for string literals to automatically match trailing whitespace if the string + // literal ends with a space character, this way we don't have to insert extra whitespace() rules after each + // character or string literal + + @Override + protected Rule fromStringLiteral(String string) { + return string.endsWith(" ") ? + Sequence(String(string.substring(0, string.length() - 1)), WhiteSpace()) : + String(string); + } + + //**************************************************************** + + /** + * The AST node for the calculators. The type of the node is carried as a Character that can either contain + * an operator char or be null. In the latter case the AST node is a leaf directly containing a value. + */ + public static class CalcNode extends ImmutableBinaryTreeNode { + private double value; + private Character operator; + + public CalcNode(double value) { + super(null, null); + this.value = value; + } + + public CalcNode(Character operator, CalcNode left, CalcNode right) { + super(left, right); + this.operator = operator; + } + + public double getValue() { + if (operator == null) return value; + switch (operator) { + case '+': + return left().getValue() + right().getValue(); + case '-': + return left().getValue() - right().getValue(); + case '*': + return left().getValue() * right().getValue(); + case '/': + return left().getValue() / right().getValue(); + case '^': + return Math.pow(left().getValue(), right().getValue()); + case 'R': + return Math.sqrt(left().getValue()); + default: + throw new IllegalStateException(); + } + } + + @Override + public String toString() { + return (operator == null ? "Value " + value : "Operator '" + operator + '\'') + " | " + getValue(); + } + } + + //**************** MAIN **************** + + public static void main(String[] args) { + main(CalculatorParser3.class); + } +} \ No newline at end of file diff --git a/src/main/java/org/parboiled/examples/calculators/CalculatorParser4.java b/src/main/java/org/parboiled/examples/calculators/CalculatorParser4.java new file mode 100644 index 0000000..477762c --- /dev/null +++ b/src/main/java/org/parboiled/examples/calculators/CalculatorParser4.java @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2009-2011 Mathias Doenitz + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.parboiled.examples.calculators; + +import org.parboiled.Rule; +import org.parboiled.annotations.BuildParseTree; +import org.parboiled.examples.calculators.CalculatorParser3.CalcNode; +import org.parboiled.support.Var; + +/** + * A calculator parser defining the same language as the CalculatorParser3 but using a rule building helper methods + * to Factor out common constructs. + */ +@BuildParseTree +public class CalculatorParser4 extends CalculatorParser { + + @Override + public Rule InputLine() { + return Sequence(Expression(), EOI); + } + + public Rule Expression() { + return OperatorRule(Term(), FirstOf("+ ", "- ")); + } + + public Rule Term() { + return OperatorRule(Factor(), FirstOf("* ", "/ ")); + } + + public Rule Factor() { + // by using toRule("^ ") instead of Ch('^') we make use of the fromCharLiteral(...) transformation below + return OperatorRule(Atom(), toRule("^ ")); + } + + public Rule OperatorRule(Rule subRule, Rule operatorRule) { + Var op = new Var(); + return Sequence( + subRule, + ZeroOrMore( + operatorRule, op.set(matchedChar()), + subRule, + push(new CalcNode(op.get(), pop(1), pop())) + ) + ); + } + + public Rule Atom() { + return FirstOf(Number(), SquareRoot(), Parens()); + } + + public Rule SquareRoot() { + return Sequence("SQRT", Parens(), push(new CalcNode('R', pop(), null))); + } + + public Rule Parens() { + return Sequence("( ", Expression(), ") "); + } + + public Rule Number() { + return Sequence( + Sequence( + Optional(Ch('-')), + OneOrMore(Digit()), + Optional(Ch('.'), OneOrMore(Digit())) + ), + // the action uses a default string in case it is run during error recovery (resynchronization) + push(new CalcNode(Double.parseDouble(matchOrDefault("0")))), + WhiteSpace() + ); + } + + public Rule Digit() { + return CharRange('0', '9'); + } + + public Rule WhiteSpace() { + return ZeroOrMore(AnyOf(" \t\f")); + } + + // we redefine the rule creation for string literals to automatically match trailing whitespace if the string + // literal ends with a space character, this way we don't have to insert extra whitespace() rules after each + // character or string literal + @Override + protected Rule fromStringLiteral(String string) { + return string.endsWith(" ") ? + Sequence(String(string.substring(0, string.length() - 1)), WhiteSpace()) : + String(string); + } + + //**************** MAIN **************** + + public static void main(String[] args) { + main(CalculatorParser4.class); + } +} \ No newline at end of file diff --git a/src/main/java/org/parboiled/examples/indenting/IndentNode.java b/src/main/java/org/parboiled/examples/indenting/IndentNode.java new file mode 100644 index 0000000..eaacd78 --- /dev/null +++ b/src/main/java/org/parboiled/examples/indenting/IndentNode.java @@ -0,0 +1,32 @@ +package org.parboiled.examples.indenting; + +import java.util.ArrayList; +import java.util.List; + +public class IndentNode { + + private final String name; + private final List children = new ArrayList(); + + public IndentNode(String name) { + this.name = name; + } + + public String getName() { + return name; + } + + public boolean addChild(IndentNode child) { + children.add(child); + return true; + } + + List getChildren() { + return children; + } + + @Override + public String toString() { + return "IndentNode [name=" + name + ", children=" + children + "]"; + } +} \ No newline at end of file diff --git a/src/main/java/org/parboiled/examples/indenting/Main.java b/src/main/java/org/parboiled/examples/indenting/Main.java new file mode 100644 index 0000000..23edd6c --- /dev/null +++ b/src/main/java/org/parboiled/examples/indenting/Main.java @@ -0,0 +1,30 @@ +package org.parboiled.examples.indenting; + +import static org.parboiled.support.ParseTreeUtils.printNodeTree; + +import org.parboiled.Parboiled; +import org.parboiled.buffers.IndentDedentInputBuffer; +import org.parboiled.errors.ErrorUtils; +import org.parboiled.parserunners.ReportingParseRunner; +import org.parboiled.support.ParsingResult; + +public class Main { + + public static void main(String[] args) { + SimpleIndent parser = Parboiled.createParser(SimpleIndent.class); + String input = "NodeA \n\tNodeB\n\tNodeC \n\t\tNodeD \nNodeE"; + + ParsingResult result = new ReportingParseRunner(parser.Parent()) + .run(new IndentDedentInputBuffer(input.toCharArray(), 2, ";", true, true)); + + if (!result.parseErrors.isEmpty()) { + System.out.println(ErrorUtils.printParseError(result.parseErrors + .get(0))); + } else { + System.out.println("NodeTree: " + printNodeTree(result) + '\n'); + Object value = result.parseTreeRoot.getValue(); + System.out.println(value.toString()); + } + + } +} \ No newline at end of file diff --git a/src/main/java/org/parboiled/examples/indenting/SimpleIndent.java b/src/main/java/org/parboiled/examples/indenting/SimpleIndent.java new file mode 100644 index 0000000..732efa2 --- /dev/null +++ b/src/main/java/org/parboiled/examples/indenting/SimpleIndent.java @@ -0,0 +1,61 @@ +package org.parboiled.examples.indenting; + +import org.parboiled.BaseParser; +import org.parboiled.Rule; +import org.parboiled.annotations.BuildParseTree; + +@BuildParseTree +public class SimpleIndent extends BaseParser { + + Rule Parent() { + return Sequence(push(new IndentNode("root")), OneOrMore(Data()), EOI); + } + + Rule Data() { + return Sequence(Identifier(), push(new IndentNode(match())), peek(1) + .addChild(peek()), + Optional(Sequence(Spacing(), ChildNodeList())), drop()); + } + + Rule ChildNodeList() { + return Sequence(INDENT, Spacing(), OneOrMore(Data(), Spacing()), DEDENT); + } + + Rule Identifier() { + return Sequence(PN_CHARS_U(), ZeroOrMore(PN_CHARS_DIGIT_U())); + } + + public Rule PN_CHARS_DIGIT_U() { + return FirstOf(PN_CHARS_U(), DIGIT()); + } + + public Rule PN_CHARS_U() { + return FirstOf(PN_CHARS_BASE(), '_'); + } + + public Rule PN_CHARS_BASE() { + return FirstOf( + CharRange('A', 'Z'), + CharRange('a', 'z'), + CharRange('\u00C0', '\u00D6'), + CharRange('\u00D8', '\u00F6'), + CharRange('\u00F8', '\u02FF'), + CharRange('\u0370', '\u037D'), + CharRange('\u037F', '\u1FFF'), + CharRange('\u200C', '\u200D'), + CharRange('\u2070', '\u218F'), + CharRange('\u2C00', '\u2FEF'), + CharRange('\u3001', '\uD7FF'), + CharRange('\uF900', '\uFDCF'), + CharRange('\uFDF0', '\uFFFD') + ); + } + + public Rule DIGIT() { + return CharRange('0', '9'); + } + + Rule Spacing() { + return ZeroOrMore(AnyOf(" \t\r\n\f").label("Whitespace")); + } +} diff --git a/src/main/java/org/parboiled/examples/java/AbstractJavaCharacterMatcher.java b/src/main/java/org/parboiled/examples/java/AbstractJavaCharacterMatcher.java new file mode 100644 index 0000000..fe63c06 --- /dev/null +++ b/src/main/java/org/parboiled/examples/java/AbstractJavaCharacterMatcher.java @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2009-2011 Mathias Doenitz + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.parboiled.examples.java; + +import org.parboiled.MatcherContext; +import org.parboiled.matchers.CustomMatcher; + +public abstract class AbstractJavaCharacterMatcher extends CustomMatcher { + + protected AbstractJavaCharacterMatcher(String label) { + super(label); + } + + @Override + public final boolean isSingleCharMatcher() { + return true; + } + + @Override + public final boolean canMatchEmpty() { + return false; + } + + @Override + public boolean isStarterChar(char c) { + return acceptChar(c); + } + + @Override + public final char getStarterChar() { + return 'a'; + } + + public final boolean match(MatcherContext context) { + if (!acceptChar(context.getCurrentChar())) { + return false; + } + context.advanceIndex(1); + context.createNode(); + return true; + } + + protected abstract boolean acceptChar(char c); +} diff --git a/src/main/java/org/parboiled/examples/java/JavaLetterMatcher.java b/src/main/java/org/parboiled/examples/java/JavaLetterMatcher.java new file mode 100644 index 0000000..55ddbcd --- /dev/null +++ b/src/main/java/org/parboiled/examples/java/JavaLetterMatcher.java @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2009-2011 Mathias Doenitz + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.parboiled.examples.java; + +public class JavaLetterMatcher extends AbstractJavaCharacterMatcher { + + public JavaLetterMatcher() { + super("Letter"); + } + + @Override + protected boolean acceptChar(char c) { + return Character.isJavaIdentifierStart(c); + } +} diff --git a/src/main/java/org/parboiled/examples/java/JavaLetterOrDigitMatcher.java b/src/main/java/org/parboiled/examples/java/JavaLetterOrDigitMatcher.java new file mode 100644 index 0000000..fdf89a7 --- /dev/null +++ b/src/main/java/org/parboiled/examples/java/JavaLetterOrDigitMatcher.java @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2009-2011 Mathias Doenitz + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.parboiled.examples.java; + +public class JavaLetterOrDigitMatcher extends AbstractJavaCharacterMatcher { + + public JavaLetterOrDigitMatcher() { + super("LetterOrDigit"); + } + + @Override + protected boolean acceptChar(char c) { + return Character.isJavaIdentifierPart(c); + } +} diff --git a/src/main/java/org/parboiled/examples/java/JavaParser.java b/src/main/java/org/parboiled/examples/java/JavaParser.java new file mode 100644 index 0000000..37c93ee --- /dev/null +++ b/src/main/java/org/parboiled/examples/java/JavaParser.java @@ -0,0 +1,1141 @@ +//=========================================================================== +// +// Parsing Expression Grammar for Java 1.6 as a parboiled parser. +// Based on Chapters 3 and 18 of Java Language Specification, Third Edition (JLS) +// at http://java.sun.com/docs/books/jls/third_edition/html/j3TOC.html. +// +//--------------------------------------------------------------------------- +// +// Copyright (C) 2010 by Mathias Doenitz +// Based on the Mouse 1.3 grammar for Java 1.6, which is +// Copyright (C) 2006, 2009, 2010, 2011 by Roman R Redziejowski (www.romanredz.se). +// +// The author gives unlimited permission to copy and distribute +// this file, with or without modifications, as long as this notice +// is preserved, and any changes are properly documented. +// +// This file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +// +//--------------------------------------------------------------------------- +// +// Change log +// 2006-12-06 Posted on Internet. +// 2009-04-04 Modified to conform to Mouse syntax: +// Underscore removed from names +// \f in Space replaced by Unicode for FormFeed. +// 2009-07-10 Unused rule THREADSAFE removed. +// 2009-07-10 Copying and distribution conditions relaxed by the author. +// 2010-01-28 Transcribed to parboiled +// 2010-02-01 Fixed problem in rule "FormalParameterDecls" +// 2010-03-29 Fixed problem in "annotation" +// 2010-03-31 Fixed problem in unicode escapes, String literals and line comments +// (Thanks to Reinier Zwitserloot for the finds) +// 2010-07-26 Fixed problem in LocalVariableDeclarationStatement (accept annotations), +// HexFloat (HexSignificant) and AnnotationTypeDeclaration (bug in the JLS!) +// 2010-10-07 Added full support of Unicode Identifiers as set forth in the JLS +// (Thanks for Ville Peurala for the patch) +// 2011-07-23 Transcribed all missing fixes from Romans Mouse grammar (http://www.romanredz.se/papers/Java.1.6.peg) +// +//=========================================================================== + +package org.parboiled.examples.java; + +import org.parboiled.BaseParser; +import org.parboiled.Rule; +import org.parboiled.annotations.*; + +@SuppressWarnings({"InfiniteRecursion"}) +@BuildParseTree +public class JavaParser extends BaseParser { + + //------------------------------------------------------------------------- + // Compilation Unit + //------------------------------------------------------------------------- + + public Rule CompilationUnit() { + return Sequence( + Spacing(), + Optional(PackageDeclaration()), + ZeroOrMore(ImportDeclaration()), + ZeroOrMore(TypeDeclaration()), + EOI + ); + } + + Rule PackageDeclaration() { + return Sequence(ZeroOrMore(Annotation()), Sequence(PACKAGE, QualifiedIdentifier(), SEMI)); + } + + Rule ImportDeclaration() { + return Sequence( + IMPORT, + Optional(STATIC), + QualifiedIdentifier(), + Optional(DOT, STAR), + SEMI + ); + } + + Rule TypeDeclaration() { + return FirstOf( + Sequence( + ZeroOrMore(Modifier()), + FirstOf( + ClassDeclaration(), + EnumDeclaration(), + InterfaceDeclaration(), + AnnotationTypeDeclaration() + ) + ), + SEMI + ); + } + + //------------------------------------------------------------------------- + // Class Declaration + //------------------------------------------------------------------------- + + Rule ClassDeclaration() { + return Sequence( + CLASS, + Identifier(), + Optional(TypeParameters()), + Optional(EXTENDS, ClassType()), + Optional(IMPLEMENTS, ClassTypeList()), + ClassBody() + ); + } + + Rule ClassBody() { + return Sequence(LWING, ZeroOrMore(ClassBodyDeclaration()), RWING); + } + + Rule ClassBodyDeclaration() { + return FirstOf( + SEMI, + Sequence(Optional(STATIC), Block()), + Sequence(ZeroOrMore(Modifier()), MemberDecl()) + ); + } + + Rule MemberDecl() { + return FirstOf( + Sequence(TypeParameters(), GenericMethodOrConstructorRest()), + Sequence(Type(), Identifier(), MethodDeclaratorRest()), + Sequence(Type(), VariableDeclarators(), SEMI), + Sequence(VOID, Identifier(), VoidMethodDeclaratorRest()), + Sequence(Identifier(), ConstructorDeclaratorRest()), + InterfaceDeclaration(), + ClassDeclaration(), + EnumDeclaration(), + AnnotationTypeDeclaration() + ); + } + + Rule GenericMethodOrConstructorRest() { + return FirstOf( + Sequence(FirstOf(Type(), VOID), Identifier(), MethodDeclaratorRest()), + Sequence(Identifier(), ConstructorDeclaratorRest()) + ); + } + + Rule MethodDeclaratorRest() { + return Sequence( + FormalParameters(), + ZeroOrMore(Dim()), + Optional(THROWS, ClassTypeList()), + FirstOf(MethodBody(), SEMI) + ); + } + + Rule VoidMethodDeclaratorRest() { + return Sequence( + FormalParameters(), + Optional(THROWS, ClassTypeList()), + FirstOf(MethodBody(), SEMI) + ); + } + + Rule ConstructorDeclaratorRest() { + return Sequence(FormalParameters(), Optional(THROWS, ClassTypeList()), MethodBody()); + } + + Rule MethodBody() { + return Block(); + } + + //------------------------------------------------------------------------- + // Interface Declaration + //------------------------------------------------------------------------- + + Rule InterfaceDeclaration() { + return Sequence( + INTERFACE, + Identifier(), + Optional(TypeParameters()), + Optional(EXTENDS, ClassTypeList()), + InterfaceBody() + ); + } + + Rule InterfaceBody() { + return Sequence(LWING, ZeroOrMore(InterfaceBodyDeclaration()), RWING); + } + + Rule InterfaceBodyDeclaration() { + return FirstOf( + Sequence(ZeroOrMore(Modifier()), InterfaceMemberDecl()), + SEMI + ); + } + + Rule InterfaceMemberDecl() { + return FirstOf( + InterfaceMethodOrFieldDecl(), + InterfaceGenericMethodDecl(), + Sequence(VOID, Identifier(), VoidInterfaceMethodDeclaratorsRest()), + InterfaceDeclaration(), + AnnotationTypeDeclaration(), + ClassDeclaration(), + EnumDeclaration() + ); + } + + Rule InterfaceMethodOrFieldDecl() { + return Sequence(Sequence(Type(), Identifier()), InterfaceMethodOrFieldRest()); + } + + Rule InterfaceMethodOrFieldRest() { + return FirstOf( + Sequence(ConstantDeclaratorsRest(), SEMI), + InterfaceMethodDeclaratorRest() + ); + } + + Rule InterfaceMethodDeclaratorRest() { + return Sequence( + FormalParameters(), + ZeroOrMore(Dim()), + Optional(THROWS, ClassTypeList()), + SEMI + ); + } + + Rule InterfaceGenericMethodDecl() { + return Sequence(TypeParameters(), FirstOf(Type(), VOID), Identifier(), InterfaceMethodDeclaratorRest()); + } + + Rule VoidInterfaceMethodDeclaratorsRest() { + return Sequence(FormalParameters(), Optional(THROWS, ClassTypeList()), SEMI); + } + + Rule ConstantDeclaratorsRest() { + return Sequence(ConstantDeclaratorRest(), ZeroOrMore(COMMA, ConstantDeclarator())); + } + + Rule ConstantDeclarator() { + return Sequence(Identifier(), ConstantDeclaratorRest()); + } + + Rule ConstantDeclaratorRest() { + return Sequence(ZeroOrMore(Dim()), EQU, VariableInitializer()); + } + + //------------------------------------------------------------------------- + // Enum Declaration + //------------------------------------------------------------------------- + + Rule EnumDeclaration() { + return Sequence( + ENUM, + Identifier(), + Optional(IMPLEMENTS, ClassTypeList()), + EnumBody() + ); + } + + Rule EnumBody() { + return Sequence( + LWING, + Optional(EnumConstants()), + Optional(COMMA), + Optional(EnumBodyDeclarations()), + RWING + ); + } + + Rule EnumConstants() { + return Sequence(EnumConstant(), ZeroOrMore(COMMA, EnumConstant())); + } + + Rule EnumConstant() { + return Sequence( + ZeroOrMore(Annotation()), + Identifier(), + Optional(Arguments()), + Optional(ClassBody()) + ); + } + + Rule EnumBodyDeclarations() { + return Sequence(SEMI, ZeroOrMore(ClassBodyDeclaration())); + } + + //------------------------------------------------------------------------- + // Variable Declarations + //------------------------------------------------------------------------- + + Rule LocalVariableDeclarationStatement() { + return Sequence(ZeroOrMore(FirstOf(FINAL, Annotation())), Type(), VariableDeclarators(), SEMI); + } + + Rule VariableDeclarators() { + return Sequence(VariableDeclarator(), ZeroOrMore(COMMA, VariableDeclarator())); + } + + Rule VariableDeclarator() { + return Sequence(Identifier(), ZeroOrMore(Dim()), Optional(EQU, VariableInitializer())); + } + + //------------------------------------------------------------------------- + // Formal Parameters + //------------------------------------------------------------------------- + + Rule FormalParameters() { + return Sequence(LPAR, Optional(FormalParameterDecls()), RPAR); + } + + Rule FormalParameter() { + return Sequence(ZeroOrMore(FirstOf(FINAL, Annotation())), Type(), VariableDeclaratorId()); + } + + Rule FormalParameterDecls() { + return Sequence(ZeroOrMore(FirstOf(FINAL, Annotation())), Type(), FormalParameterDeclsRest()); + } + + Rule FormalParameterDeclsRest() { + return FirstOf( + Sequence(VariableDeclaratorId(), Optional(COMMA, FormalParameterDecls())), + Sequence(ELLIPSIS, VariableDeclaratorId()) + ); + } + + Rule VariableDeclaratorId() { + return Sequence(Identifier(), ZeroOrMore(Dim())); + } + + //------------------------------------------------------------------------- + // Statements + //------------------------------------------------------------------------- + + Rule Block() { + return Sequence(LWING, BlockStatements(), RWING); + } + + Rule BlockStatements() { + return ZeroOrMore(BlockStatement()); + } + + Rule BlockStatement() { + return FirstOf( + LocalVariableDeclarationStatement(), + Sequence(ZeroOrMore(Modifier()), FirstOf(ClassDeclaration(), EnumDeclaration())), + Statement() + ); + } + + Rule Statement() { + return FirstOf( + Block(), + Sequence(ASSERT, Expression(), Optional(COLON, Expression()), SEMI), + Sequence(IF, ParExpression(), Statement(), Optional(ELSE, Statement())), + Sequence(FOR, LPAR, Optional(ForInit()), SEMI, Optional(Expression()), SEMI, Optional(ForUpdate()), + RPAR, Statement()), + Sequence(FOR, LPAR, FormalParameter(), COLON, Expression(), RPAR, Statement()), + Sequence(WHILE, ParExpression(), Statement()), + Sequence(DO, Statement(), WHILE, ParExpression(), SEMI), + Sequence(TRY, Block(), + FirstOf(Sequence(OneOrMore(Catch_()), Optional(Finally_())), Finally_())), + Sequence(SWITCH, ParExpression(), LWING, SwitchBlockStatementGroups(), RWING), + Sequence(SYNCHRONIZED, ParExpression(), Block()), + Sequence(RETURN, Optional(Expression()), SEMI), + Sequence(THROW, Expression(), SEMI), + Sequence(BREAK, Optional(Identifier()), SEMI), + Sequence(CONTINUE, Optional(Identifier()), SEMI), + Sequence(Sequence(Identifier(), COLON), Statement()), + Sequence(StatementExpression(), SEMI), + SEMI + ); + } + + Rule Catch_() { + return Sequence(CATCH, LPAR, FormalParameter(), RPAR, Block()); + } + + Rule Finally_() { + return Sequence(FINALLY, Block()); + } + + Rule SwitchBlockStatementGroups() { + return ZeroOrMore(SwitchBlockStatementGroup()); + } + + Rule SwitchBlockStatementGroup() { + return Sequence(SwitchLabel(), BlockStatements()); + } + + Rule SwitchLabel() { + return FirstOf( + Sequence(CASE, ConstantExpression(), COLON), + Sequence(CASE, EnumConstantName(), COLON), + Sequence(DEFAULT, COLON) + ); + } + + Rule ForInit() { + return FirstOf( + Sequence(ZeroOrMore(FirstOf(FINAL, Annotation())), Type(), VariableDeclarators()), + Sequence(StatementExpression(), ZeroOrMore(COMMA, StatementExpression())) + ); + } + + Rule ForUpdate() { + return Sequence(StatementExpression(), ZeroOrMore(COMMA, StatementExpression())); + } + + Rule EnumConstantName() { + return Identifier(); + } + + //------------------------------------------------------------------------- + // Expressions + //------------------------------------------------------------------------- + + // The following is more generous than the definition in section 14.8, + // which allows only specific forms of Expression. + + Rule StatementExpression() { + return Expression(); + } + + Rule ConstantExpression() { + return Expression(); + } + + // The following definition is part of the modification in JLS Chapter 18 + // to minimize look ahead. In JLS Chapter 15.27, Expression is defined + // as AssignmentExpression, which is effectively defined as + // (LeftHandSide AssignmentOperator)* ConditionalExpression. + // The following is obtained by allowing ANY ConditionalExpression + // as LeftHandSide, which results in accepting statements like 5 = a. + + Rule Expression() { + return Sequence( + ConditionalExpression(), + ZeroOrMore(AssignmentOperator(), ConditionalExpression()) + ); + } + + Rule AssignmentOperator() { + return FirstOf(EQU, PLUSEQU, MINUSEQU, STAREQU, DIVEQU, ANDEQU, OREQU, HATEQU, MODEQU, SLEQU, SREQU, BSREQU); + } + + Rule ConditionalExpression() { + return Sequence( + ConditionalOrExpression(), + ZeroOrMore(QUERY, Expression(), COLON, ConditionalOrExpression()) + ); + } + + Rule ConditionalOrExpression() { + return Sequence( + ConditionalAndExpression(), + ZeroOrMore(OROR, ConditionalAndExpression()) + ); + } + + Rule ConditionalAndExpression() { + return Sequence( + InclusiveOrExpression(), + ZeroOrMore(ANDAND, InclusiveOrExpression()) + ); + } + + Rule InclusiveOrExpression() { + return Sequence( + ExclusiveOrExpression(), + ZeroOrMore(OR, ExclusiveOrExpression()) + ); + } + + Rule ExclusiveOrExpression() { + return Sequence( + AndExpression(), + ZeroOrMore(HAT, AndExpression()) + ); + } + + Rule AndExpression() { + return Sequence( + EqualityExpression(), + ZeroOrMore(AND, EqualityExpression()) + ); + } + + Rule EqualityExpression() { + return Sequence( + RelationalExpression(), + ZeroOrMore(FirstOf(EQUAL, NOTEQUAL), RelationalExpression()) + ); + } + + Rule RelationalExpression() { + return Sequence( + ShiftExpression(), + ZeroOrMore( + FirstOf( + Sequence(FirstOf(LE, GE, LT, GT), ShiftExpression()), + Sequence(INSTANCEOF, ReferenceType()) + ) + ) + ); + } + + Rule ShiftExpression() { + return Sequence( + AdditiveExpression(), + ZeroOrMore(FirstOf(SL, SR, BSR), AdditiveExpression()) + ); + } + + Rule AdditiveExpression() { + return Sequence( + MultiplicativeExpression(), + ZeroOrMore(FirstOf(PLUS, MINUS), MultiplicativeExpression()) + ); + } + + Rule MultiplicativeExpression() { + return Sequence( + UnaryExpression(), + ZeroOrMore(FirstOf(STAR, DIV, MOD), UnaryExpression()) + ); + } + + Rule UnaryExpression() { + return FirstOf( + Sequence(PrefixOp(), UnaryExpression()), + Sequence(LPAR, Type(), RPAR, UnaryExpression()), + Sequence(Primary(), ZeroOrMore(Selector()), ZeroOrMore(PostFixOp())) + ); + } + + Rule Primary() { + return FirstOf( + ParExpression(), + Sequence( + NonWildcardTypeArguments(), + FirstOf(ExplicitGenericInvocationSuffix(), Sequence(THIS, Arguments())) + ), + Sequence(THIS, Optional(Arguments())), + Sequence(SUPER, SuperSuffix()), + Literal(), + Sequence(NEW, Creator()), + Sequence(QualifiedIdentifier(), Optional(IdentifierSuffix())), + Sequence(BasicType(), ZeroOrMore(Dim()), DOT, CLASS), + Sequence(VOID, DOT, CLASS) + ); + } + + Rule IdentifierSuffix() { + return FirstOf( + Sequence(LBRK, + FirstOf( + Sequence(RBRK, ZeroOrMore(Dim()), DOT, CLASS), + Sequence(Expression(), RBRK) + ) + ), + Arguments(), + Sequence( + DOT, + FirstOf( + CLASS, + ExplicitGenericInvocation(), + THIS, + Sequence(SUPER, Arguments()), + Sequence(NEW, Optional(NonWildcardTypeArguments()), InnerCreator()) + ) + ) + ); + } + + Rule ExplicitGenericInvocation() { + return Sequence(NonWildcardTypeArguments(), ExplicitGenericInvocationSuffix()); + } + + Rule NonWildcardTypeArguments() { + return Sequence(LPOINT, ReferenceType(), ZeroOrMore(COMMA, ReferenceType()), RPOINT); + } + + Rule ExplicitGenericInvocationSuffix() { + return FirstOf( + Sequence(SUPER, SuperSuffix()), + Sequence(Identifier(), Arguments()) + ); + } + + Rule PrefixOp() { + return FirstOf(INC, DEC, BANG, TILDA, PLUS, MINUS); + } + + Rule PostFixOp() { + return FirstOf(INC, DEC); + } + + Rule Selector() { + return FirstOf( + Sequence(DOT, Identifier(), Optional(Arguments())), + Sequence(DOT, ExplicitGenericInvocation()), + Sequence(DOT, THIS), + Sequence(DOT, SUPER, SuperSuffix()), + Sequence(DOT, NEW, Optional(NonWildcardTypeArguments()), InnerCreator()), + DimExpr() + ); + } + + Rule SuperSuffix() { + return FirstOf(Arguments(), Sequence(DOT, Identifier(), Optional(Arguments()))); + } + + @MemoMismatches + Rule BasicType() { + return Sequence( + FirstOf("byte", "short", "char", "int", "long", "float", "double", "boolean"), + TestNot(LetterOrDigit()), + Spacing() + ); + } + + Rule Arguments() { + return Sequence( + LPAR, + Optional(Expression(), ZeroOrMore(COMMA, Expression())), + RPAR + ); + } + + Rule Creator() { + return FirstOf( + Sequence(Optional(NonWildcardTypeArguments()), CreatedName(), ClassCreatorRest()), + Sequence(Optional(NonWildcardTypeArguments()), FirstOf(ClassType(), BasicType()), ArrayCreatorRest()) + ); + } + + Rule CreatedName() { + return Sequence( + Identifier(), Optional(NonWildcardTypeArguments()), + ZeroOrMore(DOT, Identifier(), Optional(NonWildcardTypeArguments())) + ); + } + + Rule InnerCreator() { + return Sequence(Identifier(), ClassCreatorRest()); + } + + // The following is more generous than JLS 15.10. According to that definition, + // BasicType must be followed by at least one DimExpr or by ArrayInitializer. + Rule ArrayCreatorRest() { + return Sequence( + LBRK, + FirstOf( + Sequence(RBRK, ZeroOrMore(Dim()), ArrayInitializer()), + Sequence(Expression(), RBRK, ZeroOrMore(DimExpr()), ZeroOrMore(Dim())) + ) + ); + } + + Rule ClassCreatorRest() { + return Sequence(Arguments(), Optional(ClassBody())); + } + + Rule ArrayInitializer() { + return Sequence( + LWING, + Optional( + VariableInitializer(), + ZeroOrMore(COMMA, VariableInitializer()) + ), + Optional(COMMA), + RWING + ); + } + + Rule VariableInitializer() { + return FirstOf(ArrayInitializer(), Expression()); + } + + Rule ParExpression() { + return Sequence(LPAR, Expression(), RPAR); + } + + Rule QualifiedIdentifier() { + return Sequence(Identifier(), ZeroOrMore(DOT, Identifier())); + } + + Rule Dim() { + return Sequence(LBRK, RBRK); + } + + Rule DimExpr() { + return Sequence(LBRK, Expression(), RBRK); + } + + //------------------------------------------------------------------------- + // Types and Modifiers + //------------------------------------------------------------------------- + + Rule Type() { + return Sequence(FirstOf(BasicType(), ClassType()), ZeroOrMore(Dim())); + } + + Rule ReferenceType() { + return FirstOf( + Sequence(BasicType(), OneOrMore(Dim())), + Sequence(ClassType(), ZeroOrMore(Dim())) + ); + } + + Rule ClassType() { + return Sequence( + Identifier(), Optional(TypeArguments()), + ZeroOrMore(DOT, Identifier(), Optional(TypeArguments())) + ); + } + + Rule ClassTypeList() { + return Sequence(ClassType(), ZeroOrMore(COMMA, ClassType())); + } + + Rule TypeArguments() { + return Sequence(LPOINT, TypeArgument(), ZeroOrMore(COMMA, TypeArgument()), RPOINT); + } + + Rule TypeArgument() { + return FirstOf( + ReferenceType(), + Sequence(QUERY, Optional(FirstOf(EXTENDS, SUPER), ReferenceType())) + ); + } + + Rule TypeParameters() { + return Sequence(LPOINT, TypeParameter(), ZeroOrMore(COMMA, TypeParameter()), RPOINT); + } + + Rule TypeParameter() { + return Sequence(Identifier(), Optional(EXTENDS, Bound())); + } + + Rule Bound() { + return Sequence(ClassType(), ZeroOrMore(AND, ClassType())); + } + + // the following common definition of Modifier is part of the modification + // in JLS Chapter 18 to minimize look ahead. The main body of JLS has + // different lists of modifiers for different language elements. + Rule Modifier() { + return FirstOf( + Annotation(), + Sequence( + FirstOf("public", "protected", "private", "static", "abstract", "final", "native", + "synchronized", "transient", "volatile", "strictfp"), + TestNot(LetterOrDigit()), + Spacing() + ) + ); + } + + //------------------------------------------------------------------------- + // Annotations + //------------------------------------------------------------------------- + + Rule AnnotationTypeDeclaration() { + return Sequence(AT, INTERFACE, Identifier(), AnnotationTypeBody()); + } + + Rule AnnotationTypeBody() { + return Sequence(LWING, ZeroOrMore(AnnotationTypeElementDeclaration()), RWING); + } + + Rule AnnotationTypeElementDeclaration() { + return FirstOf( + Sequence(ZeroOrMore(Modifier()), AnnotationTypeElementRest()), + SEMI + ); + } + + Rule AnnotationTypeElementRest() { + return FirstOf( + Sequence(Type(), AnnotationMethodOrConstantRest(), SEMI), + ClassDeclaration(), + EnumDeclaration(), + InterfaceDeclaration(), + AnnotationTypeDeclaration() + ); + } + + Rule AnnotationMethodOrConstantRest() { + return FirstOf(AnnotationMethodRest(), AnnotationConstantRest()); + } + + Rule AnnotationMethodRest() { + return Sequence(Identifier(), LPAR, RPAR, Optional(DefaultValue())); + } + + Rule AnnotationConstantRest() { + return VariableDeclarators(); + } + + Rule DefaultValue() { + return Sequence(DEFAULT, ElementValue()); + } + + @MemoMismatches + Rule Annotation() { + return Sequence(AT, QualifiedIdentifier(), Optional(AnnotationRest())); + } + + Rule AnnotationRest() { + return FirstOf(NormalAnnotationRest(), SingleElementAnnotationRest()); + } + + Rule NormalAnnotationRest() { + return Sequence(LPAR, Optional(ElementValuePairs()), RPAR); + } + + Rule ElementValuePairs() { + return Sequence(ElementValuePair(), ZeroOrMore(COMMA, ElementValuePair())); + } + + Rule ElementValuePair() { + return Sequence(Identifier(), EQU, ElementValue()); + } + + Rule ElementValue() { + return FirstOf(ConditionalExpression(), Annotation(), ElementValueArrayInitializer()); + } + + Rule ElementValueArrayInitializer() { + return Sequence(LWING, Optional(ElementValues()), Optional(COMMA), RWING); + } + + Rule ElementValues() { + return Sequence(ElementValue(), ZeroOrMore(COMMA, ElementValue())); + } + + Rule SingleElementAnnotationRest() { + return Sequence(LPAR, ElementValue(), RPAR); + } + + //------------------------------------------------------------------------- + // JLS 3.6-7 Spacing + //------------------------------------------------------------------------- + + @SuppressNode + Rule Spacing() { + return ZeroOrMore(FirstOf( + + // whitespace + OneOrMore(AnyOf(" \t\r\n\f").label("Whitespace")), + + // traditional comment + Sequence("/*", ZeroOrMore(TestNot("*/"), ANY), "*/"), + + // end of line comment + Sequence( + "//", + ZeroOrMore(TestNot(AnyOf("\r\n")), ANY), + FirstOf("\r\n", '\r', '\n', EOI) + ) + )); + } + + //------------------------------------------------------------------------- + // JLS 3.8 Identifiers + //------------------------------------------------------------------------- + + @SuppressSubnodes + @MemoMismatches + Rule Identifier() { + return Sequence(TestNot(Keyword()), Letter(), ZeroOrMore(LetterOrDigit()), Spacing()); + } + + // JLS defines letters and digits as Unicode characters recognized + // as such by special Java procedures. + + Rule Letter() { + // switch to this "reduced" character space version for a ~10% parser performance speedup + //return FirstOf(CharRange('a', 'z'), CharRange('A', 'Z'), '_', '$'); + return FirstOf(Sequence('\\', UnicodeEscape()), new JavaLetterMatcher()); + } + + @MemoMismatches + Rule LetterOrDigit() { + // switch to this "reduced" character space version for a ~10% parser performance speedup + //return FirstOf(CharRange('a', 'z'), CharRange('A', 'Z'), CharRange('0', '9'), '_', '$'); + return FirstOf(Sequence('\\', UnicodeEscape()), new JavaLetterOrDigitMatcher()); + } + + //------------------------------------------------------------------------- + // JLS 3.9 Keywords + //------------------------------------------------------------------------- + + @MemoMismatches + Rule Keyword() { + return Sequence( + FirstOf("assert", "break", "case", "catch", "class", "const", "continue", "default", "do", "else", + "enum", "extends", "finally", "final", "for", "goto", "if", "implements", "import", "interface", + "instanceof", "new", "package", "return", "static", "super", "switch", "synchronized", "this", + "throws", "throw", "try", "void", "while"), + TestNot(LetterOrDigit()) + ); + } + + public final Rule ASSERT = Keyword("assert"); + public final Rule BREAK = Keyword("break"); + public final Rule CASE = Keyword("case"); + public final Rule CATCH = Keyword("catch"); + public final Rule CLASS = Keyword("class"); + public final Rule CONTINUE = Keyword("continue"); + public final Rule DEFAULT = Keyword("default"); + public final Rule DO = Keyword("do"); + public final Rule ELSE = Keyword("else"); + public final Rule ENUM = Keyword("enum"); + public final Rule EXTENDS = Keyword("extends"); + public final Rule FINALLY = Keyword("finally"); + public final Rule FINAL = Keyword("final"); + public final Rule FOR = Keyword("for"); + public final Rule IF = Keyword("if"); + public final Rule IMPLEMENTS = Keyword("implements"); + public final Rule IMPORT = Keyword("import"); + public final Rule INTERFACE = Keyword("interface"); + public final Rule INSTANCEOF = Keyword("instanceof"); + public final Rule NEW = Keyword("new"); + public final Rule PACKAGE = Keyword("package"); + public final Rule RETURN = Keyword("return"); + public final Rule STATIC = Keyword("static"); + public final Rule SUPER = Keyword("super"); + public final Rule SWITCH = Keyword("switch"); + public final Rule SYNCHRONIZED = Keyword("synchronized"); + public final Rule THIS = Keyword("this"); + public final Rule THROWS = Keyword("throws"); + public final Rule THROW = Keyword("throw"); + public final Rule TRY = Keyword("try"); + public final Rule VOID = Keyword("void"); + public final Rule WHILE = Keyword("while"); + + @SuppressNode + @DontLabel + Rule Keyword(String keyword) { + return Terminal(keyword, LetterOrDigit()); + } + + //------------------------------------------------------------------------- + // JLS 3.10 Literals + //------------------------------------------------------------------------- + + Rule Literal() { + return Sequence( + FirstOf( + FloatLiteral(), + IntegerLiteral(), + CharLiteral(), + StringLiteral(), + Sequence("true", TestNot(LetterOrDigit())), + Sequence("false", TestNot(LetterOrDigit())), + Sequence("null", TestNot(LetterOrDigit())) + ), + Spacing() + ); + } + + @SuppressSubnodes + Rule IntegerLiteral() { + return Sequence(FirstOf(HexNumeral(), OctalNumeral(), DecimalNumeral()), Optional(AnyOf("lL"))); + } + + @SuppressSubnodes + Rule DecimalNumeral() { + return FirstOf('0', Sequence(CharRange('1', '9'), ZeroOrMore(Digit()))); + } + + @SuppressSubnodes + + @MemoMismatches + Rule HexNumeral() { + return Sequence('0', IgnoreCase('x'), OneOrMore(HexDigit())); + } + + Rule HexDigit() { + return FirstOf(CharRange('a', 'f'), CharRange('A', 'F'), CharRange('0', '9')); + } + + @SuppressSubnodes + Rule OctalNumeral() { + return Sequence('0', OneOrMore(CharRange('0', '7'))); + } + + Rule FloatLiteral() { + return FirstOf(HexFloat(), DecimalFloat()); + } + + @SuppressSubnodes + Rule DecimalFloat() { + return FirstOf( + Sequence(OneOrMore(Digit()), '.', ZeroOrMore(Digit()), Optional(Exponent()), Optional(AnyOf("fFdD"))), + Sequence('.', OneOrMore(Digit()), Optional(Exponent()), Optional(AnyOf("fFdD"))), + Sequence(OneOrMore(Digit()), Exponent(), Optional(AnyOf("fFdD"))), + Sequence(OneOrMore(Digit()), Optional(Exponent()), AnyOf("fFdD")) + ); + } + + Rule Exponent() { + return Sequence(AnyOf("eE"), Optional(AnyOf("+-")), OneOrMore(Digit())); + } + + Rule Digit() { + return CharRange('0', '9'); + } + + @SuppressSubnodes + Rule HexFloat() { + return Sequence(HexSignificant(), BinaryExponent(), Optional(AnyOf("fFdD"))); + } + + Rule HexSignificant() { + return FirstOf( + Sequence(FirstOf("0x", "0X"), ZeroOrMore(HexDigit()), '.', OneOrMore(HexDigit())), + Sequence(HexNumeral(), Optional('.')) + ); + } + + Rule BinaryExponent() { + return Sequence(AnyOf("pP"), Optional(AnyOf("+-")), OneOrMore(Digit())); + } + + Rule CharLiteral() { + return Sequence( + '\'', + FirstOf(Escape(), Sequence(TestNot(AnyOf("'\\")), ANY)).suppressSubnodes(), + '\'' + ); + } + + Rule StringLiteral() { + return Sequence( + '"', + ZeroOrMore( + FirstOf( + Escape(), + Sequence(TestNot(AnyOf("\r\n\"\\")), ANY) + ) + ).suppressSubnodes(), + '"' + ); + } + + Rule Escape() { + return Sequence('\\', FirstOf(AnyOf("btnfr\"\'\\"), OctalEscape(), UnicodeEscape())); + } + + Rule OctalEscape() { + return FirstOf( + Sequence(CharRange('0', '3'), CharRange('0', '7'), CharRange('0', '7')), + Sequence(CharRange('0', '7'), CharRange('0', '7')), + CharRange('0', '7') + ); + } + + Rule UnicodeEscape() { + return Sequence(OneOrMore('u'), HexDigit(), HexDigit(), HexDigit(), HexDigit()); + } + + //------------------------------------------------------------------------- + // JLS 3.11-12 Separators, Operators + //------------------------------------------------------------------------- + + final Rule AT = Terminal("@"); + final Rule AND = Terminal("&", AnyOf("=&")); + final Rule ANDAND = Terminal("&&"); + final Rule ANDEQU = Terminal("&="); + final Rule BANG = Terminal("!", Ch('=')); + final Rule BSR = Terminal(">>>", Ch('=')); + final Rule BSREQU = Terminal(">>>="); + final Rule COLON = Terminal(":"); + final Rule COMMA = Terminal(","); + final Rule DEC = Terminal("--"); + final Rule DIV = Terminal("/", Ch('=')); + final Rule DIVEQU = Terminal("/="); + final Rule DOT = Terminal("."); + final Rule ELLIPSIS = Terminal("..."); + final Rule EQU = Terminal("=", Ch('=')); + final Rule EQUAL = Terminal("=="); + final Rule GE = Terminal(">="); + final Rule GT = Terminal(">", AnyOf("=>")); + final Rule HAT = Terminal("^", Ch('=')); + final Rule HATEQU = Terminal("^="); + final Rule INC = Terminal("++"); + final Rule LBRK = Terminal("["); + final Rule LE = Terminal("<="); + final Rule LPAR = Terminal("("); + final Rule LPOINT = Terminal("<"); + final Rule LT = Terminal("<", AnyOf("=<")); + final Rule LWING = Terminal("{"); + final Rule MINUS = Terminal("-", AnyOf("=-")); + final Rule MINUSEQU = Terminal("-="); + final Rule MOD = Terminal("%", Ch('=')); + final Rule MODEQU = Terminal("%="); + final Rule NOTEQUAL = Terminal("!="); + final Rule OR = Terminal("|", AnyOf("=|")); + final Rule OREQU = Terminal("|="); + final Rule OROR = Terminal("||"); + final Rule PLUS = Terminal("+", AnyOf("=+")); + final Rule PLUSEQU = Terminal("+="); + final Rule QUERY = Terminal("?"); + final Rule RBRK = Terminal("]"); + final Rule RPAR = Terminal(")"); + final Rule RPOINT = Terminal(">"); + final Rule RWING = Terminal("}"); + final Rule SEMI = Terminal(";"); + final Rule SL = Terminal("<<", Ch('=')); + final Rule SLEQU = Terminal("<<="); + final Rule SR = Terminal(">>", AnyOf("=>")); + final Rule SREQU = Terminal(">>="); + final Rule STAR = Terminal("*", Ch('=')); + final Rule STAREQU = Terminal("*="); + final Rule TILDA = Terminal("~"); + + //------------------------------------------------------------------------- + // helper methods + //------------------------------------------------------------------------- + + @Override + protected Rule fromCharLiteral(char c) { + // turn of creation of parse tree nodes for single characters + return super.fromCharLiteral(c).suppressNode(); + } + + @SuppressNode + @DontLabel + Rule Terminal(String string) { + return Sequence(string, Spacing()).label('\'' + string + '\''); + } + + @SuppressNode + @DontLabel + Rule Terminal(String string, Rule mustNotFollow) { + return Sequence(string, TestNot(mustNotFollow), Spacing()).label('\'' + string + '\''); + } + +} diff --git a/src/main/java/org/parboiled/examples/java/JavaParserProfiler.java b/src/main/java/org/parboiled/examples/java/JavaParserProfiler.java new file mode 100644 index 0000000..1072937 --- /dev/null +++ b/src/main/java/org/parboiled/examples/java/JavaParserProfiler.java @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2009-2011 Mathias Doenitz + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.parboiled.examples.java; + +import org.parboiled.parserunners.ProfilingParseRunner; +import org.parboiled.Rule; +import org.parboiled.support.ParsingResult; + +public class JavaParserProfiler extends Main { + + private ProfilingParseRunner parseRunner; + + public static void main(String[] args) { + new JavaParserProfiler().run(args); + } + + @Override + protected void run(String[] args) { + super.run(args); + ProfilingParseRunner.Report report = parseRunner.getReport(); + System.out.println(); + System.out.println(report.print()); + } + @Override + protected ParsingResult run(Rule rootRule, String sourceText) { + if (parseRunner == null) { + parseRunner = new ProfilingParseRunner(rootRule); + } + return parseRunner.run(sourceText); + } + +} diff --git a/src/main/java/org/parboiled/examples/java/Main.java b/src/main/java/org/parboiled/examples/java/Main.java new file mode 100644 index 0000000..0ade9ec --- /dev/null +++ b/src/main/java/org/parboiled/examples/java/Main.java @@ -0,0 +1,171 @@ +/* + * Copyright (C) 2009-2011 Mathias Doenitz + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.parboiled.examples.java; + +import static org.parboiled.common.Preconditions.*; +import org.parboiled.Parboiled; +import org.parboiled.parserunners.ReportingParseRunner; +import org.parboiled.Rule; +import org.parboiled.support.ParsingResult; + +import java.io.*; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.List; + +import static org.parboiled.errors.ErrorUtils.printParseErrors; + +public class Main { + + public static void main(String[] args) { + new Main().run(args); + } + + @SuppressWarnings({"ConstantConditions"}) + protected void run(String[] args) { + System.out.println("parboiled Java parser, performance test"); + System.out.println("---------------------------------------"); + + System.out.print("Creating parser... :"); + long start = System.currentTimeMillis(); + Parboiled.createParser(JavaParser.class); + time(start); + + System.out.print("Creating 100 more parser instances... :"); + JavaParser parser = null; + start = System.currentTimeMillis(); + for (int i = 0; i < 100; i++) { + parser = Parboiled.createParser(JavaParser.class); + } + time(start); + + System.out.print("Creating 100 more parser instances using BaseParser.newInstance() ... :"); + start = System.currentTimeMillis(); + for (int i = 0; i < 100; i++) { + parser = parser.newInstance(); + } + time(start); + + start = System.currentTimeMillis(); + File baseDir = args.length == 1 ? new File(args[0]) : null; + if (baseDir == null || !baseDir.exists()) baseDir = new File("."); + System.out.printf("Retrieving file list from '%s'", baseDir); + List sources = recursiveGetAllJavaSources(baseDir, new ArrayList()); + time(start); + + System.out.printf("Parsing all %s given java sources", sources.size()); + Rule rootRule = parser.CompilationUnit().suppressNode(); // we want to see the parse-tree-less performance + start = System.currentTimeMillis(); + long lines = 0, characters = 0; + for (File sourceFile : sources) { + long dontCountStart = System.currentTimeMillis(); + String sourceText = readAllText(sourceFile); + start += System.currentTimeMillis() - dontCountStart; // do not count the time for reading the text file + + ParsingResult result = null; + try { + result = run(rootRule, sourceText); + } catch (Exception e) { + System.out.printf("\nException while parsing file '%s':\n%s", sourceFile, e); + System.exit(1); + } + if (!result.matched) { + System.out.printf("\nParse error(s) in file '%s':\n%s", sourceFile, printParseErrors(result)); + System.exit(1); + } else { + System.out.print('.'); + } + lines += result.inputBuffer.getLineCount(); + characters += sourceText.length(); + } + long time = time(start); + + System.out.println("Parsing performance:"); + System.out.printf(" %6d Files -> %6.2f Files/sec\n", sources.size(), sources.size() * 1000.0 / time); + System.out.printf(" %6d Lines -> %6d Lines/sec\n", lines, lines * 1000 / time); + System.out.printf(" %6d Chars -> %6d Chars/sec\n", characters, characters * 1000 / time); + } + + protected ParsingResult run(Rule rootRule, String sourceText) { + return new ReportingParseRunner(rootRule).run(sourceText); + } + + private static long time(long start) { + long end = System.currentTimeMillis(); + System.out.printf(" %s ms\n", end - start); + return end - start; + } + + private static final FileFilter fileFilter = new FileFilter() { + public boolean accept(File file) { + return file.isDirectory() || file.getName().endsWith(".java"); + } + }; + + private static List recursiveGetAllJavaSources(File file, ArrayList list) { + if (file.isDirectory()) { + for (File f : file.listFiles(fileFilter)) { + recursiveGetAllJavaSources(f, list); + } + } else { + list.add(file); + } + return list; + } + + public static String readAllText(File file) { + checkArgNotNull(file, "file"); + return readAllText(file, Charset.forName("UTF8")); + } + + public static String readAllText(File file, Charset charset) { + checkArgNotNull(file, "file"); + checkArgNotNull(charset, "charset"); + try { + return readAllText(new FileInputStream(file), charset); + } + catch (FileNotFoundException e) { + return null; + } + } + + public static String readAllText(InputStream stream, Charset charset) { + checkArgNotNull(charset, "charset"); + if (stream == null) return null; + BufferedReader reader = new BufferedReader(new InputStreamReader(stream, charset)); + StringWriter writer = new StringWriter(); + copyAll(reader, writer); + return writer.toString(); + } + + public static void copyAll(Reader reader, Writer writer) { + checkArgNotNull(reader, "reader"); + checkArgNotNull(writer, "writer"); + try { + char[] data = new char[4096]; // copy in chunks of 4K + int count; + while ((count = reader.read(data)) >= 0) writer.write(data, 0, count); + + reader.close(); + writer.close(); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + +} \ No newline at end of file diff --git a/src/main/java/org/parboiled/examples/sparql/SparqlParser.java b/src/main/java/org/parboiled/examples/sparql/SparqlParser.java new file mode 100644 index 0000000..3038960 --- /dev/null +++ b/src/main/java/org/parboiled/examples/sparql/SparqlParser.java @@ -0,0 +1,796 @@ +/* + * Copyright (c) 2009 Ken Wenzel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package org.parboiled.examples.sparql; + +import org.parboiled.BaseParser; +import org.parboiled.Rule; + +/** + * SPARQL Parser + * + * @author Ken Wenzel, adapted by Mathias Doenitz + */ +@SuppressWarnings({"InfiniteRecursion"}) +public class SparqlParser extends BaseParser { + // + public Rule Query() { + return Sequence(WS(), Prologue(), FirstOf(SelectQuery(), + ConstructQuery(), DescribeQuery(), AskQuery()), EOI); + } + + public Rule Prologue() { + return Sequence(Optional(BaseDecl()), ZeroOrMore(PrefixDecl())); + } + + public Rule BaseDecl() { + return Sequence(BASE(), IRI_REF()); + } + + public Rule PrefixDecl() { + return Sequence(PREFIX(), PNAME_NS(), IRI_REF()); + } + + public Rule SelectQuery() { + return Sequence(SELECT(), Optional(FirstOf(DISTINCT(), + REDUCED())), FirstOf(OneOrMore(Var()), ASTERISK()), + ZeroOrMore(DatasetClause()), WhereClause(), SolutionModifier()); + } + + public Rule ConstructQuery() { + return Sequence(CONSTRUCT(), ConstructTemplate(), + ZeroOrMore(DatasetClause()), WhereClause(), SolutionModifier()); + } + + public Rule DescribeQuery() { + return Sequence(DESCRIBE(), FirstOf(OneOrMore(VarOrIRIref()), + ASTERISK()), ZeroOrMore(DatasetClause()), + Optional(WhereClause()), SolutionModifier()); + } + + public Rule AskQuery() { + return Sequence(ASK(), ZeroOrMore(DatasetClause()), WhereClause()); + } + + public Rule DatasetClause() { + return Sequence(FROM(), FirstOf(DefaultGraphClause(), + NamedGraphClause())); + } + + public Rule DefaultGraphClause() { + return SourceSelector(); + } + + public Rule NamedGraphClause() { + return Sequence(NAMED(), SourceSelector()); + } + + public Rule SourceSelector() { + return IriRef(); + } + + public Rule WhereClause() { + return Sequence(Optional(WHERE()), GroupGraphPattern()); + } + + public Rule SolutionModifier() { + return Sequence(Optional(OrderClause()), Optional(LimitOffsetClauses())); + } + + public Rule LimitOffsetClauses() { + return FirstOf(Sequence(LimitClause(), Optional(OffsetClause())), + Sequence(OffsetClause(), Optional(LimitClause()))); + } + + public Rule OrderClause() { + return Sequence(ORDER(), BY(), OneOrMore(OrderCondition())); + } + + public Rule OrderCondition() { + return FirstOf( + Sequence(FirstOf(ASC(), DESC()), BrackettedExpression()), + FirstOf(Constraint(), Var())); + } + + public Rule LimitClause() { + return Sequence(LIMIT(), INTEGER()); + } + + public Rule OffsetClause() { + return Sequence(OFFSET(), INTEGER()); + } + + public Rule GroupGraphPattern() { + return Sequence(OPEN_CURLY_BRACE(), Optional(TriplesBlock()), + ZeroOrMore(Sequence( + FirstOf(GraphPatternNotTriples(), Filter()), + Optional(DOT()), Optional(TriplesBlock()))), + CLOSE_CURLY_BRACE()); + } + + public Rule TriplesBlock() { + return Sequence(TriplesSameSubject(), Optional(Sequence(DOT(), + Optional(TriplesBlock())))); + } + + public Rule GraphPatternNotTriples() { + return FirstOf(OptionalGraphPattern(), GroupOrUnionGraphPattern(), + GraphGraphPattern()); + } + + public Rule OptionalGraphPattern() { + return Sequence(OPTIONAL(), GroupGraphPattern()); + } + + public Rule GraphGraphPattern() { + return Sequence(GRAPH(), VarOrIRIref(), GroupGraphPattern()); + } + + public Rule GroupOrUnionGraphPattern() { + return Sequence(GroupGraphPattern(), ZeroOrMore(Sequence(UNION(), + GroupGraphPattern()))); + } + + public Rule Filter() { + return Sequence(FILTER(), Constraint()); + } + + public Rule Constraint() { + return FirstOf(BrackettedExpression(), BuiltInCall(), FunctionCall()); + } + + public Rule FunctionCall() { + return Sequence(IriRef(), ArgList()); + } + + public Rule ArgList() { + return FirstOf(Sequence(OPEN_BRACE(), CLOSE_BRACE()), Sequence( + OPEN_BRACE(), Expression(), ZeroOrMore(Sequence(COMMA(), + Expression())), CLOSE_BRACE())); + } + + public Rule ConstructTemplate() { + return Sequence(OPEN_CURLY_BRACE(), Optional(ConstructTriples()), + CLOSE_CURLY_BRACE()); + } + + public Rule ConstructTriples() { + return Sequence(TriplesSameSubject(), Optional(Sequence(DOT(), + Optional(ConstructTriples())))); + } + + public Rule TriplesSameSubject() { + return FirstOf(Sequence(VarOrTerm(), PropertyListNotEmpty()), Sequence( + TriplesNode(), PropertyList())); + } + + public Rule PropertyListNotEmpty() { + return Sequence(Verb(), ObjectList(), ZeroOrMore(Sequence(SEMICOLON(), + Optional(Sequence(Verb(), ObjectList()))))); + } + + public Rule PropertyList() { + return Optional(PropertyListNotEmpty()); + } + + public Rule ObjectList() { + return Sequence(Object_(), ZeroOrMore(Sequence(COMMA(), Object_()))); + } + + public Rule Object_() { + return GraphNode(); + } + + public Rule Verb() { + return FirstOf(VarOrIRIref(), A()); + } + + public Rule TriplesNode() { + return FirstOf(Collection(), BlankNodePropertyList()); + } + + public Rule BlankNodePropertyList() { + return Sequence(OPEN_SQUARE_BRACE(), PropertyListNotEmpty(), + CLOSE_SQUARE_BRACE()); + } + + public Rule Collection() { + return Sequence(OPEN_BRACE(), OneOrMore(GraphNode()), CLOSE_BRACE()); + } + + public Rule GraphNode() { + return FirstOf(VarOrTerm(), TriplesNode()); + } + + public Rule VarOrTerm() { + return FirstOf(Var(), GraphTerm()); + } + + public Rule VarOrIRIref() { + return FirstOf(Var(), IriRef()); + } + + public Rule Var() { + return FirstOf(VAR1(), VAR2()); + } + + public Rule GraphTerm() { + return FirstOf(IriRef(), RdfLiteral(), NumericLiteral(), + BooleanLiteral(), BlankNode(), Sequence(OPEN_BRACE(), + CLOSE_BRACE())); + } + + public Rule Expression() { + return ConditionalOrExpression(); + } + + public Rule ConditionalOrExpression() { + return Sequence(ConditionalAndExpression(), ZeroOrMore(Sequence(OR(), + ConditionalAndExpression()))); + } + + public Rule ConditionalAndExpression() { + return Sequence(ValueLogical(), ZeroOrMore(Sequence(AND(), + ValueLogical()))); + } + + public Rule ValueLogical() { + return RelationalExpression(); + } + + public Rule RelationalExpression() { + return Sequence(NumericExpression(), Optional(FirstOf(// + Sequence(EQUAL(), NumericExpression()), // + Sequence(NOT_EQUAL(), NumericExpression()), // + Sequence(LESS(), NumericExpression()), // + Sequence(GREATER(), NumericExpression()), // + Sequence(LESS_EQUAL(), NumericExpression()), // + Sequence(GREATER_EQUAL(), NumericExpression()) // + ) // + )); + } + + public Rule NumericExpression() { + return AdditiveExpression(); + } + + public Rule AdditiveExpression() { + return Sequence(MultiplicativeExpression(), // + ZeroOrMore(FirstOf( + Sequence(PLUS(), MultiplicativeExpression()), // + Sequence(MINUS(), MultiplicativeExpression()), // + NumericLiteralPositive(), NumericLiteralNegative()) // + )); + } + + public Rule MultiplicativeExpression() { + return Sequence(UnaryExpression(), ZeroOrMore(FirstOf(Sequence( + ASTERISK(), UnaryExpression()), Sequence(DIVIDE(), + UnaryExpression())))); + } + + public Rule UnaryExpression() { + return FirstOf(Sequence(NOT(), PrimaryExpression()), Sequence(PLUS(), + PrimaryExpression()), Sequence(MINUS(), PrimaryExpression()), + PrimaryExpression()); + } + + public Rule PrimaryExpression() { + return FirstOf(BrackettedExpression(), BuiltInCall(), + IriRefOrFunction(), RdfLiteral(), NumericLiteral(), + BooleanLiteral(), Var()); + } + + public Rule BrackettedExpression() { + return Sequence(OPEN_BRACE(), Expression(), CLOSE_BRACE()); + } + + public Rule BuiltInCall() { + return FirstOf( + Sequence(STR(), OPEN_BRACE(), Expression(), CLOSE_BRACE()), + Sequence(LANG(), OPEN_BRACE(), Expression(), CLOSE_BRACE()), + Sequence(LANGMATCHES(), OPEN_BRACE(), Expression(), COMMA(), + Expression(), CLOSE_BRACE()), + Sequence(DATATYPE(), OPEN_BRACE(), Expression(), CLOSE_BRACE()), + Sequence(BOUND(), OPEN_BRACE(), Var(), CLOSE_BRACE()), + Sequence(SAMETERM(), OPEN_BRACE(), Expression(), COMMA(), + Expression(), CLOSE_BRACE()), + Sequence(ISIRI(), OPEN_BRACE(), Expression(), CLOSE_BRACE()), + Sequence(ISURI(), OPEN_BRACE(), Expression(), CLOSE_BRACE()), + Sequence(ISBLANK(), OPEN_BRACE(), Expression(), CLOSE_BRACE()), + Sequence(ISLITERAL(), OPEN_BRACE(), Expression(), CLOSE_BRACE()), + RegexExpression()); + } + + public Rule RegexExpression() { + return Sequence(REGEX(), OPEN_BRACE(), Expression(), COMMA(), + Expression(), Optional(Sequence(COMMA(), Expression())), + CLOSE_BRACE()); + } + + public Rule IriRefOrFunction() { + return Sequence(IriRef(), Optional(ArgList())); + } + + public Rule RdfLiteral() { + return Sequence(String(), Optional(FirstOf(LANGTAG(), Sequence( + REFERENCE(), IriRef())))); + } + + public Rule NumericLiteral() { + return FirstOf(NumericLiteralUnsigned(), NumericLiteralPositive(), + NumericLiteralNegative()); + } + + public Rule NumericLiteralUnsigned() { + return FirstOf(DOUBLE(), DECIMAL(), INTEGER()); + } + + public Rule NumericLiteralPositive() { + return FirstOf(DOUBLE_POSITIVE(), DECIMAL_POSITIVE(), + INTEGER_POSITIVE()); + } + + public Rule NumericLiteralNegative() { + return FirstOf(DOUBLE_NEGATIVE(), DECIMAL_NEGATIVE(), + INTEGER_NEGATIVE()); + } + + public Rule BooleanLiteral() { + return FirstOf(TRUE(), FALSE()); + } + + public Rule String() { + return FirstOf(STRING_LITERAL_LONG1(), STRING_LITERAL1(), + STRING_LITERAL_LONG2(), STRING_LITERAL2()); + } + + public Rule IriRef() { + return FirstOf(IRI_REF(), PrefixedName()); + } + + public Rule PrefixedName() { + return FirstOf(PNAME_LN(), PNAME_NS()); + } + + public Rule BlankNode() { + return FirstOf(BLANK_NODE_LABEL(), Sequence(OPEN_SQUARE_BRACE(), + CLOSE_SQUARE_BRACE())); + } + // + + // + + public Rule WS() { + return ZeroOrMore(FirstOf(COMMENT(), WS_NO_COMMENT())); + } + + public Rule WS_NO_COMMENT() { + return FirstOf(Ch(' '), Ch('\t'), Ch('\f'), EOL()); + } + + public Rule PNAME_NS() { + return Sequence(Optional(PN_PREFIX()), ChWS(':')); + } + + public Rule PNAME_LN() { + return Sequence(PNAME_NS(), PN_LOCAL()); + } + + public Rule BASE() { + return StringIgnoreCaseWS("BASE"); + } + + public Rule PREFIX() { + return StringIgnoreCaseWS("PREFIX"); + } + + public Rule SELECT() { + return StringIgnoreCaseWS("SELECT"); + } + + public Rule DISTINCT() { + return StringIgnoreCaseWS("DISTINCT"); + } + + public Rule REDUCED() { + return StringIgnoreCaseWS("REDUCED"); + } + + public Rule CONSTRUCT() { + return StringIgnoreCaseWS("CONSTRUCT"); + } + + public Rule DESCRIBE() { + return StringIgnoreCaseWS("DESCRIBE"); + } + + public Rule ASK() { + return StringIgnoreCaseWS("ASK"); + } + + public Rule FROM() { + return StringIgnoreCaseWS("FROM"); + } + + public Rule NAMED() { + return StringIgnoreCaseWS("NAMED"); + } + + public Rule WHERE() { + return StringIgnoreCaseWS("WHERE"); + } + + public Rule ORDER() { + return StringIgnoreCaseWS("ORDER"); + } + + public Rule BY() { + return StringIgnoreCaseWS("BY"); + } + + public Rule ASC() { + return StringIgnoreCaseWS("ASC"); + } + + public Rule DESC() { + return StringIgnoreCaseWS("DESC"); + } + + public Rule LIMIT() { + return StringIgnoreCaseWS("LIMIT"); + } + + public Rule OFFSET() { + return StringIgnoreCaseWS("OFFSET"); + } + + public Rule OPTIONAL() { + return StringIgnoreCaseWS("OPTIONAL"); + } + + public Rule GRAPH() { + return StringIgnoreCaseWS("GRAPH"); + } + + public Rule UNION() { + return StringIgnoreCaseWS("UNION"); + } + + public Rule FILTER() { + return StringIgnoreCaseWS("FILTER"); + } + + public Rule A() { + return ChWS('a'); + } + + public Rule STR() { + return StringIgnoreCaseWS("STR"); + } + + public Rule LANG() { + return StringIgnoreCaseWS("LANG"); + } + + public Rule LANGMATCHES() { + return StringIgnoreCaseWS("LANGMATCHES"); + } + + public Rule DATATYPE() { + return StringIgnoreCaseWS("DATATYPE"); + } + + public Rule BOUND() { + return StringIgnoreCaseWS("BOUND"); + } + + public Rule SAMETERM() { + return StringIgnoreCaseWS("SAMETERM"); + } + + public Rule ISIRI() { + return StringIgnoreCaseWS("ISIRI"); + } + + public Rule ISURI() { + return StringIgnoreCaseWS("ISURI"); + } + + public Rule ISBLANK() { + return StringIgnoreCaseWS("ISBLANK"); + } + + public Rule ISLITERAL() { + return StringIgnoreCaseWS("ISLITERAL"); + } + + public Rule REGEX() { + return StringIgnoreCaseWS("REGEX"); + } + + public Rule TRUE() { + return StringIgnoreCaseWS("TRUE"); + } + + public Rule FALSE() { + return StringIgnoreCaseWS("FALSE"); + } + + public Rule IRI_REF() { + return Sequence(LESS_NO_COMMENT(), // + ZeroOrMore(Sequence(TestNot(FirstOf(LESS_NO_COMMENT(), GREATER(), '"', OPEN_CURLY_BRACE(), + CLOSE_CURLY_BRACE(), '|', '^', '\\', '`', CharRange('\u0000', '\u0020'))), ANY)), // + GREATER()); + } + + public Rule BLANK_NODE_LABEL() { + return Sequence("_:", PN_LOCAL(), WS()); + } + + public Rule VAR1() { + return Sequence('?', VARNAME(), WS()); + } + + public Rule VAR2() { + return Sequence('$', VARNAME(), WS()); + } + + public Rule LANGTAG() { + return Sequence('@', OneOrMore(PN_CHARS_BASE()), ZeroOrMore(Sequence( + MINUS(), OneOrMore(Sequence(PN_CHARS_BASE(), DIGIT())))), WS()); + } + + public Rule INTEGER() { + return Sequence(OneOrMore(DIGIT()), WS()); + } + + public Rule DECIMAL() { + return Sequence(FirstOf( // + Sequence(OneOrMore(DIGIT()), DOT(), ZeroOrMore(DIGIT())), // + Sequence(DOT(), OneOrMore(DIGIT())) // + ), WS()); + } + + public Rule DOUBLE() { + return Sequence(FirstOf(// + Sequence(OneOrMore(DIGIT()), DOT(), ZeroOrMore(DIGIT()), + EXPONENT()), // + Sequence(DOT(), OneOrMore(DIGIT()), EXPONENT()), // + Sequence(OneOrMore(DIGIT()), EXPONENT())), WS()); + } + + public Rule INTEGER_POSITIVE() { + return Sequence(PLUS(), INTEGER()); + } + + public Rule DECIMAL_POSITIVE() { + return Sequence(PLUS(), DECIMAL()); + } + + public Rule DOUBLE_POSITIVE() { + return Sequence(PLUS(), DOUBLE()); + } + + public Rule INTEGER_NEGATIVE() { + return Sequence(MINUS(), INTEGER()); + } + + public Rule DECIMAL_NEGATIVE() { + return Sequence(MINUS(), DECIMAL()); + } + + public Rule DOUBLE_NEGATIVE() { + return Sequence(MINUS(), DOUBLE()); + } + + public Rule EXPONENT() { + return Sequence(IgnoreCase('e'), Optional(FirstOf(PLUS(), MINUS())), + OneOrMore(DIGIT())); + } + + public Rule STRING_LITERAL1() { + return Sequence("'", ZeroOrMore(FirstOf(Sequence(TestNot(FirstOf("'", + '\\', '\n', '\r')), ANY), ECHAR())), "'", WS()); + } + + public Rule STRING_LITERAL2() { + return Sequence('"', ZeroOrMore(FirstOf(Sequence(TestNot(AnyOf("\"\\\n\r")), ANY), ECHAR())), '"', WS()); + } + + public Rule STRING_LITERAL_LONG1() { + return Sequence("'''", ZeroOrMore(Sequence( + Optional(FirstOf("''", "'")), FirstOf(Sequence(TestNot(FirstOf( + "'", "\\")), ANY), ECHAR()))), "'''", WS()); + } + + public Rule STRING_LITERAL_LONG2() { + return Sequence("\"\"\"", ZeroOrMore(Sequence(Optional(FirstOf("\"\"", "\"")), + FirstOf(Sequence(TestNot(FirstOf("\"", "\\")), ANY), ECHAR()))), "\"\"\"", WS()); + } + + public Rule ECHAR() { + return Sequence('\\', AnyOf("tbnrf\\\"\'")); + } + + public Rule PN_CHARS_U() { + return FirstOf(PN_CHARS_BASE(), '_'); + } + + public Rule VARNAME() { + return Sequence(FirstOf(PN_CHARS_U(), DIGIT()), ZeroOrMore(FirstOf( + PN_CHARS_U(), DIGIT(), '\u00B7', CharRange('\u0300', '\u036F'), CharRange('\u203F', '\u2040'))), WS()); + } + + public Rule PN_CHARS() { + return FirstOf(MINUS(), DIGIT(), PN_CHARS_U(), '\u00B7', + CharRange('\u0300', '\u036F'), CharRange('\u203F', '\u2040')); + } + + public Rule PN_PREFIX() { + return Sequence(PN_CHARS_BASE(), Optional(ZeroOrMore(FirstOf(PN_CHARS(), Sequence(DOT(), PN_CHARS()))))); + } + + public Rule PN_LOCAL() { + return Sequence(FirstOf(PN_CHARS_U(), DIGIT()), + Optional(ZeroOrMore(FirstOf(PN_CHARS(), Sequence(DOT(), PN_CHARS())))), WS()); + } + + public Rule PN_CHARS_BASE() { + return FirstOf( // + CharRange('A', 'Z'),// + CharRange('a', 'z'), // + CharRange('\u00C0', '\u00D6'), // + CharRange('\u00D8', '\u00F6'), // + CharRange('\u00F8', '\u02FF'), // + CharRange('\u0370', '\u037D'), // + CharRange('\u037F', '\u1FFF'), // + CharRange('\u200C', '\u200D'), // + CharRange('\u2070', '\u218F'), // + CharRange('\u2C00', '\u2FEF'), // + CharRange('\u3001', '\uD7FF'), // + CharRange('\uF900', '\uFDCF'), // + CharRange('\uFDF0', '\uFFFD') // + ); + } + + public Rule DIGIT() { + return CharRange('0', '9'); + } + + public Rule COMMENT() { + return Sequence('#', ZeroOrMore(Sequence(TestNot(EOL()), ANY)), EOL()); + } + + public Rule EOL() { + return AnyOf("\n\r"); + } + + public Rule REFERENCE() { + return StringWS("^^"); + } + + public Rule LESS_EQUAL() { + return StringWS("<="); + } + + public Rule GREATER_EQUAL() { + return StringWS(">="); + } + + public Rule NOT_EQUAL() { + return StringWS("!="); + } + + public Rule AND() { + return StringWS("&&"); + } + + public Rule OR() { + return StringWS("||"); + } + + public Rule OPEN_BRACE() { + return ChWS('('); + } + + public Rule CLOSE_BRACE() { + return ChWS(')'); + } + + public Rule OPEN_CURLY_BRACE() { + return ChWS('{'); + } + + public Rule CLOSE_CURLY_BRACE() { + return ChWS('}'); + } + + public Rule OPEN_SQUARE_BRACE() { + return ChWS('['); + } + + public Rule CLOSE_SQUARE_BRACE() { + return ChWS(']'); + } + + public Rule SEMICOLON() { + return ChWS(';'); + } + + public Rule DOT() { + return ChWS('.'); + } + + public Rule PLUS() { + return ChWS('+'); + } + + public Rule MINUS() { + return ChWS('-'); + } + + public Rule ASTERISK() { + return ChWS('*'); + } + + public Rule COMMA() { + return ChWS(','); + } + + public Rule NOT() { + return ChWS('!'); + } + + public Rule DIVIDE() { + return ChWS('/'); + } + + public Rule EQUAL() { + return ChWS('='); + } + + public Rule LESS_NO_COMMENT() { + return Sequence(Ch('<'), ZeroOrMore(WS_NO_COMMENT())); + } + + public Rule LESS() { + return ChWS('<'); + } + + public Rule GREATER() { + return ChWS('>'); + } + // + + public Rule ChWS(char c) { + return Sequence(Ch(c), WS()); + } + + public Rule StringWS(String s) { + return Sequence(String(s), WS()); + } + + public Rule StringIgnoreCaseWS(String string) { + return Sequence(IgnoreCase(string), WS()); + } + +} \ No newline at end of file diff --git a/src/main/java/org/parboiled/examples/time/Main.java b/src/main/java/org/parboiled/examples/time/Main.java new file mode 100644 index 0000000..73f2aa3 --- /dev/null +++ b/src/main/java/org/parboiled/examples/time/Main.java @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2009-2011 Mathias Doenitz + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.parboiled.examples.time; + +import org.parboiled.Parboiled; +import org.parboiled.parserunners.RecoveringParseRunner; +import org.parboiled.common.StringUtils; +import static org.parboiled.support.ParseTreeUtils.printNodeTree; +import org.parboiled.support.ParsingResult; + +import java.util.Scanner; + +public class Main { + + public static void main(String[] args) { + TimeParser parser = Parboiled.createParser(TimeParser.class); + + while (true) { + System.out.print("Enter a time expression (hh:mm(:ss)?, hh(mm(ss)?)? or h(mm)?, single RETURN to exit)!\n"); + String input = new Scanner(System.in).nextLine(); + if (StringUtils.isEmpty(input)) break; + + ParsingResult result = new RecoveringParseRunner(parser.Time()).run(input); + + System.out.println(input + " = " + result.parseTreeRoot.getValue() + '\n'); + System.out.println(printNodeTree(result) + '\n'); + + if (!result.matched) { + System.out.println(StringUtils.join(result.parseErrors, "---\n")); + } + } + } + +} \ No newline at end of file diff --git a/src/main/java/org/parboiled/examples/time/TimeParser.java b/src/main/java/org/parboiled/examples/time/TimeParser.java new file mode 100644 index 0000000..2e5e280 --- /dev/null +++ b/src/main/java/org/parboiled/examples/time/TimeParser.java @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2009-2011 Mathias Doenitz + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.parboiled.examples.time; + +import org.parboiled.BaseParser; +import org.parboiled.Rule; +import org.parboiled.annotations.BuildParseTree; + +/** + * Parser for very relaxed time literals. Demonstrates usage of the value stack with default values for unmatched rules. + */ +@BuildParseTree +public class TimeParser extends BaseParser { + + public Rule Time() { + return FirstOf(Time_HH_MM_SS(), Time_HHMMSS(), Time_HMM()); + } + + // h(h)?:mm(:ss)? + Rule Time_HH_MM_SS() { + return Sequence( + OneOrTwoDigits(), ':', + TwoDigits(), + FirstOf(Sequence(':', TwoDigits()), push(0)), + EOI, + swap3() && push(convertToTime(popAsInt(), popAsInt(), popAsInt())) + ); + } + + // hh(mm(ss)?)? + Rule Time_HHMMSS() { + return Sequence( + TwoDigits(), + FirstOf( + Sequence( + TwoDigits(), + FirstOf(TwoDigits(), push(0)) + ), + pushAll(0, 0) + ), + EOI, + swap3() && push(convertToTime(popAsInt(), popAsInt(), popAsInt())) + ); + } + + // h(mm)? + Rule Time_HMM() { + return Sequence( + OneDigit(), + FirstOf(TwoDigits(), push(0)), + EOI, + swap() && push(convertToTime(popAsInt(), popAsInt())) + ); + } + + Rule OneOrTwoDigits() { + return FirstOf(TwoDigits(), OneDigit()); + } + + Rule OneDigit() { + return Sequence(Digit(), push(Integer.parseInt(matchOrDefault("0")))); + } + + Rule TwoDigits() { + return Sequence(Sequence(Digit(), Digit()), push(Integer.parseInt(matchOrDefault("0")))); + } + + Rule Digit() { + return CharRange('0', '9'); + } + + // ************************* ACTIONS ***************************** + + protected Integer popAsInt() { + return (Integer) pop(); + } + + protected String convertToTime(Integer hours, Integer minutes) { + return convertToTime(hours, minutes, 0); + } + + protected String convertToTime(Integer hours, Integer minutes, Integer seconds) { + return String.format("%s h, %s min, %s s", + hours != null ? hours : 0, + minutes != null ? minutes : 0, + seconds != null ? seconds : 0); + } + +}