Difference between revisions of "Antlr"
(→AST TREE und Tree Grammer) |
|||
(9 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
+ | == Hello World == | ||
Generieren einer einfachen Sprache. | Generieren einer einfachen Sprache. | ||
Line 52: | Line 53: | ||
Es werden die Java-Dateien ''SimpleCalcLexer.java'' und ''SimpleCalcParser.java'' generiert. | Es werden die Java-Dateien ''SimpleCalcLexer.java'' und ''SimpleCalcParser.java'' generiert. | ||
+ | |||
+ | == Kleiner Rechner == | ||
+ | <source lang="java5"> | ||
+ | grammar SimpleCalc; | ||
+ | |||
+ | program returns [int result]: | ||
+ | |||
+ | x=INT {$result = $result + Integer.parseInt($x.text);} | ||
+ | '+' | ||
+ | y=INT { | ||
+ | $result = $result + Integer.parseInt($y.text); | ||
+ | System.out.println($result); | ||
+ | }; | ||
+ | |||
+ | INT : '0'..'9'+; | ||
+ | </source> | ||
+ | |||
+ | ''x='' Mann kann im Code dieses Token referenzieren: ''$x.text''. | ||
+ | ''program returns [int result]'' Die Produktion hat ''int'' als Rückgabewert. | ||
+ | Mehr als ein Attribut ist auch möglich. | ||
+ | |||
+ | Programm: | ||
+ | |||
+ | <source lang="java5"> | ||
+ | ANTLRStringStream st = new ANTLRStringStream("8+9"); | ||
+ | SimpleCalcLexer lex = new SimpleCalcLexer(st); | ||
+ | CommonTokenStream tokens = new CommonTokenStream(lex); | ||
+ | SimpleCalcParser parser = new SimpleCalcParser(tokens); | ||
+ | int result = parser.program(); //resultat ist 17 | ||
+ | </source> | ||
+ | |||
+ | == AST Tree == | ||
+ | AST: Abstract Syntax Tree. Baut einen Baum auf, man muss es nicht selber machen. | ||
+ | Caret ^ ist für: Das ist ein Wurzelknoten im Baum. | ||
+ | <source lang="java5"> | ||
+ | grammar MyGrammer; | ||
+ | |||
+ | options { | ||
+ | language = Java; | ||
+ | output = AST; | ||
+ | ASTLabelType=CommonTree; | ||
+ | } | ||
+ | |||
+ | @header { | ||
+ | package sample; | ||
+ | } | ||
+ | |||
+ | @lexer::header { | ||
+ | package sample; | ||
+ | } | ||
+ | |||
+ | INT : '0'..'9'+; | ||
+ | |||
+ | program: | ||
+ | |||
+ | INT | ||
+ | '+'^ | ||
+ | INT; | ||
+ | </source> | ||
+ | |||
+ | Und das Programm: | ||
+ | <source lang="java5"> | ||
+ | package sample; | ||
+ | |||
+ | import org.antlr.runtime.ANTLRStringStream; | ||
+ | import org.antlr.runtime.CommonTokenStream; | ||
+ | import org.antlr.runtime.RecognitionException; | ||
+ | |||
+ | import sample.MyGrammerParser.program_return; | ||
+ | |||
+ | |||
+ | public class Main { | ||
+ | |||
+ | /** | ||
+ | * @param args | ||
+ | */ | ||
+ | public static void main(String[] args) { | ||
+ | //generate(); | ||
+ | doIt(); | ||
+ | } | ||
+ | |||
+ | public static void doIt() | ||
+ | { | ||
+ | |||
+ | ANTLRStringStream st = new ANTLRStringStream("8+9"); | ||
+ | MyGrammerLexer lexer = new MyGrammerLexer(st); | ||
+ | CommonTokenStream tokens = new CommonTokenStream(lexer); | ||
+ | MyGrammerParser parser = new MyGrammerParser(tokens); | ||
+ | try { | ||
+ | program_return treeContainer = parser.program(); | ||
+ | System.out.println(treeContainer.tree.toStringTree()); | ||
+ | } catch (RecognitionException e) { | ||
+ | e.printStackTrace(); | ||
+ | } | ||
+ | } | ||
+ | |||
+ | public static void generate() | ||
+ | { | ||
+ | String[] startarg = {".\\src\\sample\\MyGrammer.g"}; | ||
+ | org.antlr.Tool.main(startarg); | ||
+ | } | ||
+ | } | ||
+ | </source> | ||
+ | |||
+ | == AST und Tree Grammer == | ||
+ | Zuerst lexer -> AST -> TreeGrammer | ||
+ | |||
+ | AST und TreeGrammer arbeiten zusammen. Treegrammer verarbeitet einen NodeStream. | ||
+ | |||
+ | AST: | ||
+ | <source lang="java5"> | ||
+ | grammar MyGrammer; | ||
+ | |||
+ | options { | ||
+ | language = Java; | ||
+ | output = AST; | ||
+ | ASTLabelType=CommonTree; | ||
+ | } | ||
+ | |||
+ | @header { | ||
+ | package sample; | ||
+ | } | ||
+ | |||
+ | @lexer::header { | ||
+ | package sample; | ||
+ | } | ||
+ | |||
+ | INT : '0'..'9'+; | ||
+ | |||
+ | program: | ||
+ | |||
+ | INT | ||
+ | '+'^ | ||
+ | INT; | ||
+ | </source> | ||
+ | |||
+ | TreeGrammer: | ||
+ | <source lang="java5"> | ||
+ | tree grammar MyTreeGrammer; | ||
+ | |||
+ | options { | ||
+ | language = Java; | ||
+ | tokenVocab=MyGrammer; | ||
+ | ASTLabelType=CommonTree; | ||
+ | } | ||
+ | |||
+ | @header { | ||
+ | package sample; | ||
+ | } | ||
+ | |||
+ | intval returns [int result]: | ||
+ | INT {result=Integer.parseInt($INT.text);}; | ||
+ | |||
+ | evaluate returns [int result]: | ||
+ | ^('+' op1=intval op2=intval) {result = op1 + op2;} | ||
+ | ; | ||
+ | </source> | ||
+ | |||
+ | Programm: | ||
+ | <source lang="java5"> | ||
+ | public static void doIt() | ||
+ | { | ||
+ | |||
+ | ANTLRStringStream st = new ANTLRStringStream("8+9"); | ||
+ | MyGrammerLexer lexer = new MyGrammerLexer(st); | ||
+ | CommonTokenStream tokens = new CommonTokenStream(lexer); | ||
+ | MyGrammerParser parser = new MyGrammerParser(tokens); | ||
+ | |||
+ | try { | ||
+ | program_return treeContainer = parser.program(); | ||
+ | System.out.println(treeContainer.tree.toStringTree()); | ||
+ | CommonTreeNodeStream nodeStream = new CommonTreeNodeStream(treeContainer.tree); | ||
+ | MyTreeGrammer treeg = new MyTreeGrammer(nodeStream); | ||
+ | int result = treeg.evaluate(); | ||
+ | System.out.println(result); | ||
+ | } catch (RecognitionException e) { | ||
+ | e.printStackTrace(); | ||
+ | } | ||
+ | </source> | ||
+ | |||
+ | == Lexer brauchen == | ||
+ | <source lang="java5"> | ||
+ | ANTLRStringStream st = new ANTLRStringStream("123+123"); | ||
+ | SimpleCalcLexer lex = new SimpleCalcLexer(st); | ||
+ | CommonTokenStream tokens = new CommonTokenStream(lex); | ||
+ | |||
+ | List<CommonToken> tokenList = tokens.getTokens(); //org.antlr.runtime.CommonToken | ||
+ | for(CommonToken token : tokenList) | ||
+ | { | ||
+ | System.out.println(token); | ||
+ | } | ||
+ | </source> | ||
+ | |||
+ | Der Output ist: | ||
+ | [@0,0:2='123',<8>,1:0] | ||
+ | [@1,3:3='+',<4>,1:3] | ||
+ | [@2,4:6='123',<8>,1:4] | ||
== Resourcen == | == Resourcen == | ||
* Javadoc [http://www.antlr.org/api/Java/annotated.html http://www.antlr.org/api/Java/annotated.html] | * Javadoc [http://www.antlr.org/api/Java/annotated.html http://www.antlr.org/api/Java/annotated.html] | ||
+ | * [http://javadude.com/articles/antlr3xtut/ super-video-tut] |
Latest revision as of 10:49, 30 July 2010
Contents
Hello World
Generieren einer einfachen Sprache.
Das File SimpleCalc.g:
grammar SimpleCalc;
tokens {
PLUS = '+' ;
MINUS = '-' ;
MULT = '*' ;
DIV = '/' ;
}
@members {
public static void main(String[] args) throws Exception {
ANTLRStringStream st = new ANTLRStringStream("123+123");
SimpleCalcLexer lex = new SimpleCalcLexer(st);
CommonTokenStream tokens = new CommonTokenStream(lex);
SimpleCalcParser parser = new SimpleCalcParser(tokens);
parser.expr();
}
}
/*------------------------------------------------------------------
* PARSER RULES
*------------------------------------------------------------------*/
expr : term ( ( PLUS | MINUS ) term )* ;
term : factor ( ( MULT | DIV ) factor )* ;
factor : NUMBER ;
/*------------------------------------------------------------------
* LEXER RULES
*------------------------------------------------------------------*/
NUMBER : (DIGIT)+ ;
WHITESPACE : ( '\t' | ' ' | '\r' | '\n'| '\u000C' )+ { $channel = HIDDEN; } ;
fragment DIGIT : '0'..'9' ;
Der Lexer und Parser werden so generiert:
C:\Users\Claude Glauser\Downloads\antlr>java -cp antlr-3.2.jar org.antlr.Tool SimpleCalc.g
Es werden die Java-Dateien SimpleCalcLexer.java und SimpleCalcParser.java generiert.
Kleiner Rechner
grammar SimpleCalc;
program returns [int result]:
x=INT {$result = $result + Integer.parseInt($x.text);}
'+'
y=INT {
$result = $result + Integer.parseInt($y.text);
System.out.println($result);
};
INT : '0'..'9'+;
x= Mann kann im Code dieses Token referenzieren: $x.text. program returns [int result] Die Produktion hat int als Rückgabewert. Mehr als ein Attribut ist auch möglich.
Programm:
ANTLRStringStream st = new ANTLRStringStream("8+9");
SimpleCalcLexer lex = new SimpleCalcLexer(st);
CommonTokenStream tokens = new CommonTokenStream(lex);
SimpleCalcParser parser = new SimpleCalcParser(tokens);
int result = parser.program(); //resultat ist 17
AST Tree
AST: Abstract Syntax Tree. Baut einen Baum auf, man muss es nicht selber machen. Caret ^ ist für: Das ist ein Wurzelknoten im Baum.
grammar MyGrammer;
options {
language = Java;
output = AST;
ASTLabelType=CommonTree;
}
@header {
package sample;
}
@lexer::header {
package sample;
}
INT : '0'..'9'+;
program:
INT
'+'^
INT;
Und das Programm:
package sample;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import sample.MyGrammerParser.program_return;
public class Main {
/**
* @param args
*/
public static void main(String[] args) {
//generate();
doIt();
}
public static void doIt()
{
ANTLRStringStream st = new ANTLRStringStream("8+9");
MyGrammerLexer lexer = new MyGrammerLexer(st);
CommonTokenStream tokens = new CommonTokenStream(lexer);
MyGrammerParser parser = new MyGrammerParser(tokens);
try {
program_return treeContainer = parser.program();
System.out.println(treeContainer.tree.toStringTree());
} catch (RecognitionException e) {
e.printStackTrace();
}
}
public static void generate()
{
String[] startarg = {".\\src\\sample\\MyGrammer.g"};
org.antlr.Tool.main(startarg);
}
}
AST und Tree Grammer
Zuerst lexer -> AST -> TreeGrammer
AST und TreeGrammer arbeiten zusammen. Treegrammer verarbeitet einen NodeStream.
AST:
grammar MyGrammer;
options {
language = Java;
output = AST;
ASTLabelType=CommonTree;
}
@header {
package sample;
}
@lexer::header {
package sample;
}
INT : '0'..'9'+;
program:
INT
'+'^
INT;
TreeGrammer:
tree grammar MyTreeGrammer;
options {
language = Java;
tokenVocab=MyGrammer;
ASTLabelType=CommonTree;
}
@header {
package sample;
}
intval returns [int result]:
INT {result=Integer.parseInt($INT.text);};
evaluate returns [int result]:
^('+' op1=intval op2=intval) {result = op1 + op2;}
;
Programm:
public static void doIt()
{
ANTLRStringStream st = new ANTLRStringStream("8+9");
MyGrammerLexer lexer = new MyGrammerLexer(st);
CommonTokenStream tokens = new CommonTokenStream(lexer);
MyGrammerParser parser = new MyGrammerParser(tokens);
try {
program_return treeContainer = parser.program();
System.out.println(treeContainer.tree.toStringTree());
CommonTreeNodeStream nodeStream = new CommonTreeNodeStream(treeContainer.tree);
MyTreeGrammer treeg = new MyTreeGrammer(nodeStream);
int result = treeg.evaluate();
System.out.println(result);
} catch (RecognitionException e) {
e.printStackTrace();
}
Lexer brauchen
ANTLRStringStream st = new ANTLRStringStream("123+123");
SimpleCalcLexer lex = new SimpleCalcLexer(st);
CommonTokenStream tokens = new CommonTokenStream(lex);
List<CommonToken> tokenList = tokens.getTokens(); //org.antlr.runtime.CommonToken
for(CommonToken token : tokenList)
{
System.out.println(token);
}
Der Output ist:
[@0,0:2='123',<8>,1:0] [@1,3:3='+',<4>,1:3] [@2,4:6='123',<8>,1:4]