Differences From Artifact [51dd3bd106fd9ecb]:
- File
polemy/parse.d
- 2010-11-08 12:26:39 - part of checkin [80ff567c75] on branch trunk - Testing easyAST. (user: kinaba) [annotate]
To Artifact [10baa46251b8a0b6]:
- File
polemy/parse.d
- 2010-11-08 14:59:30 - part of checkin [b985f3bf91] on branch trunk - refactored parser to change AST to ML-like one. (user: kinaba) [annotate]
5 * Parser for Polemy programming language 5 * Parser for Polemy programming language
6 */ 6 */
7 module polemy.parse; 7 module polemy.parse;
8 import polemy._common; 8 import polemy._common;
9 import polemy.lex; 9 import polemy.lex;
10 import polemy.ast; 10 import polemy.ast;
11 11
12 /// Parsing Failure | 12 /// Exception from this module
13 13
14 class ParserException : Exception | 14 class ParseException : Exception
15 { 15 {
16 private: | 16 const LexPosition pos;
17 this(string msg) { super(msg); } <
18 static ParserException create(Lexer)(Lexer lex, string msg) <
19 { | 17
20 return new ParserException(sprintf!"[%s] %s"( <
21 lex.empty ? "EOF" : to!string(lex.front.pos), msg)); <
22 } <
> 18 private this( const LexPosition pos, string msg )
> 19 { super(sprintf!"%s [%s]"(msg, pos)); this.pos = pos; }
23 } 20 }
> 21
> 22 private auto createException(Lexer)(Lexer lex, string msg)
> 23 { return new ParseException(lex.empty?null:lex.front.pos, msg); }
> 24
> 25 /// Entry point of this module
> 26
> 27 auto parseString(S, T...)(S str, T fn_ln_cn)
> 28 { return parserFromString(str, fn_ln_cn).parse(); }
> 29
> 30 auto parseFile(S, T...)(S filename,T ln_cn)
> 31 { return parserFromString(filename, ln_cn).parse(); }
24 32
25 /// Named Constructor of Parser 33 /// Named Constructor of Parser
26 34
27 auto parserFromLexer(Lexer)(Lexer lex) | 35 private auto parserFromLexer(Lexer)(Lexer lex)
28 { <
29 return new Parser!Lexer(lex); | 36 { return new Parser!Lexer(lex); }
30 } <
31 37
32 /// Named Constructor of Parser (just fwd to lexerFromString) | 38 private auto parserFromString(T...)(T params)
> 39 { return parserFromLexer(polemy.lex.lexerFromString(params)); }
33 40
34 auto parserFromString(T...)(T params) <
35 { <
36 return parserFromLexer(polemy.lex.lexerFromString(params)); <
37 } <
38 <
39 /// Named Constructor of Parser (just fwd to lexerFromFile) <
40 <
41 auto parserFromFile(T...)(T params) | 41 private auto parserFromFile(T...)(T params)
42 { <
43 return parserFromLexer(polemy.lex.lexerFromFile(params)); | 42 { return parserFromLexer(polemy.lex.lexerFromFile(params)); }
44 } <
45 43
46 /// Parser 44 /// Parser
47 45
48 class Parser(Lexer) | 46 private class Parser(Lexer)
> 47 if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) )
49 { 48 {
50 this(Lexer lex) | 49 AST parse()
51 { 50 {
52 this.lex = lex; | 51 auto e = Body();
53 } <
54 <
55 Program parseProgram() <
56 { <
57 Program p = parseStatements(); <
58 if( !lex.empty ) { | 52 if( !lex.empty )
59 auto e = ParserException.create(lex, "cannot reach eof") <
60 throw e; <
61 } <
> 53 throw createException(lex, "input is not ended but parse
62 return p; | 54 return e;
63 } 55 }
64 56
65 Program parseStatements() | 57 AST Body()
66 { 58 {
67 Program p; | 59 if( lex.empty || !lex.front.quoted && lex.front.str=="}" )
68 while( !lex.empty && (lex.front.quoted || lex.front.str!="}") ) <
69 p ~= parseStatement(); <
70 return p; | 60 return doNothingExpression();
71 } <
72 61
73 Statement parseStatement() <
74 { <
75 auto saved = lex.save; <
76 scope(failure) lex = saved; <
77 <
78 if( lex.empty ) <
79 throw new ParserException("EOF during parsing a statemen <
80 auto pos = lex.front.pos; 62 auto pos = lex.front.pos;
81 <
82 if( !lex.front.quoted && lex.front.str=="var" ) <
> 63 if( tryEat("var") )
83 { 64 {
84 // "var" Var "=" Expression ";" | 65 immutable LexPosition varpos = (lex.empty ? null : lex.f
85 lex.popFront; <
86 string var = lex.front.str; | 66 string var = eatId("after var");
87 lex.popFront; <
88 eat("=", "for variable declaration"); | 67 eat("=", "after var");
> 68 auto e = E(0);
> 69 if( tryEat(";") && !lex.empty && (lex.front.quoted || (l
89 auto parsed = new DeclStatement(pos, var, parseExpressio | 70 return new LetExpression(pos, var, e, Body());
90 eat(";", "after variable declaration"); | 71 else
91 return parsed; | 72 return new LetExpression(pos, var, e, new VarExp
92 } 73 }
93 else 74 else
94 { 75 {
95 // Expression ";" | 76 auto e = E(0);
96 auto parsed = new ExprStatement(pos, parseExpression()); | 77 if( tryEat(";") && !lex.empty && (lex.front.quoted || (l
97 eat(";", "after statement"); | 78 return new LetExpression(pos, "_", e, Body());
> 79 else
98 return parsed; | 80 return e;
99 } 81 }
100 } 82 }
101 83
102 Expression parseExpression() | 84 // [TODO] make customizable from program
103 { <
104 auto saved = lex.save; <
105 scope(failure) lex = saved; <
106 return parseE(0); <
107 } <
108 <
109 // [TODO] multi-char operators are not supported by the lexer... <
110 static immutable string[][] operator_perferences = [ 85 static immutable string[][] operator_perferences = [
111 ["="], | 86 ["||"],
112 ["or"], | 87 ["&&"],
113 ["and"], <
114 ["!="], 88 ["!="],
115 ["=="], 89 ["=="],
116 ["<","<=",">",">="], 90 ["<","<=",">",">="],
117 ["|"], 91 ["|"],
118 ["^"], 92 ["^"],
119 ["&"], 93 ["&"],
120 ["<<", ">>"], 94 ["<<", ">>"],
121 ["+","-"], 95 ["+","-"],
> 96 ["~"],
122 ["*","/","%"] | 97 ["*","/","%"],
> 98 ["^^"]
123 ]; 99 ];
124 100
125 Expression parseE(int level = 0) | 101 AST E(int level)
126 { 102 {
127 if( operator_perferences.length <= level ) 103 if( operator_perferences.length <= level )
128 return parseBaseExpression(); | 104 return Funcall();
129 else 105 else
130 { 106 {
131 auto ops = operator_perferences[level]; 107 auto ops = operator_perferences[level];
132 auto e = parseE(level+1); | 108 auto e = E(level+1);
133 seq: 109 seq:
134 while( !lex.empty ) 110 while( !lex.empty )
135 { 111 {
136 auto pos = lex.front.pos; 112 auto pos = lex.front.pos;
137 foreach(op; ops) 113 foreach(op; ops)
138 if( tryEat(op) ) 114 if( tryEat(op) )
139 { 115 {
140 if( op == "=" ) // right assoc <
141 return new AssignExpress <
142 else <
143 e = new FuncallExpressio | 116 e = new FuncallExpression(e.pos,
144 continue seq; 117 continue seq;
145 } 118 }
146 break; 119 break;
147 } 120 }
148 return e; 121 return e;
149 } 122 }
150 } 123 }
151 124
152 Expression parseBaseExpression() | 125 AST Funcall()
153 { 126 {
154 if( lex.empty ) <
155 throw new ParserException("EOF during parsing an express <
156 auto pos = lex.front.pos; <
157 Expression e = parseBaseBaseExpression(); | 127 auto e = BaseExpression();
158 while( tryEat("(") ) // funcall | 128 while( tryEat("(") )
159 { 129 {
160 Expression[] args; | 130 AST[] args;
161 while( !tryEat(")") ) { 131 while( !tryEat(")") ) {
162 if( lex.empty ) { | 132 if( lex.empty )
163 auto ex = ParserException.create(lex,"Un | 133 throw createException(lex,"Unexpected EO
164 throw ex; <
165 } <
166 args ~= parseE(); | 134 args ~= E(0);
167 if( !tryEat(",") ) { 135 if( !tryEat(",") ) {
168 eat(")", "after function parameters"); 136 eat(")", "after function parameters");
169 break; 137 break;
170 } 138 }
171 } 139 }
172 e = new FuncallExpression(pos, e, args); | 140 e = new FuncallExpression(e.pos, e, args);
173 } 141 }
174 return e; 142 return e;
175 } 143 }
176 144
177 Expression parseBaseBaseExpression() | 145 AST BaseExpression()
178 { 146 {
179 if( lex.empty ) 147 if( lex.empty )
180 throw new ParserException("EOF during parsing an express | 148 throw createException(lex, "Reached EOF when tried to pa
> 149
181 auto pos = lex.front.pos; 150 auto pos = lex.front.pos;
182 <
183 if( lex.front.quoted ) 151 if( lex.front.quoted )
184 { 152 {
185 scope(exit) lex.popFront; 153 scope(exit) lex.popFront;
186 return new StrLiteralExpression(pos, lex.front.str); | 154 return new StrLiteral(pos, lex.front.str);
187 } 155 }
188 if( isNumber(lex.front.str) ) // is_number | 156 if( isNumber(lex.front.str) )
189 { 157 {
190 scope(exit) lex.popFront; 158 scope(exit) lex.popFront;
191 return new IntLiteralExpression(pos, BigInt(cast(string) | 159 return new IntLiteral(pos, BigInt(cast(string)lex.front.
192 } 160 }
193 if( tryEat("(") ) 161 if( tryEat("(") )
194 { 162 {
195 auto e = parseE(); | 163 auto e = Body();
196 eat(")", "after parenthesized expression"); 164 eat(")", "after parenthesized expression");
197 return e; 165 return e;
198 } 166 }
199 if( tryEat("if") ) 167 if( tryEat("if") )
200 { 168 {
201 eat("(", "after if"); 169 eat("(", "after if");
202 auto cond = parseE(); | 170 auto cond = E(0);
203 eat(")", "after if condition"); 171 eat(")", "after if condition");
204 auto thenPos = lex.front.pos; 172 auto thenPos = lex.front.pos;
205 eat("{", "after if condition"); 173 eat("{", "after if condition");
206 Statement[] th = parseStatements(); | 174 auto th = Body();
207 eat("}", "after if-then body"); 175 eat("}", "after if-then body");
208 Statement[] el; | 176 auto el = doNothingExpression();
209 auto elsePos = lex.front.pos; | 177 auto elsePos = (lex.empty ? LexPosition.dummy : lex.fron
210 if( tryEat("else") ) { 178 if( tryEat("else") ) {
211 eat("{", "after else"); 179 eat("{", "after else");
212 el = parseStatements(); | 180 el = Body();
213 eat("}", "after else body"); 181 eat("}", "after else body");
214 } 182 }
215 return new FuncallExpression(pos, 183 return new FuncallExpression(pos,
216 new VarExpression(pos, "if"), 184 new VarExpression(pos, "if"),
217 cond, 185 cond,
218 new FunLiteralExpression(thenPos, [], th), | 186 new FunLiteral(thenPos, [], th),
219 new FunLiteralExpression(elsePos, [], el) | 187 new FunLiteral(elsePos, [], el)
220 ); 188 );
221 } 189 }
222 <
223 if( tryEat("fun") ) 190 if( tryEat("fun") )
224 { 191 {
225 eat("(", "after fun"); 192 eat("(", "after fun");
226 string[] params; 193 string[] params;
227 while(!tryEat(")")) | 194 while( !tryEat(")") )
228 { 195 {
229 if( lex.empty ) { | 196 params ~= eatId("for function parameter");
230 auto e = ParserException.create(lex,"Une <
231 throw e; <
232 } <
233 if( lex.front.quoted ) { <
234 auto e = ParserException.create(lex,"Ide <
235 throw e; <
236 } <
237 params ~= lex.front.str; <
238 lex.popFront; <
239 if( !tryEat(",") ) { 197 if( !tryEat(",") ) {
240 eat(")", "after function parameters"); 198 eat(")", "after function parameters");
241 break; 199 break;
242 } 200 }
243 } 201 }
244 eat("{", "after function parameters"); 202 eat("{", "after function parameters");
245 Statement[] funbody; | 203 auto funbody = Body();
246 while(!tryEat("}")) { | 204 eat("}", "after function body");
247 if( lex.empty ) { <
248 auto e = ParserException.create(lex,"Une <
249 throw e; <
250 } <
251 funbody ~= parseStatement(); <
252 } <
253 return new FunLiteralExpression(pos, params, funbody); | 205 return new FunLiteral(pos, params, funbody);
254 } 206 }
255 scope(exit) lex.popFront; 207 scope(exit) lex.popFront;
256 return new VarExpression(pos, lex.front.str); 208 return new VarExpression(pos, lex.front.str);
257 } 209 }
258 210
259 private: 211 private:
260 Lexer lex; 212 Lexer lex;
> 213 this(Lexer lex) { this.lex = lex; }
261 214
262 void eat(string kwd, lazy string msg) 215 void eat(string kwd, lazy string msg)
263 { 216 {
264 if( !tryEat(kwd) ) 217 if( !tryEat(kwd) )
265 { <
266 auto e = ParserException.create(lex, "'"~kwd~"' is expec | 218 throw createException(lex, "'"~kwd~"' is expected "~msg~
267 ~(lex.empty ? "EOF" : lex.front.str)~"' occured" 219 ~(lex.empty ? "EOF" : lex.front.str)~"' occured"
268 throw e; <
269 } <
270 } 220 }
271 221
272 bool tryEat(string kwd) 222 bool tryEat(string kwd)
273 { 223 {
274 if( lex.empty || lex.front.quoted || lex.front.str!=kwd ) 224 if( lex.empty || lex.front.quoted || lex.front.str!=kwd )
275 return false; 225 return false;
276 lex.popFront; 226 lex.popFront;
277 return true; 227 return true;
278 } 228 }
> 229
> 230 string eatId(lazy string msg)
> 231 {
> 232 if( lex.empty || lex.front.quoted )
> 233 throw createException(lex, "identifier is expected but n
> 234 string id = lex.front.str;
> 235 lex.popFront;
> 236 return id;
> 237 }
279 238
280 bool isNumber(string s) 239 bool isNumber(string s)
281 { 240 {
282 return find!(`a<'0'||'9'<a`)(s).empty; 241 return find!(`a<'0'||'9'<a`)(s).empty;
283 } 242 }
284 } <
285 243
286 unittest | 244 AST doNothingExpression()
287 { | 245 {
288 auto p = parserFromString(` | 246 return new IntLiteral(lex.empty?null:lex.front.pos, BigInt(178))
289 var x = 100; <
290 var y = 200; <
291 `); <
292 Program prog = p.parseProgram(); <
293 assert( prog.length == 2 ); <
294 auto p0 = cast(DeclStatement)prog[0]; <
295 auto p1 = cast(DeclStatement)prog[1]; <
296 assert( p0.var == "x" ); <
297 assert( p1.var == "y" ); <
298 assert( (cast(IntLiteralExpression)p0.expr).data == 100 ); <
299 assert( (cast(IntLiteralExpression)p1.expr).data == 200 ); <
> 247 }
300 } 248 }
301 249
302 unittest 250 unittest
303 { 251 {
304 auto p = parserFromString(` | 252 mixin EasyAST;
305 var zzz = 100; # comment <
> 253
306 zzz = zzz + zzz * "fo\no"; # comment | 254 assert_eq(parseString(`123`), intl(123));
307 42; <
> 255 assert_eq(parseString(`"foo"`), strl("foo"));
308 `); | 256 assert_eq(parseString(`fun(){1}`), fun([],intl(1)));
> 257 assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1)));
> 258 assert_eq(parseString(`1;2`), let("_",intl(1),intl(2)));
> 259 assert_eq(parseString(`1;2;`), let("_",intl(1),intl(2)));
> 260 assert_eq(parseString(`var x=1;2`), let("x",intl(1),intl(2)));
> 261 assert_eq(parseString(`var x=1;2;`), let("x",intl(1),intl(2)));
> 262 assert_eq(parseString(`var x=1`), let("x",intl(1),var("x")));
> 263 assert_eq(parseString(`var x=1;`), let("x",intl(1),var("x")));
> 264 assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2)));
> 265 assert_eq(parseString(`if(1){2}`), call(var("if"),intl(1),fun([],intl(2)
> 266 assert_eq(parseString(`if(1){2}else{3}`), call(var("if"),intl(1),fun([],
> 267 assert_eq(parseString(`if(1){}else{3}()()`),
> 268 call(call(call(var("if"),intl(1),fun([],intl(178)),fun([],intl(3
> 269 assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl
> 270 assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),in
> 271 assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),in
> 272 assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl
309 273
310 mixin EasyAst; | 274 assert_eq(parseString(`
311 auto s0 = decl("zzz", intl(BigInt(100))); | 275 var x = 100; #comment
312 auto s1 = expr( new AssignExpression(null, | 276 var y = 200; #comment!!!!!
313 new VarExpression(null, "zzz"), | 277 x+y
314 new FuncallExpression(null, new VarExpression(null,"+"), | 278 `),
315 new VarExpression(null, "zzz"), | 279 let("x", intl(100), let("y", intl(200), call(var("+"), var("x"),
316 new FuncallExpression(null, new VarExpression(null,"*"), | 280 );
317 new VarExpression(null, "zzz"), <
318 new StrLiteralExpression(null, "fo\\no") <
319 )))); <
320 auto s2 = new ExprStatement(null, new IntLiteralExpression(null, BigInt( <
321 281
322 Program prog = p.parseProgram(); | 282 assert_eq(parseString(`
323 assert( prog.length == 3 ); | 283 var fac = fun(x){ if(x <= 1) {1} else {x*fac(x-1)} };
324 assert( prog[0] == s0 ); | 284 fac(10)
325 assert( prog[1] == s1 ); | 285 `),
326 assert( prog[2] == s2 ); | 286 let("fac", fun(["x"],
> 287 call(var("if"),
> 288 call(var("<="), var("x"), intl(1)),
> 289 fun([], intl(1)),
> 290 fun([], call(var("*"), var("x"), call(var("fac")
> 291 )),
> 292 call(var("fac"),intl(10))
> 293 )
> 294 );
327 } 295 }
328 296
329 unittest 297 unittest
330 { 298 {
331 auto p = parserFromString(` | 299 assert_throw!ParseException(parseString(`1+`));
332 var f = fun(x,y){x+y;}; | 300 assert_throw!ParseException(parseString(`1+2}`));
333 f(1,fun(abc){}(4)); | 301 assert_throw!ParseException(parseString(`var "x"`));
334 `); | 302 assert_throw!ParseException(parseString(`var`));
335 mixin EasyAst; | 303 assert_throw!ParseException(parseString(`var x ==`));
336 Program prog = p.parseProgram(); | 304 assert_throw!ParseException(parseString(`if(){1}`));
337 assert( prog.length == 2 ); | 305 assert_throw!ParseException(parseString(`f(`));
338 assert( prog[0] == decl("f", fun( <
339 ["x","y"], [expr(funcall(var("+"), var("x"), var("y")))] <
340 ))); <
341 assert( prog[1] == expr(funcall(var("f"), <
342 intl(BigInt(1)), <
343 funcall( <
344 fun(["abc"], cast(Statement[])[]), <
345 intl(BigInt(4)) <
346 )))); <
347 } <
348 <
349 unittest <
350 { <
351 auto p = parserFromString(`var x = 1; var f = fun(){x=x+1;}; f(); f(); x <
352 Program prog = p.parseProgram(); <
353 } <
354 <
355 unittest <
356 { <
357 auto p = parserFromString(`if(x<2){1;}else{x;};`); <
358 Program prog = p.parseProgram(); <
359 assert( prog[0] == new ExprStatement(null, new FuncallExpression(null, <
360 new VarExpression(null, "if"), <
361 new FuncallExpression(null, new VarExpression(null,"<"), new Var <
362 new IntLiteralExpression(null, BigInt(2))), <
363 new FunLiteralExpression(null, [], [new ExprStatement(null, new <
364 new FunLiteralExpression(null, [], [new ExprStatement(null, new <
365 ))); <
366 } 306 }