Differences From Artifact [51dd3bd106fd9ecb]:
- File
polemy/parse.d
- 2010-11-08 12:26:39 - part of checkin [80ff567c75] on branch trunk - Testing easyAST. (user: kinaba) [annotate]
To Artifact [10baa46251b8a0b6]:
- File
polemy/parse.d
- 2010-11-08 14:59:30 - part of checkin [b985f3bf91] on branch trunk - refactored parser to change AST to ML-like one. (user: kinaba) [annotate]
5 5 * Parser for Polemy programming language
6 6 */
7 7 module polemy.parse;
8 8 import polemy._common;
9 9 import polemy.lex;
10 10 import polemy.ast;
11 11
12 -/// Parsing Failure
12 +/// Exception from this module
13 13
14 -class ParserException : Exception
14 +class ParseException : Exception
15 15 {
16 -private:
17 - this(string msg) { super(msg); }
18 - static ParserException create(Lexer)(Lexer lex, string msg)
19 - {
20 - return new ParserException(sprintf!"[%s] %s"(
21 - lex.empty ? "EOF" : to!string(lex.front.pos), msg));
22 - }
16 + const LexPosition pos;
17 +
18 + private this( const LexPosition pos, string msg )
19 + { super(sprintf!"%s [%s]"(msg, pos)); this.pos = pos; }
23 20 }
21 +
22 +private auto createException(Lexer)(Lexer lex, string msg)
23 + { return new ParseException(lex.empty?null:lex.front.pos, msg); }
24 +
25 +/// Entry point of this module
26 +
27 +auto parseString(S, T...)(S str, T fn_ln_cn)
28 + { return parserFromString(str, fn_ln_cn).parse(); }
29 +
30 +auto parseFile(S, T...)(S filename,T ln_cn)
31 + { return parserFromString(filename, ln_cn).parse(); }
24 32
25 33 /// Named Constructor of Parser
26 34
27 -auto parserFromLexer(Lexer)(Lexer lex)
28 -{
29 - return new Parser!Lexer(lex);
30 -}
35 +private auto parserFromLexer(Lexer)(Lexer lex)
36 + { return new Parser!Lexer(lex); }
31 37
32 -/// Named Constructor of Parser (just fwd to lexerFromString)
38 +private auto parserFromString(T...)(T params)
39 + { return parserFromLexer(polemy.lex.lexerFromString(params)); }
33 40
34 -auto parserFromString(T...)(T params)
35 -{
36 - return parserFromLexer(polemy.lex.lexerFromString(params));
37 -}
38 -
39 -/// Named Constructor of Parser (just fwd to lexerFromFile)
40 -
41 -auto parserFromFile(T...)(T params)
42 -{
43 - return parserFromLexer(polemy.lex.lexerFromFile(params));
44 -}
41 +private auto parserFromFile(T...)(T params)
42 + { return parserFromLexer(polemy.lex.lexerFromFile(params)); }
45 43
46 44 /// Parser
47 45
48 -class Parser(Lexer)
46 +private class Parser(Lexer)
47 + if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) )
49 48 {
50 - this(Lexer lex)
49 + AST parse()
51 50 {
52 - this.lex = lex;
53 - }
54 -
55 - Program parseProgram()
56 - {
57 - Program p = parseStatements();
58 - if( !lex.empty ) {
59 - auto e = ParserException.create(lex, "cannot reach eof");
60 - throw e;
61 - }
62 - return p;
51 + auto e = Body();
52 + if( !lex.empty )
53 + throw createException(lex, "input is not ended but parser came to the end");
54 + return e;
63 55 }
64 56
65 - Program parseStatements()
57 + AST Body()
66 58 {
67 - Program p;
68 - while( !lex.empty && (lex.front.quoted || lex.front.str!="}") )
69 - p ~= parseStatement();
70 - return p;
71 - }
59 + if( lex.empty || !lex.front.quoted && lex.front.str=="}" )
60 + return doNothingExpression();
72 61
73 - Statement parseStatement()
74 - {
75 - auto saved = lex.save;
76 - scope(failure) lex = saved;
77 -
78 - if( lex.empty )
79 - throw new ParserException("EOF during parsing a statement");
80 62 auto pos = lex.front.pos;
81 -
82 - if( !lex.front.quoted && lex.front.str=="var" )
63 + if( tryEat("var") )
83 64 {
84 - // "var" Var "=" Expression ";"
85 - lex.popFront;
86 - string var = lex.front.str;
87 - lex.popFront;
88 - eat("=", "for variable declaration");
89 - auto parsed = new DeclStatement(pos, var, parseExpression());
90 - eat(";", "after variable declaration");
91 - return parsed;
65 + immutable LexPosition varpos = (lex.empty ? null : lex.front.pos);
66 + string var = eatId("after var");
67 + eat("=", "after var");
68 + auto e = E(0);
69 + if( tryEat(";") && !lex.empty && (lex.front.quoted || (lex.front.str!="}" && lex.front.str!=")")) )
70 + return new LetExpression(pos, var, e, Body());
71 + else
72 + return new LetExpression(pos, var, e, new VarExpression(varpos, var));
92 73 }
93 74 else
94 75 {
95 - // Expression ";"
96 - auto parsed = new ExprStatement(pos, parseExpression());
97 - eat(";", "after statement");
98 - return parsed;
76 + auto e = E(0);
77 + if( tryEat(";") && !lex.empty && (lex.front.quoted || (lex.front.str!="}" && lex.front.str!=")")) )
78 + return new LetExpression(pos, "_", e, Body());
79 + else
80 + return e;
99 81 }
100 82 }
101 83
102 - Expression parseExpression()
103 - {
104 - auto saved = lex.save;
105 - scope(failure) lex = saved;
106 - return parseE(0);
107 - }
108 -
109 - // [TODO] multi-char operators are not supported by the lexer...
84 + // [TODO] make customizable from program
110 85 static immutable string[][] operator_perferences = [
111 - ["="],
112 - ["or"],
113 - ["and"],
86 + ["||"],
87 + ["&&"],
114 88 ["!="],
115 89 ["=="],
116 90 ["<","<=",">",">="],
117 91 ["|"],
118 92 ["^"],
119 93 ["&"],
120 94 ["<<", ">>"],
121 95 ["+","-"],
122 - ["*","/","%"]
96 + ["~"],
97 + ["*","/","%"],
98 + ["^^"]
123 99 ];
124 100
125 - Expression parseE(int level = 0)
101 + AST E(int level)
126 102 {
127 103 if( operator_perferences.length <= level )
128 - return parseBaseExpression();
104 + return Funcall();
129 105 else
130 106 {
131 107 auto ops = operator_perferences[level];
132 - auto e = parseE(level+1);
108 + auto e = E(level+1);
133 109 seq:
134 110 while( !lex.empty )
135 111 {
136 112 auto pos = lex.front.pos;
137 113 foreach(op; ops)
138 114 if( tryEat(op) )
139 115 {
140 - if( op == "=" ) // right assoc
141 - return new AssignExpression(e.pos, e, parseE(level));
142 - else
143 - e = new FuncallExpression(e.pos, new VarExpression(pos, op), e, parseE(level+1));
116 + e = new FuncallExpression(e.pos, new VarExpression(pos, op), e, E(level+1));
144 117 continue seq;
145 118 }
146 119 break;
147 120 }
148 121 return e;
149 122 }
150 123 }
151 124
152 - Expression parseBaseExpression()
125 + AST Funcall()
153 126 {
154 - if( lex.empty )
155 - throw new ParserException("EOF during parsing an expression");
156 - auto pos = lex.front.pos;
157 - Expression e = parseBaseBaseExpression();
158 - while( tryEat("(") ) // funcall
127 + auto e = BaseExpression();
128 + while( tryEat("(") )
159 129 {
160 - Expression[] args;
130 + AST[] args;
161 131 while( !tryEat(")") ) {
162 - if( lex.empty ) {
163 - auto ex = ParserException.create(lex,"Unexpected EOF");
164 - throw ex;
165 - }
166 - args ~= parseE();
132 + if( lex.empty )
133 + throw createException(lex,"Unexpected EOF");
134 + args ~= E(0);
167 135 if( !tryEat(",") ) {
168 136 eat(")", "after function parameters");
169 137 break;
170 138 }
171 139 }
172 - e = new FuncallExpression(pos, e, args);
140 + e = new FuncallExpression(e.pos, e, args);
173 141 }
174 142 return e;
175 143 }
176 144
177 - Expression parseBaseBaseExpression()
145 + AST BaseExpression()
178 146 {
179 147 if( lex.empty )
180 - throw new ParserException("EOF during parsing an expression");
148 + throw createException(lex, "Reached EOF when tried to parse an expression");
149 +
181 150 auto pos = lex.front.pos;
182 -
183 151 if( lex.front.quoted )
184 152 {
185 153 scope(exit) lex.popFront;
186 - return new StrLiteralExpression(pos, lex.front.str);
154 + return new StrLiteral(pos, lex.front.str);
187 155 }
188 - if( isNumber(lex.front.str) ) // is_number
156 + if( isNumber(lex.front.str) )
189 157 {
190 158 scope(exit) lex.popFront;
191 - return new IntLiteralExpression(pos, BigInt(cast(string)lex.front.str));
159 + return new IntLiteral(pos, BigInt(cast(string)lex.front.str));
192 160 }
193 161 if( tryEat("(") )
194 162 {
195 - auto e = parseE();
163 + auto e = Body();
196 164 eat(")", "after parenthesized expression");
197 165 return e;
198 166 }
199 167 if( tryEat("if") )
200 168 {
201 169 eat("(", "after if");
202 - auto cond = parseE();
170 + auto cond = E(0);
203 171 eat(")", "after if condition");
204 172 auto thenPos = lex.front.pos;
205 173 eat("{", "after if condition");
206 - Statement[] th = parseStatements();
174 + auto th = Body();
207 175 eat("}", "after if-then body");
208 - Statement[] el;
209 - auto elsePos = lex.front.pos;
176 + auto el = doNothingExpression();
177 + auto elsePos = (lex.empty ? LexPosition.dummy : lex.front.pos);
210 178 if( tryEat("else") ) {
211 179 eat("{", "after else");
212 - el = parseStatements();
180 + el = Body();
213 181 eat("}", "after else body");
214 182 }
215 183 return new FuncallExpression(pos,
216 184 new VarExpression(pos, "if"),
217 185 cond,
218 - new FunLiteralExpression(thenPos, [], th),
219 - new FunLiteralExpression(elsePos, [], el)
186 + new FunLiteral(thenPos, [], th),
187 + new FunLiteral(elsePos, [], el)
220 188 );
221 189 }
222 -
223 190 if( tryEat("fun") )
224 191 {
225 192 eat("(", "after fun");
226 193 string[] params;
227 - while(!tryEat(")"))
194 + while( !tryEat(")") )
228 195 {
229 - if( lex.empty ) {
230 - auto e = ParserException.create(lex,"Unexpected EOF");
231 - throw e;
232 - }
233 - if( lex.front.quoted ) {
234 - auto e = ParserException.create(lex,"Identifier Expected for parameters");
235 - throw e;
236 - }
237 - params ~= lex.front.str;
238 - lex.popFront;
196 + params ~= eatId("for function parameter");
239 197 if( !tryEat(",") ) {
240 198 eat(")", "after function parameters");
241 199 break;
242 200 }
243 201 }
244 202 eat("{", "after function parameters");
245 - Statement[] funbody;
246 - while(!tryEat("}")) {
247 - if( lex.empty ) {
248 - auto e = ParserException.create(lex,"Unexpected EOF");
249 - throw e;
250 - }
251 - funbody ~= parseStatement();
252 - }
253 - return new FunLiteralExpression(pos, params, funbody);
203 + auto funbody = Body();
204 + eat("}", "after function body");
205 + return new FunLiteral(pos, params, funbody);
254 206 }
255 207 scope(exit) lex.popFront;
256 208 return new VarExpression(pos, lex.front.str);
257 209 }
258 210
259 211 private:
260 212 Lexer lex;
213 + this(Lexer lex) { this.lex = lex; }
261 214
262 215 void eat(string kwd, lazy string msg)
263 216 {
264 217 if( !tryEat(kwd) )
265 - {
266 - auto e = ParserException.create(lex, "'"~kwd~"' is expected "~msg~" but '"
218 + throw createException(lex, "'"~kwd~"' is expected "~msg~" but '"
267 219 ~(lex.empty ? "EOF" : lex.front.str)~"' occured");
268 - throw e;
269 - }
270 220 }
271 221
272 222 bool tryEat(string kwd)
273 223 {
274 224 if( lex.empty || lex.front.quoted || lex.front.str!=kwd )
275 225 return false;
276 226 lex.popFront;
277 227 return true;
278 228 }
229 +
230 + string eatId(lazy string msg)
231 + {
232 + if( lex.empty || lex.front.quoted )
233 + throw createException(lex, "identifier is expected but not found "~msg);
234 + string id = lex.front.str;
235 + lex.popFront;
236 + return id;
237 + }
279 238
280 239 bool isNumber(string s)
281 240 {
282 241 return find!(`a<'0'||'9'<a`)(s).empty;
283 242 }
284 -}
285 243
286 -unittest
287 -{
288 - auto p = parserFromString(`
289 - var x = 100;
290 - var y = 200;
291 - `);
292 - Program prog = p.parseProgram();
293 - assert( prog.length == 2 );
294 - auto p0 = cast(DeclStatement)prog[0];
295 - auto p1 = cast(DeclStatement)prog[1];
296 - assert( p0.var == "x" );
297 - assert( p1.var == "y" );
298 - assert( (cast(IntLiteralExpression)p0.expr).data == 100 );
299 - assert( (cast(IntLiteralExpression)p1.expr).data == 200 );
244 + AST doNothingExpression()
245 + {
246 + return new IntLiteral(lex.empty?null:lex.front.pos, BigInt(178));
247 + }
300 248 }
301 249
302 250 unittest
303 251 {
304 - auto p = parserFromString(`
305 - var zzz = 100; # comment
306 - zzz = zzz + zzz * "fo\no"; # comment
307 - 42;
308 - `);
252 + mixin EasyAST;
253 +
254 + assert_eq(parseString(`123`), intl(123));
255 + assert_eq(parseString(`"foo"`), strl("foo"));
256 + assert_eq(parseString(`fun(){1}`), fun([],intl(1)));
257 + assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1)));
258 + assert_eq(parseString(`1;2`), let("_",intl(1),intl(2)));
259 + assert_eq(parseString(`1;2;`), let("_",intl(1),intl(2)));
260 + assert_eq(parseString(`var x=1;2`), let("x",intl(1),intl(2)));
261 + assert_eq(parseString(`var x=1;2;`), let("x",intl(1),intl(2)));
262 + assert_eq(parseString(`var x=1`), let("x",intl(1),var("x")));
263 + assert_eq(parseString(`var x=1;`), let("x",intl(1),var("x")));
264 + assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2)));
265 + assert_eq(parseString(`if(1){2}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(178))));
266 + assert_eq(parseString(`if(1){2}else{3}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(3))));
267 + assert_eq(parseString(`if(1){}else{3}()()`),
268 + call(call(call(var("if"),intl(1),fun([],intl(178)),fun([],intl(3))))));
269 + assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl(2),intl(3))));
270 + assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),intl(2)),intl(3)));
271 + assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),intl(2),intl(3))));
272 + assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl(2)),intl(3)));
309 273
310 - mixin EasyAst;
311 - auto s0 = decl("zzz", intl(BigInt(100)));
312 - auto s1 = expr( new AssignExpression(null,
313 - new VarExpression(null, "zzz"),
314 - new FuncallExpression(null, new VarExpression(null,"+"),
315 - new VarExpression(null, "zzz"),
316 - new FuncallExpression(null, new VarExpression(null,"*"),
317 - new VarExpression(null, "zzz"),
318 - new StrLiteralExpression(null, "fo\\no")
319 - ))));
320 - auto s2 = new ExprStatement(null, new IntLiteralExpression(null, BigInt(42)));
274 + assert_eq(parseString(`
275 + var x = 100; #comment
276 + var y = 200; #comment!!!!!
277 + x+y
278 + `),
279 + let("x", intl(100), let("y", intl(200), call(var("+"), var("x"), var("y"))))
280 + );
321 281
322 - Program prog = p.parseProgram();
323 - assert( prog.length == 3 );
324 - assert( prog[0] == s0 );
325 - assert( prog[1] == s1 );
326 - assert( prog[2] == s2 );
282 + assert_eq(parseString(`
283 + var fac = fun(x){ if(x <= 1) {1} else {x*fac(x-1)} };
284 + fac(10)
285 + `),
286 + let("fac", fun(["x"],
287 + call(var("if"),
288 + call(var("<="), var("x"), intl(1)),
289 + fun([], intl(1)),
290 + fun([], call(var("*"), var("x"), call(var("fac"),call(var("-"),var("x"),intl(1)))))
291 + )),
292 + call(var("fac"),intl(10))
293 + )
294 + );
327 295 }
328 296
329 297 unittest
330 298 {
331 - auto p = parserFromString(`
332 - var f = fun(x,y){x+y;};
333 - f(1,fun(abc){}(4));
334 - `);
335 - mixin EasyAst;
336 - Program prog = p.parseProgram();
337 - assert( prog.length == 2 );
338 - assert( prog[0] == decl("f", fun(
339 - ["x","y"], [expr(funcall(var("+"), var("x"), var("y")))]
340 - )));
341 - assert( prog[1] == expr(funcall(var("f"),
342 - intl(BigInt(1)),
343 - funcall(
344 - fun(["abc"], cast(Statement[])[]),
345 - intl(BigInt(4))
346 - ))));
347 -}
348 -
349 -unittest
350 -{
351 - auto p = parserFromString(`var x = 1; var f = fun(){x=x+1;}; f(); f(); x;`);
352 - Program prog = p.parseProgram();
353 -}
354 -
355 -unittest
356 -{
357 - auto p = parserFromString(`if(x<2){1;}else{x;};`);
358 - Program prog = p.parseProgram();
359 - assert( prog[0] == new ExprStatement(null, new FuncallExpression(null,
360 - new VarExpression(null, "if"),
361 - new FuncallExpression(null, new VarExpression(null,"<"), new VarExpression(null,"x"),
362 - new IntLiteralExpression(null, BigInt(2))),
363 - new FunLiteralExpression(null, [], [new ExprStatement(null, new IntLiteralExpression(null, BigInt(1)))]),
364 - new FunLiteralExpression(null, [], [new ExprStatement(null, new VarExpression(null, "x"))])
365 - )));
299 + assert_throw!ParseException(parseString(`1+`));
300 + assert_throw!ParseException(parseString(`1+2}`));
301 + assert_throw!ParseException(parseString(`var "x"`));
302 + assert_throw!ParseException(parseString(`var`));
303 + assert_throw!ParseException(parseString(`var x ==`));
304 + assert_throw!ParseException(parseString(`if(){1}`));
305 + assert_throw!ParseException(parseString(`f(`));
366 306 }