Differences From Artifact [d3a7b70fa2f3b803]:
- File
polemy/parse.d
- 2010-11-11 15:22:55 - part of checkin [6f0ec5b7c9] on branch trunk - Custom Test Runner (user: kinaba) [annotate]
To Artifact [8212de2433d6e818]:
- File
polemy/parse.d
- 2010-11-12 04:40:33 - part of checkin [a7b5d1d95a] on branch trunk - refactored the parser, and added layerd params fun(x @t){...} (user: kinaba) [annotate]
5 5 * Parser for Polemy programming language
6 6 */
7 7 module polemy.parse;
8 8 import polemy._common;
9 9 import polemy.lex;
10 10 import polemy.ast;
11 11
12 -///
12 +/// Thrown when encountered a syntax error
13 +
13 14 class ParseException : Exception
14 15 {
15 16 mixin ExceptionWithPosition;
16 17 }
17 18
18 -/// Entry points of this module
19 +/// Parse a string and return its AST
20 +/// Throws: ParseException, LexException, UnexpectedEOF
19 21
20 22 AST parseString(S, T...)(S str, T fn_ln_cn)
21 - { return parserFromString(str, fn_ln_cn).parse(); }
23 +{
24 + return parserFromString(str, fn_ln_cn).parse();
25 +}
22 26
23 -/// Entry points of this module
27 +/// Parse the content of a file and return its AST
28 +/// Throws: ParseException, LexException, UnexpectedEOF
24 29
25 30 AST parseFile(S, T...)(S filename, T ln_cn)
26 - { return parserFromFile(filename, ln_cn).parse(); }
31 +{
32 + return parserFromFile(filename, ln_cn).parse();
33 +}
27 34
28 35 // Named Constructors of Parser
29 36
30 37 private auto parserFromLexer(Lexer)(Lexer lex)
31 38 { return new Parser!Lexer(lex); }
32 39
33 40 private auto parserFromString(T...)(T params)
34 - { return parserFromLexer(polemy.lex.lexerFromString(params)); }
41 + { return parserFromLexer(lexerFromString(params)); }
35 42
36 43 private auto parserFromFile(T...)(T params)
37 - { return parserFromLexer(polemy.lex.lexerFromFile(params)); }
44 + { return parserFromLexer(lexerFromFile(params)); }
38 45
39 46 // Parser
40 47
41 48 private class Parser(Lexer)
42 49 if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) )
43 50 {
44 51 AST parse()
................................................................................
47 54 if( !lex.empty )
48 55 throw genex!ParseException(currentPosition(), "parsing ended but some tokens left");
49 56 return e;
50 57 }
51 58
52 59 AST Body()
53 60 {
54 - if( lex.empty || !lex.front.quoted && ["}",")","]"].canFind(lex.front.str) )
61 + /// Body ::= Declaration
62 + /// | TopLevelExpression
63 +
64 + if( closingBracket() )
55 65 return doNothingExpression();
56 66
57 67 auto saved = lex.save;
58 - auto pos = lex.front.pos;
59 - string kwd = lex.front.str;
60 - if( tryEat("let") || tryEat("var") || tryEat("def") || tryEat("@") )
68 + if( auto e = Declaration() )
69 + return e;
70 + lex = saved;
71 + return TopLevelExpression();
72 + }
73 +
74 + AST Declaration() // returns null if it is not a declaration
75 + {
76 + /// Declaration ::=
77 + /// ["@" Layer|"let"|"var"|"def"] Var "=" Expression ([";"|"in"] Body?)?
78 + /// | ["@" Layer|"let"|"var"|"def"] Var "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
79 +
80 + auto pos = currentPosition();
81 + string layer = "";
82 +
83 + if( tryEat("@") )
61 84 {
62 - if( kwd == "@" ) {
63 - kwd ~= eatId("after @",true);
64 - if( tryEat("(") ) {
65 - lex = saved;
66 - goto asExpression;
67 - }
68 - }
69 - immutable LexPosition varpos = (lex.empty ? null : lex.front.pos);
70 - string var = eatId("after "~kwd,true);
71 - // [TODO] refactor. only auto e = ... differ
72 - if(tryEat("(")) {
73 - kwd = (kwd[0]=='@' ? kwd : ""); // "let, var, def ==> neutral layer"
74 - auto e = parseLambdaAfterOpenParen(varpos);
75 - if( tryEat(";") && !lex.empty && (lex.front.quoted || !["}",")","]"].canFind(lex.front.str)) )
76 - return new LetExpression(pos, var, kwd, e, Body());
77 - else
78 - return new LetExpression(pos, var, kwd, e, new VarExpression(varpos, var));
79 - } else {
80 - eat("=", "after "~kwd);
81 - kwd = (kwd[0]=='@' ? kwd : ""); // "let, var, def ==> neutral layer"
82 - auto e = E(0);
83 - if( tryEat(";") && !lex.empty && (lex.front.quoted || !["}",")","]"].canFind(lex.front.str)) )
84 - return new LetExpression(pos, var, kwd, e, Body());
85 - else
86 - return new LetExpression(pos, var, kwd, e, new VarExpression(varpos, var));
87 - }
85 + layer = "@" ~ eatId("after @", AllowQuoted);
86 + if( tryEat("(") )
87 + return null; // @lay(...) expression, not a declaration
88 88 }
89 +
90 + string kwd = layer;
91 + if( layer.empty && !tryEat(kwd="let") && !tryEat(kwd="var") && !tryEat(kwd="def") )
92 + return null; // none of {@lay, let, var, def} occurred, it's not a declaration
93 +
94 + auto varpos = currentPosition();
95 + string var = eatId("after "~kwd, AllowQuoted); // name of the declared variable
96 +
97 + auto e = tryEat("(")
98 + ? parseLambdaAfterOpenParen(varpos) // let var ( ...
99 + : (eat("=", "after "~kwd), E(0)); // let var = ...
100 +
101 + if( moreDeclarationExists() )
102 + return new LetExpression(pos, var, layer, e, Body());
103 + else
104 + return new LetExpression(pos, var, layer, e, new VarExpression(varpos, var));
105 + }
106 +
107 + AST TopLevelExpression()
108 + {
109 + /// TopLevelExpression ::= Expression ([";"|"in"] Body?)?
110 +
111 + auto pos = currentPosition();
112 + auto e = E(0);
113 + if( moreDeclarationExists() )
114 + return new LetExpression(pos, "_", "", e, Body());
89 115 else
90 - {
91 - asExpression:
92 - auto e = E(0);
93 - if( tryEat(";") && !lex.empty && (lex.front.quoted || (lex.front.str!="}" && lex.front.str!=")")) )
94 - return new LetExpression(pos, "_", "", e, Body());
95 - else
96 - return e;
97 - }
116 + return e;
117 + }
118 +
119 + private bool moreDeclarationExists()
120 + {
121 + return (tryEat(";") || tryEat("in")) && !closingBracket();
122 + }
123 +
124 + private bool closingBracket()
125 + {
126 + return lex.empty || !lex.front.quoted && ["}",")","]"].canFind(lex.front.str);
98 127 }
99 128
100 - // [TODO] make customizable from program
101 - static immutable string[][] operator_perferences = [
129 + // [TODO] make this customizable from program
130 + private static string[][] operator_perferences = [
102 131 ["||"],
103 132 ["&&"],
104 133 ["!="],
105 134 ["=="],
106 135 ["<","<=",">",">="],
107 136 ["|"],
108 137 ["^"],
109 138 ["&"],
110 139 ["<<", ">>"],
111 140 ["+","-"],
112 141 ["~"],
113 142 ["*","/","%"],
114 - ["^^"]
143 + ["^^","**"]
115 144 ];
116 145
117 - AST E(int level)
146 + AST E(size_t level)
118 147 {
148 + /// Expression ::= (Binary left-associative operators over) Funcall
149 +
150 + AST rec(AST lhs)
151 + {
152 + if( closingBracket() )
153 + return lhs;
154 +
155 + auto pos = currentPosition();
156 + foreach(op; operator_perferences[level])
157 + if( tryEat(op) )
158 + return rec(
159 + new FuncallExpression(lhs.pos, new VarExpression(pos, op), lhs, E(level+1)));
160 + return lhs;
161 + }
162 +
119 163 if( operator_perferences.length <= level )
120 164 return Funcall();
121 165 else
122 - {
123 - auto ops = operator_perferences[level];
124 - auto e = E(level+1);
125 - seq:
126 - while( !lex.empty )
127 - {
128 - auto pos = lex.front.pos;
129 - foreach(op; ops)
130 - if( tryEat(op) )
131 - {
132 - e = new FuncallExpression(e.pos, new VarExpression(pos, op), e, E(level+1));
133 - continue seq;
134 - }
135 - break;
136 - }
137 - return e;
138 - }
166 + return rec(E(level+1));
139 167 }
140 168
141 169 AST Funcall()
142 170 {
171 + /// Funcall ::= BaseExpression ["(" Expression%"," ")"]*
172 +
143 173 auto e = BaseExpression();
144 174 while( tryEat("(") )
145 175 {
146 176 auto pos = currentPosition();
147 177 AST[] args;
148 178 while( !tryEat(")") ) {
149 179 if( lex.empty )
150 - throw genex!UnexpectedEOF(pos,"Closing ')' for arguments not found");
180 + throw genex!UnexpectedEOF(pos, "closing ')' for arguments not found");
151 181 args ~= E(0);
152 182 if( !tryEat(",") ) {
153 183 eat(")", "after function parameters");
154 184 break;
155 185 }
156 186 }
157 187 e = new FuncallExpression(e.pos, e, args);
................................................................................
208 238 return new FuncallExpression(pos,
209 239 new VarExpression(pos, "if"),
210 240 cond,
211 241 new FunLiteral(thenPos, [], th),
212 242 new FunLiteral(elsePos, [], el)
213 243 );
214 244 }
215 - if( tryEat("fun") || tryEat("\u03BB") )
245 + if( tryEat("fun") || tryEat("\u03BB") ) // lambda!!
216 246 {
217 247 eat("(", "after fun");
218 248 return parseLambdaAfterOpenParen(pos);
219 249 }
220 250 scope(exit) lex.popFront;
221 251 return new VarExpression(pos, lex.front.str);
222 252 }
223 253
224 254 AST parseLambdaAfterOpenParen(immutable LexPosition pos)
225 255 {
226 256 Parameter[] params;
227 257 while( !tryEat(")") )
228 258 {
229 - params ~= new Parameter(eatId("for function parameter"), []);
259 + params ~= parseParam();
230 260 if( !tryEat(",") ) {
231 261 eat(")", "after function parameters");
232 262 break;
233 263 }
234 264 }
235 265 eat("{", "after function parameters");
236 266 auto funbody = Body();
237 267 eat("}", "after function body");
238 268 return new FunLiteral(pos, params, funbody);
239 269 }
270 +
271 + Parameter parseParam()
272 + {
273 + string var;
274 + string[] lay;
275 + while( !closingBracket() && !lex.empty && lex.front.str!="," )
276 + {
277 + auto pos = currentPosition();
278 + string p = eatId("for function parameter", AllowQuoted);
279 + if( p == "@" )
280 + lay ~= "@" ~ eatId("after @", AllowQuoted);
281 + else if( var.empty )
282 + var = p;
283 + else
284 + throw genex!ParseException(pos, "one parameter has two names");
285 + }
286 + return new Parameter(var, lay);
287 + }
240 288
241 289 private:
242 290 Lexer lex;
243 291 this(Lexer lex) { this.lex = lex; }
292 +
293 + bool isNumber(string s)
294 + {
295 + return find!(`a<'0' || '9'<a`)(s).empty;
296 + }
244 297
245 298 void eat(string kwd, lazy string msg)
246 299 {
247 300 if( !tryEat(kwd) )
248 301 if( lex.empty )
249 302 throw genex!UnexpectedEOF(
250 303 currentPosition(), sprintf!"%s is expected %s but not found"(kwd,msg));
................................................................................
257 310 {
258 311 if( lex.empty || lex.front.quoted || lex.front.str!=kwd )
259 312 return false;
260 313 lex.popFront;
261 314 return true;
262 315 }
263 316
264 - string eatId(lazy string msg, bool allowQuoted=false)
317 + enum {AllowQuoted=true, DisallowQuoted=false};
318 + string eatId(lazy string msg, bool aq=DisallowQuoted)
265 319 {
266 320 if( lex.empty )
267 321 throw genex!UnexpectedEOF(currentPosition(), "identifier is expected but not found "~msg);
268 - if( !allowQuoted && lex.front.quoted )
322 + if( !aq && lex.front.quoted )
269 323 throw genex!ParseException(currentPosition(), "identifier is expected but not found "~msg);
270 324 scope(exit) lex.popFront;
271 325 return lex.front.str;
272 326 }
273 327
274 - bool isNumber(string s)
275 - {
276 - return find!(`a<'0'||'9'<a`)(s).empty;
277 - }
278 -
279 328 AST doNothingExpression()
280 329 {
281 330 return new IntLiteral(currentPosition(), BigInt(178));
282 331 }
283 332
284 333 immutable(LexPosition) currentPosition()
285 334 {
................................................................................
295 344 assert_eq(parseString(`"foo"`), strl("foo"));
296 345 assert_eq(parseString(`fun(){1}`), fun([],intl(1)));
297 346 assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1)));
298 347 assert_eq(parseString("\u03BB(){1}"), fun([],intl(1)));
299 348 assert_eq(parseString("\u03BB(x){1}"), fun(["x"],intl(1)));
300 349 assert_eq(parseString(`1;2`), let("_","",intl(1),intl(2)));
301 350 assert_eq(parseString(`1;2;`), let("_","",intl(1),intl(2)));
302 - assert_eq(parseString(`let x=1;2`), let("x","",intl(1),intl(2)));
351 + assert_eq(parseString(`let x=1 in 2`), let("x","",intl(1),intl(2)));
303 352 assert_eq(parseString(`var x=1;2;`), let("x","",intl(1),intl(2)));
304 353 assert_eq(parseString(`def x=1`), let("x","",intl(1),var("x")));
305 354 assert_eq(parseString(`@val x=1;`), let("x","@val",intl(1),var("x")));
306 355 assert_eq(parseString(`@typ x="#int";`), let("x","@typ",strl("#int"),var("x")));
307 356 assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2)));
308 357 assert_eq(parseString(`if(1){2}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(178))));
309 358 assert_eq(parseString(`if(1){2}else{3}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(3))));
................................................................................
310 359 assert_eq(parseString(`if(1){}else{3}()()`),
311 360 call(call(call(var("if"),intl(1),fun([],intl(178)),fun([],intl(3))))));
312 361 assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl(2),intl(3))));
313 362 assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),intl(2)),intl(3)));
314 363 assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),intl(2),intl(3))));
315 364 assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl(2)),intl(3)));
316 365 assert_eq(parseString(`@x(1)`), lay("@x", intl(1)));
366 + assert_eq(parseString(`fun(x @v @t, y, z @t){}`),
367 + funp([param("x",["@v","@t"]), param("y",[]), param("z",["@t"])], intl(178)));
317 368
318 369 assert_eq(parseString(`
319 370 let x = 100; #comment
320 371 let y = 200; #comment!!!!!
321 372 x+y
322 373 `),
323 374 let("x", "", intl(100), let("y", "", intl(200), call(var("+"), var("x"), var("y"))))