Differences From Artifact [d3a7b70fa2f3b803]:
- File
polemy/parse.d
- 2010-11-11 15:22:55 - part of checkin [6f0ec5b7c9] on branch trunk - Custom Test Runner (user: kinaba) [annotate]
To Artifact [8212de2433d6e818]:
- File
polemy/parse.d
- 2010-11-12 04:40:33 - part of checkin [a7b5d1d95a] on branch trunk - refactored the parser, and added layerd params fun(x @t){...} (user: kinaba) [annotate]
5 * Parser for Polemy programming language 5 * Parser for Polemy programming language
6 */ 6 */
7 module polemy.parse; 7 module polemy.parse;
8 import polemy._common; 8 import polemy._common;
9 import polemy.lex; 9 import polemy.lex;
10 import polemy.ast; 10 import polemy.ast;
11 11
12 /// | 12 /// Thrown when encountered a syntax error
> 13
13 class ParseException : Exception 14 class ParseException : Exception
14 { 15 {
15 mixin ExceptionWithPosition; 16 mixin ExceptionWithPosition;
16 } 17 }
17 18
18 /// Entry points of this module | 19 /// Parse a string and return its AST
> 20 /// Throws: ParseException, LexException, UnexpectedEOF
19 21
20 AST parseString(S, T...)(S str, T fn_ln_cn) 22 AST parseString(S, T...)(S str, T fn_ln_cn)
> 23 {
21 { return parserFromString(str, fn_ln_cn).parse(); } | 24 return parserFromString(str, fn_ln_cn).parse();
> 25 }
22 26
23 /// Entry points of this module | 27 /// Parse the content of a file and return its AST
> 28 /// Throws: ParseException, LexException, UnexpectedEOF
24 29
25 AST parseFile(S, T...)(S filename, T ln_cn) 30 AST parseFile(S, T...)(S filename, T ln_cn)
> 31 {
26 { return parserFromFile(filename, ln_cn).parse(); } | 32 return parserFromFile(filename, ln_cn).parse();
> 33 }
27 34
28 // Named Constructors of Parser 35 // Named Constructors of Parser
29 36
30 private auto parserFromLexer(Lexer)(Lexer lex) 37 private auto parserFromLexer(Lexer)(Lexer lex)
31 { return new Parser!Lexer(lex); } 38 { return new Parser!Lexer(lex); }
32 39
33 private auto parserFromString(T...)(T params) 40 private auto parserFromString(T...)(T params)
34 { return parserFromLexer(polemy.lex.lexerFromString(params)); } | 41 { return parserFromLexer(lexerFromString(params)); }
35 42
36 private auto parserFromFile(T...)(T params) 43 private auto parserFromFile(T...)(T params)
37 { return parserFromLexer(polemy.lex.lexerFromFile(params)); } | 44 { return parserFromLexer(lexerFromFile(params)); }
38 45
39 // Parser 46 // Parser
40 47
41 private class Parser(Lexer) 48 private class Parser(Lexer)
42 if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) ) 49 if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) )
43 { 50 {
44 AST parse() 51 AST parse()
................................................................................................................................................................................
47 if( !lex.empty ) 54 if( !lex.empty )
48 throw genex!ParseException(currentPosition(), "parsing e 55 throw genex!ParseException(currentPosition(), "parsing e
49 return e; 56 return e;
50 } 57 }
51 58
52 AST Body() 59 AST Body()
53 { 60 {
54 if( lex.empty || !lex.front.quoted && ["}",")","]"].canFind(lex. | 61 /// Body ::= Declaration
> 62 /// | TopLevelExpression
> 63
> 64 if( closingBracket() )
55 return doNothingExpression(); 65 return doNothingExpression();
56 66
57 auto saved = lex.save; 67 auto saved = lex.save;
> 68 if( auto e = Declaration() )
> 69 return e;
> 70 lex = saved;
> 71 return TopLevelExpression();
> 72 }
> 73
> 74 AST Declaration() // returns null if it is not a declaration
> 75 {
> 76 /// Declaration ::=
> 77 /// ["@" Layer|"let"|"var"|"def"] Var "=" Expression ([";"|"i
> 78 /// | ["@" Layer|"let"|"var"|"def"] Var "(" Param%"," ")" "{" B
> 79
58 auto pos = lex.front.pos; | 80 auto pos = currentPosition();
59 string kwd = lex.front.str; <
60 if( tryEat("let") || tryEat("var") || tryEat("def") || tryEat("@ <
> 81 string layer = "";
> 82
> 83 if( tryEat("@") )
61 { 84 {
62 if( kwd == "@" ) { <
63 kwd ~= eatId("after @",true); | 85 layer = "@" ~ eatId("after @", AllowQuoted);
64 if( tryEat("(") ) { | 86 if( tryEat("(") )
65 lex = saved; <
66 goto asExpression; <
67 } <
68 } <
69 immutable LexPosition varpos = (lex.empty ? null : lex.f <
70 string var = eatId("after "~kwd,true); <
71 // [TODO] refactor. only auto e = ... differ <
72 if(tryEat("(")) { <
73 kwd = (kwd[0]=='@' ? kwd : ""); // "let, var, de <
74 auto e = parseLambdaAfterOpenParen(varpos); <
75 if( tryEat(";") && !lex.empty && (lex.front.quot <
76 return new LetExpression(pos, var, kwd, <
77 else <
78 return new LetExpression(pos, var, kwd, <
79 } else { <
80 eat("=", "after "~kwd); <
81 kwd = (kwd[0]=='@' ? kwd : ""); // "let, var, de <
82 auto e = E(0); <
83 if( tryEat(";") && !lex.empty && (lex.front.quot <
84 return new LetExpression(pos, var, kwd, <
85 else <
86 return new LetExpression(pos, var, kwd, <
87 } <
> 87 return null; // @lay(...) expression, not a decl
88 } 88 }
> 89
> 90 string kwd = layer;
> 91 if( layer.empty && !tryEat(kwd="let") && !tryEat(kwd="var") && !
> 92 return null; // none of {@lay, let, var, def} occurred,
> 93
> 94 auto varpos = currentPosition();
> 95 string var = eatId("after "~kwd, AllowQuoted); // name of the de
> 96
> 97 auto e = tryEat("(")
> 98 ? parseLambdaAfterOpenParen(varpos) // let var ( ...
> 99 : (eat("=", "after "~kwd), E(0)); // let var = ...
> 100
> 101 if( moreDeclarationExists() )
> 102 return new LetExpression(pos, var, layer, e, Body());
> 103 else
> 104 return new LetExpression(pos, var, layer, e, new VarExpr
> 105 }
> 106
> 107 AST TopLevelExpression()
> 108 {
> 109 /// TopLevelExpression ::= Expression ([";"|"in"] Body?)?
> 110
> 111 auto pos = currentPosition();
> 112 auto e = E(0);
> 113 if( moreDeclarationExists() )
> 114 return new LetExpression(pos, "_", "", e, Body());
89 else 115 else
> 116 return e;
> 117 }
90 { | 118
91 asExpression: <
92 auto e = E(0); <
93 if( tryEat(";") && !lex.empty && (lex.front.quoted || (l | 119 private bool moreDeclarationExists()
94 return new LetExpression(pos, "_", "", e, Body() <
> 120 {
95 else | 121 return (tryEat(";") || tryEat("in")) && !closingBracket();
96 return e; <
97 } | 122 }
> 123
> 124 private bool closingBracket()
> 125 {
> 126 return lex.empty || !lex.front.quoted && ["}",")","]"].canFind(l
98 } 127 }
99 128
100 // [TODO] make customizable from program | 129 // [TODO] make this customizable from program
101 static immutable string[][] operator_perferences = [ | 130 private static string[][] operator_perferences = [
102 ["||"], 131 ["||"],
103 ["&&"], 132 ["&&"],
104 ["!="], 133 ["!="],
105 ["=="], 134 ["=="],
106 ["<","<=",">",">="], 135 ["<","<=",">",">="],
107 ["|"], 136 ["|"],
108 ["^"], 137 ["^"],
109 ["&"], 138 ["&"],
110 ["<<", ">>"], 139 ["<<", ">>"],
111 ["+","-"], 140 ["+","-"],
112 ["~"], 141 ["~"],
113 ["*","/","%"], 142 ["*","/","%"],
114 ["^^"] | 143 ["^^","**"]
115 ]; 144 ];
116 145
117 AST E(int level) | 146 AST E(size_t level)
118 { 147 {
> 148 /// Expression ::= (Binary left-associative operators over) Func
> 149
> 150 AST rec(AST lhs)
> 151 {
> 152 if( closingBracket() )
> 153 return lhs;
> 154
> 155 auto pos = currentPosition();
> 156 foreach(op; operator_perferences[level])
> 157 if( tryEat(op) )
> 158 return rec(
> 159 new FuncallExpression(lhs.pos, n
> 160 return lhs;
> 161 }
> 162
119 if( operator_perferences.length <= level ) 163 if( operator_perferences.length <= level )
120 return Funcall(); 164 return Funcall();
121 else 165 else
122 { <
123 auto ops = operator_perferences[level]; <
124 auto e = E(level+1); | 166 return rec(E(level+1));
125 seq: <
126 while( !lex.empty ) <
127 { <
128 auto pos = lex.front.pos; <
129 foreach(op; ops) <
130 if( tryEat(op) ) <
131 { <
132 e = new FuncallExpression(e.pos, <
133 continue seq; <
134 } <
135 break; <
136 } <
137 return e; <
138 } <
139 } 167 }
140 168
141 AST Funcall() 169 AST Funcall()
142 { 170 {
> 171 /// Funcall ::= BaseExpression ["(" Expression%"," ")"]*
> 172
143 auto e = BaseExpression(); 173 auto e = BaseExpression();
144 while( tryEat("(") ) 174 while( tryEat("(") )
145 { 175 {
146 auto pos = currentPosition(); 176 auto pos = currentPosition();
147 AST[] args; 177 AST[] args;
148 while( !tryEat(")") ) { 178 while( !tryEat(")") ) {
149 if( lex.empty ) 179 if( lex.empty )
150 throw genex!UnexpectedEOF(pos,"Closing ' | 180 throw genex!UnexpectedEOF(pos, "closing
151 args ~= E(0); 181 args ~= E(0);
152 if( !tryEat(",") ) { 182 if( !tryEat(",") ) {
153 eat(")", "after function parameters"); 183 eat(")", "after function parameters");
154 break; 184 break;
155 } 185 }
156 } 186 }
157 e = new FuncallExpression(e.pos, e, args); 187 e = new FuncallExpression(e.pos, e, args);
................................................................................................................................................................................
208 return new FuncallExpression(pos, 238 return new FuncallExpression(pos,
209 new VarExpression(pos, "if"), 239 new VarExpression(pos, "if"),
210 cond, 240 cond,
211 new FunLiteral(thenPos, [], th), 241 new FunLiteral(thenPos, [], th),
212 new FunLiteral(elsePos, [], el) 242 new FunLiteral(elsePos, [], el)
213 ); 243 );
214 } 244 }
215 if( tryEat("fun") || tryEat("\u03BB") ) | 245 if( tryEat("fun") || tryEat("\u03BB") ) // lambda!!
216 { 246 {
217 eat("(", "after fun"); 247 eat("(", "after fun");
218 return parseLambdaAfterOpenParen(pos); 248 return parseLambdaAfterOpenParen(pos);
219 } 249 }
220 scope(exit) lex.popFront; 250 scope(exit) lex.popFront;
221 return new VarExpression(pos, lex.front.str); 251 return new VarExpression(pos, lex.front.str);
222 } 252 }
223 253
224 AST parseLambdaAfterOpenParen(immutable LexPosition pos) 254 AST parseLambdaAfterOpenParen(immutable LexPosition pos)
225 { 255 {
226 Parameter[] params; 256 Parameter[] params;
227 while( !tryEat(")") ) 257 while( !tryEat(")") )
228 { 258 {
229 params ~= new Parameter(eatId("for function parameter"), | 259 params ~= parseParam();
230 if( !tryEat(",") ) { 260 if( !tryEat(",") ) {
231 eat(")", "after function parameters"); 261 eat(")", "after function parameters");
232 break; 262 break;
233 } 263 }
234 } 264 }
235 eat("{", "after function parameters"); 265 eat("{", "after function parameters");
236 auto funbody = Body(); 266 auto funbody = Body();
237 eat("}", "after function body"); 267 eat("}", "after function body");
238 return new FunLiteral(pos, params, funbody); 268 return new FunLiteral(pos, params, funbody);
239 } 269 }
> 270
> 271 Parameter parseParam()
> 272 {
> 273 string var;
> 274 string[] lay;
> 275 while( !closingBracket() && !lex.empty && lex.front.str!="," )
> 276 {
> 277 auto pos = currentPosition();
> 278 string p = eatId("for function parameter", AllowQuoted);
> 279 if( p == "@" )
> 280 lay ~= "@" ~ eatId("after @", AllowQuoted);
> 281 else if( var.empty )
> 282 var = p;
> 283 else
> 284 throw genex!ParseException(pos, "one parameter h
> 285 }
> 286 return new Parameter(var, lay);
> 287 }
240 288
241 private: 289 private:
242 Lexer lex; 290 Lexer lex;
243 this(Lexer lex) { this.lex = lex; } 291 this(Lexer lex) { this.lex = lex; }
> 292
> 293 bool isNumber(string s)
> 294 {
> 295 return find!(`a<'0' || '9'<a`)(s).empty;
> 296 }
244 297
245 void eat(string kwd, lazy string msg) 298 void eat(string kwd, lazy string msg)
246 { 299 {
247 if( !tryEat(kwd) ) 300 if( !tryEat(kwd) )
248 if( lex.empty ) 301 if( lex.empty )
249 throw genex!UnexpectedEOF( 302 throw genex!UnexpectedEOF(
250 currentPosition(), sprintf!"%s is expect 303 currentPosition(), sprintf!"%s is expect
................................................................................................................................................................................
257 { 310 {
258 if( lex.empty || lex.front.quoted || lex.front.str!=kwd ) 311 if( lex.empty || lex.front.quoted || lex.front.str!=kwd )
259 return false; 312 return false;
260 lex.popFront; 313 lex.popFront;
261 return true; 314 return true;
262 } 315 }
263 316
> 317 enum {AllowQuoted=true, DisallowQuoted=false};
264 string eatId(lazy string msg, bool allowQuoted=false) | 318 string eatId(lazy string msg, bool aq=DisallowQuoted)
265 { 319 {
266 if( lex.empty ) 320 if( lex.empty )
267 throw genex!UnexpectedEOF(currentPosition(), "identifier 321 throw genex!UnexpectedEOF(currentPosition(), "identifier
268 if( !allowQuoted && lex.front.quoted ) | 322 if( !aq && lex.front.quoted )
269 throw genex!ParseException(currentPosition(), "identifie 323 throw genex!ParseException(currentPosition(), "identifie
270 scope(exit) lex.popFront; 324 scope(exit) lex.popFront;
271 return lex.front.str; 325 return lex.front.str;
272 } 326 }
273 327
274 bool isNumber(string s) <
275 { <
276 return find!(`a<'0'||'9'<a`)(s).empty; <
277 } <
278 <
279 AST doNothingExpression() 328 AST doNothingExpression()
280 { 329 {
281 return new IntLiteral(currentPosition(), BigInt(178)); 330 return new IntLiteral(currentPosition(), BigInt(178));
282 } 331 }
283 332
284 immutable(LexPosition) currentPosition() 333 immutable(LexPosition) currentPosition()
285 { 334 {
................................................................................................................................................................................
295 assert_eq(parseString(`"foo"`), strl("foo")); 344 assert_eq(parseString(`"foo"`), strl("foo"));
296 assert_eq(parseString(`fun(){1}`), fun([],intl(1))); 345 assert_eq(parseString(`fun(){1}`), fun([],intl(1)));
297 assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1))); 346 assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1)));
298 assert_eq(parseString("\u03BB(){1}"), fun([],intl(1))); 347 assert_eq(parseString("\u03BB(){1}"), fun([],intl(1)));
299 assert_eq(parseString("\u03BB(x){1}"), fun(["x"],intl(1))); 348 assert_eq(parseString("\u03BB(x){1}"), fun(["x"],intl(1)));
300 assert_eq(parseString(`1;2`), let("_","",intl(1),intl(2))); 349 assert_eq(parseString(`1;2`), let("_","",intl(1),intl(2)));
301 assert_eq(parseString(`1;2;`), let("_","",intl(1),intl(2))); 350 assert_eq(parseString(`1;2;`), let("_","",intl(1),intl(2)));
302 assert_eq(parseString(`let x=1;2`), let("x","",intl(1),intl(2))); | 351 assert_eq(parseString(`let x=1 in 2`), let("x","",intl(1),intl(2)));
303 assert_eq(parseString(`var x=1;2;`), let("x","",intl(1),intl(2))); 352 assert_eq(parseString(`var x=1;2;`), let("x","",intl(1),intl(2)));
304 assert_eq(parseString(`def x=1`), let("x","",intl(1),var("x"))); 353 assert_eq(parseString(`def x=1`), let("x","",intl(1),var("x")));
305 assert_eq(parseString(`@val x=1;`), let("x","@val",intl(1),var("x"))); 354 assert_eq(parseString(`@val x=1;`), let("x","@val",intl(1),var("x")));
306 assert_eq(parseString(`@typ x="#int";`), let("x","@typ",strl("#int"),var 355 assert_eq(parseString(`@typ x="#int";`), let("x","@typ",strl("#int"),var
307 assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2))); 356 assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2)));
308 assert_eq(parseString(`if(1){2}`), call(var("if"),intl(1),fun([],intl(2) 357 assert_eq(parseString(`if(1){2}`), call(var("if"),intl(1),fun([],intl(2)
309 assert_eq(parseString(`if(1){2}else{3}`), call(var("if"),intl(1),fun([], 358 assert_eq(parseString(`if(1){2}else{3}`), call(var("if"),intl(1),fun([],
................................................................................................................................................................................
310 assert_eq(parseString(`if(1){}else{3}()()`), 359 assert_eq(parseString(`if(1){}else{3}()()`),
311 call(call(call(var("if"),intl(1),fun([],intl(178)),fun([],intl(3 360 call(call(call(var("if"),intl(1),fun([],intl(178)),fun([],intl(3
312 assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl 361 assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl
313 assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),in 362 assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),in
314 assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),in 363 assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),in
315 assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl 364 assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl
316 assert_eq(parseString(`@x(1)`), lay("@x", intl(1))); 365 assert_eq(parseString(`@x(1)`), lay("@x", intl(1)));
> 366 assert_eq(parseString(`fun(x @v @t, y, z @t){}`),
> 367 funp([param("x",["@v","@t"]), param("y",[]), param("z",["@t"])],
317 368
318 assert_eq(parseString(` 369 assert_eq(parseString(`
319 let x = 100; #comment 370 let x = 100; #comment
320 let y = 200; #comment!!!!! 371 let y = 200; #comment!!!!!
321 x+y 372 x+y
322 `), 373 `),
323 let("x", "", intl(100), let("y", "", intl(200), call(var("+"), v 374 let("x", "", intl(100), let("y", "", intl(200), call(var("+"), v