/**
* Authors: k.inaba
* License: NYSL 0.9982 http://www.kmonos.net/nysl/
*
* Parser for Polemy programming language
*/
module polemy.parse;
import polemy._common;
import polemy.failure;
import polemy.lex;
import polemy.ast;
import polemy.layer;
import polemy.fresh;
/// Parse a string and return its AST
AST parseString(S, T...)(S str, T fn_ln_cn)
{
return parserFromString(str, fn_ln_cn).parse();
}
/// Parse the content of a file and return its AST
AST parseFile(S, T...)(S filename, T ln_cn)
{
return parserFromFile(filename, ln_cn).parse();
}
// Named Constructors of Parser
private auto parserFromLexer(Lexer)(Lexer lex)
{ return new Parser!Lexer(lex); }
private auto parserFromString(T...)(T params)
{ return parserFromLexer(lexerFromString(params)); }
private auto parserFromFile(T...)(T params)
{ return parserFromLexer(lexerFromFile(params)); }
// Parser
private class Parser(Lexer)
if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) )
{
AST parse()
{
auto e = Body();
if( !lex.empty )
throw genex!ParseException(currentPosition(), "parsing ended but some tokens left");
return e;
}
AST Body()
{
/// Body ::= Declaration
/// | TopLevelExpression
if( closingBracket() )
return doNothingExpression();
auto saved = lex.save;
if( auto e = Declaration() )
return e;
lex = saved;
return TopLevelExpression();
}
AST Declaration() // returns null if it is not a declaration
{
/// Declaration ::=
/// ["@" Layer|"let"|"var"|"def"] Var "=" Expression ([";"|"in"] Body?)?
/// | ["@" Layer|"let"|"var"|"def"] Var "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
/// | ["@" "@" Layer "=" Expression ([";"|"in"] Body?)?
/// | ["@" "@" Layer "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
auto pos = currentPosition();
Layer layer = "";
bool layerLiftDecl = false;
if( tryEat("@") )
{
layer = "@" ~ eatId("after @", AllowQuoted);
if( layer == "@@" )
{
layer = "@" ~ eatId("after @@", AllowQuoted);
layerLiftDecl = true;
}
else
{
if( tryEat("(") )
return null; // @lay(...) expression, not a declaration
}
}
// [TODO] Refactor
if( layerLiftDecl )
{
string kwd = "@" ~ layer;
string var = layer;
auto e = tryEat("(")
? parseLambdaAfterOpenParen(pos) // let var ( ...
: (eat("=", "after "~kwd), E(0)); // let var = ...
if( moreDeclarationExists() )
return new Let(pos, var, LiftLayer, e, Body());
else
return new Let(pos, var, LiftLayer, e,
new Lay(pos, LiftLayer, new Var(pos, var))
);
}
else
{
string kwd = layer;
if( layer.empty && !tryEat(kwd="let") && !tryEat(kwd="var") && !tryEat(kwd="def") )
return null; // none of {@lay, let, var, def} occurred, it's not a declaration
auto varpos = currentPosition();
string var = eatId("after "~kwd, AllowQuoted); // name of the declared variable
auto e = tryEat("(")
? parseLambdaAfterOpenParen(pos) // let var ( ...
: (eat("=", "after "~kwd), E(0)); // let var = ...
if( moreDeclarationExists() )
return new Let(pos, var, layer, e, Body());
else {
if( layer.empty )
return new Let(pos, var, layer, e, new Var(varpos, var));
else if( isMacroLayer(layer) )
return new Let(pos, var, layer, e, new Str(varpos, "(macro definition)"));
else
return new Let(pos, var, layer, e, new Lay(varpos, layer, new Var(varpos, var)));
}
}
}
AST TopLevelExpression()
{
/// TopLevelExpression ::= Expression ([";"|"in"] Body?)?
auto pos = currentPosition();
auto e = E(0);
if( moreDeclarationExists() )
return new Let(pos, "_", "", e, Body());
else
return e;
}
private bool moreDeclarationExists()
{
return (tryEat(";") || tryEat("in")) && !closingBracket();
}
private bool closingBracket()
{
return lex.empty || !lex.front.quoted && ["}",")","]",","].canFind(lex.front.str);
}
// [TODO] make this customizable from program
private static string[][] operator_perferences = [
["||"],
["&&"],
["!="],
["=="],
["<","<=",">",">="],
["|"],
["^"],
["&"],
["<<", ">>", "<<<", ">>>"],
["+","-"],
["~"],
["*","/","%"],
["^^","**"],
[".",".?"]
];
AST E(size_t level)
{
/// Expression ::= (Binary left-associative operators over) Funcall
AST rec(AST lhs)
{
if( closingBracket() )
return lhs;
auto pos = currentPosition();
foreach(op; operator_perferences[level])
if( tryEat(op) )
if( op[0]=='.' )
return rec(
new App(lhs.pos, new Var(pos, op), lhs, parseId()));
else
return rec(
new App(lhs.pos, new Var(pos, op), lhs, E(level+1)));
return lhs;
}
if( operator_perferences.length <= level )
return Funcall();
else
return rec(E(level+1));
}
AST Funcall()
{
/// Funcall ::= BaseExpression ["(" Expression%"," ")" | "{" ENTRIES "}"]*
auto e = BaseExpression();
for(;;)
if( tryEat("(") )
{
auto pos = currentPosition();
AST[] args;
while( !tryEat(")") ) {
if( lex.empty )
throw genex!UnexpectedEOF(pos, "closing ')' for arguments not found");
args ~= E(0);
if( !tryEat(",") ) {
eat(")", "after function parameters");
break;
}
}
e = new App(e.pos, e, args);
}
else if( tryEat("{") )
{
e = parseTableSetAfterBrace(e);
}
else
break;
return e;
}
AST parseTableSetAfterBrace(AST e)
{
/// TableSet ::= "{" (ID ":" E) % "," "}"
if( tryEat("}") )
return e;
auto pos = currentPosition();
for(;;)
{
string key = eatId("for table key", AllowQuoted);
eat(":", "after table key");
AST val = E(0);
e = new App(pos, new Var(pos,".="),
e, new Str(pos,key), val);
if( !tryEat(",") )
{
eat("}", "for the end of table literal");
break;
}
}
return e;
}
AST BaseExpression()
{
if( lex.empty )
throw genex!UnexpectedEOF(currentPosition(), "Reached EOF when tried to parse an expression");
auto pos = currentPosition();
if( lex.front.quoted )
{
scope(exit) lex.popFront;
return new Str(pos, lex.front.str);
}
if( isNumber(lex.front.str) )
{
scope(exit) lex.popFront;
return new Int(pos, BigInt(cast(string)lex.front.str));
}
if( tryEat("...") )
{
return new Die(pos);
}
if( tryEat("@") )
{
auto lay = "@"~eatId("for layer ID");
eat("(", "for layered execution");
auto e = Body();
eat(")", "after "~lay~"(...");
return new Lay(pos, lay, e);
}
if( tryEat("(") )
{
auto e = Body();
eat(")", "after parenthesized expression");
return e;
}
if( tryEat("{") )
{
AST e = new App(pos, new Var(pos,"{}"));
return parseTableSetAfterBrace(e);
}
if( tryEat("if") )
{
return parseIfAfterIf(pos);
}
if( tryEat("case") )
{
return parsePatternMatch(pos);
}
if( tryEat("fun") || tryEat("\u03BB") ) // lambda!!
{
eat("(", "after fun");
return parseLambdaAfterOpenParen(pos);
}
scope(exit) lex.popFront;
return new Var(pos, lex.front.str);
}
AST parseIfAfterIf(LexPosition pos)
{
auto cond = E(0);
auto thenPos = currentPosition();
if(!tryEat(":")) {
eat("then", "after if condition");
tryEat(":");
}
AST th = Body();
auto el = doNothingExpression();
auto elsePos = currentPosition();
if( tryEat("else") ) {
tryEat(":");
el = Body();
}
return new App(pos, new Var(pos,"if"), cond, new Fun(thenPos,[],th), new Fun(elsePos,[],el));
}
AST parsePatternMatch(LexPosition pos)
{
// case pmExpr CASES
//==>
// let pmVar = pmExpr in (... let pmTryFirst = ... in pmTryFirst())
AST pmExpr = E(0);
string pmVar = freshVarName();
string pmTryFirst = freshVarName();
AST pmBody = parsePatternMatchCases(pos, pmVar, pmTryFirst,
new App(pos, new Var(pos, pmTryFirst)));
return new Let(pos, pmVar, [], pmExpr, pmBody);
}
AST parsePatternMatchCases(LexPosition casePos, string pmVar, string tryThisBranchVar, AST thenDoThis)
{
// when pat: cBody
//==>
// ... let failBranchVar = ... in
// let tryThisBranchVar = fun(){ if(test){cBody}else{failBranchVar()} } in thenDoThis
if( tryEat("when") )
{
auto pos = currentPosition();
string failBranchVar = freshVarName();
auto pr = parsePattern();
eat(":", "after when pattern");
AST cBody = Body();
AST judgement = new App(pos, new Var(pos, "if"),
ppTest(pmVar, pr), new Fun(pos,[],ppBind(pmVar, pr, cBody)),
new Var(pos, failBranchVar));
return parsePatternMatchCases(casePos, pmVar, failBranchVar,
new Let(pos, tryThisBranchVar, [],
new Fun(pos,[],judgement), thenDoThis)
);
}
else
{
AST doNothing = new Fun(casePos,[], new Die(casePos));
return new Let(casePos, tryThisBranchVar, [], doNothing, thenDoThis);
}
}
// hageshiku tenuki
abstract class SinglePattern
{
string[] path;
mixin SimpleClass;
private AST access(string pmVar, string[] path) {
auto pos = currentPosition();
AST e = new Var(pos, pmVar);
foreach(p; path)
e = new App(pos, new Var(pos, "."), e, new Str(pos, p));
return e;
}
private AST has(AST e, string k) {
auto pos = currentPosition();
return opAndAnd(
new App(pos, new Var(pos, "_istbl"), e),
new App(pos, new Var(pos, ".?"), e, new Str(pos, k))
);
}
private AST opAndAnd(AST a, AST b) {
if( a is null ) return b;
if( b is null ) return a;
auto pos = currentPosition();
return new App(pos,
new Var(pos, "if"),
a,
new Fun(pos, [], b),
new Fun(pos, [], new Int(pos, 0))
);
}
AST ppTest(string pmVar) {
AST c = null;
for(int i=0; i<path.length; ++i)
c = opAndAnd(c, has(access(pmVar,path[0..i]), path[i]));
return c;
}
AST ppBind(string pmVar, AST thenDoThis) { return thenDoThis; }
}
class WildPattern : SinglePattern
{
mixin SimpleClass;
}
class VarPattern : SinglePattern
{
string name;
mixin SimpleClass;
AST ppBind(string pmVar, AST thenDoThis) {
auto pos = currentPosition();
return new Let(pos, name, [], access(pmVar,path), thenDoThis);
}
}
class ConstantPattern : SinglePattern
{
AST e;
mixin SimpleClass;
AST ppTest(string pmVar) {
auto pos = currentPosition();
return opAndAnd( super.ppTest(pmVar),
new App(pos, new Var(pos,"=="), access(pmVar,path), e)
);
}
}
SinglePattern[] parsePattern(string[] path = null)
{
SinglePattern[] result;
if( tryEat("{") )
{
if( !tryEat("}") ) {
do {
string key = eatId("in table pattern", AllowQuoted);
eat(":", "after field-id in table pattern");
result ~= parsePattern(path ~ key);
} while( tryEat(",") );
eat("}", "at the end of table pattern");
}
}
else
{
AST e = E(0);
if(auto ev = cast(Var)e)
if(ev.name == "_")
result ~= new WildPattern(path);
else
result ~= new VarPattern(path, ev.name);
else
result ~= new ConstantPattern(path, e);
}
return result;
}
AST ppTest(string pmVar, SinglePattern[] pats)
{
auto pos = currentPosition();
AST cond = null;
foreach(p; pats) {
AST c2 = p.ppTest(pmVar);
if( c2 !is null )
cond = cond is null ? c2
: new App(pos, new Var(pos,"&&"), cond, c2);
}
return cond is null ? new Int(currentPosition(), 1) : cond;
}
AST ppBind(string pmVar, SinglePattern[] pats, AST thenDoThis)
{
foreach(p; pats)
thenDoThis = p.ppBind(pmVar, thenDoThis);
return thenDoThis;
}
AST parseId()
{
scope(exit) lex.popFront;
return new Str(currentPosition(), lex.front.str);
}
AST parseLambdaAfterOpenParen(LexPosition pos)
{
Parameter[] params;
while( !tryEat(")") )
{
params ~= parseParam();
if( !tryEat(",") ) {
eat(")", "after function parameters");
break;
}
}
eat("{", "after function parameters");
auto funbody = Body();
eat("}", "after function body");
return new Fun(pos, params, funbody);
}
Parameter parseParam()
{
string var;
string[] lay;
while( !closingBracket() && !lex.empty && lex.front.str!="," )
{
auto pos = currentPosition();
string p = eatId("for function parameter", AllowQuoted);
if( p == "@" )
lay ~= "@" ~ eatId("after @", AllowQuoted);
else if( var.empty )
var = p;
else
throw genex!ParseException(pos, "one parameter has two names");
}
return new Parameter(var, lay);
}
private:
Lexer lex;
this(Lexer lex) { this.lex = lex; }
bool isNumber(string s)
{
return find!(`a<'0' || '9'<a`)(s).empty;
}
void eat(string kwd, lazy string msg)
{
if( !tryEat(kwd) )
if( lex.empty )
throw genex!UnexpectedEOF(
currentPosition(), sprintf!"%s is expected %s but not found"(kwd,msg));
else
throw genex!ParseException(
currentPosition(), sprintf!"%s is expected for %s but not found"(kwd,msg));
}
bool tryEat(string kwd)
{
if( lex.empty || lex.front.quoted || lex.front.str!=kwd )
return false;
lex.popFront;
return true;
}
enum {AllowQuoted=true, DisallowQuoted=false};
string eatId(lazy string msg, bool aq=DisallowQuoted)
{
if( lex.empty )
throw genex!UnexpectedEOF(currentPosition(), "identifier is expected but not found "~msg);
if( !aq && lex.front.quoted )
throw genex!ParseException(currentPosition(), "identifier is expected but not found "~msg);
scope(exit) lex.popFront;
return lex.front.str;
}
AST doNothingExpression()
{
return new Str(currentPosition(), "(empty function body)");
}
LexPosition currentPosition()
{
return lex.empty ? new LexPosition("EOF",0,0) : lex.front.pos;
}
}
unittest
{
mixin EasyAST;
assert_eq(parseString(`123`), intl(123));
assert_eq(parseString(`"foo"`), strl("foo"));
assert_eq(parseString(`fun(){1}`), fun([],intl(1)));
assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1)));
assert_eq(parseString("\u03BB(){1}"), fun([],intl(1)));
assert_eq(parseString("\u03BB(x,y){1}"), fun(["x","y"],intl(1)));
assert_eq(parseString(`1;2`), let("_","",intl(1),intl(2)));
assert_eq(parseString(`1;2;`), let("_","",intl(1),intl(2)));
assert_eq(parseString(`let x=1 in 2`), let("x","",intl(1),intl(2)));
assert_eq(parseString(`var x=1;2;`), let("x","",intl(1),intl(2)));
assert_eq(parseString(`def x=1`), let("x","",intl(1),var("x")));
assert_eq(parseString(`@val x=1;`), let("x","@val",intl(1),lay("@val",var("x"))));
assert_eq(parseString(`@typ x="#int";`), let("x","@typ",strl("#int"),lay("@typ",var("x"))));
assert_eq(parseString(`@macro x=1`), let("x","@macro",intl(1),strl("(macro definition)")));
assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2)));
assert_eq(parseString(`if 1 then 2`), call(var("if"),intl(1),fun([],intl(2)),fun([],strl("(empty function body)"))));
assert_eq(parseString(`if 1 then: 2 else(3)`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(3))));
assert_eq(parseString(`(if 1 then () else 3)()()`),
call(call(call(var("if"),intl(1),fun([],strl("(empty function body)")),fun([],intl(3))))));
assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl(2),intl(3))));
assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),intl(2)),intl(3)));
assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),intl(2),intl(3))));
assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl(2)),intl(3)));
assert_eq(parseString(`@x(1)`), lay("@x", intl(1)));
assert_eq(parseString(`fun(x @v @t, y, z @t){}`),
funp([param("x",["@v","@t"]), param("y",[]), param("z",["@t"])], strl("(empty function body)")));
assert_eq(parseString(`
let x = 100; #comment
let y = 200; #comment!!!!!
x+y
`),
let("x", "", intl(100), let("y", "", intl(200), call(var("+"), var("x"), var("y"))))
);
assert_eq(parseString(`
var fac = fun(x){ if(x <= 1) then 1 else x*fac(x-1) };
fac(10)
`),
let("fac", "", fun(["x"],
call(var("if"),
call(var("<="), var("x"), intl(1)),
fun([], intl(1)),
fun([], call(var("*"), var("x"), call(var("fac"),call(var("-"),var("x"),intl(1)))))
)),
call(var("fac"),intl(10))
)
);
}
unittest
{
assert_throw!UnexpectedEOF(parseString(`1+`));
assert_throw!ParseException(parseString(`1+2}`));
assert_throw!UnexpectedEOF(parseString(`let "x"`));
assert_throw!UnexpectedEOF(parseString(`var`));
assert_throw!ParseException(parseString(`@val x ==`));
assert_throw!ParseException(parseString(`if(){1}`));
assert_throw!UnexpectedEOF(parseString(`f(`));
assert_throw!ParseException(parseString(`fun(x y){}`));
}
unittest
{
mixin EasyAST;
assert_eq(parseString(`def foo(x) { x+1 }; foo`),
let("foo", "",
fun(["x"], call(var("+"), var("x"), intl(1))),
var("foo"))
);
assert_eq(parseString(`@@type ( x ) { x }`),
let("@type", LiftLayer, fun(["x"], var("x")), lay(LiftLayer, var("@type"))) );
assert_eq(parseString(`{}`), call(var("{}")));
assert_eq(parseString(`{foo:1,"bar":2}`),
call(var(".="), call(var(".="), call(var("{}")), strl("foo"), intl(1)), strl("bar"), intl(2)));
assert_eq(parseString(`{}.foo`), call(var("."),call(var("{}")),strl("foo")));
assert_eq(parseString(`{}.?foo`), call(var(".?"),call(var("{}")),strl("foo")));
assert_eq(parseString(`x{y:1}`), call(var(".="),var("x"),strl("y"),intl(1)));
}
unittest
{
assert_nothrow(parseString(`
case( 1 )
when(x): 1
`));
assert_nothrow(parseString(`
case 1
when {aaaa:_}: 1
`));
assert_nothrow(parseString(`
case 1
when {aaaa:@value(x)}: 1
when {aaaa:{bbb:_}, ccc:123}: 1
`));
}