Artifact Content
Not logged in

Artifact 25ec86a59533b939ec72435f0a8e4b97b6209f4e


/**
 * Authors: k.inaba
 * License: NYSL 0.9982 http://www.kmonos.net/nysl/
 *
 * Parser for Polemy programming language
 */
module polemy.parse;
import polemy._common;
import polemy.failure;
import polemy.lex;
import polemy.ast;

/// Parse a string and return its AST
/// Throws: ParseException, LexException, UnexpectedEOF

AST parseString(S, T...)(S str, T fn_ln_cn)
{
	return parserFromString(str, fn_ln_cn).parse();
}

/// Parse the content of a file and return its AST
/// Throws: ParseException, LexException, UnexpectedEOF

AST parseFile(S, T...)(S filename, T ln_cn)
{
	return parserFromFile(filename, ln_cn).parse();
}

// Named Constructors of Parser

private auto parserFromLexer(Lexer)(Lexer lex)
	{ return new Parser!Lexer(lex); }

private auto parserFromString(T...)(T params)
	{ return parserFromLexer(lexerFromString(params)); }

private auto parserFromFile(T...)(T params)
	{ return parserFromLexer(lexerFromFile(params)); }

// Parser

private class Parser(Lexer)
	if( isForwardRange!(Lexer) && is(ElementType!(Lexer) == Token) )
{
	AST parse()
	{
		auto e = Body();
		if( !lex.empty )
			throw genex!ParseException(currentPosition(), "parsing ended but some tokens left");
		return e;
	}

	AST Body()
	{
		/// Body ::= Declaration
		///        | TopLevelExpression

		if( closingBracket() )
			return doNothingExpression();

		auto saved = lex.save;
		if( auto e = Declaration() )
			return e;
		lex = saved;
		return TopLevelExpression();
	}

	AST Declaration() // returns null if it is not a declaration
	{
		/// Declaration ::=
		///    ["@" Layer|"let"|"var"|"def"] Var "=" Expression ([";"|"in"] Body?)?
		///  | ["@" Layer|"let"|"var"|"def"] Var "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?
		///  | ["@" "@" Layer "=" Expression ([";"|"in"] Body?)?
		///  | ["@" "@" Layer "(" Param%"," ")" "{" Body "}" ([";"|"in"] Body?)?

		auto pos = currentPosition();
		string layer = "";
		bool layerRiseDecl = false;

		if( tryEat("@") )
		{
			layer = "@" ~ eatId("after @", AllowQuoted);
			if( layer == "@@" )
			{
				layer = "@" ~ eatId("after @@", AllowQuoted);
				layerRiseDecl = true;
			}
			else
			{
				if( tryEat("(") )
					return null; // @lay(...) expression, not a declaration
			}
		}

		// [TODO] Refactor
		if( layerRiseDecl )
		{
			string kwd = "@" ~ layer;
			string var = layer;

			auto e = tryEat("(")
				? parseLambdaAfterOpenParen(pos)  // let var ( ...
				: (eat("=", "after "~kwd), E(0)); // let var = ...
			if( moreDeclarationExists() )
				return new LetExpression(pos, var, "(system)", e, Body());
			else
				return new LetExpression(pos, var, "(system)", e, new VarExpression(pos, var));
		}
		else
		{
			string kwd = layer;
			if( layer.empty && !tryEat(kwd="let") && !tryEat(kwd="var") && !tryEat(kwd="def") )
				return null; // none of {@lay, let, var, def} occurred, it's not a declaration

			auto varpos = currentPosition();
			string var = eatId("after "~kwd, AllowQuoted); // name of the declared variable

			auto e = tryEat("(")
				? parseLambdaAfterOpenParen(varpos)  // let var ( ...
				: (eat("=", "after "~kwd), E(0));    // let var = ...
			if( moreDeclarationExists() )
				return new LetExpression(pos, var, layer, e, Body());
			else
				return new LetExpression(pos, var, layer, e, new VarExpression(varpos, var));
		}
	}

	AST TopLevelExpression()
	{
		/// TopLevelExpression ::= Expression ([";"|"in"] Body?)?

		auto pos = currentPosition();
		auto e = E(0);
		if( moreDeclarationExists() )
			return new LetExpression(pos, "_", "", e, Body());
		else
			return e;
	}

	private bool moreDeclarationExists()
	{
		return (tryEat(";") || tryEat("in")) && !closingBracket();
	}

	private bool closingBracket()
	{
		return lex.empty || !lex.front.quoted && ["}",")","]"].canFind(lex.front.str);
	}

	// [TODO] make this customizable from program
	private static string[][] operator_perferences = [
		["||"],
		["&&"],
		["!="],
		["=="],
		["<","<=",">",">="],
		["|"],
		["^"],
		["&"],
		["<<", ">>"],
		["+","-"],
		["~"],
		["*","/","%"],
		["^^","**"],
		[".",".?"]
	];

	AST E(size_t level)
	{
		/// Expression ::= (Binary left-associative operators over) Funcall

		AST rec(AST lhs)
		{
			if( closingBracket() )
				return lhs;

			auto pos = currentPosition();
			foreach(op; operator_perferences[level])
				if( tryEat(op) )
					if( op[0]=='.' )
						return rec(
							new FuncallExpression(lhs.pos, new VarExpression(pos, op), lhs, parseId()));
					else
					return rec(
						new FuncallExpression(lhs.pos, new VarExpression(pos, op), lhs, E(level+1)));
			return lhs;
		}

		if( operator_perferences.length <= level )
			return Funcall();
		else
			return rec(E(level+1));
	}

	AST Funcall()
	{
		/// Funcall ::= BaseExpression ["(" Expression%"," ")"]*

		auto e = BaseExpression();
		for(;;)
			if( tryEat("(") )
			{
				auto pos = currentPosition();
				AST[] args;
				while( !tryEat(")") ) {
					if( lex.empty )
						throw genex!UnexpectedEOF(pos, "closing ')' for arguments not found");
					args ~= E(0);
					if( !tryEat(",") ) {
						eat(")", "after function parameters");
						break;
					}
				}
				e = new FuncallExpression(e.pos, e, args);
			}
			else if( tryEat("{") )
			{
				e = parseTableSetAfterBrace(e);
			}
			else
				break;
		return e;
	}

	AST parseTableSetAfterBrace(AST e)
	{
		if( tryEat("}") )
			return e;
		auto pos = currentPosition();
		for(;;)
		{
			string key = eatId("for table key", AllowQuoted);
			eat(":", "after table key");
			AST val = E(0);
			e = new FuncallExpression(pos, new VarExpression(pos,".="),
					e, new StrLiteral(pos,key), val);
			if( !tryEat(",") )
			{
				eat("}", "for the end of table literal");
				break;
			}
		}
		return e;
	}

	AST BaseExpression()
	{
		if( lex.empty )
			throw genex!UnexpectedEOF(currentPosition(), "Reached EOF when tried to parse an expression");

		auto pos = lex.front.pos;
		if( lex.front.quoted )
		{
			scope(exit) lex.popFront;
			return new StrLiteral(pos, lex.front.str);
		}
		if( isNumber(lex.front.str) )
		{
			scope(exit) lex.popFront;
			return new IntLiteral(pos, BigInt(cast(string)lex.front.str));
		}
		if( tryEat("@") )
		{
			auto lay = "@"~eatId("for layer ID");
			eat("(", "for layered execution");
			auto e = Body();
			eat(")", "after "~lay~"(...");
			return new LayeredExpression(pos, lay, e);
		}
		if( tryEat("(") )
		{
			auto e = Body();
			eat(")", "after parenthesized expression");
			return e;
		}
		if( tryEat("{") )
		{
			AST e = new FuncallExpression(pos, new VarExpression(pos,"{}"));
			return parseTableSetAfterBrace(e);
		}
		if( tryEat("if") )
		{
			eat("(", "after if");
			auto cond = E(0);
			eat(")", "after if condition");
			auto thenPos = lex.front.pos;
			eat("{", "after if condition");
			auto th = Body();
			eat("}", "after if-then body");
			auto el = doNothingExpression();
			auto elsePos = (lex.empty ? LexPosition.dummy : lex.front.pos);
			if( tryEat("else") ) {
				eat("{", "after else");
				el = Body();
				eat("}", "after else body");
			}
			return new FuncallExpression(pos, 
				new VarExpression(pos, "if"),
				cond,
				new FunLiteral(thenPos, [], th),
				new FunLiteral(elsePos, [], el)
			);
		}
		if( tryEat("fun") || tryEat("\u03BB") ) // lambda!!
		{
			eat("(", "after fun");
			return parseLambdaAfterOpenParen(pos);
		}
		scope(exit) lex.popFront;
		return new VarExpression(pos, lex.front.str);
	}

	AST parseId()
	{
		scope(exit) lex.popFront;
		return new StrLiteral(currentPosition(), lex.front.str);
	}

	AST parseLambdaAfterOpenParen(immutable LexPosition pos)
	{
		Parameter[] params;
		while( !tryEat(")") )
		{
			params ~= parseParam();
			if( !tryEat(",") ) {
				eat(")", "after function parameters");
				break;
			}
		}
		eat("{", "after function parameters");
		auto funbody = Body();
		eat("}", "after function body");
		return new FunLiteral(pos, params, funbody);
	}

	Parameter parseParam()
	{
		string var;
		string[] lay;
		while( !closingBracket() && !lex.empty && lex.front.str!="," )
		{
			auto pos = currentPosition();
			string p = eatId("for function parameter", AllowQuoted);
			if( p == "@" )
				lay ~= "@" ~ eatId("after @", AllowQuoted);
			else if( var.empty )
				var = p;
			else
				throw genex!ParseException(pos, "one parameter has two names");
		}
		return new Parameter(var, lay);
	}

private:
	Lexer lex;
	this(Lexer lex) { this.lex = lex; }

	bool isNumber(string s)
	{
		return find!(`a<'0' || '9'<a`)(s).empty;
	}
	
	void eat(string kwd, lazy string msg)
	{
		if( !tryEat(kwd) )
			if( lex.empty )
				throw genex!UnexpectedEOF(
					currentPosition(), sprintf!"%s is expected %s but not found"(kwd,msg));
			else
				throw genex!ParseException(
					currentPosition(), sprintf!"%s is expected for %s but not found"(kwd,msg));
	}

	bool tryEat(string kwd)
	{
		if( lex.empty || lex.front.quoted || lex.front.str!=kwd )
			return false;
		lex.popFront;
		return true;
	}

	enum {AllowQuoted=true, DisallowQuoted=false};
	string eatId(lazy string msg, bool aq=DisallowQuoted)
	{
		if( lex.empty )
			throw genex!UnexpectedEOF(currentPosition(), "identifier is expected but not found "~msg);
		if( !aq && lex.front.quoted )
			throw genex!ParseException(currentPosition(), "identifier is expected but not found "~msg);
		scope(exit) lex.popFront;
		return lex.front.str;
	}

	AST doNothingExpression()
	{
		return new IntLiteral(currentPosition(), BigInt(178));
	}

	immutable(LexPosition) currentPosition()
	{
		return lex.empty ? null : lex.front.pos;
	}
}

unittest
{
	mixin EasyAST;

	assert_eq(parseString(`123`), intl(123));
	assert_eq(parseString(`"foo"`), strl("foo"));
	assert_eq(parseString(`fun(){1}`), fun([],intl(1)));
	assert_eq(parseString(`fun(x){1}`), fun(["x"],intl(1)));
	assert_eq(parseString("\u03BB(){1}"), fun([],intl(1)));
	assert_eq(parseString("\u03BB(x){1}"), fun(["x"],intl(1)));
	assert_eq(parseString(`1;2`), let("_","",intl(1),intl(2)));
	assert_eq(parseString(`1;2;`), let("_","",intl(1),intl(2)));
	assert_eq(parseString(`let x=1 in 2`), let("x","",intl(1),intl(2)));
	assert_eq(parseString(`var x=1;2;`), let("x","",intl(1),intl(2)));
	assert_eq(parseString(`def x=1`), let("x","",intl(1),var("x")));
	assert_eq(parseString(`@val x=1;`), let("x","@val",intl(1),var("x")));
	assert_eq(parseString(`@typ x="#int";`), let("x","@typ",strl("#int"),var("x")));
	assert_eq(parseString(`f(1,2)`), call(var("f"),intl(1),intl(2)));
	assert_eq(parseString(`if(1){2}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(178))));
	assert_eq(parseString(`if(1){2}else{3}`), call(var("if"),intl(1),fun([],intl(2)),fun([],intl(3))));
	assert_eq(parseString(`if(1){}else{3}()()`),
		call(call(call(var("if"),intl(1),fun([],intl(178)),fun([],intl(3))))));
	assert_eq(parseString(`1+2*3`), call(var("+"),intl(1),call(var("*"),intl(2),intl(3))));
	assert_eq(parseString(`(1+2)*3`), call(var("*"),call(var("+"),intl(1),intl(2)),intl(3)));
	assert_eq(parseString(`1*(2+3)`), call(var("*"),intl(1),call(var("+"),intl(2),intl(3))));
	assert_eq(parseString(`1*2+3`), call(var("+"),call(var("*"),intl(1),intl(2)),intl(3)));
	assert_eq(parseString(`@x(1)`), lay("@x", intl(1)));
	assert_eq(parseString(`fun(x @v @t, y, z @t){}`),
		funp([param("x",["@v","@t"]), param("y",[]), param("z",["@t"])], intl(178)));

	assert_eq(parseString(`
		let x = 100; #comment
		let y = 200; #comment!!!!!
			x+y
	`),
		let("x", "", intl(100), let("y", "", intl(200), call(var("+"), var("x"), var("y"))))
	);

	assert_eq(parseString(`
		var fac = fun(x){ if(x <= 1) {1} else {x*fac(x-1)} };
		fac(10)
	`),
		let("fac", "", fun(["x"],
			call(var("if"),
				call(var("<="), var("x"), intl(1)),
				fun([], intl(1)),
				fun([], call(var("*"), var("x"), call(var("fac"),call(var("-"),var("x"),intl(1)))))
			)),
			call(var("fac"),intl(10))
		)
	);
}

unittest
{
	assert_throw!UnexpectedEOF(parseString(`1+`));
	assert_throw!ParseException(parseString(`1+2}`));
	assert_throw!UnexpectedEOF(parseString(`let "x"`));
	assert_throw!UnexpectedEOF(parseString(`var`));
	assert_throw!ParseException(parseString(`@val x ==`));
	assert_throw!ParseException(parseString(`if(){1}`));
	assert_throw!UnexpectedEOF(parseString(`f(`));
}

unittest
{
	mixin EasyAST;
	assert_eq(parseString(`def foo(x) { x+1 }; foo`),
		let("foo", "",
			fun(["x"], call(var("+"), var("x"), intl(1))),
			var("foo"))
	);

	assert_eq(parseString(`@@type ( x ) { x }`),
		let("@type", "(system)", fun(["x"], var("x")), var("@type")) );

	assert_eq(parseString(`{}`), call(var("{}")));
	assert_eq(parseString(`{foo:1,"bar":2}`),
		call(var(".="), call(var(".="), call(var("{}")), strl("foo"), intl(1)), strl("bar"), intl(2)));
	assert_eq(parseString(`{}.foo`), call(var("."),call(var("{}")),strl("foo")));
	assert_eq(parseString(`{}.?foo`), call(var(".?"),call(var("{}")),strl("foo")));
	assert_eq(parseString(`x{y:1}`), call(var(".="),var("x"),strl("y"),intl(1)));
}