package muesli import ( _ "bufio" _ "bytes" _ "errors" "fmt" _ "io" _ "os" _ "reflect" _ "runtime" _ "strings" _ "unicode" // "gitlab.com/beoran/woe/graphviz" // _ "gitlab.com/beoran/woe/monolog" ) /* Grammar: Desrired syntax (verified LL(1) on smlweb.cpsc.ucalgary.ca) In Muesli, the program is first tokenized by a lexer. The lexer uses the first character, and only the first character of every token to determine it's type. Then, the tokens are parsed with a recursive descent parser with a structured syntax, which has been verified to be LL(1) on smlweb.cpsc.ucalgary.ca. This means the parser only needs one token of lookahead to be able to parse the syntax. The syntax can be described as follows: PROGRAM -> STATEMENTS. STATEMENTS -> STATEMENT eos STATEMENTS|. STATEMENT -> EXPRESSION OPERATION |. OPERATION -> operator STATEMENT |. EXPRESSION -> COMMAND | EVALUATION. COMMAND -> FIELD PARAMETERS. FIELD -> NAME SELECTOR. SELECTOR -> selector PARAMETER |. PARAMETERS -> PARAMETER PARAMETERS |. PARAMETER -> FIELD | EVALUATION. EVALUATION -> LITERAL | BLOCK | GETTER | SETTER | LIST | PARENTHESIS. PARENTHESIS -> closeparen STATEMENT openparen. BLOCK -> openblock STATEMENTS closeblock. LIST -> openlist PARAMETERS closelist. LITERAL -> string | int | float. NAME -> word | symbol | type. SETTER -> set PARAMETER PARAMETER. GETTER -> get PARAMETER. The semantics of a muesli program are based on this syntax and can be explained as follows: - A muesli program consists of statements, separated by end of statement or eos, which are executed from top to bottom. The period . and the unescaped newline characters are eos. - A statement an expression followed by an optional operation, or the empty statement. A statement evaluates to the value of it's expression to which the consecutive operations are applied from right to left. - An expression is either a command or an evaluation. - An operation starts with an operator followed by a statement. That statement may in turn contain another operation. - An operator begins with a token with one of the following characters: -+/*^%~|&><@ Next is a statement. The effect of an operator is to evaluate the links before it and after it as a substitution, and then look up the callable with the name of the operator and execute that. - A command consists of a field and parameters . - A field is a name optionally followed by selectors . - An evaluation is a block, parenthesis, list, getter, setter, or literal. - A parameter is an evaluation or a field. - A selector starts with a token with one of the following characters: ,;' Next is a parameter. The effect of a selector is to evaluate the parameter, and then use it as a string to look up the value of the field thus named in named object. - A block consist of statements between braces { }. The statements of a block are not executed but compiled to a block which can be called to execute it later. - A () parenthesis gets substituted inline by the value returned by executing it anywhere it occurs, also in the beginning of a command. - A bracketed list [ elem1 elem2 ... ] is syntactic sugar for (list elem1 elem 2) - A dollar getter $varname is syntactic sugar for (get varname) - A equals setter =varname value is syntactic sugar for (set varname value) - Therefore, parenthesis, lists, getters and setters are allowed anywhere, also in the beginning of the command with substitution semantics. - A literal evaluates to itself. - A name also evaluates as itself but is specific for commands. or, yet again: # Type a grammar here: PROGRAM -> STATEMENTS. STATEMENTS -> STATEMENT eos STATEMENTS|. STATEMENT -> EXPRESSION OPERATION |. OPERATION -> operator STATEMENT |. EXPRESSION -> COMMAND | EVALUATION . COMMAND -> NAME PARAMETERS. PARAMETERS -> PARAMETER PARAMETERS |. PARAMETER -> EVALUATION | NAME . EVALUATION -> LITERAL | BLOCK | GETTER | SETTER | LIST | PARENTHESIS | TAKE | MAKE | METHOD . PARENTHESIS -> closeparen STATEMENT openparen. BLOCK -> openblock STATEMENTS closeblock. LIST -> openlist PARAMETERS closelist. SETTER -> set PARAMETER PARAMETER. GETTER -> get PARAMETER. MAKE -> make FIELD PARAMETER . TAKE -> take FIELD . METHOD -> call FIELD . FIELD -> NAME SELECTORS. SELECTORS -> selector NAME SELECTORS |. LITERAL -> string | int | float. NAME -> word | symbol | type. */ type Parser struct { Lexer *Lexer current Token LoggerWrapper Errors []ParserError } // panic with this type on errors that would prevent the parser // from making progress. type ParserError struct { *Parser *Token Chain error } func (pe ParserError) Error() string { return fmt.Sprintf("%s %s", pe.Token.String(), pe.Chain.Error()) } func (parser *Parser) SetLogger(logger Logger) { parser.LoggerWrapper = LoggerWrapper{logger} } func (parser *Parser) SetDefaultLogger() { lg := &testLogger{} parser.LoggerWrapper = LoggerWrapper{lg} } func (parser *Parser) SetVmLogger(vm * VM) { parser.LoggerWrapper = vm.Console.LoggerWrapper } func (parser *Parser) Errorf(message string, args ...interface{}) ParserError { err := fmt.Errorf(message, args...) pe := ParserError { Parser: parser, Token:&parser.current, Chain: err } parser.Errors = append(parser.Errors, pe) return pe } func (parser *Parser) Panicf(message string, args ...interface{}) { pe := parser.Errorf(message, args...) panic(pe) } func (parser *Parser) Advance() { token := parser.Lexer.Lex() parser.current = token parser.LogDebug("Next token: %s\n", token.String()) } /* Skips tokens until a closer, namely ), }, ], EOX or EOF is found, * to skip parse errors, and to be able to continue parsing despite errors. */ func (parser *Parser) SkipError() Token { parser.Advance() for { parser.Advance() if parser.NextIsErrorSkipped() { return parser.current } if parser.NextIs(TokenKindError, TokenKindNone) { parser.Panicf("Cannot recover from parse error. Parse ended.") } } return parser.current } /* Looks at the current token and advances the lexer if the token is of any of the token kinds given in kinds. In this case it will return the accepted token and advance the parser. Otherwise, it will call parser.Panicf with a * "Syntax error unexpected ".*/ func (parser *Parser) Require(kinds ...TokenKind) Token { if parser.current.IsNone() { parser.Advance() } expected := "" sep := "" for _, kind := range kinds { if kind == parser.current.TokenKind { accepted := parser.current parser.Advance() parser.LogDebug("Require: Accepted token: %s\n", accepted.String()) return accepted } expected = fmt.Sprintf("%s%s%s", expected, sep, kind.String()) } parser.Panicf("error: expected one of the following: %s", expected) return Token{} } func (parser *Parser) NewAstError(message string, args ...interface{}) *Ast { sv := StringValue(fmt.Sprintf(message+" at token "+parser.current.String(), args)) pos := parser.current.Position tok := NewToken(TokenKindError, sv, pos) parser.Errorf(message, args...) return NewAst(AstKindError, nil, EmptyAstArray(), tok) } // Also handles none or error vales of the token func (parser *Parser) NewAst(kind AstKind, parent * Ast, children []*Ast, value Token) *Ast{ if value.IsNone() { return NewAstNone() } if value.IsError() { return NewAst(AstKindError, parent, children, value) } return NewAst(kind, parent, children, value) } func (parser *Parser) ParseValue() *Ast { parser.LogDebug("ParseValue: %s\n", parser.current.String()) value := parser.Require(TokenKindInteger, TokenKindString, TokenKindBoolean, TokenKindNil, TokenKindFloat, TokenKindSymbol) return parser.NewAst(AstKindValue, nil, EmptyAstArray(), value) } func AstKindForToken(token Token) AstKind { switch token.TokenKind { case TokenKindInteger, TokenKindString, TokenKindBoolean, TokenKindNil, TokenKindFloat, TokenKindSymbol: return AstKindValue case TokenKindWord: return AstKindWord case TokenKindType: return AstKindType default: return AstKindError } } func (parser *Parser) ParseLiteral() *Ast { parser.LogDebug("ParseLiteral: %s\n", parser.current.String()) value := parser.Require(TokenKindInteger, TokenKindString, TokenKindBoolean, TokenKindNil, TokenKindFloat) astKind := AstKindForToken(value) return parser.NewAst(astKind, nil, EmptyAstArray(), value) } func (parser *Parser) ParseName() *Ast { parser.LogDebug("ParseName: %s\n", parser.current.String()) value := parser.Require(TokenKindWord, TokenKindType, TokenKindSymbol) astKind := AstKindForToken(value) return parser.NewAst(astKind, nil, EmptyAstArray(), value) } func (parser *Parser) ParseArgument() *Ast { parser.LogDebug("ParseArgument: %s\n", parser.current.String()) switch { case parser.NextIsName(): return parser.ParseField() case parser.NextIsBlock(): return parser.ParseBlock() case parser.NextIsSubstitution(): return parser.ParseSubstitution() case parser.NextIsLiteral(): return parser.ParseLiteral() default: parser.Panicf("error: in argument: expected $, =, }, word or value") return nil } } func (parser *Parser) ParseArguments(extra ... *Ast) *Ast { parser.LogDebug("ParseArguments: %s\n", parser.current.String()) ast := NewAstWithToken(AstKindArguments, parser.current) var children []*Ast for _, arg := range extra { children = append(children, arg) } for parser.NextIsArgument() { child := parser.ParseArgument() children = append(children, child) } ast.AppendChildren(children...) return ast } func (parser *Parser) ParseList() *Ast { parser.LogDebug("ParseList: %s\n", parser.current.String()) op := parser.Require(TokenKindOpenList) if op.IsError() { return parser.NewAstError("Unexpected value.") } ast := NewAstWithToken(AstKindList, op) args := parser.ParseArguments() if AstIsError(args) { return args } if cp := parser.Require(TokenKindCloseList); cp.IsError() { return parser.NewAstError("expected closing brackets") } ast.AppendChild(args) return ast } func (parser *Parser) ParseParenthesis() *Ast { parser.LogDebug("ParseParenthesis: %s\n", parser.current.String()) op := parser.Require(TokenKindOpenParen) if op.IsError() { return parser.NewAstError("Unexpected value.") } ast := NewAstWithToken(AstKindParenthesis, op) expr := parser.ParseChain() if expr.IsNone() { return parser.NewAstError("expected chain") } if AstIsError(expr) { return expr } if cp := parser.Require(TokenKindCloseParen); cp.IsError() { return parser.NewAstError("expected closing parenthesis") } ast.AppendChild(expr) return ast } func (parser *Parser) ParseBlock() *Ast { parser.LogDebug("ParseBlock: %s\n", parser.current.String()) op := parser.Require(TokenKindOpenBlock) if op.IsError() { return parser.NewAstError("Unexpected value.") } ast := NewAstWithToken(AstKindBlock, op) stats := parser.ParseStatements() if stats.IsNone() { return parser.NewAstError("expected expression") } if AstIsError(stats) { return stats } if cp := parser.Require(TokenKindCloseBlock); cp.IsError() { return parser.NewAstError("expected closing block") } ast.AppendChild(stats) return ast } func (parser *Parser) RequireName() Token { return parser.Require(TokenKindWord, TokenKindType, TokenKindSymbol) } /* Parses the target of a set expression */ func (parser *Parser) ParseTarget() *Ast { parser.LogDebug("ParseTarget: %s\n", parser.current.String()) var target *Ast switch { case parser.NextIsName(): // Direct target to a set token := parser.RequireName() target = NewAstWithToken(AstKindTarget, token) case parser.NextIsSubstitution(): // Indirect target target = NewAstWithToken(AstKindTarget, parser.current) target.AppendChild(parser.ParseSubstitution()) default: parser.Panicf("Malformed setter.") return nil } argument := parser.ParseArgument() if argument.IsNone() { return parser.NewAstError("Expected argument to set") } target.AppendChild(argument) return target } /* Parses the origin of a get expression */ func (parser *Parser) ParseOrigin() *Ast { parser.LogDebug("ParseOrigin: %s\n", parser.current.String()) var target *Ast switch { case parser.NextIsName(): // Direct target to a set token := parser.RequireName() target = NewAstWithToken(AstKindTarget, token) case parser.NextIsSubstitution(): target = NewAstWithToken(AstKindTarget, parser.current) target.AppendChild(parser.ParseSubstitution()) default: parser.Panicf("Malformed getter") return nil } return target } func (parser *Parser) ParseSelector() *Ast { parser.LogDebug("ParseSelector %s\n", parser.current.String()) selector := parser.Require(TokenKindSelector) parameter := parser.ParseArgument() ast := NewAstWithToken(AstKindSelector, selector) ast.AppendChild(parameter) return ast } // Parses a Field with it's optional selectors func (parser *Parser) ParseField() *Ast { // FIELD -> NAME SELECTOR. name := parser.ParseName() if parser.NextIsSelector() { selector := parser.ParseSelector() // prepend name before selector selector.children = append([]*Ast{name}, selector.children...) } // No selector, just return the name return name } func (parser *Parser) ParseSet() *Ast { parser.LogDebug("ParseSet: %s\n", parser.current.String()) set := parser.Require(TokenKindSet) ast := NewAstWithToken(AstKindSet, set) target := parser.ParseTarget() ast.AppendChild(target) return ast } func (parser *Parser) ParseGet() *Ast { parser.LogDebug("ParseGet: %s\n", parser.current.String()) get := parser.Require(TokenKindGet) ast := NewAstWithToken(AstKindGet, get) target := parser.ParseOrigin() ast.AppendChild(target) return ast } func (parser *Parser) ParseCommand() *Ast { parser.LogDebug("ParseCommand: %s\n", parser.current.String()) field := parser.ParseField() arguments := parser.ParseArguments() /// this is wrong... command := NewAstWithToken(AstKindCommand, field.Token()) command.AppendChild(arguments) return command } func (parser *Parser) ParseSubstitution() *Ast { parser.LogDebug("ParseSubstitution: %s\n", parser.current.String()) switch { case parser.NextIs(TokenKindOpenParen): return parser.ParseParenthesis() case parser.NextIs(TokenKindOpenList): return parser.ParseList() case parser.NextIs(TokenKindGet): return parser.ParseGet() case parser.NextIs(TokenKindSet): return parser.ParseSet() default: parser.Panicf("Syntax error in substitution, expected $, =, (, [") return nil } } func (parser *Parser) ParseExpression() *Ast { parser.LogDebug("ParseExpression: %s\n", parser.current.String()) switch { case parser.NextIsWord(): return parser.ParseCommand() case parser.NextIsSubstitution(): return parser.ParseSubstitution() case parser.NextIsLiteral(): return parser.ParseLiteral() default: parser.Panicf("Syntax error in expression, expected word, $, =, value") return nil } } func (parser *Parser) ParseExpressionStatement() *Ast { parser.LogDebug("ParseExpressionStatement: %s\n", parser.current.String()) expr := parser.ParseExpression() if expr.IsNone() { return NewAstNone() } // Expression statements must end on EOX or EOF if eox := parser.Require(TokenKindEOX, TokenKindEOF); eox.IsError() { return parser.NewAstError("expected end of statement") } return expr } func (parser *Parser) ParseEmptyStatement() *Ast { parser.LogDebug("ParseEmptyStatement: %s\n", parser.current.String()) if eox := parser.Require(TokenKindEOX, TokenKindEOF); eox.IsError() { return parser.NewAstError("expected end of statement") } return NewAstWithToken(AstKindStatement, parser.current) } func (parser Parser) NextIs(kinds ...TokenKind) bool { if (parser.current.TokenKind == TokenKindNone) { parser.Advance() } for _, kind := range kinds { if kind == parser.current.TokenKind { return true } } return false } func (parser Parser) NextIsBlock() bool { return parser.NextIs(TokenKindOpenBlock) } func (parser Parser) NextIsSubstitution() bool { return parser.NextIs(TokenKindOpenList, TokenKindOpenParen, TokenKindSet, TokenKindGet) } func (parser Parser) NextIsWord() bool { return parser.NextIs(TokenKindWord) } func (parser Parser) NextIsGet() bool { return parser.NextIs(TokenKindGet) } func (parser Parser) NextIsSet() bool { return parser.NextIs(TokenKindSet) } func (parser Parser) NextIsLiteral() bool { return parser.NextIs(TokenKindString, TokenKindInteger, TokenKindFloat, TokenKindBoolean, TokenKindNil) } func (parser Parser) NextIsName() bool { return parser.NextIs(TokenKindWord, TokenKindType, TokenKindSymbol) } func (parser Parser) NextIsOperator() bool { return parser.NextIs(TokenKindOperator) } func (parser Parser) NextIsSelector() bool { return parser.NextIs(TokenKindSelector) } func (parser Parser) NextIsArgument() bool { return parser.NextIsName() || parser.NextIsLiteral() || parser.NextIsSubstitution() || parser.NextIsBlock() } func (parser Parser) NextIsCommand() bool { return parser.NextIsName() } func (parser Parser) NextIsErrorSkipped() bool { return parser.NextIs(TokenKindCloseBlock, TokenKindCloseList, TokenKindCloseParen, TokenKindEOX, TokenKindEOF) } func (parser Parser) NextIsEOX() bool { return parser.NextIs(TokenKindEOX) } func (parser Parser) NextIsEOF() bool { return parser.NextIs(TokenKindEOF) } func (parser Parser) NextIsExpression() bool { return parser.NextIsCommand() || parser.NextIsSubstitution() || parser.NextIsLiteral() } func (parser Parser) NextIsChain() bool { return parser.NextIsExpression() } func (parser Parser) NextIsStatement() bool { return parser.NextIsChain() || parser.NextIsBlock() || parser.NextIsEOX() } func (parser *Parser) newMethodChain(oper Token, expr1, expr2 *Ast) *Ast { chain := NewAstWithToken(AstKindOperation, oper) chain.AppendChildren(expr1) chain.AppendChild(NewAstWithToken(AstKindForToken(expr2.Token()), expr2.Token())) chain.AppendChildren(expr2.Children()...) parser.LogDebug("New method chain: %v", chain) return chain } func (parser *Parser) newChain(oper Token, expr1, expr2 *Ast) * Ast { var astkind AstKind = AstKindParenthesis subst1 := NewAstWithToken(astkind, oper) subst1.AppendChildren(expr1) subst2 := NewAstWithToken(astkind, oper) subst2.AppendChildren(expr2) chain := NewAstWithToken(AstKindOperation, oper) chain.AppendChildren(subst1, subst2) return chain } func (parser *Parser) composeChain(oper Token, chain, nextExpr *Ast) * Ast { var astkind AstKind = AstKindParenthesis parser.LogDebug("Composed chain: %v", chain) subst := NewAstWithToken(astkind, oper) subst.AppendChildren(nextExpr) newChain := NewAstWithToken(AstKindOperation, oper) newChain.AppendChildren(chain, subst) return newChain } func (parser *Parser) ParseChain() *Ast { expression := parser.ParseExpression() if !parser.NextIsOperator() { return expression } var expressions = []*Ast{ expression } var operators = []Token {} for parser.NextIsOperator() { oper := parser.Require(TokenKindOperator) expression := parser.ParseExpression() if expression == nil { parser.Panicf("Expected expression after operator.") } expressions = append(expressions, expression) operators = append(operators, oper) } // Now there are N expressions and N - 1 operators, iterate // for easy composition var chain *Ast parser.LogDebug("Working on operator chain: %v %v", operators, expressions) for i, j := 0, 0 ; i < len(expressions) && j < len(operators) ; i, j = i + 1 , j + 1 { expression := expressions[i] oper := operators[j] if chain == nil { expression2 := expressions[i+1] chain = parser.newChain(oper, expression, expression2) i++ } else { chain = parser.composeChain(oper, chain, expression) } } return chain } func (parser *Parser) ParseStatement() *Ast { parser.LogDebug("ParseStatement: %s\n", parser.current.String()) switch { case parser.NextIsBlock(): return parser.ParseBlock() case parser.NextIsExpression(): return parser.ParseChain() case parser.NextIsEOX(): return parser.ParseEmptyStatement() default: parser.Panicf("Expected block, command, or EOX in statement") return parser.NewAstError("Expected closed, expression, or EOX in statement") } } func (parser *Parser) ParseStatements() *Ast { parser.LogDebug("ParseStatements: %s\n", parser.current.String()) ast := NewAstWithToken(AstKindStatements, parser.current) var children []*Ast for parser.NextIsStatement() { child := parser.ParseStatement() children = append(children, child) } ast.AppendChildren(children...) return ast } func (parser *Parser) ParseProgram() *Ast { parser.LogDebug("ParseProgram: %s\n", parser.current.String()) stats := parser.ParseStatements() eof := parser.Require(TokenKindEOF) aeof := NewAstWithToken(AstKindEnd, eof) if eof.IsNone() { aeof = parser.NewAstError("Expected EOF, have: %s", parser.current.String()) } children := []*Ast{stats, aeof} return NewAst(AstKindProgram, nil, children, NoToken()) } func (parser *Parser) Parse() (ast *Ast) { defer func() { if err := recover() ; err != nil { if perr, ok := err.(ParserError) ; ok{ ast = parser.NewAstError("Parse error, recovered: %s\n", perr) } else { panic(err) } } } () parser.Advance() ast = parser.ParseProgram() return ast } func NewParser(lexer *Lexer) *Parser { parser := &Parser{Lexer: lexer, current: NoToken(), LoggerWrapper: LoggerWrapper{nil}} return parser } func NewParserFromString(input string) *Parser { lexer := NewLexerFromString(input) return NewParser(lexer) } func NewParserFromFilename(filename string) (*Parser, error) { lexer, err := NewLexerFromFilename(filename) if err != nil { return nil, err } return NewParser(lexer), nil } func (parser * Parser) NewKeyword(name string, tk TokenKind, val Value) *Keyword { return parser.Lexer.NewKeyword(name, tk, val) } func (parser * Parser) AddKeyword(kw * Keyword) *Keyword { return parser.Lexer.AddKeyword(kw) } func (parser * Parser) AddKeywords(keywords []*Keyword) { for _, kw := range keywords { parser.AddKeyword(kw) } } func (parser * Parser) AddDefaultKeywords() { parser.AddKeywords(DefaultKeywords) }