2 years ago · 7787c932de
--- a/go.mod
+++ b/go.mod
@@ -0,0 +1,3 @@
 
				+module src.eruta.nl/ll1
			
 
				+
			
 
				+go 1.16
			
--- a/lexer.go
+++ b/lexer.go
@@ -0,0 +1,566 @@
 
				+package main
			
 
				+
			
 
				+import (
			
 
				+	"fmt"
			
 
				+	"strings"
			
 
				+	"unicode"
			
 
				+)
			
 
				+
			
 
				+// Ucalgary stntax:
			
 
				+//
			
 
				+// Grammar Structure
			
 
				+//
			
 
				+// The format for a context free grammar is as follows:
			
 
				+//
			
 
				+// Any reference to "punctuation" when describing legal identifiers is restricted
			
 
				+// to certain pieces of punctuation. In no particular order, here are the
			
 
				+// punctuation characters that are legal: ~ ! @ # * ( ) _ + ' ; : / ?
			
 
				+//
			
 
				+// Terminals are strings which either start with a lowercase letter, a number,
			
 
				+// or a piece of punctuation, followed by any amount of letters, numbers and
			
 
				+// punctuation. i.e. a terminal matches the regular expression
			
 
				+// [{punctuation}a-z0-9][{punctuation}a-zA-Z0-9]*. Examples are id + begin end
			
 
				+// plus 9 8' 7+? '' ....
			
 
				+//
			
 
				+// Nonterminals start with uppercase letters, followed by any number of legal
			
 
				+// characters. i.e. a nonterminal matches the regular expression
			
 
				+// [A-Z][{punctuation}a-zA-Z0-9]*. Examples are S A EXPR TERM~3 Stop ....
			
 
				+//
			
 
				+// It is assumed that the start symbol is the first nonterminal whose productions
			
 
				+// are given.
			
 
				+//
			
 
				+// The productions associated with a nonterminal are indicated as
			
 
				+// head -> RHS1          | RHS2          ....          | RHSn.
			
 
				+// where the alternative righthand sides of the productions are separated by
			
 
				+// a slash, "|", and terminated by a period, ".".
			
 
				+//
			
 
				+// The RHS of a production is a sequence of terminals or nonterminals separated
			
 
				+// by spaces. For example: Expr add Expr.
			
 
				+//
			
 
				+// C-style comments are allowed, e.g.
			
 
				+// /* this is a comment     possibly spanning many lines */
			
 
				+//
			
 
				+// Empty grammars
			
 
				+// The grammar epsilon or ε IS EMPTY.
			
 
				+// For any nonterminal epsilon is a valid grammar.
			
 
				+// This represents a grammar with no productions.
			
 
				+//
			
 
				+// Contrary to this, ll1 only allows the _ punctuation in identifiers.
			
 
				+//
			
 
				+
			
 
				+// Location of the lexer or of a token
			
 
				+type Location struct {
			
 
				+	//Name is the name of the file or input.
			
 
				+	Name string
			
 
				+	//Current index in the input buffer of the token, or of the lex step.
			
 
				+	Index int
			
 
				+	//Start index in the input buffer at which the token begins.
			
 
				+	Start int
			
 
				+	//Line the lexer is at or on which a token begins.
			
 
				+	Line int
			
 
				+	//Col is the column in the line on which the token begins.
			
 
				+	Col int
			
 
				+	//Index in the input buffer from which the token begins.
			
 
				+}
			
 
				+
			
 
				+func (l Location) String() string {
			
 
				+	return fmt.Sprintf("%s:%d:%d", l.Name, l.Line, l.Col)
			
 
				+}
			
 
				+
			
 
				+type TokenKind rune
			
 
				+
			
 
				+const TokenKindSkip = TokenKind(-2)
			
 
				+
			
 
				+type Token struct {
			
 
				+	Location
			
 
				+	TokenKind
			
 
				+	Text string
			
 
				+}
			
 
				+
			
 
				+// Advance moves to the next rune in input location, updating the
			
 
				+// location's Index, Line and Col.
			
 
				+// Returns the rune found, or -1 if the end of the buffer has been reached.
			
 
				+func (l *Location) Advance(input []rune) rune {
			
 
				+	if l.Index >= len(input) {
			
 
				+		return -1
			
 
				+	}
			
 
				+	r := input[l.Index]
			
 
				+	if (r == '\n' && l.Index > 0 && input[l.Index] != '\r') ||
			
 
				+		(r == '\r') {
			
 
				+		l.Line++
			
 
				+	}
			
 
				+	if (r == '\n') || (r == '\r') {
			
 
				+		l.Col = 0
			
 
				+	}
			
 
				+	l.Col++
			
 
				+	l.Index++
			
 
				+	return r
			
 
				+}
			
 
				+
			
 
				+type Lexer struct {
			
 
				+	Location
			
 
				+	Input []rune
			
 
				+	Rules []LexerFunc
			
 
				+}
			
 
				+
			
 
				+type LexerFuncs map[string]LexerFunc
			
 
				+
			
 
				+func (l *Lexer) Advance() rune {
			
 
				+	return l.Location.Advance(l.Input)
			
 
				+}
			
 
				+
			
 
				+func (l Lexer) IsEof() bool {
			
 
				+	return l.Index >= len(l.Input)
			
 
				+}
			
 
				+
			
 
				+func (l Lexer) Peek() rune {
			
 
				+	if l.IsEof() {
			
 
				+		return -1
			
 
				+	}
			
 
				+	return l.Input[l.Location.Index]
			
 
				+}
			
 
				+
			
 
				+// LexerFunc is a Lexer function.
			
 
				+// It Lexes the input buffer starting from lex.Index, which must be
			
 
				+// guaranteed by the caller to be non negative.
			
 
				+// The lexerfunc must make progress if it parses a token or returns an error.
			
 
				+// It should NOT make progress if it does not match.
			
 
				+// It should return as follows:
			
 
				+// * If the Lexer function matched what it is intended to lex
			
 
				+//   it should return the lexed token(s), nil, and lex.Start points to the start
			
 
				+//   of the token, and lex.Index should be moved to point right after the lexed
			
 
				+//   part of the string. To indicate that the lexer should skip the parsed token,
			
 
				+//   set it's token kind to TokenKindSkip
			
 
				+// * If the Lexer function did not match what it is intended to lex
			
 
				+//   it should return nil, nil, and lex.Index should be unchanged.
			
 
				+// * If the Lexer function did match what it is intended to lex
			
 
				+//   but there is a Lexer error, it should return empty slice, error slice,
			
 
				+//   and l.Index should be set to the error's location.
			
 
				+type LexerFunc func(lex *Lexer) ([]Token, []error)
			
 
				+
			
 
				+var Debug = false
			
 
				+
			
 
				+func debug(msg string) {
			
 
				+	if Debug {
			
 
				+		print(msg)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (l Lexer) WhileOk(ok func(r rune, l Lexer) bool) (string, error) {
			
 
				+	l.Start = l.Index
			
 
				+	now := 0
			
 
				+	for !l.IsEof() {
			
 
				+		r := l.Advance()
			
 
				+		if !ok(r, l) {
			
 
				+			if now == 0 {
			
 
				+				return "", nil
			
 
				+			}
			
 
				+			return string(l.Input[l.Start:l.Index]), nil
			
 
				+		}
			
 
				+		now++
			
 
				+	}
			
 
				+	return "", fmt.Errorf("unexpected EOF: >" + string(l.Input[l.Start:l.Index]) + "<")
			
 
				+}
			
 
				+
			
 
				+func NewLexerFromString(input, name string, funcs []LexerFunc) *Lexer {
			
 
				+	loc := Location{}
			
 
				+	loc.Name = name
			
 
				+	return &Lexer{loc, []rune(input), funcs}
			
 
				+}
			
 
				+
			
 
				+func (l *Lexer) Lex() (result []Token, rerr []error) {
			
 
				+	defer func() {
			
 
				+		val := recover()
			
 
				+		err, ok := val.(error)
			
 
				+		if ok {
			
 
				+			rerr = append(rerr, err)
			
 
				+		}
			
 
				+	}()
			
 
				+	return l.lex()
			
 
				+}
			
 
				+
			
 
				+func (l *Lexer) lex() (result []Token, rerr []error) {
			
 
				+	for !l.IsEof() {
			
 
				+		for _, lf := range l.Rules {
			
 
				+			tokens, errs := lf(l)
			
 
				+			if len(errs) > 0 {
			
 
				+				rerr = append(rerr, errs...)
			
 
				+				// skip until next whitespace
			
 
				+				for !l.IsEof() {
			
 
				+					r2 := l.Advance()
			
 
				+					if unicode.IsSpace(r2) {
			
 
				+						break
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+			for _, token := range tokens {
			
 
				+				if token.TokenKind != TokenKindSkip {
			
 
				+					result = append(result, token)
			
 
				+				}
			
 
				+			}
			
 
				+			// no progress made, indicates fatal lex error
			
 
				+			if l.Index == l.Start {
			
 
				+				err := fmt.Errorf("Lex error: %s", l.Location)
			
 
				+				rerr = append(rerr, err)
			
 
				+				return result, rerr
			
 
				+			}
			
 
				+			// advance start based on progress
			
 
				+			l.Start = l.Index
			
 
				+		}
			
 
				+	}
			
 
				+	return result, rerr
			
 
				+}
			
 
				+
			
 
				+func (l *Lexer) Tokenize(kind TokenKind) (toks []Token, errs []error) {
			
 
				+	if l.Index == l.Start {
			
 
				+		err := fmt.Errorf("Lex error: %s, expected %c", l.Location, kind)
			
 
				+		errs = append(errs, err)
			
 
				+		return toks, errs
			
 
				+	}
			
 
				+	str := string(l.Input[l.Start:l.Index])
			
 
				+	return []Token{Token{l.Location, kind, str}}, errs
			
 
				+}
			
 
				+
			
 
				+func (l *Lexer) LexContains(chars string, kind TokenKind) (toks []Token, errs []error) {
			
 
				+	r := l.Peek()
			
 
				+	if !strings.ContainsRune(chars, r) {
			
 
				+		return toks, errs
			
 
				+	}
			
 
				+
			
 
				+	for !l.IsEof() {
			
 
				+		if strings.ContainsRune(chars, r) {
			
 
				+			l.Advance()
			
 
				+		} else {
			
 
				+			return l.Tokenize(kind)
			
 
				+		}
			
 
				+	}
			
 
				+	return l.Tokenize(kind)
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+func LexerRs(input []rune, index *int) (Token, error) {
			
 
				+	debug("LexerRs")
			
 
				+	SkipWs(input, index)
			
 
				+	return LexerWhileRuneOk(input, index, func(r rune) bool {
			
 
				+		return r == '\n' || r == '\r' || r == ';'
			
 
				+	})
			
 
				+}
			
 
				+
			
 
				+func LexerWs(input []rune, index *int) (Token, error) {
			
 
				+	debug("LexerWs")
			
 
				+	return LexerWhileRuneOk(input, index, func(r rune) bool {
			
 
				+		return r == ' ' || r == '\t'
			
 
				+	})
			
 
				+}
			
 
				+
			
 
				+func LexerWsRs(input []rune, index *int) (Token, error) {
			
 
				+	debug("LexerRs")
			
 
				+	SkipWs(input, index)
			
 
				+	return LexerWhileRuneOk(input, index, func(r rune) bool {
			
 
				+		return r == '\n' || r == '\r' || r == ';' || r == ' ' || r == '\t'
			
 
				+	})
			
 
				+}
			
 
				+
			
 
				+func SkipWs(input []rune, index *int) {
			
 
				+	LexerWs(input, index)
			
 
				+}
			
 
				+
			
 
				+func SkipRs(input []rune, index *int) {
			
 
				+	LexerRs(input, index)
			
 
				+}
			
 
				+
			
 
				+func SkipWsRs(input []rune, index *int) {
			
 
				+	LexerWsRs(input, index)
			
 
				+}
			
 
				+
			
 
				+func LexerComment(input []rune, index *int) (Token, error) {
			
 
				+	debug("LexerComment")
			
 
				+	start := *index
			
 
				+	if !RequireRune(input, index, '#') {
			
 
				+		return nil, nil
			
 
				+	}
			
 
				+	for ; *index < len(input); *index++ {
			
 
				+		r := input[*index]
			
 
				+		if r == '\n' || r == '\r' {
			
 
				+			end := *index
			
 
				+			return Comment(string(input[start:end])), nil
			
 
				+		}
			
 
				+	}
			
 
				+	return nil, ErrorFromString("unexpected EOF in comment")
			
 
				+}
			
 
				+
			
 
				+func LexerStatement(input []rune, index *int) (Token, error) {
			
 
				+	debug("LexerStatement")
			
 
				+	SkipWs(input, index)
			
 
				+	return LexerAlternative(input, index, LexerCommand, LexerBlock, LexerComment)
			
 
				+}
			
 
				+
			
 
				+func LexerParameters(input []rune, index *int) (Token, error) {
			
 
				+	debug("LexerParameters")
			
 
				+	params := List{}
			
 
				+	for {
			
 
				+		sep, err := LexerWs(input, index)
			
 
				+		if err != nil {
			
 
				+			return nil, err
			
 
				+		}
			
 
				+		if sep == nil {
			
 
				+			return params, nil
			
 
				+		}
			
 
				+		val, err := LexerParameter(input, index)
			
 
				+		if err != nil {
			
 
				+			return nil, err
			
 
				+		}
			
 
				+		if val == nil {
			
 
				+			return params, nil
			
 
				+		}
			
 
				+		params = append(params, val)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func LexerParameter(input []rune, index *int) (Token, error) {
			
 
				+	debug("LexerParameter")
			
 
				+	funcs := []LexerFunc{LexerLiteral, LexerEvaluation, LexerBlock, LexerGetter}
			
 
				+	return LexerAlternative(input, index, funcs...)
			
 
				+}
			
 
				+
			
 
				+func LexerOrder(input []rune, index *int) (Token, error) {
			
 
				+	debug("LexerOrder")
			
 
				+	return LexerAlternative(input, index, LexerLiteral, LexerEvaluation)
			
 
				+}
			
 
				+
			
 
				+func LexerCommand(input []rune, index *int) (Token, error) {
			
 
				+	debug("LexerCommand")
			
 
				+	order, err := LexerOrder(input, index)
			
 
				+	if err != nil || order == nil {
			
 
				+		return order, err
			
 
				+	}
			
 
				+	params, err := LexerParameters(input, index)
			
 
				+	if err != nil {
			
 
				+		return params, err
			
 
				+	}
			
 
				+	if params == nil {
			
 
				+		params = List{}
			
 
				+	}
			
 
				+	return Command{order, params.(List)}, nil
			
 
				+}
			
 
				+
			
 
				+// RequireRune requires a single rune to be present,
			
 
				+// and skips it, however that rune is discared.
			
 
				+// Returns true if the rune was found, false if not
			
 
				+func RequireRune(input []rune, index *int, req rune) bool {
			
 
				+	if input[*index] == req {
			
 
				+		*index++
			
 
				+		return true
			
 
				+	}
			
 
				+	return false
			
 
				+}
			
 
				+
			
 
				+func LexerEvaluation(input []rune, index *int) (Token, error) {
			
 
				+	debug("LexerEvaluation")
			
 
				+	if !RequireRune(input, index, '[') {
			
 
				+		return nil, nil
			
 
				+	}
			
 
				+	res, err := LexerCommand(input, index)
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	if !RequireRune(input, index, ']') {
			
 
				+		print(input[*index])
			
 
				+		return nil, ErrorFromString("Expected end of evaluation ]")
			
 
				+	}
			
 
				+	if res != nil {
			
 
				+		res = Evaluation{Command: res.(Command)}
			
 
				+	}
			
 
				+	return res, nil
			
 
				+}
			
 
				+
			
 
				+func LexerBlock(input []rune, index *int) (Token, error) {
			
 
				+	debug("LexerBlock")
			
 
				+	if !RequireRune(input, index, '{') {
			
 
				+		return nil, nil
			
 
				+	}
			
 
				+	res, err := LexerStatements(input, index)
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	SkipWsRs(input, index)
			
 
				+	if !RequireRune(input, index, '}') {
			
 
				+		return nil, ErrorFromString("Expected end of block }")
			
 
				+	}
			
 
				+	return Block{Statements: res.(List)}, nil
			
 
				+	return nil, nil
			
 
				+}
			
 
				+
			
 
				+func LexerGetter(input []rune, index *int) (Token, error) {
			
 
				+	debug("LexerGetter")
			
 
				+	if RequireRune(input, index, '$') {
			
 
				+		if input[*index] == '$' { // recusively Lexer double getters
			
 
				+			val, err := LexerGetter(input, index)
			
 
				+			if err == nil { // Getter with a getter inside.
			
 
				+				return Getter{val}, err
			
 
				+			} else {
			
 
				+				return nil, err
			
 
				+			}
			
 
				+		} else { // integer, sring or getter name
			
 
				+			key, err := LexerLiteral(input, index)
			
 
				+			if key == nil {
			
 
				+				return nil, ErrorFromString("Expected literal after getter $")
			
 
				+			}
			
 
				+			if err == nil {
			
 
				+				return Getter{key}, nil
			
 
				+			}
			
 
				+			return nil, err
			
 
				+		}
			
 
				+	}
			
 
				+	return nil, nil
			
 
				+}
			
 
				+
			
 
				+func LexerLiteral(input []rune, index *int) (Token, error) {
			
 
				+	debug("LexerLiteral")
			
 
				+	return LexerAlternative(input, index, LexerWord, LexerString, LexerInteger,
			
 
				+		LexerRawString)
			
 
				+}
			
 
				+
			
 
				+func IsLetter(r rune) bool {
			
 
				+	return (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r > rune(128)) ||
			
 
				+		r == '_' || r == '/'
			
 
				+}
			
 
				+
			
 
				+func IsNumber(r rune) bool {
			
 
				+	return (r >= '0' && r <= '9')
			
 
				+}
			
 
				+
			
 
				+func LexerWord(input []rune, index *int) (Token, error) {
			
 
				+	debug("LexerWord")
			
 
				+	// a word consists of an ascii letter or non asci characters, or underscore
			
 
				+	// followed by an ascii letter or number, or non ascii characters, or underscore
			
 
				+	start := *index
			
 
				+	r := input[*index]
			
 
				+	if !IsLetter(r) {
			
 
				+		return nil, nil
			
 
				+	}
			
 
				+	for *index++; *index < len(input); *index++ {
			
 
				+		r := input[*index]
			
 
				+		if !(IsLetter(r) || IsNumber(r)) {
			
 
				+			return Word(string(input[start:*index])), nil
			
 
				+		}
			
 
				+	}
			
 
				+	return nil, ErrorFromString("unexpected EOF in string")
			
 
				+}
			
 
				+
			
 
				+func next(input []rune, index *int) {
			
 
				+	*index++
			
 
				+	if *index >= len(input) {
			
 
				+		panic(ErrorFromString("Unexpected end of input."))
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func LexerEscape(input []rune, index *int) (Token, error) {
			
 
				+	res := ""
			
 
				+	if input[*index] != '\\' {
			
 
				+		return nil, nil
			
 
				+	}
			
 
				+	next(input, index)
			
 
				+	switch input[*index] {
			
 
				+	case 'a':
			
 
				+		res += "\a"
			
 
				+	case 'b':
			
 
				+		res += "\b"
			
 
				+	case 'e':
			
 
				+		res += "\033"
			
 
				+	case 'f':
			
 
				+		res += "\f"
			
 
				+	case 'n':
			
 
				+		res += "\n"
			
 
				+	case 'r':
			
 
				+		res += "\r"
			
 
				+	case 't':
			
 
				+		res += "\t"
			
 
				+	case '\\':
			
 
				+		res += "\\"
			
 
				+	case '"':
			
 
				+		res += "\""
			
 
				+	default:
			
 
				+		return nil, ErrorFromString("Unknown escape sequence character")
			
 
				+	}
			
 
				+
			
 
				+	return String(res), nil
			
 
				+}
			
 
				+
			
 
				+func LexerString(input []rune, index *int) (Token, error) {
			
 
				+	debug("LexerString")
			
 
				+	res := ""
			
 
				+	ch := input[*index]
			
 
				+	if ch != '"' {
			
 
				+		return nil, nil
			
 
				+	}
			
 
				+	*index++
			
 
				+	for *index < len(input) {
			
 
				+		ch = input[*index]
			
 
				+		esc, err := LexerEscape(input, index)
			
 
				+		if err != nil {
			
 
				+			return nil, err
			
 
				+		}
			
 
				+		if esc != nil {
			
 
				+			res += string(esc.(String))
			
 
				+		} else if ch == '"' {
			
 
				+			*index++
			
 
				+			return String(res), nil
			
 
				+		} else {
			
 
				+			res += string(ch)
			
 
				+		}
			
 
				+		*index++
			
 
				+	}
			
 
				+	return nil, ErrorFromString("Unexpected end of input.")
			
 
				+}
			
 
				+
			
 
				+func LexerRawString(input []rune, index *int) (Token, error) {
			
 
				+	debug("LexerRawString")
			
 
				+	res := ""
			
 
				+	ch := input[*index]
			
 
				+	if ch != '`' {
			
 
				+		return nil, nil
			
 
				+	}
			
 
				+	*index++
			
 
				+	for *index < len(input) {
			
 
				+		ch = input[*index]
			
 
				+		if ch == '`' {
			
 
				+			*index++
			
 
				+			return String(res), nil
			
 
				+		} else {
			
 
				+			res += string(ch)
			
 
				+		}
			
 
				+		*index++
			
 
				+	}
			
 
				+	return nil, ErrorFromString("Unexpected end of input.")
			
 
				+}
			
 
				+
			
 
				+func LexerInteger(input []rune, index *int) (Token, error) {
			
 
				+	debug("LexerInteger")
			
 
				+	ch := input[*index]
			
 
				+	neg := 1
			
 
				+	res := 0
			
 
				+	if ch == '-' {
			
 
				+		neg = -1
			
 
				+	} else if ch == '+' {
			
 
				+		// do nothing, ignore + as an integer prefix
			
 
				+	} else {
			
 
				+		res = int(ch - '0')
			
 
				+		if res < 0 || res > 9 { // Not a digit, no integer
			
 
				+			return nil, nil
			
 
				+		}
			
 
				+	}
			
 
				+	*index++
			
 
				+	for *index < len(input) {
			
 
				+		ch = input[*index]
			
 
				+		ch -= '0'
			
 
				+		if ch < 0 || ch > 9 { // Not a digit, finished
			
 
				+			return Int(neg * res), nil
			
 
				+		}
			
 
				+		res = res * 10
			
 
				+		res = res + int(ch)
			
 
				+		*index++
			
 
				+	}
			
 
				+	return nil, ErrorFromString("unexpected EOF in number")
			
 
				+}
			
 
				+*/
			
--- a/old/generate.go
+++ b/old/generate.go
--- a/old/grammar.go
+++ b/old/grammar.go
--- a/old/ll1.debug.tpl
+++ b/old/ll1.debug.tpl
--- a/old/ll1.dot.tpl
+++ b/old/ll1.dot.tpl
--- a/old/ll1.go
+++ b/old/ll1.go
--- a/old/ll1.parser.go.lined.tpl
+++ b/old/ll1.parser.go.lined.tpl
--- a/old/ll1.parser.go.tpl
+++ b/old/ll1.parser.go.tpl
--- a/old/ll1_parser.go
+++ b/old/ll1_parser.go
--- a/old/main.go
+++ b/old/main.go
--- a/old/parser.go
+++ b/old/parser.go
--- a/old/template_functions.go
+++ b/old/template_functions.go