beoran
/
ll1


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566
							package main

import (
	"fmt"
	"strings"
	"unicode"
)

// Ucalgary stntax:
//
// Grammar Structure
//
// The format for a context free grammar is as follows:
//
// Any reference to "punctuation" when describing legal identifiers is restricted
// to certain pieces of punctuation. In no particular order, here are the
// punctuation characters that are legal: ~ ! @ # * ( ) _ + ' ; : / ?
//
// Terminals are strings which either start with a lowercase letter, a number,
// or a piece of punctuation, followed by any amount of letters, numbers and
// punctuation. i.e. a terminal matches the regular expression
// [{punctuation}a-z0-9][{punctuation}a-zA-Z0-9]*. Examples are id + begin end
// plus 9 8' 7+? '' ....
//
// Nonterminals start with uppercase letters, followed by any number of legal
// characters. i.e. a nonterminal matches the regular expression
// [A-Z][{punctuation}a-zA-Z0-9]*. Examples are S A EXPR TERM~3 Stop ....
//
// It is assumed that the start symbol is the first nonterminal whose productions
// are given.
//
// The productions associated with a nonterminal are indicated as
// head -> RHS1          | RHS2          ....          | RHSn.
// where the alternative righthand sides of the productions are separated by
// a slash, "|", and terminated by a period, ".".
//
// The RHS of a production is a sequence of terminals or nonterminals separated
// by spaces. For example: Expr add Expr.
//
// C-style comments are allowed, e.g.
// /* this is a comment     possibly spanning many lines */
//
// Empty grammars
// The grammar epsilon or ε IS EMPTY.
// For any nonterminal epsilon is a valid grammar.
// This represents a grammar with no productions.
//
// Contrary to this, ll1 only allows the _ punctuation in identifiers.
//

// Location of the lexer or of a token
type Location struct {
	//Name is the name of the file or input.
	Name string
	//Current index in the input buffer of the token, or of the lex step.
	Index int
	//Start index in the input buffer at which the token begins.
	Start int
	//Line the lexer is at or on which a token begins.
	Line int
	//Col is the column in the line on which the token begins.
	Col int
	//Index in the input buffer from which the token begins.
}

func (l Location) String() string {
	return fmt.Sprintf("%s:%d:%d", l.Name, l.Line, l.Col)
}

type TokenKind rune

const TokenKindSkip = TokenKind(-2)

type Token struct {
	Location
	TokenKind
	Text string
}

// Advance moves to the next rune in input location, updating the
// location's Index, Line and Col.
// Returns the rune found, or -1 if the end of the buffer has been reached.
func (l *Location) Advance(input []rune) rune {
	if l.Index >= len(input) {
		return -1
	}
	r := input[l.Index]
	if (r == '\n' && l.Index > 0 && input[l.Index] != '\r') ||
		(r == '\r') {
		l.Line++
	}
	if (r == '\n') || (r == '\r') {
		l.Col = 0
	}
	l.Col++
	l.Index++
	return r
}

type Lexer struct {
	Location
	Input []rune
	Rules []LexerFunc
}

type LexerFuncs map[string]LexerFunc

func (l *Lexer) Advance() rune {
	return l.Location.Advance(l.Input)
}

func (l Lexer) IsEof() bool {
	return l.Index >= len(l.Input)
}

func (l Lexer) Peek() rune {
	if l.IsEof() {
		return -1
	}
	return l.Input[l.Location.Index]
}

// LexerFunc is a Lexer function.
// It Lexes the input buffer starting from lex.Index, which must be
// guaranteed by the caller to be non negative.
// The lexerfunc must make progress if it parses a token or returns an error.
// It should NOT make progress if it does not match.
// It should return as follows:
// * If the Lexer function matched what it is intended to lex
//   it should return the lexed token(s), nil, and lex.Start points to the start
//   of the token, and lex.Index should be moved to point right after the lexed
//   part of the string. To indicate that the lexer should skip the parsed token,
//   set it's token kind to TokenKindSkip
// * If the Lexer function did not match what it is intended to lex
//   it should return nil, nil, and lex.Index should be unchanged.
// * If the Lexer function did match what it is intended to lex
//   but there is a Lexer error, it should return empty slice, error slice,
//   and l.Index should be set to the error's location.
type LexerFunc func(lex *Lexer) ([]Token, []error)

var Debug = false

func debug(msg string) {
	if Debug {
		print(msg)
	}
}

func (l Lexer) WhileOk(ok func(r rune, l Lexer) bool) (string, error) {
	l.Start = l.Index
	now := 0
	for !l.IsEof() {
		r := l.Advance()
		if !ok(r, l) {
			if now == 0 {
				return "", nil
			}
			return string(l.Input[l.Start:l.Index]), nil
		}
		now++
	}
	return "", fmt.Errorf("unexpected EOF: >" + string(l.Input[l.Start:l.Index]) + "<")
}

func NewLexerFromString(input, name string, funcs []LexerFunc) *Lexer {
	loc := Location{}
	loc.Name = name
	return &Lexer{loc, []rune(input), funcs}
}

func (l *Lexer) Lex() (result []Token, rerr []error) {
	defer func() {
		val := recover()
		err, ok := val.(error)
		if ok {
			rerr = append(rerr, err)
		}
	}()
	return l.lex()
}

func (l *Lexer) lex() (result []Token, rerr []error) {
	for !l.IsEof() {
		for _, lf := range l.Rules {
			tokens, errs := lf(l)
			if len(errs) > 0 {
				rerr = append(rerr, errs...)
				// skip until next whitespace
				for !l.IsEof() {
					r2 := l.Advance()
					if unicode.IsSpace(r2) {
						break
					}
				}
			}
			for _, token := range tokens {
				if token.TokenKind != TokenKindSkip {
					result = append(result, token)
				}
			}
			// no progress made, indicates fatal lex error
			if l.Index == l.Start {
				err := fmt.Errorf("Lex error: %s", l.Location)
				rerr = append(rerr, err)
				return result, rerr
			}
			// advance start based on progress
			l.Start = l.Index
		}
	}
	return result, rerr
}

func (l *Lexer) Tokenize(kind TokenKind) (toks []Token, errs []error) {
	if l.Index == l.Start {
		err := fmt.Errorf("Lex error: %s, expected %c", l.Location, kind)
		errs = append(errs, err)
		return toks, errs
	}
	str := string(l.Input[l.Start:l.Index])
	return []Token{Token{l.Location, kind, str}}, errs
}

func (l *Lexer) LexContains(chars string, kind TokenKind) (toks []Token, errs []error) {
	r := l.Peek()
	if !strings.ContainsRune(chars, r) {
		return toks, errs
	}

	for !l.IsEof() {
		if strings.ContainsRune(chars, r) {
			l.Advance()
		} else {
			return l.Tokenize(kind)
		}
	}
	return l.Tokenize(kind)
}

/*
func LexerRs(input []rune, index *int) (Token, error) {
	debug("LexerRs")
	SkipWs(input, index)
	return LexerWhileRuneOk(input, index, func(r rune) bool {
		return r == '\n' || r == '\r' || r == ';'
	})
}

func LexerWs(input []rune, index *int) (Token, error) {
	debug("LexerWs")
	return LexerWhileRuneOk(input, index, func(r rune) bool {
		return r == ' ' || r == '\t'
	})
}

func LexerWsRs(input []rune, index *int) (Token, error) {
	debug("LexerRs")
	SkipWs(input, index)
	return LexerWhileRuneOk(input, index, func(r rune) bool {
		return r == '\n' || r == '\r' || r == ';' || r == ' ' || r == '\t'
	})
}

func SkipWs(input []rune, index *int) {
	LexerWs(input, index)
}

func SkipRs(input []rune, index *int) {
	LexerRs(input, index)
}

func SkipWsRs(input []rune, index *int) {
	LexerWsRs(input, index)
}

func LexerComment(input []rune, index *int) (Token, error) {
	debug("LexerComment")
	start := *index
	if !RequireRune(input, index, '#') {
		return nil, nil
	}
	for ; *index < len(input); *index++ {
		r := input[*index]
		if r == '\n' || r == '\r' {
			end := *index
			return Comment(string(input[start:end])), nil
		}
	}
	return nil, ErrorFromString("unexpected EOF in comment")
}

func LexerStatement(input []rune, index *int) (Token, error) {
	debug("LexerStatement")
	SkipWs(input, index)
	return LexerAlternative(input, index, LexerCommand, LexerBlock, LexerComment)
}

func LexerParameters(input []rune, index *int) (Token, error) {
	debug("LexerParameters")
	params := List{}
	for {
		sep, err := LexerWs(input, index)
		if err != nil {
			return nil, err
		}
		if sep == nil {
			return params, nil
		}
		val, err := LexerParameter(input, index)
		if err != nil {
			return nil, err
		}
		if val == nil {
			return params, nil
		}
		params = append(params, val)
	}
}

func LexerParameter(input []rune, index *int) (Token, error) {
	debug("LexerParameter")
	funcs := []LexerFunc{LexerLiteral, LexerEvaluation, LexerBlock, LexerGetter}
	return LexerAlternative(input, index, funcs...)
}

func LexerOrder(input []rune, index *int) (Token, error) {
	debug("LexerOrder")
	return LexerAlternative(input, index, LexerLiteral, LexerEvaluation)
}

func LexerCommand(input []rune, index *int) (Token, error) {
	debug("LexerCommand")
	order, err := LexerOrder(input, index)
	if err != nil || order == nil {
		return order, err
	}
	params, err := LexerParameters(input, index)
	if err != nil {
		return params, err
	}
	if params == nil {
		params = List{}
	}
	return Command{order, params.(List)}, nil
}

// RequireRune requires a single rune to be present,
// and skips it, however that rune is discared.
// Returns true if the rune was found, false if not
func RequireRune(input []rune, index *int, req rune) bool {
	if input[*index] == req {
		*index++
		return true
	}
	return false
}

func LexerEvaluation(input []rune, index *int) (Token, error) {
	debug("LexerEvaluation")
	if !RequireRune(input, index, '[') {
		return nil, nil
	}
	res, err := LexerCommand(input, index)
	if err != nil {
		return nil, err
	}
	if !RequireRune(input, index, ']') {
		print(input[*index])
		return nil, ErrorFromString("Expected end of evaluation ]")
	}
	if res != nil {
		res = Evaluation{Command: res.(Command)}
	}
	return res, nil
}

func LexerBlock(input []rune, index *int) (Token, error) {
	debug("LexerBlock")
	if !RequireRune(input, index, '{') {
		return nil, nil
	}
	res, err := LexerStatements(input, index)
	if err != nil {
		return nil, err
	}
	SkipWsRs(input, index)
	if !RequireRune(input, index, '}') {
		return nil, ErrorFromString("Expected end of block }")
	}
	return Block{Statements: res.(List)}, nil
	return nil, nil
}

func LexerGetter(input []rune, index *int) (Token, error) {
	debug("LexerGetter")
	if RequireRune(input, index, '$') {
		if input[*index] == '$' { // recusively Lexer double getters
			val, err := LexerGetter(input, index)
			if err == nil { // Getter with a getter inside.
				return Getter{val}, err
			} else {
				return nil, err
			}
		} else { // integer, sring or getter name
			key, err := LexerLiteral(input, index)
			if key == nil {
				return nil, ErrorFromString("Expected literal after getter $")
			}
			if err == nil {
				return Getter{key}, nil
			}
			return nil, err
		}
	}
	return nil, nil
}

func LexerLiteral(input []rune, index *int) (Token, error) {
	debug("LexerLiteral")
	return LexerAlternative(input, index, LexerWord, LexerString, LexerInteger,
		LexerRawString)
}

func IsLetter(r rune) bool {
	return (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r > rune(128)) ||
		r == '_' || r == '/'
}

func IsNumber(r rune) bool {
	return (r >= '0' && r <= '9')
}

func LexerWord(input []rune, index *int) (Token, error) {
	debug("LexerWord")
	// a word consists of an ascii letter or non asci characters, or underscore
	// followed by an ascii letter or number, or non ascii characters, or underscore
	start := *index
	r := input[*index]
	if !IsLetter(r) {
		return nil, nil
	}
	for *index++; *index < len(input); *index++ {
		r := input[*index]
		if !(IsLetter(r) || IsNumber(r)) {
			return Word(string(input[start:*index])), nil
		}
	}
	return nil, ErrorFromString("unexpected EOF in string")
}

func next(input []rune, index *int) {
	*index++
	if *index >= len(input) {
		panic(ErrorFromString("Unexpected end of input."))
	}
}

func LexerEscape(input []rune, index *int) (Token, error) {
	res := ""
	if input[*index] != '\\' {
		return nil, nil
	}
	next(input, index)
	switch input[*index] {
	case 'a':
		res += "\a"
	case 'b':
		res += "\b"
	case 'e':
		res += "\033"
	case 'f':
		res += "\f"
	case 'n':
		res += "\n"
	case 'r':
		res += "\r"
	case 't':
		res += "\t"
	case '\\':
		res += "\\"
	case '"':
		res += "\""
	default:
		return nil, ErrorFromString("Unknown escape sequence character")
	}

	return String(res), nil
}

func LexerString(input []rune, index *int) (Token, error) {
	debug("LexerString")
	res := ""
	ch := input[*index]
	if ch != '"' {
		return nil, nil
	}
	*index++
	for *index < len(input) {
		ch = input[*index]
		esc, err := LexerEscape(input, index)
		if err != nil {
			return nil, err
		}
		if esc != nil {
			res += string(esc.(String))
		} else if ch == '"' {
			*index++
			return String(res), nil
		} else {
			res += string(ch)
		}
		*index++
	}
	return nil, ErrorFromString("Unexpected end of input.")
}

func LexerRawString(input []rune, index *int) (Token, error) {
	debug("LexerRawString")
	res := ""
	ch := input[*index]
	if ch != '`' {
		return nil, nil
	}
	*index++
	for *index < len(input) {
		ch = input[*index]
		if ch == '`' {
			*index++
			return String(res), nil
		} else {
			res += string(ch)
		}
		*index++
	}
	return nil, ErrorFromString("Unexpected end of input.")
}

func LexerInteger(input []rune, index *int) (Token, error) {
	debug("LexerInteger")
	ch := input[*index]
	neg := 1
	res := 0
	if ch == '-' {
		neg = -1
	} else if ch == '+' {
		// do nothing, ignore + as an integer prefix
	} else {
		res = int(ch - '0')
		if res < 0 || res > 9 { // Not a digit, no integer
			return nil, nil
		}
	}
	*index++
	for *index < len(input) {
		ch = input[*index]
		ch -= '0'
		if ch < 0 || ch > 9 { // Not a digit, finished
			return Int(neg * res), nil
		}
		res = res * 10
		res = res + int(ch)
		*index++
	}
	return nil, ErrorFromString("unexpected EOF in number")
}
*/