|
@@ -0,0 +1,566 @@
|
|
|
+package main
|
|
|
+
|
|
|
+import (
|
|
|
+ "fmt"
|
|
|
+ "strings"
|
|
|
+ "unicode"
|
|
|
+)
|
|
|
+
|
|
|
+// Ucalgary stntax:
|
|
|
+//
|
|
|
+// Grammar Structure
|
|
|
+//
|
|
|
+// The format for a context free grammar is as follows:
|
|
|
+//
|
|
|
+// Any reference to "punctuation" when describing legal identifiers is restricted
|
|
|
+// to certain pieces of punctuation. In no particular order, here are the
|
|
|
+// punctuation characters that are legal: ~ ! @ # * ( ) _ + ' ; : / ?
|
|
|
+//
|
|
|
+// Terminals are strings which either start with a lowercase letter, a number,
|
|
|
+// or a piece of punctuation, followed by any amount of letters, numbers and
|
|
|
+// punctuation. i.e. a terminal matches the regular expression
|
|
|
+// [{punctuation}a-z0-9][{punctuation}a-zA-Z0-9]*. Examples are id + begin end
|
|
|
+// plus 9 8' 7+? '' ....
|
|
|
+//
|
|
|
+// Nonterminals start with uppercase letters, followed by any number of legal
|
|
|
+// characters. i.e. a nonterminal matches the regular expression
|
|
|
+// [A-Z][{punctuation}a-zA-Z0-9]*. Examples are S A EXPR TERM~3 Stop ....
|
|
|
+//
|
|
|
+// It is assumed that the start symbol is the first nonterminal whose productions
|
|
|
+// are given.
|
|
|
+//
|
|
|
+// The productions associated with a nonterminal are indicated as
|
|
|
+// head -> RHS1 | RHS2 .... | RHSn.
|
|
|
+// where the alternative righthand sides of the productions are separated by
|
|
|
+// a slash, "|", and terminated by a period, ".".
|
|
|
+//
|
|
|
+// The RHS of a production is a sequence of terminals or nonterminals separated
|
|
|
+// by spaces. For example: Expr add Expr.
|
|
|
+//
|
|
|
+// C-style comments are allowed, e.g.
|
|
|
+// /* this is a comment possibly spanning many lines */
|
|
|
+//
|
|
|
+// Empty grammars
|
|
|
+// The grammar epsilon or ε IS EMPTY.
|
|
|
+// For any nonterminal epsilon is a valid grammar.
|
|
|
+// This represents a grammar with no productions.
|
|
|
+//
|
|
|
+// Contrary to this, ll1 only allows the _ punctuation in identifiers.
|
|
|
+//
|
|
|
+
|
|
|
+// Location of the lexer or of a token
|
|
|
+type Location struct {
|
|
|
+ //Name is the name of the file or input.
|
|
|
+ Name string
|
|
|
+ //Current index in the input buffer of the token, or of the lex step.
|
|
|
+ Index int
|
|
|
+ //Start index in the input buffer at which the token begins.
|
|
|
+ Start int
|
|
|
+ //Line the lexer is at or on which a token begins.
|
|
|
+ Line int
|
|
|
+ //Col is the column in the line on which the token begins.
|
|
|
+ Col int
|
|
|
+ //Index in the input buffer from which the token begins.
|
|
|
+}
|
|
|
+
|
|
|
+func (l Location) String() string {
|
|
|
+ return fmt.Sprintf("%s:%d:%d", l.Name, l.Line, l.Col)
|
|
|
+}
|
|
|
+
|
|
|
+type TokenKind rune
|
|
|
+
|
|
|
+const TokenKindSkip = TokenKind(-2)
|
|
|
+
|
|
|
+type Token struct {
|
|
|
+ Location
|
|
|
+ TokenKind
|
|
|
+ Text string
|
|
|
+}
|
|
|
+
|
|
|
+// Advance moves to the next rune in input location, updating the
|
|
|
+// location's Index, Line and Col.
|
|
|
+// Returns the rune found, or -1 if the end of the buffer has been reached.
|
|
|
+func (l *Location) Advance(input []rune) rune {
|
|
|
+ if l.Index >= len(input) {
|
|
|
+ return -1
|
|
|
+ }
|
|
|
+ r := input[l.Index]
|
|
|
+ if (r == '\n' && l.Index > 0 && input[l.Index] != '\r') ||
|
|
|
+ (r == '\r') {
|
|
|
+ l.Line++
|
|
|
+ }
|
|
|
+ if (r == '\n') || (r == '\r') {
|
|
|
+ l.Col = 0
|
|
|
+ }
|
|
|
+ l.Col++
|
|
|
+ l.Index++
|
|
|
+ return r
|
|
|
+}
|
|
|
+
|
|
|
+type Lexer struct {
|
|
|
+ Location
|
|
|
+ Input []rune
|
|
|
+ Rules []LexerFunc
|
|
|
+}
|
|
|
+
|
|
|
+type LexerFuncs map[string]LexerFunc
|
|
|
+
|
|
|
+func (l *Lexer) Advance() rune {
|
|
|
+ return l.Location.Advance(l.Input)
|
|
|
+}
|
|
|
+
|
|
|
+func (l Lexer) IsEof() bool {
|
|
|
+ return l.Index >= len(l.Input)
|
|
|
+}
|
|
|
+
|
|
|
+func (l Lexer) Peek() rune {
|
|
|
+ if l.IsEof() {
|
|
|
+ return -1
|
|
|
+ }
|
|
|
+ return l.Input[l.Location.Index]
|
|
|
+}
|
|
|
+
|
|
|
+// LexerFunc is a Lexer function.
|
|
|
+// It Lexes the input buffer starting from lex.Index, which must be
|
|
|
+// guaranteed by the caller to be non negative.
|
|
|
+// The lexerfunc must make progress if it parses a token or returns an error.
|
|
|
+// It should NOT make progress if it does not match.
|
|
|
+// It should return as follows:
|
|
|
+// * If the Lexer function matched what it is intended to lex
|
|
|
+// it should return the lexed token(s), nil, and lex.Start points to the start
|
|
|
+// of the token, and lex.Index should be moved to point right after the lexed
|
|
|
+// part of the string. To indicate that the lexer should skip the parsed token,
|
|
|
+// set it's token kind to TokenKindSkip
|
|
|
+// * If the Lexer function did not match what it is intended to lex
|
|
|
+// it should return nil, nil, and lex.Index should be unchanged.
|
|
|
+// * If the Lexer function did match what it is intended to lex
|
|
|
+// but there is a Lexer error, it should return empty slice, error slice,
|
|
|
+// and l.Index should be set to the error's location.
|
|
|
+type LexerFunc func(lex *Lexer) ([]Token, []error)
|
|
|
+
|
|
|
+var Debug = false
|
|
|
+
|
|
|
+func debug(msg string) {
|
|
|
+ if Debug {
|
|
|
+ print(msg)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func (l Lexer) WhileOk(ok func(r rune, l Lexer) bool) (string, error) {
|
|
|
+ l.Start = l.Index
|
|
|
+ now := 0
|
|
|
+ for !l.IsEof() {
|
|
|
+ r := l.Advance()
|
|
|
+ if !ok(r, l) {
|
|
|
+ if now == 0 {
|
|
|
+ return "", nil
|
|
|
+ }
|
|
|
+ return string(l.Input[l.Start:l.Index]), nil
|
|
|
+ }
|
|
|
+ now++
|
|
|
+ }
|
|
|
+ return "", fmt.Errorf("unexpected EOF: >" + string(l.Input[l.Start:l.Index]) + "<")
|
|
|
+}
|
|
|
+
|
|
|
+func NewLexerFromString(input, name string, funcs []LexerFunc) *Lexer {
|
|
|
+ loc := Location{}
|
|
|
+ loc.Name = name
|
|
|
+ return &Lexer{loc, []rune(input), funcs}
|
|
|
+}
|
|
|
+
|
|
|
+func (l *Lexer) Lex() (result []Token, rerr []error) {
|
|
|
+ defer func() {
|
|
|
+ val := recover()
|
|
|
+ err, ok := val.(error)
|
|
|
+ if ok {
|
|
|
+ rerr = append(rerr, err)
|
|
|
+ }
|
|
|
+ }()
|
|
|
+ return l.lex()
|
|
|
+}
|
|
|
+
|
|
|
+func (l *Lexer) lex() (result []Token, rerr []error) {
|
|
|
+ for !l.IsEof() {
|
|
|
+ for _, lf := range l.Rules {
|
|
|
+ tokens, errs := lf(l)
|
|
|
+ if len(errs) > 0 {
|
|
|
+ rerr = append(rerr, errs...)
|
|
|
+ // skip until next whitespace
|
|
|
+ for !l.IsEof() {
|
|
|
+ r2 := l.Advance()
|
|
|
+ if unicode.IsSpace(r2) {
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ for _, token := range tokens {
|
|
|
+ if token.TokenKind != TokenKindSkip {
|
|
|
+ result = append(result, token)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // no progress made, indicates fatal lex error
|
|
|
+ if l.Index == l.Start {
|
|
|
+ err := fmt.Errorf("Lex error: %s", l.Location)
|
|
|
+ rerr = append(rerr, err)
|
|
|
+ return result, rerr
|
|
|
+ }
|
|
|
+ // advance start based on progress
|
|
|
+ l.Start = l.Index
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return result, rerr
|
|
|
+}
|
|
|
+
|
|
|
+func (l *Lexer) Tokenize(kind TokenKind) (toks []Token, errs []error) {
|
|
|
+ if l.Index == l.Start {
|
|
|
+ err := fmt.Errorf("Lex error: %s, expected %c", l.Location, kind)
|
|
|
+ errs = append(errs, err)
|
|
|
+ return toks, errs
|
|
|
+ }
|
|
|
+ str := string(l.Input[l.Start:l.Index])
|
|
|
+ return []Token{Token{l.Location, kind, str}}, errs
|
|
|
+}
|
|
|
+
|
|
|
+func (l *Lexer) LexContains(chars string, kind TokenKind) (toks []Token, errs []error) {
|
|
|
+ r := l.Peek()
|
|
|
+ if !strings.ContainsRune(chars, r) {
|
|
|
+ return toks, errs
|
|
|
+ }
|
|
|
+
|
|
|
+ for !l.IsEof() {
|
|
|
+ if strings.ContainsRune(chars, r) {
|
|
|
+ l.Advance()
|
|
|
+ } else {
|
|
|
+ return l.Tokenize(kind)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return l.Tokenize(kind)
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+func LexerRs(input []rune, index *int) (Token, error) {
|
|
|
+ debug("LexerRs")
|
|
|
+ SkipWs(input, index)
|
|
|
+ return LexerWhileRuneOk(input, index, func(r rune) bool {
|
|
|
+ return r == '\n' || r == '\r' || r == ';'
|
|
|
+ })
|
|
|
+}
|
|
|
+
|
|
|
+func LexerWs(input []rune, index *int) (Token, error) {
|
|
|
+ debug("LexerWs")
|
|
|
+ return LexerWhileRuneOk(input, index, func(r rune) bool {
|
|
|
+ return r == ' ' || r == '\t'
|
|
|
+ })
|
|
|
+}
|
|
|
+
|
|
|
+func LexerWsRs(input []rune, index *int) (Token, error) {
|
|
|
+ debug("LexerRs")
|
|
|
+ SkipWs(input, index)
|
|
|
+ return LexerWhileRuneOk(input, index, func(r rune) bool {
|
|
|
+ return r == '\n' || r == '\r' || r == ';' || r == ' ' || r == '\t'
|
|
|
+ })
|
|
|
+}
|
|
|
+
|
|
|
+func SkipWs(input []rune, index *int) {
|
|
|
+ LexerWs(input, index)
|
|
|
+}
|
|
|
+
|
|
|
+func SkipRs(input []rune, index *int) {
|
|
|
+ LexerRs(input, index)
|
|
|
+}
|
|
|
+
|
|
|
+func SkipWsRs(input []rune, index *int) {
|
|
|
+ LexerWsRs(input, index)
|
|
|
+}
|
|
|
+
|
|
|
+func LexerComment(input []rune, index *int) (Token, error) {
|
|
|
+ debug("LexerComment")
|
|
|
+ start := *index
|
|
|
+ if !RequireRune(input, index, '#') {
|
|
|
+ return nil, nil
|
|
|
+ }
|
|
|
+ for ; *index < len(input); *index++ {
|
|
|
+ r := input[*index]
|
|
|
+ if r == '\n' || r == '\r' {
|
|
|
+ end := *index
|
|
|
+ return Comment(string(input[start:end])), nil
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return nil, ErrorFromString("unexpected EOF in comment")
|
|
|
+}
|
|
|
+
|
|
|
+func LexerStatement(input []rune, index *int) (Token, error) {
|
|
|
+ debug("LexerStatement")
|
|
|
+ SkipWs(input, index)
|
|
|
+ return LexerAlternative(input, index, LexerCommand, LexerBlock, LexerComment)
|
|
|
+}
|
|
|
+
|
|
|
+func LexerParameters(input []rune, index *int) (Token, error) {
|
|
|
+ debug("LexerParameters")
|
|
|
+ params := List{}
|
|
|
+ for {
|
|
|
+ sep, err := LexerWs(input, index)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ if sep == nil {
|
|
|
+ return params, nil
|
|
|
+ }
|
|
|
+ val, err := LexerParameter(input, index)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ if val == nil {
|
|
|
+ return params, nil
|
|
|
+ }
|
|
|
+ params = append(params, val)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func LexerParameter(input []rune, index *int) (Token, error) {
|
|
|
+ debug("LexerParameter")
|
|
|
+ funcs := []LexerFunc{LexerLiteral, LexerEvaluation, LexerBlock, LexerGetter}
|
|
|
+ return LexerAlternative(input, index, funcs...)
|
|
|
+}
|
|
|
+
|
|
|
+func LexerOrder(input []rune, index *int) (Token, error) {
|
|
|
+ debug("LexerOrder")
|
|
|
+ return LexerAlternative(input, index, LexerLiteral, LexerEvaluation)
|
|
|
+}
|
|
|
+
|
|
|
+func LexerCommand(input []rune, index *int) (Token, error) {
|
|
|
+ debug("LexerCommand")
|
|
|
+ order, err := LexerOrder(input, index)
|
|
|
+ if err != nil || order == nil {
|
|
|
+ return order, err
|
|
|
+ }
|
|
|
+ params, err := LexerParameters(input, index)
|
|
|
+ if err != nil {
|
|
|
+ return params, err
|
|
|
+ }
|
|
|
+ if params == nil {
|
|
|
+ params = List{}
|
|
|
+ }
|
|
|
+ return Command{order, params.(List)}, nil
|
|
|
+}
|
|
|
+
|
|
|
+// RequireRune requires a single rune to be present,
|
|
|
+// and skips it, however that rune is discared.
|
|
|
+// Returns true if the rune was found, false if not
|
|
|
+func RequireRune(input []rune, index *int, req rune) bool {
|
|
|
+ if input[*index] == req {
|
|
|
+ *index++
|
|
|
+ return true
|
|
|
+ }
|
|
|
+ return false
|
|
|
+}
|
|
|
+
|
|
|
+func LexerEvaluation(input []rune, index *int) (Token, error) {
|
|
|
+ debug("LexerEvaluation")
|
|
|
+ if !RequireRune(input, index, '[') {
|
|
|
+ return nil, nil
|
|
|
+ }
|
|
|
+ res, err := LexerCommand(input, index)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ if !RequireRune(input, index, ']') {
|
|
|
+ print(input[*index])
|
|
|
+ return nil, ErrorFromString("Expected end of evaluation ]")
|
|
|
+ }
|
|
|
+ if res != nil {
|
|
|
+ res = Evaluation{Command: res.(Command)}
|
|
|
+ }
|
|
|
+ return res, nil
|
|
|
+}
|
|
|
+
|
|
|
+func LexerBlock(input []rune, index *int) (Token, error) {
|
|
|
+ debug("LexerBlock")
|
|
|
+ if !RequireRune(input, index, '{') {
|
|
|
+ return nil, nil
|
|
|
+ }
|
|
|
+ res, err := LexerStatements(input, index)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ SkipWsRs(input, index)
|
|
|
+ if !RequireRune(input, index, '}') {
|
|
|
+ return nil, ErrorFromString("Expected end of block }")
|
|
|
+ }
|
|
|
+ return Block{Statements: res.(List)}, nil
|
|
|
+ return nil, nil
|
|
|
+}
|
|
|
+
|
|
|
+func LexerGetter(input []rune, index *int) (Token, error) {
|
|
|
+ debug("LexerGetter")
|
|
|
+ if RequireRune(input, index, '$') {
|
|
|
+ if input[*index] == '$' { // recusively Lexer double getters
|
|
|
+ val, err := LexerGetter(input, index)
|
|
|
+ if err == nil { // Getter with a getter inside.
|
|
|
+ return Getter{val}, err
|
|
|
+ } else {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ } else { // integer, sring or getter name
|
|
|
+ key, err := LexerLiteral(input, index)
|
|
|
+ if key == nil {
|
|
|
+ return nil, ErrorFromString("Expected literal after getter $")
|
|
|
+ }
|
|
|
+ if err == nil {
|
|
|
+ return Getter{key}, nil
|
|
|
+ }
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return nil, nil
|
|
|
+}
|
|
|
+
|
|
|
+func LexerLiteral(input []rune, index *int) (Token, error) {
|
|
|
+ debug("LexerLiteral")
|
|
|
+ return LexerAlternative(input, index, LexerWord, LexerString, LexerInteger,
|
|
|
+ LexerRawString)
|
|
|
+}
|
|
|
+
|
|
|
+func IsLetter(r rune) bool {
|
|
|
+ return (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r > rune(128)) ||
|
|
|
+ r == '_' || r == '/'
|
|
|
+}
|
|
|
+
|
|
|
+func IsNumber(r rune) bool {
|
|
|
+ return (r >= '0' && r <= '9')
|
|
|
+}
|
|
|
+
|
|
|
+func LexerWord(input []rune, index *int) (Token, error) {
|
|
|
+ debug("LexerWord")
|
|
|
+ // a word consists of an ascii letter or non asci characters, or underscore
|
|
|
+ // followed by an ascii letter or number, or non ascii characters, or underscore
|
|
|
+ start := *index
|
|
|
+ r := input[*index]
|
|
|
+ if !IsLetter(r) {
|
|
|
+ return nil, nil
|
|
|
+ }
|
|
|
+ for *index++; *index < len(input); *index++ {
|
|
|
+ r := input[*index]
|
|
|
+ if !(IsLetter(r) || IsNumber(r)) {
|
|
|
+ return Word(string(input[start:*index])), nil
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return nil, ErrorFromString("unexpected EOF in string")
|
|
|
+}
|
|
|
+
|
|
|
+func next(input []rune, index *int) {
|
|
|
+ *index++
|
|
|
+ if *index >= len(input) {
|
|
|
+ panic(ErrorFromString("Unexpected end of input."))
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func LexerEscape(input []rune, index *int) (Token, error) {
|
|
|
+ res := ""
|
|
|
+ if input[*index] != '\\' {
|
|
|
+ return nil, nil
|
|
|
+ }
|
|
|
+ next(input, index)
|
|
|
+ switch input[*index] {
|
|
|
+ case 'a':
|
|
|
+ res += "\a"
|
|
|
+ case 'b':
|
|
|
+ res += "\b"
|
|
|
+ case 'e':
|
|
|
+ res += "\033"
|
|
|
+ case 'f':
|
|
|
+ res += "\f"
|
|
|
+ case 'n':
|
|
|
+ res += "\n"
|
|
|
+ case 'r':
|
|
|
+ res += "\r"
|
|
|
+ case 't':
|
|
|
+ res += "\t"
|
|
|
+ case '\\':
|
|
|
+ res += "\\"
|
|
|
+ case '"':
|
|
|
+ res += "\""
|
|
|
+ default:
|
|
|
+ return nil, ErrorFromString("Unknown escape sequence character")
|
|
|
+ }
|
|
|
+
|
|
|
+ return String(res), nil
|
|
|
+}
|
|
|
+
|
|
|
+func LexerString(input []rune, index *int) (Token, error) {
|
|
|
+ debug("LexerString")
|
|
|
+ res := ""
|
|
|
+ ch := input[*index]
|
|
|
+ if ch != '"' {
|
|
|
+ return nil, nil
|
|
|
+ }
|
|
|
+ *index++
|
|
|
+ for *index < len(input) {
|
|
|
+ ch = input[*index]
|
|
|
+ esc, err := LexerEscape(input, index)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ if esc != nil {
|
|
|
+ res += string(esc.(String))
|
|
|
+ } else if ch == '"' {
|
|
|
+ *index++
|
|
|
+ return String(res), nil
|
|
|
+ } else {
|
|
|
+ res += string(ch)
|
|
|
+ }
|
|
|
+ *index++
|
|
|
+ }
|
|
|
+ return nil, ErrorFromString("Unexpected end of input.")
|
|
|
+}
|
|
|
+
|
|
|
+func LexerRawString(input []rune, index *int) (Token, error) {
|
|
|
+ debug("LexerRawString")
|
|
|
+ res := ""
|
|
|
+ ch := input[*index]
|
|
|
+ if ch != '`' {
|
|
|
+ return nil, nil
|
|
|
+ }
|
|
|
+ *index++
|
|
|
+ for *index < len(input) {
|
|
|
+ ch = input[*index]
|
|
|
+ if ch == '`' {
|
|
|
+ *index++
|
|
|
+ return String(res), nil
|
|
|
+ } else {
|
|
|
+ res += string(ch)
|
|
|
+ }
|
|
|
+ *index++
|
|
|
+ }
|
|
|
+ return nil, ErrorFromString("Unexpected end of input.")
|
|
|
+}
|
|
|
+
|
|
|
+func LexerInteger(input []rune, index *int) (Token, error) {
|
|
|
+ debug("LexerInteger")
|
|
|
+ ch := input[*index]
|
|
|
+ neg := 1
|
|
|
+ res := 0
|
|
|
+ if ch == '-' {
|
|
|
+ neg = -1
|
|
|
+ } else if ch == '+' {
|
|
|
+ // do nothing, ignore + as an integer prefix
|
|
|
+ } else {
|
|
|
+ res = int(ch - '0')
|
|
|
+ if res < 0 || res > 9 { // Not a digit, no integer
|
|
|
+ return nil, nil
|
|
|
+ }
|
|
|
+ }
|
|
|
+ *index++
|
|
|
+ for *index < len(input) {
|
|
|
+ ch = input[*index]
|
|
|
+ ch -= '0'
|
|
|
+ if ch < 0 || ch > 9 { // Not a digit, finished
|
|
|
+ return Int(neg * res), nil
|
|
|
+ }
|
|
|
+ res = res * 10
|
|
|
+ res = res + int(ch)
|
|
|
+ *index++
|
|
|
+ }
|
|
|
+ return nil, ErrorFromString("unexpected EOF in number")
|
|
|
+}
|
|
|
+*/
|