123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566 |
- package main
- import (
- "fmt"
- "strings"
- "unicode"
- )
- // Ucalgary stntax:
- //
- // Grammar Structure
- //
- // The format for a context free grammar is as follows:
- //
- // Any reference to "punctuation" when describing legal identifiers is restricted
- // to certain pieces of punctuation. In no particular order, here are the
- // punctuation characters that are legal: ~ ! @ # * ( ) _ + ' ; : / ?
- //
- // Terminals are strings which either start with a lowercase letter, a number,
- // or a piece of punctuation, followed by any amount of letters, numbers and
- // punctuation. i.e. a terminal matches the regular expression
- // [{punctuation}a-z0-9][{punctuation}a-zA-Z0-9]*. Examples are id + begin end
- // plus 9 8' 7+? '' ....
- //
- // Nonterminals start with uppercase letters, followed by any number of legal
- // characters. i.e. a nonterminal matches the regular expression
- // [A-Z][{punctuation}a-zA-Z0-9]*. Examples are S A EXPR TERM~3 Stop ....
- //
- // It is assumed that the start symbol is the first nonterminal whose productions
- // are given.
- //
- // The productions associated with a nonterminal are indicated as
- // head -> RHS1 | RHS2 .... | RHSn.
- // where the alternative righthand sides of the productions are separated by
- // a slash, "|", and terminated by a period, ".".
- //
- // The RHS of a production is a sequence of terminals or nonterminals separated
- // by spaces. For example: Expr add Expr.
- //
- // C-style comments are allowed, e.g.
- // /* this is a comment possibly spanning many lines */
- //
- // Empty grammars
- // The grammar epsilon or ε IS EMPTY.
- // For any nonterminal epsilon is a valid grammar.
- // This represents a grammar with no productions.
- //
- // Contrary to this, ll1 only allows the _ punctuation in identifiers.
- //
- // Location of the lexer or of a token
- type Location struct {
- //Name is the name of the file or input.
- Name string
- //Current index in the input buffer of the token, or of the lex step.
- Index int
- //Start index in the input buffer at which the token begins.
- Start int
- //Line the lexer is at or on which a token begins.
- Line int
- //Col is the column in the line on which the token begins.
- Col int
- //Index in the input buffer from which the token begins.
- }
- func (l Location) String() string {
- return fmt.Sprintf("%s:%d:%d", l.Name, l.Line, l.Col)
- }
- type TokenKind rune
- const TokenKindSkip = TokenKind(-2)
- type Token struct {
- Location
- TokenKind
- Text string
- }
- // Advance moves to the next rune in input location, updating the
- // location's Index, Line and Col.
- // Returns the rune found, or -1 if the end of the buffer has been reached.
- func (l *Location) Advance(input []rune) rune {
- if l.Index >= len(input) {
- return -1
- }
- r := input[l.Index]
- if (r == '\n' && l.Index > 0 && input[l.Index] != '\r') ||
- (r == '\r') {
- l.Line++
- }
- if (r == '\n') || (r == '\r') {
- l.Col = 0
- }
- l.Col++
- l.Index++
- return r
- }
- type Lexer struct {
- Location
- Input []rune
- Rules []LexerFunc
- }
- type LexerFuncs map[string]LexerFunc
- func (l *Lexer) Advance() rune {
- return l.Location.Advance(l.Input)
- }
- func (l Lexer) IsEof() bool {
- return l.Index >= len(l.Input)
- }
- func (l Lexer) Peek() rune {
- if l.IsEof() {
- return -1
- }
- return l.Input[l.Location.Index]
- }
- // LexerFunc is a Lexer function.
- // It Lexes the input buffer starting from lex.Index, which must be
- // guaranteed by the caller to be non negative.
- // The lexerfunc must make progress if it parses a token or returns an error.
- // It should NOT make progress if it does not match.
- // It should return as follows:
- // * If the Lexer function matched what it is intended to lex
- // it should return the lexed token(s), nil, and lex.Start points to the start
- // of the token, and lex.Index should be moved to point right after the lexed
- // part of the string. To indicate that the lexer should skip the parsed token,
- // set it's token kind to TokenKindSkip
- // * If the Lexer function did not match what it is intended to lex
- // it should return nil, nil, and lex.Index should be unchanged.
- // * If the Lexer function did match what it is intended to lex
- // but there is a Lexer error, it should return empty slice, error slice,
- // and l.Index should be set to the error's location.
- type LexerFunc func(lex *Lexer) ([]Token, []error)
- var Debug = false
- func debug(msg string) {
- if Debug {
- print(msg)
- }
- }
- func (l Lexer) WhileOk(ok func(r rune, l Lexer) bool) (string, error) {
- l.Start = l.Index
- now := 0
- for !l.IsEof() {
- r := l.Advance()
- if !ok(r, l) {
- if now == 0 {
- return "", nil
- }
- return string(l.Input[l.Start:l.Index]), nil
- }
- now++
- }
- return "", fmt.Errorf("unexpected EOF: >" + string(l.Input[l.Start:l.Index]) + "<")
- }
- func NewLexerFromString(input, name string, funcs []LexerFunc) *Lexer {
- loc := Location{}
- loc.Name = name
- return &Lexer{loc, []rune(input), funcs}
- }
- func (l *Lexer) Lex() (result []Token, rerr []error) {
- defer func() {
- val := recover()
- err, ok := val.(error)
- if ok {
- rerr = append(rerr, err)
- }
- }()
- return l.lex()
- }
- func (l *Lexer) lex() (result []Token, rerr []error) {
- for !l.IsEof() {
- for _, lf := range l.Rules {
- tokens, errs := lf(l)
- if len(errs) > 0 {
- rerr = append(rerr, errs...)
- // skip until next whitespace
- for !l.IsEof() {
- r2 := l.Advance()
- if unicode.IsSpace(r2) {
- break
- }
- }
- }
- for _, token := range tokens {
- if token.TokenKind != TokenKindSkip {
- result = append(result, token)
- }
- }
- // no progress made, indicates fatal lex error
- if l.Index == l.Start {
- err := fmt.Errorf("Lex error: %s", l.Location)
- rerr = append(rerr, err)
- return result, rerr
- }
- // advance start based on progress
- l.Start = l.Index
- }
- }
- return result, rerr
- }
- func (l *Lexer) Tokenize(kind TokenKind) (toks []Token, errs []error) {
- if l.Index == l.Start {
- err := fmt.Errorf("Lex error: %s, expected %c", l.Location, kind)
- errs = append(errs, err)
- return toks, errs
- }
- str := string(l.Input[l.Start:l.Index])
- return []Token{Token{l.Location, kind, str}}, errs
- }
- func (l *Lexer) LexContains(chars string, kind TokenKind) (toks []Token, errs []error) {
- r := l.Peek()
- if !strings.ContainsRune(chars, r) {
- return toks, errs
- }
- for !l.IsEof() {
- if strings.ContainsRune(chars, r) {
- l.Advance()
- } else {
- return l.Tokenize(kind)
- }
- }
- return l.Tokenize(kind)
- }
- /*
- func LexerRs(input []rune, index *int) (Token, error) {
- debug("LexerRs")
- SkipWs(input, index)
- return LexerWhileRuneOk(input, index, func(r rune) bool {
- return r == '\n' || r == '\r' || r == ';'
- })
- }
- func LexerWs(input []rune, index *int) (Token, error) {
- debug("LexerWs")
- return LexerWhileRuneOk(input, index, func(r rune) bool {
- return r == ' ' || r == '\t'
- })
- }
- func LexerWsRs(input []rune, index *int) (Token, error) {
- debug("LexerRs")
- SkipWs(input, index)
- return LexerWhileRuneOk(input, index, func(r rune) bool {
- return r == '\n' || r == '\r' || r == ';' || r == ' ' || r == '\t'
- })
- }
- func SkipWs(input []rune, index *int) {
- LexerWs(input, index)
- }
- func SkipRs(input []rune, index *int) {
- LexerRs(input, index)
- }
- func SkipWsRs(input []rune, index *int) {
- LexerWsRs(input, index)
- }
- func LexerComment(input []rune, index *int) (Token, error) {
- debug("LexerComment")
- start := *index
- if !RequireRune(input, index, '#') {
- return nil, nil
- }
- for ; *index < len(input); *index++ {
- r := input[*index]
- if r == '\n' || r == '\r' {
- end := *index
- return Comment(string(input[start:end])), nil
- }
- }
- return nil, ErrorFromString("unexpected EOF in comment")
- }
- func LexerStatement(input []rune, index *int) (Token, error) {
- debug("LexerStatement")
- SkipWs(input, index)
- return LexerAlternative(input, index, LexerCommand, LexerBlock, LexerComment)
- }
- func LexerParameters(input []rune, index *int) (Token, error) {
- debug("LexerParameters")
- params := List{}
- for {
- sep, err := LexerWs(input, index)
- if err != nil {
- return nil, err
- }
- if sep == nil {
- return params, nil
- }
- val, err := LexerParameter(input, index)
- if err != nil {
- return nil, err
- }
- if val == nil {
- return params, nil
- }
- params = append(params, val)
- }
- }
- func LexerParameter(input []rune, index *int) (Token, error) {
- debug("LexerParameter")
- funcs := []LexerFunc{LexerLiteral, LexerEvaluation, LexerBlock, LexerGetter}
- return LexerAlternative(input, index, funcs...)
- }
- func LexerOrder(input []rune, index *int) (Token, error) {
- debug("LexerOrder")
- return LexerAlternative(input, index, LexerLiteral, LexerEvaluation)
- }
- func LexerCommand(input []rune, index *int) (Token, error) {
- debug("LexerCommand")
- order, err := LexerOrder(input, index)
- if err != nil || order == nil {
- return order, err
- }
- params, err := LexerParameters(input, index)
- if err != nil {
- return params, err
- }
- if params == nil {
- params = List{}
- }
- return Command{order, params.(List)}, nil
- }
- // RequireRune requires a single rune to be present,
- // and skips it, however that rune is discared.
- // Returns true if the rune was found, false if not
- func RequireRune(input []rune, index *int, req rune) bool {
- if input[*index] == req {
- *index++
- return true
- }
- return false
- }
- func LexerEvaluation(input []rune, index *int) (Token, error) {
- debug("LexerEvaluation")
- if !RequireRune(input, index, '[') {
- return nil, nil
- }
- res, err := LexerCommand(input, index)
- if err != nil {
- return nil, err
- }
- if !RequireRune(input, index, ']') {
- print(input[*index])
- return nil, ErrorFromString("Expected end of evaluation ]")
- }
- if res != nil {
- res = Evaluation{Command: res.(Command)}
- }
- return res, nil
- }
- func LexerBlock(input []rune, index *int) (Token, error) {
- debug("LexerBlock")
- if !RequireRune(input, index, '{') {
- return nil, nil
- }
- res, err := LexerStatements(input, index)
- if err != nil {
- return nil, err
- }
- SkipWsRs(input, index)
- if !RequireRune(input, index, '}') {
- return nil, ErrorFromString("Expected end of block }")
- }
- return Block{Statements: res.(List)}, nil
- return nil, nil
- }
- func LexerGetter(input []rune, index *int) (Token, error) {
- debug("LexerGetter")
- if RequireRune(input, index, '$') {
- if input[*index] == '$' { // recusively Lexer double getters
- val, err := LexerGetter(input, index)
- if err == nil { // Getter with a getter inside.
- return Getter{val}, err
- } else {
- return nil, err
- }
- } else { // integer, sring or getter name
- key, err := LexerLiteral(input, index)
- if key == nil {
- return nil, ErrorFromString("Expected literal after getter $")
- }
- if err == nil {
- return Getter{key}, nil
- }
- return nil, err
- }
- }
- return nil, nil
- }
- func LexerLiteral(input []rune, index *int) (Token, error) {
- debug("LexerLiteral")
- return LexerAlternative(input, index, LexerWord, LexerString, LexerInteger,
- LexerRawString)
- }
- func IsLetter(r rune) bool {
- return (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r > rune(128)) ||
- r == '_' || r == '/'
- }
- func IsNumber(r rune) bool {
- return (r >= '0' && r <= '9')
- }
- func LexerWord(input []rune, index *int) (Token, error) {
- debug("LexerWord")
- // a word consists of an ascii letter or non asci characters, or underscore
- // followed by an ascii letter or number, or non ascii characters, or underscore
- start := *index
- r := input[*index]
- if !IsLetter(r) {
- return nil, nil
- }
- for *index++; *index < len(input); *index++ {
- r := input[*index]
- if !(IsLetter(r) || IsNumber(r)) {
- return Word(string(input[start:*index])), nil
- }
- }
- return nil, ErrorFromString("unexpected EOF in string")
- }
- func next(input []rune, index *int) {
- *index++
- if *index >= len(input) {
- panic(ErrorFromString("Unexpected end of input."))
- }
- }
- func LexerEscape(input []rune, index *int) (Token, error) {
- res := ""
- if input[*index] != '\\' {
- return nil, nil
- }
- next(input, index)
- switch input[*index] {
- case 'a':
- res += "\a"
- case 'b':
- res += "\b"
- case 'e':
- res += "\033"
- case 'f':
- res += "\f"
- case 'n':
- res += "\n"
- case 'r':
- res += "\r"
- case 't':
- res += "\t"
- case '\\':
- res += "\\"
- case '"':
- res += "\""
- default:
- return nil, ErrorFromString("Unknown escape sequence character")
- }
- return String(res), nil
- }
- func LexerString(input []rune, index *int) (Token, error) {
- debug("LexerString")
- res := ""
- ch := input[*index]
- if ch != '"' {
- return nil, nil
- }
- *index++
- for *index < len(input) {
- ch = input[*index]
- esc, err := LexerEscape(input, index)
- if err != nil {
- return nil, err
- }
- if esc != nil {
- res += string(esc.(String))
- } else if ch == '"' {
- *index++
- return String(res), nil
- } else {
- res += string(ch)
- }
- *index++
- }
- return nil, ErrorFromString("Unexpected end of input.")
- }
- func LexerRawString(input []rune, index *int) (Token, error) {
- debug("LexerRawString")
- res := ""
- ch := input[*index]
- if ch != '`' {
- return nil, nil
- }
- *index++
- for *index < len(input) {
- ch = input[*index]
- if ch == '`' {
- *index++
- return String(res), nil
- } else {
- res += string(ch)
- }
- *index++
- }
- return nil, ErrorFromString("Unexpected end of input.")
- }
- func LexerInteger(input []rune, index *int) (Token, error) {
- debug("LexerInteger")
- ch := input[*index]
- neg := 1
- res := 0
- if ch == '-' {
- neg = -1
- } else if ch == '+' {
- // do nothing, ignore + as an integer prefix
- } else {
- res = int(ch - '0')
- if res < 0 || res > 9 { // Not a digit, no integer
- return nil, nil
- }
- }
- *index++
- for *index < len(input) {
- ch = input[*index]
- ch -= '0'
- if ch < 0 || ch > 9 { // Not a digit, finished
- return Int(neg * res), nil
- }
- res = res * 10
- res = res + int(ch)
- *index++
- }
- return nil, ErrorFromString("unexpected EOF in number")
- }
- */
|