package muesli import ( "bufio" _ "bytes" _ "errors" "fmt" "io" _ "io" "os" _ "reflect" _ "runtime" "strconv" "strings" "unicode" _ "unicode" // "gitlab.com/beoran/woe/graphviz" // _ "gitlab.com/beoran/woe/monolog" ) /* A Lexer splits scanned input into tokens. */ type Lexer struct { Position Index int Start int io.RuneScanner buffer []rune Current rune Keywords map[string]*Keyword LoggerWrapper } func (lexer *Lexer) SetLogger(logger Logger) { lexer.LoggerWrapper = LoggerWrapper{logger} } func (lexer *Lexer) ClearBuffer() { lexer.buffer = make([]rune, 0) } func (lexer *Lexer) MakeIntegerToken() Token { var sbuffer = string(lexer.buffer) i, err := strconv.ParseInt(sbuffer, 0, 64) if err == nil { lexer.ClearBuffer() return NewToken(TokenKindInteger, IntValue(i), lexer.Position) } else { lexer.ClearBuffer() return lexer.MakeErrorToken(err) } } func (lexer *Lexer) MakeTokenFromKeyword(kw * Keyword) Token { lexer.ClearBuffer() return NewToken(kw.TokenKind, kw.Value, lexer.Position) } func (lexer *Lexer) MakeFloatToken() Token { var sbuffer = string(lexer.buffer) f, err := strconv.ParseFloat(sbuffer, 64) if err == nil { lexer.ClearBuffer() return NewToken(TokenKindFloat, FloatValue(f), lexer.Position) } else { lexer.ClearBuffer() return lexer.MakeErrorToken(err) } } func (lexer *Lexer) MakeBooleanToken(b bool) Token { lexer.ClearBuffer() if b { return NewToken(TokenKindBoolean, TrueValue, lexer.Position) } else { return NewToken(TokenKindBoolean, FalseValue, lexer.Position) } } func (lexer *Lexer) MakeNilToken() Token { lexer.ClearBuffer() return NewToken(TokenKindNil, NilValue, lexer.Position) } func (lexer *Lexer) MakeBuiltinToken() Token { var sbuffer = string(lexer.buffer) lexer.ClearBuffer() if sbuffer == "true" { lexer.ClearBuffer() return NewToken(TokenKindBoolean, TrueValue, lexer.Position) } else if sbuffer == "false" { return NewToken(TokenKindBoolean, FalseValue, lexer.Position) } else if sbuffer == "nil" { return NewToken(TokenKindNil, NilValue, lexer.Position) } else { return lexer.MakeErrorfToken("Not a builtin: %s", sbuffer) } } func (lexer *Lexer) MakeStringValueToken(kind TokenKind) Token { var sbuffer = string(lexer.buffer) return NewToken(kind, StringValue(sbuffer), lexer.Position) } func (lexer *Lexer) MakeTypeValueToken(kind TokenKind) Token { var sbuffer = string(lexer.buffer) return NewToken(kind, TypeValue(sbuffer), lexer.Position) } func (lexer *Lexer) MakeErrorValueToken(kind TokenKind) Token { var sbuffer = string(lexer.buffer) return NewToken(kind, NewErrorValuef("%s", sbuffer), lexer.Position) } func (lexer *Lexer) MakeWordValueToken(kind TokenKind) Token { var sbuffer = string(lexer.buffer) return NewToken(kind, WordValue(sbuffer), lexer.Position) } func (lexer *Lexer) MakeToken(kind TokenKind) Token { switch kind { case TokenKindInteger: return lexer.MakeIntegerToken() case TokenKindFloat: return lexer.MakeFloatToken() case TokenKindString: return lexer.MakeStringValueToken(kind) case TokenKindSymbol: return lexer.MakeWordValueToken(kind) case TokenKindType: return lexer.MakeTypeValueToken(kind) case TokenKindError: return lexer.MakeErrorValueToken(kind) case TokenKindWord: return lexer.MakeWordValueToken(kind) case TokenKindOperator: fallthrough case TokenKindSelector: return lexer.MakeWordValueToken(kind) case TokenKindNil: fallthrough case TokenKindBoolean: return lexer.MakeBuiltinToken() case TokenKindGet: fallthrough case TokenKindSet: fallthrough case TokenKindOpenBlock: fallthrough case TokenKindCloseBlock: fallthrough case TokenKindOpenList: fallthrough case TokenKindCloseList: fallthrough case TokenKindOpenParen: fallthrough case TokenKindCloseParen: fallthrough case TokenKindEOX: fallthrough case TokenKindEOF: val := StringValue(string(lexer.buffer)) lexer.ClearBuffer() return NewToken(kind, val, lexer.Position) default: return lexer.MakeErrorfToken("Internal error on token type %s", kind) } } func (lexer Lexer) MakeErrorToken(err error) Token { return NewToken(TokenKindError, ErrorValue{err}, lexer.Position) } func (lexer Lexer) MakeErrorfToken(format string, va ...interface{}) Token { err := fmt.Errorf(format, va...) return lexer.MakeErrorToken(err) } func (lexer Lexer) MakeEOFToken() Token { return NewToken(TokenKindEOF, &EmptyValue{}, lexer.Position) } func (lexer *Lexer) Peek() (rune, error) { r, _, err := lexer.RuneScanner.ReadRune() err2 := lexer.RuneScanner.UnreadRune() if err == nil { err = err2 } return r, err } /* Advances the lexer's position based on the rune r read. */ func (lexer *Lexer) advance(r rune) { lexer.Current = r lexer.Index++ lexer.Position.Column++ if r == '\n' { lexer.Position.Column = 1 lexer.Position.Line++ } } /* Append a rune to the lexer's buffer. */ func (lexer *Lexer) appendRune(r rune) { lexer.buffer = append(lexer.buffer, r) } /* Advances the lexer's input buffer but does not store the rune read, * but just returns it. */ func (lexer *Lexer) Skip() (rune, error) { r, _, err := lexer.RuneScanner.ReadRune() if err != nil { return 0, err } lexer.advance(r) return r, nil } /* Actually reads the next rune from the lexer's input source and stores * them in the lexer's token buffer. * Shorthand for r, err := lexer.Skip() ; lexer.appendRune(r) */ func (lexer *Lexer) Next() (rune, error) { r, err := lexer.Skip() if err == nil { lexer.appendRune(r) } return r, nil } func (lexer *Lexer) DoIf(predicate func(rune) bool, todo func(*Lexer) (rune, error)) (bool, error) { r, err := lexer.Peek() if err != nil { return false, err } if predicate(r) { r, err = todo(lexer) if err != nil { return true, err } return true, nil } return false, nil } func (lexer *Lexer) NextIf(predicate func(rune) bool) (bool, error) { return lexer.DoIf(predicate, (*Lexer).Next) } func (lexer *Lexer) SkipIf(predicate func(rune) bool) (bool, error) { return lexer.DoIf(predicate, (*Lexer).Skip) } func (lexer *Lexer) NextWhile(predicate func(rune) bool) (bool, error) { result := true ok, err := lexer.NextIf(predicate) result = result || ok for ; ok && (err == nil); ok, err = lexer.NextIf(predicate) { result = result || ok } return result, err } func (lexer *Lexer) SkipWhile(predicate func(rune) bool) (bool, error) { result := true ok, err := lexer.SkipIf(predicate) result = result || ok for ; ok && (err == nil); ok, err = lexer.SkipIf(predicate) { result = result || ok } return result, err } func isEOX(r rune) bool { return r == '\n' || r == '.' } func isSpace(r rune) bool { return r == ' ' || r == '\t' || r == '\v' || r == '\r' } func isSpaceOrEOX(r rune) bool { return r == ' ' || r == '\t' || r == '\v' || r == '\r' || r == '\n' || r == '.' } func isComment(r rune) bool { return r == '#' } func isOperator(r rune) bool { return r == '+' || r == '-' || r == '*' || r == '/' || r == '^' || r == '%' || r == '~' || r == '|' || r == '&' || r == '>' || r == '<' || r == '@' } func isSelector(r rune) bool { return r == ',' || r == ';' || r == '\'' } func (lexer *Lexer) SkipSpace() error { _, err := lexer.SkipWhile(isSpace) return err } func (lexer *Lexer) SkipBlockComment() error { var err error var r rune lexer.LogDebug("Skipping block comment.") for block := 1; block > 0 && err == nil; { _, err = lexer.Skip() if err != nil { return err } r, err = lexer.Peek() if r == '{' { block++ } else if r == '}' { block-- } lexer.LogDebug("Skipping block comment: %d", block) } _, err = lexer.Skip() return err } func (lexer *Lexer) SkipComment() error { r, err := lexer.Skip() lexer.LogDebug("Skipping %c.", r) if err != nil { return err } r, err = lexer.Peek() if r == '{' { return lexer.SkipBlockComment() } for r != '\n' && err == nil { lexer.LogDebug("Skipping comment %c.", r) _, err = lexer.Skip() if err != nil { return err } r, err = lexer.Peek() } if err != nil { return err } _, err = lexer.Skip() return err } /* Handles errors including EOF by either returning an error token or an * EOF token. */ func (lexer *Lexer) handleError(err error) Token { if err == io.EOF { return lexer.MakeEOFToken() } else { return lexer.MakeErrorToken(err) } } func (lexer *Lexer) LexOperator() Token { _, err := lexer.NextWhile(isOperator) if err != nil { return lexer.MakeErrorfToken("when parsing operator: %s", err) } return lexer.MakeToken(TokenKindOperator) } func (lexer *Lexer) LexSelector() Token { _, err := lexer.NextWhile(isSelector) if err != nil { return lexer.MakeErrorfToken("when parsing selector: %s", err) } _, err = lexer.NextWhile(unicode.IsLetter) if err != nil { return lexer.MakeErrorfToken("when parsing selector extension: %s", err) } return lexer.MakeToken(TokenKindSelector) } func (lexer *Lexer) LexNumber() Token { isFloat := false maybeOperator := false // skip any first - or + _, err := lexer.NextIf(func(r rune) bool { maybeOperator = (r == '-' || r == '+') // it might also be an operator in stead. return r == '-' || r == '+' }) if err != nil { return lexer.MakeErrorfToken("Error during parsing of number prefix: %s", err) } for { r, err := lexer.Peek() if err != nil { return lexer.MakeErrorfToken("Error during parsing of number: %s", err) } if unicode.IsDigit(r) { maybeOperator = false lexer.Next() } else if r == '.' { if isFloat { lexer.Next() return lexer.MakeErrorfToken("two points in floating point number") } else { isFloat = true lexer.Next() } } else if maybeOperator { return lexer.LexOperator() } else { break } } if isFloat { return lexer.MakeToken(TokenKindFloat) } else { return lexer.MakeToken(TokenKindInteger) } } func isDoubleQuote(r rune) bool { return r == '"' } func (lexer *Lexer) handleEscapeHexChars(amount int) error { buffer := make([]byte, 0) r, err := lexer.Skip() for index := 0; err == nil && index < amount; { if unicode.Is(unicode.ASCII_Hex_Digit, r) { buffer = append(buffer, byte(r)) } else { return fmt.Errorf("Not a hexadecimal digit: %c", r) } index++ if index < amount { r, err = lexer.Skip() } } if err != nil { return err } i, err := strconv.ParseInt(string(buffer), 16, 32) if err != nil { return err } lexer.appendRune(rune(i)) _, err = lexer.Peek() return err } func (lexer *Lexer) handleEscape() error { r, err := lexer.Skip() if err != nil { return err } switch r { case 'a': lexer.appendRune('\a') case 'b': lexer.appendRune('\b') case 'e': lexer.appendRune('\033') case 'f': lexer.appendRune('\f') case 'n': lexer.appendRune('\n') case 'r': lexer.appendRune('\r') case 't': lexer.appendRune('\t') case '\\': lexer.appendRune('\\') case '"': lexer.appendRune('"') // case 'o': fallthrough // No octals, for now. case 'x': err = lexer.handleEscapeHexChars(2) case 'u': err = lexer.handleEscapeHexChars(4) case 'U': err = lexer.handleEscapeHexChars(6) default: return fmt.Errorf("Unknown escape sequence character %c: %d", r, r) } return err } func (lexer *Lexer) LexString() Token { var err error var r rune _, err = lexer.Skip() // Skip first " if err != nil { return lexer.handleError(err) } r, err = lexer.Skip() for r != '"' && err == nil { if r == '\\' { err = lexer.handleEscape() if err != nil { return lexer.handleError(err) } } else { lexer.appendRune(r) // still inside the string } r, err = lexer.Skip() } if err != nil { return lexer.MakeErrorfToken("when parsing string: %s", err) } if err != nil { return lexer.handleError(err) } return lexer.MakeToken(TokenKindString) } func (lexer *Lexer) LexLongString() Token { var err error _, err = lexer.Skip() if err != nil { return lexer.handleError(err) } _, err = lexer.NextWhile(func(r rune) bool { return r != '`' }) if err != nil { return lexer.MakeErrorfToken("when parsing long string: %s", err) } _, err = lexer.Skip() if err != nil { return lexer.handleError(err) } return lexer.MakeToken(TokenKindString) } func (lexer *Lexer) LexWordOrType(kind TokenKind) Token { var err error first := true _, err = lexer.Next() if err != nil { return lexer.handleError(err) } _, err = lexer.NextWhile(func(r rune) bool { if first { first = false return unicode.IsLetter(r) || r == '_' } else { return unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_' } }) if err != nil { return lexer.handleError(err) } sbuffer := string(lexer.buffer) // handle keywords if kw, ok := lexer.Keywords[sbuffer] ; ok { return lexer.MakeTokenFromKeyword(kw) } return lexer.MakeToken(kind) } func (lexer *Lexer) LexWord() Token { return lexer.LexWordOrType(TokenKindWord) } func (lexer *Lexer) LexType() Token { return lexer.LexWordOrType(TokenKindType) } func (lexer *Lexer) LexSymbol() Token { var err error _, err = lexer.Skip() if err != nil { return lexer.handleError(err) } _, err = lexer.NextWhile(func(r rune) bool { return !isSpaceOrEOX(r) }) if err != nil { return lexer.handleError(err) } return lexer.MakeToken(TokenKindSymbol) } func (lexer *Lexer) LexBuiltin() Token { var err error _, err = lexer.Skip() if err != nil { return lexer.handleError(err) } _, err = lexer.NextWhile(func(r rune) bool { return !isSpaceOrEOX(r) }) if err != nil { return lexer.handleError(err) } return lexer.MakeBuiltinToken() } func (lexer *Lexer) skipSpaceAndCommentAndPeek() (rune, error) { r, err := lexer.Peek() if err != nil { return r, err } i := 0 for isSpace(r) || isComment(r) { if isSpace(r) { err = lexer.SkipSpace() } else if isComment(r) { err = lexer.SkipComment() } if err != nil { return r, err } i++ r, err = lexer.Peek() lexer.LogDebug("Peeked again: >%c< %v %v %d", r, isSpace(r), isComment(r), i) if err != nil { return r, err } } return r, err } func (lexer *Lexer) LexEOX() Token { lexer.Next() _, err := lexer.skipSpaceAndCommentAndPeek() if err != nil { return lexer.handleError(err) } _, err = lexer.NextWhile(func(r rune) bool { return isSpaceOrEOX(r) || r == '\n' || r == '.' }) if err != nil { return lexer.handleError(err) } return lexer.MakeToken(TokenKindEOX) } func (lexer *Lexer) lex() Token { r, err := lexer.skipSpaceAndCommentAndPeek() lexer.LogDebug(" After skip: >%c< >%v<\n", r, err) if err != nil { return lexer.handleError(err) } if unicode.IsDigit(r) || r == '-' || r == '+' { return lexer.LexNumber() } if r == '\n' || r == '.' { return lexer.LexEOX() } if r == '"' { return lexer.LexString() } if r == '`' { return lexer.LexLongString() } if r == '!' { return lexer.LexBuiltin() } if r == ':' { return lexer.LexSymbol() } if isOperator(r) { return lexer.LexOperator() } if isSelector(r) { return lexer.LexSelector() } switch TokenKind(r) { case TokenKindGet: fallthrough case TokenKindSet: fallthrough case TokenKindOpenBlock: fallthrough case TokenKindCloseBlock: fallthrough case TokenKindOpenList: fallthrough case TokenKindCloseList: fallthrough case TokenKindOpenParen: fallthrough case TokenKindCloseParen: lexer.Next() return lexer.MakeToken(TokenKind(r)) default: } if unicode.IsLetter(r) || r == '_' { if unicode.IsUpper(r) { return lexer.LexType() } else { return lexer.LexWord() } } // EOF character if r == 0x7f { return lexer.MakeEOFToken() } return lexer.MakeErrorfToken("Unknown character: %c", r) } func (lexer *Lexer) Lex() Token { res := lexer.lex() lexer.ClearBuffer() // ensure buffer is cleared after lexing, always. return res } func (lexer *Lexer) LexAll() []Token { var token Token res := make([]Token, 0) for token = lexer.Lex(); !token.IsLast(); token = lexer.Lex() { res = append(res, token) } res = append(res, token) return res } func NewLexer(scanner io.RuneScanner, filename string) *Lexer { lexer := &Lexer{} lexer.RuneScanner = scanner lexer.Position.FileName = filename lexer.Position.Column = 1 lexer.Position.Line = 1 lexer.LoggerWrapper = LoggerWrapper{nil} lexer.Keywords = make(map[string]*Keyword) return lexer } func (lexer * Lexer) Report() { if lexer == nil { fmt.Printf("Lexer: is nil\n") } else { fmt.Printf("Lexer: %s:%d:%d\n", lexer.Position.FileName, lexer.Position.Column, lexer.Position.Line) } } func (lexer *Lexer) AddKeyword(kw * Keyword) *Keyword { if kw != nil { lexer.Keywords[kw.Name] = kw } return kw } func (lexer *Lexer) NewKeyword(name string, kind TokenKind, value Value) *Keyword { kw := &Keyword{Name: name, TokenKind: kind, Value: value} return lexer.AddKeyword(kw) } func NewLexerFromString(input string) *Lexer { reader := strings.NewReader(input) return NewLexer(reader, "") } func NewLexerFromFilename(filename string) (*Lexer, error) { read, err := os.Open(filename) if err == nil { bread := bufio.NewReader(read) lex := NewLexer(bread, filename) return lex, nil } return nil, err }