package muesli import ( "bufio" _ "bytes" _ "errors" "fmt" "io" _ "io" "os" _ "reflect" _ "runtime" "strconv" "strings" "unicode" _ "unicode" // "gitlab.com/beoran/woe/graphviz" // _ "gitlab.com/beoran/woe/monolog" ) /* A Lexer splits scanned input into tokens. */ type Lexer struct { Position Index int Start int io.RuneScanner buffer []rune Current rune LoggerWrapper } func (lexer *Lexer) SetLogger(logger Logger) { lexer.LoggerWrapper = LoggerWrapper{logger} } func (lexer *Lexer) ClearBuffer() { lexer.buffer = make([]rune, 0) } func (lexer *Lexer) MakeIntegerToken() Token { var sbuffer = string(lexer.buffer) i, err := strconv.ParseInt(sbuffer, 0, 64) if err == nil { lexer.ClearBuffer() return NewToken(TokenKindInteger, IntValue(i), lexer.Position) } else { lexer.ClearBuffer() return lexer.MakeErrorToken(err) } } func (lexer *Lexer) MakeFloatToken() Token { var sbuffer = string(lexer.buffer) f, err := strconv.ParseFloat(sbuffer, 64) if err == nil { lexer.ClearBuffer() return NewToken(TokenKindFloat, FloatValue(f), lexer.Position) } else { lexer.ClearBuffer() return lexer.MakeErrorToken(err) } } func (lexer *Lexer) MakeBooleanToken(b bool) Token { lexer.ClearBuffer() if b { return NewToken(TokenKindBoolean, TrueValue, lexer.Position) } else { return NewToken(TokenKindBoolean, FalseValue, lexer.Position) } } func (lexer *Lexer) MakeNilToken() Token { lexer.ClearBuffer() return NewToken(TokenKindNil, NilValue, lexer.Position) } func (lexer *Lexer) MakeBuiltinToken() Token { var sbuffer = string(lexer.buffer) lexer.ClearBuffer() if sbuffer == "true" { lexer.ClearBuffer() return NewToken(TokenKindBoolean, TrueValue, lexer.Position) } else if sbuffer == "false" { return NewToken(TokenKindBoolean, FalseValue, lexer.Position) } else if sbuffer == "nil" { return NewToken(TokenKindNil, NilValue, lexer.Position) } else { return lexer.MakeErrorfToken("Not a builtin: %s", sbuffer) } } func (lexer *Lexer) MakeStringValueToken(kind TokenKind) Token { var sbuffer = string(lexer.buffer) return NewToken(kind, StringValue(sbuffer), lexer.Position) } func (lexer *Lexer) MakeToken(kind TokenKind) Token { switch kind { case TokenKindInteger: return lexer.MakeIntegerToken() case TokenKindFloat: return lexer.MakeFloatToken() case TokenKindString: fallthrough case TokenKindSymbol: fallthrough case TokenKindType: fallthrough case TokenKindError: fallthrough case TokenKindWord: return lexer.MakeStringValueToken(kind) case TokenKindNil: fallthrough case TokenKindBoolean: return lexer.MakeBuiltinToken() case TokenKindGet: fallthrough case TokenKindSet: fallthrough case TokenKindOpenBlock: fallthrough case TokenKindCloseBlock: fallthrough case TokenKindOpenList: fallthrough case TokenKindCloseList: fallthrough case TokenKindOpenParen: fallthrough case TokenKindCloseParen: fallthrough case TokenKindEOX: fallthrough case TokenKindEOF: val := StringValue(string(lexer.buffer)) lexer.ClearBuffer() return NewToken(kind, val, lexer.Position) default: return lexer.MakeErrorfToken("Internal error on token type %s", kind) } } func (lexer Lexer) MakeErrorToken(err error) Token { return NewToken(TokenKindError, ErrorValue{err}, lexer.Position) } func (lexer Lexer) MakeErrorfToken(format string, va ...interface{}) Token { err := fmt.Errorf(format, va...) return lexer.MakeErrorToken(err) } func (lexer Lexer) MakeEOFToken() Token { return NewToken(TokenKindEOF, &EmptyValue{}, lexer.Position) } func (lexer *Lexer) Peek() (rune, error) { r, _, err := lexer.RuneScanner.ReadRune() err2 := lexer.RuneScanner.UnreadRune() if err == nil { err = err2 } return r, err } /* Advances the lexer's position based on the rune r read. */ func (lexer *Lexer) advance(r rune) { lexer.Current = r lexer.Index++ lexer.Position.Column++ if r == '\n' { lexer.Position.Column = 1 lexer.Position.Line++ } } /* Append a rune to the lexer's buffer. */ func (lexer *Lexer) appendRune(r rune) { lexer.buffer = append(lexer.buffer, r) } /* Advances the lexer's input buffer but does not store the rune read, * but just returns it. */ func (lexer *Lexer) Skip() (rune, error) { r, _, err := lexer.RuneScanner.ReadRune() if err != nil { return 0, err } lexer.advance(r) return r, nil } /* Actually reads the next rune from the lexer's input source and stores * them in the lexer's token buffer. * Shorthand for r, err := lexer.Skip() ; lexer.appendRune(r) */ func (lexer *Lexer) Next() (rune, error) { r, err := lexer.Skip() if err == nil { lexer.appendRune(r) } return r, nil } func (lexer *Lexer) DoIf(predicate func(rune) bool, todo func(*Lexer) (rune, error)) (bool, error) { r, err := lexer.Peek() if err != nil { return false, err } if predicate(r) { r, err = todo(lexer) if err != nil { return true, err } return true, nil } return false, nil } func (lexer *Lexer) NextIf(predicate func(rune) bool) (bool, error) { return lexer.DoIf(predicate, (*Lexer).Next) } func (lexer *Lexer) SkipIf(predicate func(rune) bool) (bool, error) { return lexer.DoIf(predicate, (*Lexer).Skip) } func (lexer *Lexer) NextWhile(predicate func(rune) bool) (bool, error) { result := true ok, err := lexer.NextIf(predicate) result = result || ok for ; ok && (err == nil); ok, err = lexer.NextIf(predicate) { result = result || ok } return result, err } func (lexer *Lexer) SkipWhile(predicate func(rune) bool) (bool, error) { result := true ok, err := lexer.SkipIf(predicate) result = result || ok for ; ok && (err == nil); ok, err = lexer.SkipIf(predicate) { result = result || ok } return result, err } func isSpace(r rune) bool { return r == ' ' || r == '\t' || r == '\v' || r == '\r' } func isComment(r rune) bool { return r == '#' } func (lexer *Lexer) SkipSpace() error { _, err := lexer.SkipWhile(isSpace) return err } func (lexer *Lexer) SkipBlockComment() error { var err error var r rune lexer.LogDebug("Skipping block comment.") for block := 1; block > 0 && err == nil; { _, err = lexer.Skip() if err != nil { return err } r, err = lexer.Peek() if r == '{' { block++ } else if r == '}' { block-- } lexer.LogDebug("Skipping block comment: %d", block) } _, err = lexer.Skip() return err } func (lexer *Lexer) SkipComment() error { r, err := lexer.Skip() lexer.LogDebug("Skipping %c.", r) if err != nil { return err } r, err = lexer.Peek() if r == '{' { return lexer.SkipBlockComment() } for r != '\n' && err == nil { lexer.LogDebug("Skipping comment %c.", r) _, err = lexer.Skip() if err != nil { return err } r, err = lexer.Peek() } if err != nil { return err } _, err = lexer.Skip() return err } /* Handles errors including EOF by either returning an error token or an * EOF token. */ func (lexer *Lexer) handleError(err error) Token { if err == io.EOF { return lexer.MakeEOFToken() } else { return lexer.MakeErrorToken(err) } } func (lexer *Lexer) LexNumber() Token { isFloat := false // skip any first - or + _, err := lexer.NextIf(func(r rune) bool { return r == '-' || r == '+' }) _, err = lexer.NextWhile(func(r rune) bool { if unicode.IsDigit(r) { return true } else if r == '.' { if isFloat { return false // double point in floating point } else { isFloat = true return true } } else { return false } }) if err != nil { return lexer.MakeErrorfToken("when parsing number: %s", err) } if isFloat { return lexer.MakeToken(TokenKindFloat) } else { return lexer.MakeToken(TokenKindInteger) } } func isDoubleQuote(r rune) bool { return r == '"' } func (lexer *Lexer) handleEscapeHexChars(amount int) error { buffer := make([]byte, 0) r, err := lexer.Skip() for index := 0; err == nil && index < amount; { if unicode.Is(unicode.ASCII_Hex_Digit, r) { buffer = append(buffer, byte(r)) } else { return fmt.Errorf("Not a hexadecimal digit: %c", r) } index++ if index < amount { r, err = lexer.Skip() } } if err != nil { return err } i, err := strconv.ParseInt(string(buffer), 16, 32) if err != nil { return err } lexer.appendRune(rune(i)) _, err = lexer.Peek() return err } func (lexer *Lexer) handleEscape() error { r, err := lexer.Skip() if err != nil { return err } switch r { case 'a': lexer.appendRune('\a') case 'b': lexer.appendRune('\b') case 'e': lexer.appendRune('\033') case 'f': lexer.appendRune('\f') case 'n': lexer.appendRune('\n') case 'r': lexer.appendRune('\r') case 't': lexer.appendRune('\t') case '\\': lexer.appendRune('\\') case '"': lexer.appendRune('"') // case 'o': fallthrough // No octals, for now. case 'x': err = lexer.handleEscapeHexChars(2) case 'u': err = lexer.handleEscapeHexChars(4) case 'U': err = lexer.handleEscapeHexChars(6) default: return fmt.Errorf("Unknown escape sequence character %c: %d", r, r) } return err } func (lexer *Lexer) LexString() Token { var err error var r rune _, err = lexer.Skip() // Skip first " if err != nil { return lexer.handleError(err) } r, err = lexer.Skip() for r != '"' && err == nil { if r == '\\' { err = lexer.handleEscape() if err != nil { return lexer.handleError(err) } } else { lexer.appendRune(r) // still inside the string } r, err = lexer.Skip() } if err != nil { return lexer.MakeErrorfToken("when parsing string: %s", err) } _, err = lexer.Skip() // skip last " if err != nil { return lexer.handleError(err) } return lexer.MakeToken(TokenKindString) } func (lexer *Lexer) LexLongString() Token { var err error _, err = lexer.Skip() if err != nil { return lexer.handleError(err) } _, err = lexer.NextWhile(func(r rune) bool { return r != '`' }) if err != nil { return lexer.MakeErrorfToken("when parsing long string: %s", err) } _, err = lexer.Skip() if err != nil { return lexer.handleError(err) } return lexer.MakeToken(TokenKindString) } func (lexer *Lexer) LexWordOrType(kind TokenKind) Token { var err error first := true _, err = lexer.Next() if err != nil { return lexer.handleError(err) } _, err = lexer.NextWhile(func(r rune) bool { if first { first = false return unicode.IsLetter(r) || r == '_' } else { return unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_' } }) if err != nil { return lexer.handleError(err) } sbuffer := string(lexer.buffer) // handle key words switch sbuffer { case "true": return lexer.MakeBooleanToken(true) case "false": return lexer.MakeBooleanToken(false) case "nil": return lexer.MakeNilToken() default: } return lexer.MakeToken(kind) } func (lexer *Lexer) LexWord() Token { return lexer.LexWordOrType(TokenKindWord) } func (lexer *Lexer) LexType() Token { return lexer.LexWordOrType(TokenKindType) } func (lexer *Lexer) LexSymbol() Token { var err error _, err = lexer.Skip() if err != nil { return lexer.handleError(err) } _, err = lexer.NextWhile(func(r rune) bool { return !unicode.IsSpace(r) }) if err != nil { return lexer.handleError(err) } return lexer.MakeToken(TokenKindSymbol) } func (lexer *Lexer) LexBuiltin() Token { var err error _, err = lexer.Skip() if err != nil { return lexer.handleError(err) } _, err = lexer.NextWhile(func(r rune) bool { return !unicode.IsSpace(r) }) if err != nil { return lexer.handleError(err) } return lexer.MakeBuiltinToken() } func (lexer *Lexer) skipSpaceAndCommentAndPeek() (rune, error) { r, err := lexer.Peek() if err != nil { return r, err } i := 0 for isSpace(r) || isComment(r) { if isSpace(r) { err = lexer.SkipSpace() } else if isComment(r) { err = lexer.SkipComment() } if err != nil { return r, err } i++ r, err = lexer.Peek() lexer.LogDebug("Peeked again: >%c< %v %v %d", r, isSpace(r), isComment(r), i) if err != nil { return r, err } } return r, err } func (lexer *Lexer) lex() Token { r, err := lexer.skipSpaceAndCommentAndPeek() lexer.LogDebug(" After skip: >%c< >%v<\n", r, err) if err != nil { return lexer.handleError(err) } if unicode.IsDigit(r) || r == '-' || r == '+' { return lexer.LexNumber() } if r == '\n' || r == '.' { lexer.Next() return lexer.MakeToken(TokenKindEOX) } if r == '"' { return lexer.LexString() } if r == '`' { return lexer.LexLongString() } if r == '!' { return lexer.LexBuiltin() } if r == ':' { return lexer.LexSymbol() } switch TokenKind(r) { case TokenKindGet: fallthrough case TokenKindSet: fallthrough case TokenKindOpenBlock: fallthrough case TokenKindCloseBlock: fallthrough case TokenKindOpenList: fallthrough case TokenKindCloseList: fallthrough case TokenKindOpenParen: fallthrough case TokenKindCloseParen: lexer.Next() return lexer.MakeToken(TokenKind(r)) default: } if unicode.IsLetter(r) { if unicode.IsUpper(r) { return lexer.LexType() } else { return lexer.LexWord() } } return lexer.MakeErrorfToken("Unknown character: %c", r) } func (lexer *Lexer) Lex() Token { res := lexer.lex() lexer.ClearBuffer() // ensure buffer is cleared after lexing, always. return res } func (lexer *Lexer) LexAll() []Token { var token Token res := make([]Token, 0) for token = lexer.Lex(); !token.IsLast(); token = lexer.Lex() { res = append(res, token) } res = append(res, token) return res } func NewLexer(scanner io.RuneScanner, filename string) *Lexer { lexer := &Lexer{} lexer.RuneScanner = scanner lexer.Position.FileName = filename lexer.Position.Column = 1 lexer.Position.Line = 1 lexer.LoggerWrapper = LoggerWrapper{nil} return lexer } func (lexer * Lexer) Report() { if lexer == nil { fmt.Printf("Lexer: is nil\n") } else { fmt.Printf("Lexer: %s:%d:%d\n", lexer.Position.FileName, lexer.Position.Column, lexer.Position.Line) } } func NewLexerFromString(input string) *Lexer { reader := strings.NewReader(input) return NewLexer(reader, "") } func NewLexerFromFilename(filename string) (*Lexer, error) { read, err := os.Open(filename) if err == nil { bread := bufio.NewReader(read) lex := NewLexer(bread, filename) return lex, nil } return nil, err }