package muesli import ( _ "bytes" _ "errors" "fmt" _ "io" _ "reflect" _ "runtime" "strings" _ "unicode" "io" "os" "bufio" "unicode" // "gitlab.com/beoran/woe/graphviz" // _ "gitlab.com/beoran/woe/monolog" ) /* A Lexer splits scanned input into tokens. */ type Lexer struct { Position Index int Start int io.RuneScanner buffer []rune Current rune } func (lexer * Lexer) ClearBuffer() { lexer.buffer = make([]rune, 0) } func (lexer * Lexer) MakeToken(kind TokenKind) Token { val := StringValue(string(lexer.buffer)) lexer.ClearBuffer() return NewToken(kind, val, lexer.Position) } func (lexer Lexer) MakeErrorToken(err error) Token { return NewToken(TokenKindError, err.Error(), lexer.Position) } func (lexer Lexer) MakeErrorfToken(format string, va ... interface{}) Token { err := fmt.Errorf(format, va...) return lexer.MakeErrorToken(err) } func (lexer Lexer) MakeEOFToken() Token { return NewToken(TokenKindEOF, "", lexer.Position) } func (lexer * Lexer) Peek() (rune, error) { r, _, err := lexer.RuneScanner.ReadRune() err2 := lexer.RuneScanner.UnreadRune() if err == nil { err = err2 } return r, err } func (lexer * Lexer) Next() (rune, error) { r, _, err := lexer.RuneScanner.ReadRune() if err != nil { return 0, err } lexer.Current = r lexer.buffer = append(lexer.buffer, r) lexer.Index++ lexer.Position.Column++ if r == '\n' { lexer.Position.Column = 1 lexer.Position.Line++ } return lexer.buffer[len(lexer.buffer) - 1], nil } func (lexer * Lexer) oldPrevious() error { fmt.Printf("Previous: now %c \n", lexer.Current) err := lexer.RuneScanner.UnreadRune() if err != nil { return err } lexer.Index-- lexer.Position.Column-- if (len(lexer.buffer) > 0) { r := lexer.buffer[len(lexer.buffer) - 1]; lexer.buffer = lexer.buffer[0: len(lexer.buffer) - 1]; if r == '\n' { lexer.Position.Column = 1 // XXX wrong lexer.Position.Line-- } lexer.Current = r } return nil } func (lexer * Lexer) NextIf(predicate func(rune) bool) (bool, error) { r, err := lexer.Peek() if err != nil { return false, err } if (predicate(r)) { r, err = lexer.Next() if err != nil { return true, err } return true, nil } return false, nil } func (lexer * Lexer) NextWhile(predicate func(rune) bool) (bool, error) { result := true ok, err := lexer.NextIf(predicate) result = result || ok for ; ok && (err == nil) ; ok, err = lexer.NextIf(predicate) { result = result || ok } return result, err } func isSpace(r rune) bool { return r == ' ' || r == '\t' } func (lexer * Lexer) SkipSpace() (error) { _, err := lexer.NextWhile(isSpace) if err == nil { lexer.ClearBuffer() } return err } /* Handles errors including EOF by either returning an error token or an * EOF token. */ func (lexer * Lexer) handleError(err error) Token { if err == io.EOF { return lexer.MakeEOFToken() } else { return lexer.MakeErrorToken(err) } } func (lexer * Lexer) LexNumber() Token { isFloat := false _, err := lexer.NextWhile(func (r rune) bool { if unicode.IsDigit(r) { return true } else if r == '.' { if isFloat { return false // double point in floating point } else { isFloat = true return true } } else { return false } }) if err != nil { return lexer.MakeErrorfToken("when parsing number: %s", err) } if isFloat { return lexer.MakeToken(TokenKindFloat) } else { return lexer.MakeToken(TokenKindInteger) } } func isDoubleQuote(r rune) bool { return r == '"' } func (lexer * Lexer) LexString() Token { inEscape := false var err error _, err = lexer.Next() if err != nil { return lexer.handleError(err) } _, err = lexer.NextWhile(func (r rune) bool { if r == '"' && !inEscape { return false } if r == '\\' { // TODO escape parsing, now just a single character after it if inEscape { // double backslash inEscape = false } else { inEscape = true } } else { inEscape = false } return true // still inside the string }) if err != nil { return lexer.MakeErrorfToken("when parsing string: %s", err) } _, err = lexer.Next() if err != nil { return lexer.handleError(err) } return lexer.MakeToken(TokenKindString) } func (lexer * Lexer) LexLongString() Token { var err error _, err = lexer.Next() if err != nil { return lexer.handleError(err) } _, err = lexer.NextWhile(func (r rune) bool { return r != '`' }) if err != nil { return lexer.MakeErrorfToken("when parsing long string: %s", err) } _, err = lexer.Next() if err != nil { return lexer.handleError(err) } return lexer.MakeToken(TokenKindString) } func (lexer * Lexer) LexWord() Token { var err error _, err = lexer.Next() if err != nil { return lexer.handleError(err) } lexer.NextWhile(func(r rune) bool { return unicode.IsLetter(r) }) return lexer.MakeToken(TokenKindWord) } func (lexer * Lexer) lex() Token { r, err := lexer.Peek() if err != nil { return lexer.handleError(err) } if isSpace(r) { err = lexer.SkipSpace() if err != nil { return lexer.handleError(err) } r, err = lexer.Peek() if err != nil { return lexer.handleError(err) } } if unicode.IsDigit(r) { return lexer.LexNumber() } if r == '\n' || r == '.' { lexer.Next() return lexer.MakeToken(TokenKindEOX) } if r == '"' { return lexer.LexString() } if r == '`' { return lexer.LexLongString() } switch (TokenKind(r)) { case TokenKindGet : fallthrough case TokenKindSet : fallthrough case TokenKindOpenBlock : fallthrough case TokenKindCloseBlock: fallthrough case TokenKindOpenList : fallthrough case TokenKindCloseList : fallthrough case TokenKindOpenParen : fallthrough case TokenKindCloseParen: lexer.Next() return lexer.MakeToken(TokenKind(r)) default: } if unicode.IsLetter(r) { return lexer.LexWord() } return lexer.MakeErrorfToken("Unknown character: %c", r) } func (lexer * Lexer) Lex() Token { res := lexer.lex() lexer.ClearBuffer() // ensure buffer is cleared after lexing, always. return res } func (lexer * Lexer) LexAll() []Token { var token Token res := make([]Token, 0) for token = lexer.Lex() ; ! token.IsLast() ; token = lexer.Lex() { fmt.Printf("token: %s %v\n", token.String(), token.IsLast()) res = append(res, token) } fmt.Printf("Last token: %s %v\n", token.String(), token.IsLast()) res = append(res, token) return res } func NewLexer(scanner io.RuneScanner, filename string) Lexer { lexer := Lexer{} lexer.RuneScanner = scanner lexer.Position.FileName = filename lexer.Position.Column = 1 lexer.Position.Line = 1 return lexer } func NewLexerFromInputString(input string) Lexer { reader := strings.NewReader(input) return NewLexer(reader, "") } func NewLexerFromFileName(filename string) (*Lexer, error) { read, err := os.Open(filename) if err != nil { bread := bufio.NewReader(read) lex := NewLexer(bread, filename) return &lex, nil } return nil , err }