123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832 |
- package muesli
- import (
- "bufio"
- _ "bytes"
- _ "errors"
- "fmt"
- "io"
- _ "io"
- "os"
- _ "reflect"
- _ "runtime"
- "strconv"
- "strings"
- "unicode"
- _ "unicode"
- // "gitlab.com/beoran/woe/graphviz"
- // _ "gitlab.com/beoran/woe/monolog"
- )
-
- /* A Lexer splits scanned input into tokens.
- */
- type Lexer struct {
- Position
- Index int
- Start int
- io.RuneScanner
- buffer []rune
- Current rune
- Keywords map[string]*Keyword
- LoggerWrapper
- }
- func (lexer *Lexer) SetLogger(logger Logger) {
- lexer.LoggerWrapper = LoggerWrapper{logger}
- }
- func (lexer *Lexer) ClearBuffer() {
- lexer.buffer = make([]rune, 0)
- }
- func (lexer *Lexer) MakeIntegerToken() Token {
- var sbuffer = string(lexer.buffer)
- i, err := strconv.ParseInt(sbuffer, 0, 64)
- if err == nil {
- lexer.ClearBuffer()
- return NewToken(TokenKindInteger, IntValue(i), lexer.Position)
- } else {
- lexer.ClearBuffer()
- return lexer.MakeErrorToken(err)
- }
- }
- func (lexer *Lexer) MakeTokenFromKeyword(kw * Keyword) Token {
- lexer.ClearBuffer()
- return NewToken(kw.TokenKind, kw.Value, lexer.Position)
- }
- func (lexer *Lexer) MakeFloatToken() Token {
- var sbuffer = string(lexer.buffer)
- f, err := strconv.ParseFloat(sbuffer, 64)
- if err == nil {
- lexer.ClearBuffer()
- return NewToken(TokenKindFloat, FloatValue(f), lexer.Position)
- } else {
- lexer.ClearBuffer()
- return lexer.MakeErrorToken(err)
- }
- }
- func (lexer *Lexer) MakeBooleanToken(b bool) Token {
- lexer.ClearBuffer()
- if b {
- return NewToken(TokenKindBoolean, TrueValue, lexer.Position)
- } else {
- return NewToken(TokenKindBoolean, FalseValue, lexer.Position)
- }
- }
- func (lexer *Lexer) MakeNilToken() Token {
- lexer.ClearBuffer()
- return NewToken(TokenKindNil, NilValue, lexer.Position)
- }
- func (lexer *Lexer) MakeBuiltinToken() Token {
- var sbuffer = string(lexer.buffer)
- lexer.ClearBuffer()
- if sbuffer == "true" {
- lexer.ClearBuffer()
- return NewToken(TokenKindBoolean, TrueValue, lexer.Position)
- } else if sbuffer == "false" {
- return NewToken(TokenKindBoolean, FalseValue, lexer.Position)
- } else if sbuffer == "nil" {
- return NewToken(TokenKindNil, NilValue, lexer.Position)
- } else {
- return lexer.MakeErrorfToken("Not a builtin: %s", sbuffer)
- }
- }
- func (lexer *Lexer) MakeStringValueToken(kind TokenKind) Token {
- var sbuffer = string(lexer.buffer)
- return NewToken(kind, StringValue(sbuffer), lexer.Position)
- }
- func (lexer *Lexer) MakeTypeValueToken(kind TokenKind) Token {
- var sbuffer = string(lexer.buffer)
- return NewToken(kind, TypeValue(sbuffer), lexer.Position)
- }
- func (lexer *Lexer) MakeErrorValueToken(kind TokenKind) Token {
- var sbuffer = string(lexer.buffer)
- return NewToken(kind, NewErrorValuef("%s", sbuffer), lexer.Position)
- }
- func (lexer *Lexer) MakeWordValueToken(kind TokenKind) Token {
- var sbuffer = string(lexer.buffer)
- return NewToken(kind, WordValue(sbuffer), lexer.Position)
- }
- func (lexer *Lexer) MakeToken(kind TokenKind) Token {
- switch kind {
- case TokenKindInteger:
- return lexer.MakeIntegerToken()
- case TokenKindFloat:
- return lexer.MakeFloatToken()
- case TokenKindString:
- return lexer.MakeStringValueToken(kind)
- case TokenKindSymbol:
- return lexer.MakeWordValueToken(kind)
- case TokenKindType:
- return lexer.MakeTypeValueToken(kind)
- case TokenKindError:
- return lexer.MakeErrorValueToken(kind)
- case TokenKindWord:
- return lexer.MakeWordValueToken(kind)
- case TokenKindOperator:
- fallthrough
- case TokenKindRedirect:
- fallthrough
- case TokenKindMethod:
- return lexer.MakeWordValueToken(kind)
- case TokenKindNil:
- fallthrough
- case TokenKindBoolean:
- return lexer.MakeBuiltinToken()
- case TokenKindGet:
- fallthrough
- case TokenKindSet:
- fallthrough
- case TokenKindOpenBlock:
- fallthrough
- case TokenKindCloseBlock:
- fallthrough
- case TokenKindOpenList:
- fallthrough
- case TokenKindCloseList:
- fallthrough
- case TokenKindOpenParen:
- fallthrough
- case TokenKindCloseParen:
- fallthrough
- case TokenKindEOX:
- fallthrough
- case TokenKindEOF:
- val := StringValue(string(lexer.buffer))
- lexer.ClearBuffer()
- return NewToken(kind, val, lexer.Position)
- default:
- return lexer.MakeErrorfToken("Internal error on token type %s", kind)
- }
- }
- func (lexer Lexer) MakeErrorToken(err error) Token {
- return NewToken(TokenKindError, ErrorValue{err}, lexer.Position)
- }
- func (lexer Lexer) MakeErrorfToken(format string, va ...interface{}) Token {
- err := fmt.Errorf(format, va...)
- return lexer.MakeErrorToken(err)
- }
- func (lexer Lexer) MakeEOFToken() Token {
- return NewToken(TokenKindEOF, &EmptyValue{}, lexer.Position)
- }
- func (lexer *Lexer) Peek() (rune, error) {
- r, _, err := lexer.RuneScanner.ReadRune()
- err2 := lexer.RuneScanner.UnreadRune()
- if err == nil {
- err = err2
- }
- return r, err
- }
- /* Advances the lexer's position based on the rune r read. */
- func (lexer *Lexer) advance(r rune) {
- lexer.Current = r
- lexer.Index++
- lexer.Position.Column++
- if r == '\n' {
- lexer.Position.Column = 1
- lexer.Position.Line++
- }
- }
- /* Append a rune to the lexer's buffer. */
- func (lexer *Lexer) appendRune(r rune) {
- lexer.buffer = append(lexer.buffer, r)
- }
- /* Advances the lexer's input buffer but does not store the rune read,
- * but just returns it. */
- func (lexer *Lexer) Skip() (rune, error) {
- r, _, err := lexer.RuneScanner.ReadRune()
- if err != nil {
- return 0, err
- }
- lexer.advance(r)
- return r, nil
- }
- /* Actually reads the next rune from the lexer's input source and stores
- * them in the lexer's token buffer.
- * Shorthand for r, err := lexer.Skip() ; lexer.appendRune(r) */
- func (lexer *Lexer) Next() (rune, error) {
- r, err := lexer.Skip()
- if err == nil {
- lexer.appendRune(r)
- }
- return r, nil
- }
- func (lexer *Lexer) DoIf(predicate func(rune) bool,
- todo func(*Lexer) (rune, error)) (bool, error) {
- r, err := lexer.Peek()
- if err != nil {
- return false, err
- }
- if predicate(r) {
- r, err = todo(lexer)
- if err != nil {
- return true, err
- }
- return true, nil
- }
- return false, nil
- }
- func (lexer *Lexer) NextIf(predicate func(rune) bool) (bool, error) {
- return lexer.DoIf(predicate, (*Lexer).Next)
- }
- func (lexer *Lexer) SkipIf(predicate func(rune) bool) (bool, error) {
- return lexer.DoIf(predicate, (*Lexer).Skip)
- }
- func (lexer *Lexer) NextWhile(predicate func(rune) bool) (bool, error) {
- result := true
- ok, err := lexer.NextIf(predicate)
- result = result || ok
- for ; ok && (err == nil); ok, err = lexer.NextIf(predicate) {
- result = result || ok
- }
- return result, err
- }
- func (lexer *Lexer) SkipWhile(predicate func(rune) bool) (bool, error) {
- result := true
- ok, err := lexer.SkipIf(predicate)
- result = result || ok
- for ; ok && (err == nil); ok, err = lexer.SkipIf(predicate) {
- result = result || ok
- }
- return result, err
- }
- func isEOX(r rune) bool {
- return r == '\n' || r == '.'
- }
- func isSpace(r rune) bool {
- return r == ' ' || r == '\t' || r == '\v' || r == '\r'
- }
- func isSpaceOrEOX(r rune) bool {
- return r == ' ' || r == '\t' || r == '\v' || r == '\r' || r == '\n' || r == '.'
- }
- func isComment(r rune) bool {
- return r == '#'
- }
- func isOperator(r rune) bool {
- return isPureOperator(r) || isRedirect(r) || isMethod(r)
- }
- func isPureOperator(r rune) bool {
- return r == '+' || r == '-' || r == '*' || r == '/' || r == '^' ||
- r == '%' || r == '~'
- }
- func isRedirect(r rune) bool {
- return r == '|' || r == '&' || r == '>' || r == '<' || r == '@'
- }
- func isMethod(r rune) bool {
- return r == ',' || r == ';'
- }
- func (lexer *Lexer) SkipSpace() error {
- _, err := lexer.SkipWhile(isSpace)
- return err
- }
- func (lexer *Lexer) SkipBlockComment() error {
- var err error
- var r rune
- lexer.LogDebug("Skipping block comment.")
- for block := 1; block > 0 && err == nil; {
- _, err = lexer.Skip()
- if err != nil {
- return err
- }
- r, err = lexer.Peek()
- if r == '{' {
- block++
- } else if r == '}' {
- block--
- }
- lexer.LogDebug("Skipping block comment: %d", block)
- }
- _, err = lexer.Skip()
- return err
- }
- func (lexer *Lexer) SkipComment() error {
- r, err := lexer.Skip()
- lexer.LogDebug("Skipping %c.", r)
- if err != nil {
- return err
- }
- r, err = lexer.Peek()
- if r == '{' {
- return lexer.SkipBlockComment()
- }
- for r != '\n' && err == nil {
- lexer.LogDebug("Skipping comment %c.", r)
- _, err = lexer.Skip()
- if err != nil {
- return err
- }
- r, err = lexer.Peek()
- }
- if err != nil {
- return err
- }
- _, err = lexer.Skip()
- return err
- }
- /* Handles errors including EOF by either returning an error token or an
- * EOF token.
- */
- func (lexer *Lexer) handleError(err error) Token {
- if err == io.EOF {
- return lexer.MakeEOFToken()
- } else {
- return lexer.MakeErrorToken(err)
- }
- }
- func (lexer *Lexer) LexOperator() Token {
- _, err := lexer.NextWhile(isOperator)
- if err != nil {
- return lexer.MakeErrorfToken("when parsing operator: %s", err)
- }
- oper := lexer.buffer[0]
- switch {
- case isPureOperator(oper): return lexer.MakeToken(TokenKindOperator)
- case isRedirect(oper): return lexer.MakeToken(TokenKindRedirect)
- case isMethod(oper): return lexer.MakeToken(TokenKindMethod)
- }
- return lexer.MakeToken(TokenKindOperator)
- }
- func (lexer *Lexer) LexNumber() Token {
- isFloat := false
- maybeOperator := false
- // skip any first - or +
- _, err := lexer.NextIf(func(r rune) bool {
- maybeOperator = (r == '-' || r == '+') // it might also be an operator in stead.
- return r == '-' || r == '+'
- })
-
- if err != nil {
- return lexer.MakeErrorfToken("Error during parsing of number prefix: %s", err)
- }
-
- for {
- r, err := lexer.Peek()
- if err != nil {
- return lexer.MakeErrorfToken("Error during parsing of number: %s", err)
- }
-
- if unicode.IsDigit(r) {
- maybeOperator = false
- lexer.Next()
- } else if r == '.' {
- if isFloat {
- lexer.Next()
- return lexer.MakeErrorfToken("two points in floating point number")
- } else {
- isFloat = true
- lexer.Next()
- }
- } else if maybeOperator {
- return lexer.LexOperator()
- } else {
- break
- }
- }
-
- if isFloat {
- return lexer.MakeToken(TokenKindFloat)
- } else {
- return lexer.MakeToken(TokenKindInteger)
- }
- }
- func isDoubleQuote(r rune) bool {
- return r == '"'
- }
- func (lexer *Lexer) handleEscapeHexChars(amount int) error {
- buffer := make([]byte, 0)
- r, err := lexer.Skip()
- for index := 0; err == nil && index < amount; {
- if unicode.Is(unicode.ASCII_Hex_Digit, r) {
- buffer = append(buffer, byte(r))
- } else {
- return fmt.Errorf("Not a hexadecimal digit: %c", r)
- }
- index++
- if index < amount {
- r, err = lexer.Skip()
- }
- }
- if err != nil {
- return err
- }
- i, err := strconv.ParseInt(string(buffer), 16, 32)
- if err != nil {
- return err
- }
- lexer.appendRune(rune(i))
- _, err = lexer.Peek()
- return err
- }
- func (lexer *Lexer) handleEscape() error {
- r, err := lexer.Skip()
- if err != nil {
- return err
- }
- switch r {
- case 'a':
- lexer.appendRune('\a')
- case 'b':
- lexer.appendRune('\b')
- case 'e':
- lexer.appendRune('\033')
- case 'f':
- lexer.appendRune('\f')
- case 'n':
- lexer.appendRune('\n')
- case 'r':
- lexer.appendRune('\r')
- case 't':
- lexer.appendRune('\t')
- case '\\':
- lexer.appendRune('\\')
- case '"':
- lexer.appendRune('"')
- // case 'o': fallthrough // No octals, for now.
- case 'x':
- err = lexer.handleEscapeHexChars(2)
- case 'u':
- err = lexer.handleEscapeHexChars(4)
- case 'U':
- err = lexer.handleEscapeHexChars(6)
- default:
- return fmt.Errorf("Unknown escape sequence character %c: %d", r, r)
- }
- return err
- }
- func (lexer *Lexer) LexString() Token {
- var err error
- var r rune
- _, err = lexer.Skip() // Skip first "
- if err != nil {
- return lexer.handleError(err)
- }
- r, err = lexer.Skip()
- for r != '"' && err == nil {
- if r == '\\' {
- err = lexer.handleEscape()
- if err != nil {
- return lexer.handleError(err)
- }
- } else {
- lexer.appendRune(r)
- // still inside the string
- }
- r, err = lexer.Skip()
- }
- if err != nil {
- return lexer.MakeErrorfToken("when parsing string: %s", err)
- }
-
- if err != nil {
- return lexer.handleError(err)
- }
- return lexer.MakeToken(TokenKindString)
- }
- func (lexer *Lexer) LexLongString() Token {
- var err error
- _, err = lexer.Skip()
- if err != nil {
- return lexer.handleError(err)
- }
- _, err = lexer.NextWhile(func(r rune) bool {
- return r != '`'
- })
- if err != nil {
- return lexer.MakeErrorfToken("when parsing long string: %s", err)
- }
- _, err = lexer.Skip()
- if err != nil {
- return lexer.handleError(err)
- }
- return lexer.MakeToken(TokenKindString)
- }
- func (lexer *Lexer) LexWordOrType(kind TokenKind) Token {
- var err error
- first := true
- _, err = lexer.Next()
- if err != nil {
- return lexer.handleError(err)
- }
- _, err = lexer.NextWhile(func(r rune) bool {
- if first {
- first = false
- return unicode.IsLetter(r) || r == '_'
- } else {
- return unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_'
- }
- })
- if err != nil {
- return lexer.handleError(err)
- }
- sbuffer := string(lexer.buffer)
- // handle keywords
- if kw, ok := lexer.Keywords[sbuffer] ; ok {
- return lexer.MakeTokenFromKeyword(kw)
- }
-
- return lexer.MakeToken(kind)
- }
- func (lexer *Lexer) LexWord() Token {
- return lexer.LexWordOrType(TokenKindWord)
- }
- func (lexer *Lexer) LexType() Token {
- return lexer.LexWordOrType(TokenKindType)
- }
- func (lexer *Lexer) LexSymbol() Token {
- var err error
- _, err = lexer.Skip()
- if err != nil {
- return lexer.handleError(err)
- }
- _, err = lexer.NextWhile(func(r rune) bool {
- return !isSpaceOrEOX(r)
- })
- if err != nil {
- return lexer.handleError(err)
- }
- return lexer.MakeToken(TokenKindSymbol)
- }
- func (lexer *Lexer) LexBuiltin() Token {
- var err error
- _, err = lexer.Skip()
- if err != nil {
- return lexer.handleError(err)
- }
- _, err = lexer.NextWhile(func(r rune) bool {
- return !isSpaceOrEOX(r)
- })
- if err != nil {
- return lexer.handleError(err)
- }
-
- return lexer.MakeBuiltinToken()
- }
- func (lexer *Lexer) skipSpaceAndCommentAndPeek() (rune, error) {
- r, err := lexer.Peek()
- if err != nil {
- return r, err
- }
- i := 0
- for isSpace(r) || isComment(r) {
- if isSpace(r) {
- err = lexer.SkipSpace()
- } else if isComment(r) {
- err = lexer.SkipComment()
- }
- if err != nil {
- return r, err
- }
- i++
- r, err = lexer.Peek()
- lexer.LogDebug("Peeked again: >%c< %v %v %d", r, isSpace(r), isComment(r), i)
- if err != nil {
- return r, err
- }
- }
- return r, err
- }
- func (lexer *Lexer) LexEOX() Token {
- lexer.Next()
- _, err := lexer.skipSpaceAndCommentAndPeek()
- if err != nil {
- return lexer.handleError(err)
- }
-
- _, err = lexer.NextWhile(func(r rune) bool {
- return isSpaceOrEOX(r) || r == '\n' || r == '.'
- })
- if err != nil {
- return lexer.handleError(err)
- }
-
- return lexer.MakeToken(TokenKindEOX)
- }
- func (lexer *Lexer) lex() Token {
- r, err := lexer.skipSpaceAndCommentAndPeek()
- lexer.LogDebug(" After skip: >%c< >%v<\n", r, err)
- if err != nil {
- return lexer.handleError(err)
- }
- if unicode.IsDigit(r) || r == '-' || r == '+' {
- return lexer.LexNumber()
- }
- if r == '\n' || r == '.' {
- return lexer.LexEOX()
- }
- if r == '"' {
- return lexer.LexString()
- }
- if r == '`' {
- return lexer.LexLongString()
- }
-
- if r == '!' {
- return lexer.LexBuiltin()
- }
- if r == ':' {
- return lexer.LexSymbol()
- }
-
- if isOperator(r) {
- return lexer.LexOperator()
- }
- switch TokenKind(r) {
- case TokenKindGet:
- fallthrough
- case TokenKindSet:
- fallthrough
- case TokenKindOpenBlock:
- fallthrough
- case TokenKindCloseBlock:
- fallthrough
- case TokenKindOpenList:
- fallthrough
- case TokenKindCloseList:
- fallthrough
- case TokenKindOpenParen:
- fallthrough
- case TokenKindCloseParen:
- lexer.Next()
- return lexer.MakeToken(TokenKind(r))
- default:
- }
- if unicode.IsLetter(r) || r == '_' {
- if unicode.IsUpper(r) {
- return lexer.LexType()
- } else {
- return lexer.LexWord()
- }
- }
-
- // EOF character
- if r == 0x7f {
- return lexer.MakeEOFToken()
- }
- return lexer.MakeErrorfToken("Unknown character: %c", r)
- }
- func (lexer *Lexer) Lex() Token {
- res := lexer.lex()
- lexer.ClearBuffer() // ensure buffer is cleared after lexing, always.
- return res
- }
- func (lexer *Lexer) LexAll() []Token {
- var token Token
- res := make([]Token, 0)
- for token = lexer.Lex(); !token.IsLast(); token = lexer.Lex() {
- res = append(res, token)
- }
- res = append(res, token)
- return res
- }
- func NewLexer(scanner io.RuneScanner, filename string) *Lexer {
- lexer := &Lexer{}
- lexer.RuneScanner = scanner
- lexer.Position.FileName = filename
- lexer.Position.Column = 1
- lexer.Position.Line = 1
- lexer.LoggerWrapper = LoggerWrapper{nil}
- lexer.Keywords = make(map[string]*Keyword)
- return lexer
- }
- func (lexer * Lexer) Report() {
- if lexer == nil {
- fmt.Printf("Lexer: is nil\n")
- } else {
- fmt.Printf("Lexer: %s:%d:%d\n",
- lexer.Position.FileName,
- lexer.Position.Column,
- lexer.Position.Line)
- }
- }
- func (lexer *Lexer) AddKeyword(kw * Keyword) *Keyword {
- if kw != nil {
- lexer.Keywords[kw.Name] = kw
- }
- return kw
- }
- func (lexer *Lexer) NewKeyword(name string, kind TokenKind, value Value) *Keyword {
- kw := &Keyword{Name: name, TokenKind: kind, Value: value}
- return lexer.AddKeyword(kw)
- }
- func NewLexerFromString(input string) *Lexer {
- reader := strings.NewReader(input)
- return NewLexer(reader, "<input>")
- }
- func NewLexerFromFilename(filename string) (*Lexer, error) {
- read, err := os.Open(filename)
- if err == nil {
- bread := bufio.NewReader(read)
- lex := NewLexer(bread, filename)
- return lex, nil
- }
- return nil, err
- }
|