|
@@ -19,12 +19,10 @@ import (
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
-type Position struct {
|
|
|
|
- FileName string
|
|
|
|
- Line int
|
|
|
|
- Column int
|
|
|
|
-}
|
|
|
|
|
|
|
|
|
|
+
|
|
|
|
+/* A Lexer splits scanned input into tokens.
|
|
|
|
+ */
|
|
type Lexer struct {
|
|
type Lexer struct {
|
|
Position
|
|
Position
|
|
Index int
|
|
Index int
|
|
@@ -35,41 +33,42 @@ type Lexer struct {
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
+func (lexer * Lexer) ClearBuffer() {
|
|
|
|
+ lexer.buffer = make([]rune, 0)
|
|
|
|
+}
|
|
|
|
|
|
-/** Token Kind. Uses a rune to easily handle single character tokens. */
|
|
|
|
-type TokenKind rune
|
|
|
|
-
|
|
|
|
-const (
|
|
|
|
- TokenKindInteger = TokenKind('i')
|
|
|
|
- TokenKindFloat = TokenKind('f')
|
|
|
|
- TokenKindString = TokenKind('s')
|
|
|
|
- TokenKindBoolean = TokenKind('b')
|
|
|
|
- TokenKindWord = TokenKind('w')
|
|
|
|
- TokenKindType = TokenKind('t')
|
|
|
|
- TokenKindGet = TokenKind('$')
|
|
|
|
- TokenKindSet = TokenKind('=')
|
|
|
|
- TokenKindOpenBlock = TokenKind('{')
|
|
|
|
- TokenKindCloseBlock = TokenKind('}')
|
|
|
|
- TokenKindOpenList = TokenKind('[')
|
|
|
|
- TokenKindCloseList = TokenKind(']')
|
|
|
|
- TokenKindOpenParen = TokenKind('(')
|
|
|
|
- TokenKindCloseParen = TokenKind(')')
|
|
|
|
- TokenKindError = TokenKind('!')
|
|
|
|
- TokenKindEOX = TokenKind('\n')
|
|
|
|
-)
|
|
|
|
|
|
+func (lexer * Lexer) MakeToken(kind TokenKind) Token {
|
|
|
|
+ val := StringValue(string(lexer.buffer))
|
|
|
|
+ lexer.ClearBuffer()
|
|
|
|
+ return NewToken(kind, val, lexer.Position)
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func (lexer Lexer) MakeErrorToken(err error) Token {
|
|
|
|
+ return NewToken(TokenKindError, err.Error(), lexer.Position)
|
|
|
|
+}
|
|
|
|
|
|
|
|
+func (lexer Lexer) MakeErrorfToken(format string, va ... interface{}) Token {
|
|
|
|
+ err := fmt.Errorf(format, va...)
|
|
|
|
+ return lexer.MakeErrorToken(err)
|
|
|
|
+}
|
|
|
|
|
|
|
|
|
|
-func NewToken(kind TokenKind, val Value, pos Position) Token {
|
|
|
|
- return Token{kind, val, pos}
|
|
|
|
|
|
+func (lexer Lexer) MakeEOFToken() Token {
|
|
|
|
+ return NewToken(TokenKindEOF, "", lexer.Position)
|
|
}
|
|
}
|
|
|
|
|
|
-func (lexer Lexer) MakeToken(kind TokenKind) Token {
|
|
|
|
- val := StringValue(string(lexer.buffer))
|
|
|
|
- return NewToken(kind, val, lexer.Position)
|
|
|
|
|
|
+
|
|
|
|
+func (lexer * Lexer) Peek() (rune, error) {
|
|
|
|
+ r, _, err := lexer.RuneScanner.ReadRune()
|
|
|
|
+ err2 := lexer.RuneScanner.UnreadRune()
|
|
|
|
+
|
|
|
|
+ if err == nil {
|
|
|
|
+ err = err2
|
|
|
|
+ }
|
|
|
|
+ return r, err
|
|
}
|
|
}
|
|
|
|
|
|
-func (lexer * Lexer) Next() (rune, error) {
|
|
|
|
|
|
+func (lexer * Lexer) Next() (rune, error) {
|
|
r, _, err := lexer.RuneScanner.ReadRune()
|
|
r, _, err := lexer.RuneScanner.ReadRune()
|
|
if err != nil {
|
|
if err != nil {
|
|
return 0, err
|
|
return 0, err
|
|
@@ -85,7 +84,8 @@ func (lexer * Lexer) Next() (rune, error) {
|
|
return lexer.buffer[len(lexer.buffer) - 1], nil
|
|
return lexer.buffer[len(lexer.buffer) - 1], nil
|
|
}
|
|
}
|
|
|
|
|
|
-func (lexer * Lexer) Previous() error {
|
|
|
|
|
|
+func (lexer * Lexer) oldPrevious() error {
|
|
|
|
+ fmt.Printf("Previous: now %c \n", lexer.Current)
|
|
err := lexer.RuneScanner.UnreadRune()
|
|
err := lexer.RuneScanner.UnreadRune()
|
|
if err != nil {
|
|
if err != nil {
|
|
return err
|
|
return err
|
|
@@ -99,8 +99,8 @@ func (lexer * Lexer) Previous() error {
|
|
lexer.buffer = lexer.buffer[0: len(lexer.buffer) - 1];
|
|
lexer.buffer = lexer.buffer[0: len(lexer.buffer) - 1];
|
|
|
|
|
|
if r == '\n' {
|
|
if r == '\n' {
|
|
- lexer.Position.Column = 1
|
|
|
|
- lexer.Position.Line++
|
|
|
|
|
|
+ lexer.Position.Column = 1 // XXX wrong
|
|
|
|
+ lexer.Position.Line--
|
|
}
|
|
}
|
|
|
|
|
|
lexer.Current = r
|
|
lexer.Current = r
|
|
@@ -109,141 +109,192 @@ func (lexer * Lexer) Previous() error {
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
+func (lexer * Lexer) NextIf(predicate func(rune) bool) (bool, error) {
|
|
|
|
+ r, err := lexer.Peek()
|
|
|
|
+ if err != nil {
|
|
|
|
+ return false, err
|
|
|
|
+ }
|
|
|
|
+ if (predicate(r)) {
|
|
|
|
+ r, err = lexer.Next()
|
|
|
|
+ if err != nil {
|
|
|
|
+ return true, err
|
|
|
|
+ }
|
|
|
|
+ return true, nil
|
|
|
|
+ }
|
|
|
|
+ return false, nil
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func (lexer * Lexer) NextWhile(predicate func(rune) bool) (bool, error) {
|
|
|
|
+ result := true
|
|
|
|
+ ok, err := lexer.NextIf(predicate)
|
|
|
|
+ result = result || ok
|
|
|
|
+ for ; ok && (err == nil) ; ok, err = lexer.NextIf(predicate) {
|
|
|
|
+ result = result || ok
|
|
|
|
+ }
|
|
|
|
+ return result, err
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+func isSpace(r rune) bool {
|
|
|
|
+ return r == ' ' || r == '\t'
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
|
|
-
|
|
|
|
func (lexer * Lexer) SkipSpace() (error) {
|
|
func (lexer * Lexer) SkipSpace() (error) {
|
|
- var r rune
|
|
|
|
- var err error
|
|
|
|
- r = lexer.Current
|
|
|
|
-
|
|
|
|
- for unicode.IsSpace(r) {
|
|
|
|
- r, err = lexer.Next()
|
|
|
|
- if err != nil {
|
|
|
|
- return err
|
|
|
|
- }
|
|
|
|
|
|
+ _, err := lexer.NextWhile(isSpace)
|
|
|
|
+ if err == nil {
|
|
|
|
+ lexer.ClearBuffer()
|
|
|
|
+ }
|
|
|
|
+ return err
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+/* Handles errors including EOF by either returning an error token or an
|
|
|
|
+ * EOF token.
|
|
|
|
+ */
|
|
|
|
+func (lexer * Lexer) handleError(err error) Token {
|
|
|
|
+ if err == io.EOF {
|
|
|
|
+ return lexer.MakeEOFToken()
|
|
|
|
+ } else {
|
|
|
|
+ return lexer.MakeErrorToken(err)
|
|
}
|
|
}
|
|
- lexer.Previous()
|
|
|
|
- return nil
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
-func (lexer * Lexer) LexNumber() (Token, error) {
|
|
|
|
|
|
+func (lexer * Lexer) LexNumber() Token {
|
|
isFloat := false
|
|
isFloat := false
|
|
- var r rune
|
|
|
|
- var err error
|
|
|
|
|
|
|
|
- r = lexer.Current
|
|
|
|
-
|
|
|
|
- for unicode.IsDigit(r) || r == '.' {
|
|
|
|
- if r == '.' {
|
|
|
|
- if isFloat { // double . in floating point is an error
|
|
|
|
- tok := lexer.MakeToken(TokenKindError)
|
|
|
|
- err = fmt.Errorf("Double period . in floating point constant.")
|
|
|
|
- return tok, err
|
|
|
|
|
|
+ _, err := lexer.NextWhile(func (r rune) bool {
|
|
|
|
+ if unicode.IsDigit(r) {
|
|
|
|
+ return true
|
|
|
|
+ } else if r == '.' {
|
|
|
|
+ if isFloat {
|
|
|
|
+ return false // double point in floating point
|
|
} else {
|
|
} else {
|
|
- isFloat = true
|
|
|
|
|
|
+ isFloat = true
|
|
|
|
+ return true
|
|
}
|
|
}
|
|
|
|
+ } else {
|
|
|
|
+ return false
|
|
}
|
|
}
|
|
- r, err = lexer.Next()
|
|
|
|
- if err != nil {
|
|
|
|
- return lexer.MakeToken(TokenKindError), err
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+ })
|
|
|
|
+
|
|
|
|
|
|
- lexer.Previous()
|
|
|
|
|
|
+ if err != nil {
|
|
|
|
+ return lexer.MakeErrorfToken("when parsing number: %s", err)
|
|
|
|
+ }
|
|
if isFloat {
|
|
if isFloat {
|
|
- return lexer.MakeToken(TokenKindFloat), nil
|
|
|
|
|
|
+ return lexer.MakeToken(TokenKindFloat)
|
|
} else {
|
|
} else {
|
|
- return lexer.MakeToken(TokenKindInteger), nil
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
|
|
+ return lexer.MakeToken(TokenKindInteger)
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func isDoubleQuote(r rune) bool {
|
|
|
|
+ return r == '"'
|
|
}
|
|
}
|
|
|
|
|
|
-func (lexer * Lexer) LexString() (Token, error) {
|
|
|
|
|
|
+func (lexer * Lexer) LexString() Token {
|
|
inEscape := false
|
|
inEscape := false
|
|
- var r rune
|
|
|
|
var err error
|
|
var err error
|
|
|
|
|
|
- r, err = lexer.Next()
|
|
|
|
|
|
+ _, err = lexer.Next()
|
|
if err != nil {
|
|
if err != nil {
|
|
- return lexer.MakeToken(TokenKindError), err
|
|
|
|
|
|
+ return lexer.handleError(err)
|
|
}
|
|
}
|
|
-
|
|
|
|
-
|
|
|
|
- for r != '"' || inEscape {
|
|
|
|
- if r == '\\' {
|
|
|
|
- // TODO escape parsing, now just a single character after it
|
|
|
|
- if inEscape { // double backslash
|
|
|
|
|
|
+
|
|
|
|
+ _, err = lexer.NextWhile(func (r rune) bool {
|
|
|
|
+ if r == '"' && !inEscape {
|
|
|
|
+ return false
|
|
|
|
+ }
|
|
|
|
+ if r == '\\' {
|
|
|
|
+ // TODO escape parsing, now just a single character after it
|
|
|
|
+ if inEscape { // double backslash
|
|
|
|
+ inEscape = false
|
|
|
|
+ } else {
|
|
|
|
+ inEscape = true
|
|
|
|
+ }
|
|
} else {
|
|
} else {
|
|
- inEscape = true
|
|
|
|
|
|
+ inEscape = false
|
|
}
|
|
}
|
|
- } else {
|
|
|
|
- inEscape = false
|
|
|
|
- }
|
|
|
|
- r, err = lexer.Next()
|
|
|
|
- if err != nil {
|
|
|
|
- return lexer.MakeToken(TokenKindError), err
|
|
|
|
- }
|
|
|
|
|
|
+ return true // still inside the string
|
|
|
|
+ })
|
|
|
|
+ if err != nil {
|
|
|
|
+ return lexer.MakeErrorfToken("when parsing string: %s", err)
|
|
}
|
|
}
|
|
|
|
|
|
- return lexer.MakeToken(TokenKindString), nil
|
|
|
|
|
|
+ _, err = lexer.Next()
|
|
|
|
+ if err != nil {
|
|
|
|
+ return lexer.handleError(err)
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return lexer.MakeToken(TokenKindString)
|
|
}
|
|
}
|
|
|
|
|
|
-func (lexer * Lexer) LexLongString() (Token, error) {
|
|
|
|
- var r rune
|
|
|
|
|
|
+func (lexer * Lexer) LexLongString() Token {
|
|
var err error
|
|
var err error
|
|
|
|
|
|
- r, err = lexer.Next()
|
|
|
|
|
|
+ _, err = lexer.Next()
|
|
if err != nil {
|
|
if err != nil {
|
|
- return lexer.MakeToken(TokenKindError), err
|
|
|
|
|
|
+ return lexer.handleError(err)
|
|
}
|
|
}
|
|
|
|
|
|
- for r != '`' {
|
|
|
|
- r, err = lexer.Next()
|
|
|
|
- if err != nil {
|
|
|
|
- return lexer.MakeToken(TokenKindError), err
|
|
|
|
- }
|
|
|
|
|
|
+ _, err = lexer.NextWhile(func (r rune) bool {
|
|
|
|
+ return r != '`'
|
|
|
|
+ })
|
|
|
|
+
|
|
|
|
+ if err != nil {
|
|
|
|
+ return lexer.MakeErrorfToken("when parsing long string: %s", err)
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ _, err = lexer.Next()
|
|
|
|
+ if err != nil {
|
|
|
|
+ return lexer.handleError(err)
|
|
}
|
|
}
|
|
|
|
|
|
- return lexer.MakeToken(TokenKindString), nil
|
|
|
|
|
|
+ return lexer.MakeToken(TokenKindString)
|
|
}
|
|
}
|
|
|
|
|
|
-func (lexer * Lexer) LexWord() (Token, error) {
|
|
|
|
- var r rune
|
|
|
|
|
|
+func (lexer * Lexer) LexWord() Token {
|
|
var err error
|
|
var err error
|
|
|
|
|
|
- r, err = lexer.Next()
|
|
|
|
|
|
+ _, err = lexer.Next()
|
|
if err != nil {
|
|
if err != nil {
|
|
- return lexer.MakeToken(TokenKindError), err
|
|
|
|
|
|
+ return lexer.handleError(err)
|
|
}
|
|
}
|
|
|
|
|
|
- for r != '`' {
|
|
|
|
- r, err = lexer.Next()
|
|
|
|
- if err != nil {
|
|
|
|
- return lexer.MakeToken(TokenKindError), err
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+ lexer.NextWhile(func(r rune) bool {
|
|
|
|
+ return unicode.IsLetter(r)
|
|
|
|
+ })
|
|
|
|
|
|
- return lexer.MakeToken(TokenKindString), nil
|
|
|
|
|
|
+ return lexer.MakeToken(TokenKindWord)
|
|
}
|
|
}
|
|
|
|
|
|
-func (lexer * Lexer) Lex() (Token, error) {
|
|
|
|
- r, err := lexer.Next()
|
|
|
|
|
|
+func (lexer * Lexer) lex() Token {
|
|
|
|
+ r, err := lexer.Peek()
|
|
|
|
+
|
|
if err != nil {
|
|
if err != nil {
|
|
- return lexer.MakeToken(TokenKindError), err
|
|
|
|
|
|
+ return lexer.handleError(err)
|
|
}
|
|
}
|
|
|
|
|
|
- if unicode.IsSpace(r) {
|
|
|
|
- lexer.SkipSpace()
|
|
|
|
|
|
+ if isSpace(r) {
|
|
|
|
+ err = lexer.SkipSpace()
|
|
|
|
+ if err != nil {
|
|
|
|
+ return lexer.handleError(err)
|
|
|
|
+ }
|
|
|
|
+ r, err = lexer.Peek()
|
|
|
|
+ if err != nil {
|
|
|
|
+ return lexer.handleError(err)
|
|
|
|
+ }
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
|
|
|
|
+
|
|
if unicode.IsDigit(r) {
|
|
if unicode.IsDigit(r) {
|
|
return lexer.LexNumber()
|
|
return lexer.LexNumber()
|
|
}
|
|
}
|
|
|
|
|
|
if r == '\n' || r == '.' {
|
|
if r == '\n' || r == '.' {
|
|
- return lexer.MakeToken(TokenKindEOX), nil
|
|
|
|
|
|
+ lexer.Next()
|
|
|
|
+ return lexer.MakeToken(TokenKindEOX)
|
|
}
|
|
}
|
|
|
|
|
|
if r == '"' {
|
|
if r == '"' {
|
|
@@ -264,7 +315,8 @@ func (lexer * Lexer) Lex() (Token, error) {
|
|
case TokenKindCloseList : fallthrough
|
|
case TokenKindCloseList : fallthrough
|
|
case TokenKindOpenParen : fallthrough
|
|
case TokenKindOpenParen : fallthrough
|
|
case TokenKindCloseParen:
|
|
case TokenKindCloseParen:
|
|
- return lexer.MakeToken(TokenKind(r)), nil
|
|
|
|
|
|
+ lexer.Next()
|
|
|
|
+ return lexer.MakeToken(TokenKind(r))
|
|
default:
|
|
default:
|
|
}
|
|
}
|
|
|
|
|
|
@@ -272,9 +324,32 @@ func (lexer * Lexer) Lex() (Token, error) {
|
|
return lexer.LexWord()
|
|
return lexer.LexWord()
|
|
}
|
|
}
|
|
|
|
|
|
- return lexer.MakeToken(TokenKindError), fmt.Errorf("Unknown character")
|
|
|
|
|
|
+ return lexer.MakeErrorfToken("Unknown character: %c", r)
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func (lexer * Lexer) Lex() Token {
|
|
|
|
+ res := lexer.lex()
|
|
|
|
+ lexer.ClearBuffer() // ensure buffer is cleared after lexing, always.
|
|
|
|
+ return res
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+func (lexer * Lexer) LexAll() []Token {
|
|
|
|
+ var token Token
|
|
|
|
+
|
|
|
|
+ res := make([]Token, 0)
|
|
|
|
+
|
|
|
|
+ for token = lexer.Lex() ; ! token.IsLast() ; token = lexer.Lex() {
|
|
|
|
+ fmt.Printf("token: %s %v\n", token.String(), token.IsLast())
|
|
|
|
+ res = append(res, token)
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ fmt.Printf("Last token: %s %v\n", token.String(), token.IsLast())
|
|
|
|
+ res = append(res, token)
|
|
|
|
+
|
|
|
|
+ return res
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+
|
|
func NewLexer(scanner io.RuneScanner, filename string) Lexer {
|
|
func NewLexer(scanner io.RuneScanner, filename string) Lexer {
|
|
lexer := Lexer{}
|
|
lexer := Lexer{}
|
|
lexer.RuneScanner = scanner
|
|
lexer.RuneScanner = scanner
|