|
@@ -23,8 +23,11 @@ Lexer:
|
|
package raku
|
|
package raku
|
|
|
|
|
|
import (
|
|
import (
|
|
|
|
+ "bytes"
|
|
"fmt"
|
|
"fmt"
|
|
"io"
|
|
"io"
|
|
|
|
+ "strings"
|
|
|
|
+ "unicode"
|
|
)
|
|
)
|
|
|
|
|
|
type Value string
|
|
type Value string
|
|
@@ -37,8 +40,13 @@ type Position struct {
|
|
}
|
|
}
|
|
|
|
|
|
const (
|
|
const (
|
|
- TokenError TokenType = iota
|
|
|
|
- TokenEOF
|
|
|
|
|
|
+ TokenEOS TokenType = TokenType('.')
|
|
|
|
+ TokenComma TokenType = TokenType(',')
|
|
|
|
+ TokenError TokenType = -1
|
|
|
|
+ TokenWord TokenType = -2
|
|
|
|
+ TokenEOL TokenType = -3
|
|
|
|
+ TokenEOF TokenType = -4
|
|
|
|
+ TokenNumber TokenType = -5
|
|
)
|
|
)
|
|
|
|
|
|
type Token struct {
|
|
type Token struct {
|
|
@@ -61,27 +69,82 @@ type Lexer struct {
|
|
rule LexerRule
|
|
rule LexerRule
|
|
Output TokenChannel
|
|
Output TokenChannel
|
|
buffer []byte
|
|
buffer []byte
|
|
|
|
+ runes []rune
|
|
}
|
|
}
|
|
|
|
|
|
type LexerRule func(lexer *Lexer) LexerRule
|
|
type LexerRule func(lexer *Lexer) LexerRule
|
|
|
|
|
|
-func (lexer *Lexer) Emit(t TokenType, v Value) {
|
|
|
|
- tok := Token{t, v, lexer.Current}
|
|
|
|
- lexer.Output <- tok
|
|
|
|
|
|
+func (me *Lexer) Emit(t TokenType, v Value) {
|
|
|
|
+ tok := Token{t, v, me.Current}
|
|
|
|
+ me.Output <- tok
|
|
}
|
|
}
|
|
|
|
|
|
-func (lexer *Lexer) Error(message string, args ...interface{}) {
|
|
|
|
|
|
+func (me *Lexer) Error(message string, args ...interface{}) {
|
|
value := fmt.Sprintf(message, args...)
|
|
value := fmt.Sprintf(message, args...)
|
|
- lexer.Emit(TokenError, Value(value))
|
|
|
|
|
|
+ me.Emit(TokenError, Value(value))
|
|
}
|
|
}
|
|
|
|
|
|
-func LexError(lexer *Lexer) LexerRule {
|
|
|
|
- lexer.Error("Error")
|
|
|
|
|
|
+func LexError(me *Lexer) LexerRule {
|
|
|
|
+ me.Error("Error")
|
|
return nil
|
|
return nil
|
|
}
|
|
}
|
|
|
|
|
|
-func LexNormal(lexer *Lexer) LexerRule {
|
|
|
|
- return LexError
|
|
|
|
|
|
+func (me *Lexer) SkipComment() bool {
|
|
|
|
+ if me.Peek() == '#' {
|
|
|
|
+ if me.Next() == '(' {
|
|
|
|
+ return me.SkipNotIn(")")
|
|
|
|
+ } else {
|
|
|
|
+ return me.SkipNotIn("\r\n")
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return true
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func LexWord(me *Lexer) LexerRule {
|
|
|
|
+ me.Found(TokenWord)
|
|
|
|
+ return LexNormal
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func LexNumber(me *Lexer) LexerRule {
|
|
|
|
+ me.Found(TokenNumber)
|
|
|
|
+ return LexNormal
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func LexComment(me *Lexer) LexerRule {
|
|
|
|
+ if !me.SkipComment() {
|
|
|
|
+ me.Error("Unterminated comment")
|
|
|
|
+ return LexError
|
|
|
|
+ }
|
|
|
|
+ me.Advance()
|
|
|
|
+ return LexNormal
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func LexEOS(me *Lexer) LexerRule {
|
|
|
|
+ me.Found(TokenEOS)
|
|
|
|
+ return LexNormal
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func LexEOL(me *Lexer) LexerRule {
|
|
|
|
+ me.Found(TokenEOL)
|
|
|
|
+ return LexNormal
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func LexNormal(me *Lexer) LexerRule {
|
|
|
|
+ me.SkipWhitespace()
|
|
|
|
+ peek := me.Peek()
|
|
|
|
+ if peek == '#' {
|
|
|
|
+ return LexComment
|
|
|
|
+ } else if peek == '.' {
|
|
|
|
+ return LexEOS
|
|
|
|
+ } else if peek == '\n' || peek == '\r' {
|
|
|
|
+ return LexEOL
|
|
|
|
+ } else if unicode.IsLetter(me.Peek()) {
|
|
|
|
+ return LexWord
|
|
|
|
+ } else if unicode.IsDigit(me.Peek()) {
|
|
|
|
+ return LexNumber
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return nil
|
|
}
|
|
}
|
|
|
|
|
|
func OpenLexer(reader io.Reader) *Lexer {
|
|
func OpenLexer(reader io.Reader) *Lexer {
|
|
@@ -92,13 +155,16 @@ func OpenLexer(reader io.Reader) *Lexer {
|
|
return lexer
|
|
return lexer
|
|
}
|
|
}
|
|
|
|
|
|
-func (me *Lexer) ReadReader() (bool, error) {
|
|
|
|
|
|
+func (me *Lexer) ReadReaderOnce() (bool, error) {
|
|
buffer := make([]byte, 1024)
|
|
buffer := make([]byte, 1024)
|
|
|
|
|
|
n, err := me.Reader.Read(buffer)
|
|
n, err := me.Reader.Read(buffer)
|
|
|
|
+ fmt.Printf("read %v %d %v\n", buffer[:n], n, err)
|
|
if n > 0 {
|
|
if n > 0 {
|
|
- me.buffer = append(me.buffer, buffer...)
|
|
|
|
|
|
+ me.buffer = append(me.buffer, buffer[:n]...)
|
|
|
|
+ fmt.Printf("append %s", me.buffer)
|
|
}
|
|
}
|
|
|
|
+
|
|
if err == io.EOF {
|
|
if err == io.EOF {
|
|
me.Emit(TokenEOF, "")
|
|
me.Emit(TokenEOF, "")
|
|
return true, nil
|
|
return true, nil
|
|
@@ -109,35 +175,122 @@ func (me *Lexer) ReadReader() (bool, error) {
|
|
return false, nil
|
|
return false, nil
|
|
}
|
|
}
|
|
|
|
|
|
-func (me *Lexer) Start() {
|
|
|
|
- more, err := me.ReadReader()
|
|
|
|
|
|
+func (me *Lexer) ReadReader() bool {
|
|
|
|
+ me.buffer = make([]byte, 0)
|
|
|
|
+ more, err := me.ReadReaderOnce()
|
|
for err == nil && more {
|
|
for err == nil && more {
|
|
- more, err = me.ReadReader()
|
|
|
|
|
|
+ more, err = me.ReadReaderOnce()
|
|
|
|
+ }
|
|
|
|
+ me.runes = bytes.Runes(me.buffer)
|
|
|
|
+
|
|
|
|
+ return err != nil && err != io.EOF
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func (me *Lexer) Peek() rune {
|
|
|
|
+ return me.runes[me.Current.Index]
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func (me *Lexer) PeekNext() rune {
|
|
|
|
+ if (me.Current.Index) >= len(me.runes) {
|
|
|
|
+ return '\000'
|
|
|
|
+ }
|
|
|
|
+ return me.runes[me.Current.Index+1]
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func (me *Lexer) Next() rune {
|
|
|
|
+ if me.Peek() == '\n' {
|
|
|
|
+ me.Current.Column = 0
|
|
|
|
+ me.Current.Row++
|
|
|
|
+ }
|
|
|
|
+ me.Current.Index++
|
|
|
|
+ if me.Current.Index >= len(me.runes) {
|
|
|
|
+ me.Emit(TokenEOF, "")
|
|
|
|
+ }
|
|
|
|
+ return me.Peek()
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func (me *Lexer) Previous() rune {
|
|
|
|
+ if me.Current.Index > 0 {
|
|
|
|
+ me.Current.Index--
|
|
|
|
+
|
|
|
|
+ if me.Peek() == '\n' {
|
|
|
|
+ me.Current.Column = 0
|
|
|
|
+ me.Current.Row++
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
+ return me.Peek()
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func (me *Lexer) SkipRune() {
|
|
|
|
+ _ = me.Next
|
|
|
|
+}
|
|
|
|
|
|
- if err != nil {
|
|
|
|
- return
|
|
|
|
|
|
+func (me *Lexer) SkipIn(set string) bool {
|
|
|
|
+ _ = me.Next
|
|
|
|
+ for strings.ContainsRune(set, me.Peek()) {
|
|
|
|
+ if me.Next() == '\000' {
|
|
|
|
+ return false
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
+ return true
|
|
|
|
+}
|
|
|
|
|
|
- rule := LexNormal
|
|
|
|
- for rule != nil {
|
|
|
|
- rule = rule(me)
|
|
|
|
|
|
+func (me *Lexer) SkipNotIn(set string) bool {
|
|
|
|
+ _ = me.Next
|
|
|
|
+ for !strings.ContainsRune(set, me.Peek()) {
|
|
|
|
+ if me.Next() == '\000' {
|
|
|
|
+ return false
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
+ return true
|
|
|
|
+}
|
|
|
|
|
|
|
|
+func (me *Lexer) SkipWhile(should_skip func(r rune) bool) bool {
|
|
|
|
+ _ = me.Next
|
|
|
|
+ for should_skip(me.Peek()) {
|
|
|
|
+ if me.Next() == '\000' {
|
|
|
|
+ return false
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return true
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func (me *Lexer) SkipWhitespace() {
|
|
|
|
+ me.SkipIn(" \t")
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func (me *Lexer) Advance() {
|
|
|
|
+ me.Last = me.Current
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func (me *Lexer) Retry() {
|
|
|
|
+ me.Current = me.Last
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func (me *Lexer) Found(kind TokenType) {
|
|
|
|
+ value := me.runes[me.Last.Index:me.Current.Index]
|
|
|
|
+ svalue := string(value)
|
|
|
|
+ me.Emit(kind, Value(svalue))
|
|
|
|
+ me.Advance()
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func (me *Lexer) Start() {
|
|
|
|
+ if me.ReadReader() {
|
|
|
|
+ rule := LexNormal
|
|
|
|
+ for rule != nil {
|
|
|
|
+ rule = rule(me)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
close(me.Output)
|
|
close(me.Output)
|
|
}
|
|
}
|
|
|
|
|
|
-/*
|
|
|
|
func (me *Lexer) TryLexing() {
|
|
func (me *Lexer) TryLexing() {
|
|
- go {
|
|
|
|
- me.Start()
|
|
|
|
- }
|
|
|
|
|
|
+ go me.Start()
|
|
|
|
|
|
for token := range me.Output {
|
|
for token := range me.Output {
|
|
fmt.Println("Token %s", token)
|
|
fmt.Println("Token %s", token)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
-*/
|
|
|
|
|
|
|
|
type Parser struct {
|
|
type Parser struct {
|
|
Lexer
|
|
Lexer
|