|
@@ -4,102 +4,70 @@ import "fmt"
|
|
|
import "regexp"
|
|
|
import "strings"
|
|
|
import "strconv"
|
|
|
+import . "src.eruta.nl/beoran/ll1/common"
|
|
|
|
|
|
-/* Flexer is a flexible regexp and rule based
|
|
|
+/* Flexer is a flexible regexp and lexeme based
|
|
|
lexer that can be used as an implementation for
|
|
|
generated code.
|
|
|
*/
|
|
|
|
|
|
-type Position struct {
|
|
|
- Name *string
|
|
|
- Line int
|
|
|
- Col int
|
|
|
-}
|
|
|
-
|
|
|
-type Kind int
|
|
|
-
|
|
|
-const (
|
|
|
- SkipKind Kind = -30000
|
|
|
- ErrorKind Kind = -31000
|
|
|
-)
|
|
|
-
|
|
|
-type Token interface {
|
|
|
- Position() Position
|
|
|
- Kind() Kind
|
|
|
- Text() string
|
|
|
-}
|
|
|
-
|
|
|
-type Lexer interface {
|
|
|
- // Accept will accept a regexp and advance, returning the matches.
|
|
|
- // Returns nil if no matches were found.
|
|
|
- Accept(re *regexp.Regexp) []string
|
|
|
- // Returns the current lexer position.
|
|
|
- Position() Position
|
|
|
- // Returns if the lexer is at the end or not.
|
|
|
- EOF() bool
|
|
|
-
|
|
|
- // The lexer creates a token with the current lexer position and
|
|
|
- // the given kind and text.
|
|
|
- MakeToken(kind Kind, form string, args ...interface{}) Token
|
|
|
-
|
|
|
- // The lexer creates a token with the current lexer position and
|
|
|
- // the given kind. The text is taken from the lexer string builder and
|
|
|
- // that builser is reset.
|
|
|
- MakeBuilderToken(kind Kind) Token
|
|
|
-
|
|
|
- // The lexer has a string builder, which can be used to append
|
|
|
- // strings or runes to and which can be returned and cleared when the
|
|
|
- // token is complete.
|
|
|
- Builder() *strings.Builder
|
|
|
-
|
|
|
- // Adds a rule to the lexer.
|
|
|
- Rule(kind Kind, re, context string, act Action) error
|
|
|
- // Calls the lexer once.
|
|
|
- LexOnce() []Token
|
|
|
-
|
|
|
- // Returns the current lexer context
|
|
|
- Context() string
|
|
|
- // Pushes the named context on the lexer context stack
|
|
|
- PushContext(name string)
|
|
|
- // Pops the current context from the lexer context stack.
|
|
|
- PopContext()
|
|
|
-}
|
|
|
-
|
|
|
-type Action func(f Lexer, k Kind, matches ...string) []Token
|
|
|
-
|
|
|
type BasicToken struct {
|
|
|
- position Position
|
|
|
+ location Location
|
|
|
kind Kind
|
|
|
text string
|
|
|
+ value Value
|
|
|
}
|
|
|
|
|
|
func (bt BasicToken) Kind() Kind {
|
|
|
return bt.kind
|
|
|
}
|
|
|
|
|
|
-func (bt BasicToken) Position() Position {
|
|
|
- return bt.position
|
|
|
+func (bt BasicToken) Location() Location {
|
|
|
+ return bt.location
|
|
|
}
|
|
|
|
|
|
func (bt BasicToken) Text() string {
|
|
|
return bt.text
|
|
|
}
|
|
|
|
|
|
-func MakeToken(position Position, kind Kind, form string,
|
|
|
+func (bt BasicToken) Value() Value {
|
|
|
+ if bt.Value() == nil {
|
|
|
+ return StringValue(bt.text)
|
|
|
+ } else {
|
|
|
+ return bt.value
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func MakeToken(Location Location, kind Kind, form string,
|
|
|
args ...interface{}) BasicToken {
|
|
|
text := fmt.Sprintf(form, args...)
|
|
|
- return BasicToken{position, kind, text}
|
|
|
+ return BasicToken{Location, kind, text, StringValue(text)}
|
|
|
+}
|
|
|
+
|
|
|
+func MakeValueToken(Location Location, kind Kind, value Value) BasicToken {
|
|
|
+ text := value.String()
|
|
|
+ return BasicToken{Location, kind, text, value}
|
|
|
}
|
|
|
|
|
|
type ErrorToken struct {
|
|
|
BasicToken
|
|
|
}
|
|
|
|
|
|
-/* A rule for Flexer is based on a regular expression.
|
|
|
-* While the rule may have submatches, the lexer will consume
|
|
|
+func MakeErrorToken(Location Location, form string, args ...interface{}) ErrorToken {
|
|
|
+ err := fmt.Errorf(form, args...)
|
|
|
+ tok := MakeValueToken(Location, ErrorKind, ErrorValue{err})
|
|
|
+ return ErrorToken{tok}
|
|
|
+}
|
|
|
+
|
|
|
+func (e ErrorToken) Error() string {
|
|
|
+ return fmt.Sprintf("%s%s", e.Location(), e.text)
|
|
|
+}
|
|
|
+
|
|
|
+/* Lexeme for Flexer is based on a regular expression.
|
|
|
+* While the lexeme may have submatches, the lexer will consume
|
|
|
* the whole match if it matches at the beginning of the current input.
|
|
|
*/
|
|
|
-type Rule struct {
|
|
|
+type Lexeme struct {
|
|
|
Kind
|
|
|
*regexp.Regexp
|
|
|
Context string
|
|
@@ -170,9 +138,9 @@ func EscapeAction(quote byte) func(lex Lexer, k Kind, matches ...string) []Token
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-// Try tries to apply a rule.
|
|
|
+// Try tries to apply a lexeme.
|
|
|
// Returns nil on no match.
|
|
|
-func (r Rule) Try(lex Lexer) []Token {
|
|
|
+func (r Lexeme) Try(lex Lexer) []Token {
|
|
|
matches := lex.Accept(r.Regexp)
|
|
|
if matches == nil || len(matches) == 0 {
|
|
|
return nil
|
|
@@ -186,8 +154,8 @@ func (r Rule) Try(lex Lexer) []Token {
|
|
|
|
|
|
type Flexer struct {
|
|
|
index int
|
|
|
- position Position
|
|
|
- rules []Rule
|
|
|
+ location Location
|
|
|
+ lexemes []Lexeme
|
|
|
input string
|
|
|
name string
|
|
|
contexts []string
|
|
@@ -195,7 +163,7 @@ type Flexer struct {
|
|
|
}
|
|
|
|
|
|
func (f Flexer) MakeToken(kind Kind, form string, args ...interface{}) Token {
|
|
|
- return MakeToken(f.position, kind, form, args...)
|
|
|
+ return MakeToken(f.location, kind, form, args...)
|
|
|
}
|
|
|
|
|
|
func (f *Flexer) MakeBuilderToken(kind Kind) Token {
|
|
@@ -205,7 +173,7 @@ func (f *Flexer) MakeBuilderToken(kind Kind) Token {
|
|
|
}
|
|
|
|
|
|
// Advances the flexer to the given index,
|
|
|
-// updating the position.
|
|
|
+// updating the Location.
|
|
|
func (f *Flexer) advanceTo(index int) {
|
|
|
start := f.index
|
|
|
end := index
|
|
@@ -217,10 +185,10 @@ func (f *Flexer) advanceTo(index int) {
|
|
|
i++
|
|
|
}
|
|
|
}
|
|
|
- f.position.Line++
|
|
|
- f.position.Col = 1
|
|
|
+ f.location.Line++
|
|
|
+ f.location.Col = 1
|
|
|
} else {
|
|
|
- f.position.Col++
|
|
|
+ f.location.Col++
|
|
|
}
|
|
|
}
|
|
|
f.index = end
|
|
@@ -242,27 +210,27 @@ func (f *Flexer) Accept(re *regexp.Regexp) []string {
|
|
|
return matches
|
|
|
}
|
|
|
|
|
|
-func (f *Flexer) Rule(kind Kind, expr, context string, act Action) error {
|
|
|
+func (f *Flexer) Lexeme(kind Kind, expr, context string, act Action) error {
|
|
|
re, err := regexp.Compile(`\A` + expr)
|
|
|
if err != nil {
|
|
|
return err
|
|
|
}
|
|
|
- rule := Rule{kind, re, context, act}
|
|
|
- f.rules = append(f.rules, rule)
|
|
|
+ lexeme := Lexeme{kind, re, context, act}
|
|
|
+ f.lexemes = append(f.lexemes, lexeme)
|
|
|
return nil
|
|
|
}
|
|
|
|
|
|
-func (f *Flexer) EscapedStringRule(kind Kind, first, last, context string) {
|
|
|
- f.Rule(SkipKind, first, "", ContextAction(context))
|
|
|
- f.Rule(kind, last, context, PopAction(kind))
|
|
|
- f.Rule(SkipKind, `\\[etnru][0-9a-f]*`, context, EscapeAction(last[0]))
|
|
|
- f.Rule(SkipKind, `.`, context, StoreAction())
|
|
|
+func (f *Flexer) EscapedStringLexeme(kind Kind, first, last, context string) {
|
|
|
+ f.Lexeme(SkipKind, first, "", ContextAction(context))
|
|
|
+ f.Lexeme(kind, last, context, PopAction(kind))
|
|
|
+ f.Lexeme(SkipKind, `\\[etnru][0-9a-f]*`, context, EscapeAction(last[0]))
|
|
|
+ f.Lexeme(SkipKind, `.`, context, StoreAction())
|
|
|
}
|
|
|
|
|
|
-func (f *Flexer) RawStringRule(kind Kind, first, last, context string) {
|
|
|
- f.Rule(SkipKind, first, "", ContextAction(context))
|
|
|
- f.Rule(kind, last, context, PopAction(kind))
|
|
|
- f.Rule(SkipKind, `.`, context, StoreAction())
|
|
|
+func (f *Flexer) RawStringLexeme(kind Kind, first, last, context string) {
|
|
|
+ f.Lexeme(SkipKind, first, "", ContextAction(context))
|
|
|
+ f.Lexeme(kind, last, context, PopAction(kind))
|
|
|
+ f.Lexeme(SkipKind, `.`, context, StoreAction())
|
|
|
}
|
|
|
|
|
|
func (f *Flexer) PushContext(context string) {
|
|
@@ -292,11 +260,11 @@ func (f *Flexer) Builder() *strings.Builder {
|
|
|
// Runs the lexer once.
|
|
|
// Return nil if no more progress can be made
|
|
|
func (f *Flexer) LexOnce() []Token {
|
|
|
- for _, rule := range f.rules {
|
|
|
- if rule.Context != f.Context() {
|
|
|
+ for _, lexeme := range f.lexemes {
|
|
|
+ if lexeme.Context != f.Context() {
|
|
|
continue
|
|
|
}
|
|
|
- tokens := rule.Try(f)
|
|
|
+ tokens := lexeme.Try(f)
|
|
|
if tokens != nil {
|
|
|
return tokens
|
|
|
}
|
|
@@ -304,8 +272,8 @@ func (f *Flexer) LexOnce() []Token {
|
|
|
return nil
|
|
|
}
|
|
|
|
|
|
-func (f Flexer) Position() Position {
|
|
|
- return f.position
|
|
|
+func (f Flexer) Location() Location {
|
|
|
+ return f.location
|
|
|
}
|
|
|
|
|
|
func (f Flexer) EOF() bool {
|
|
@@ -314,9 +282,9 @@ func (f Flexer) EOF() bool {
|
|
|
|
|
|
func NewFlexer(name, text string) *Flexer {
|
|
|
res := &Flexer{}
|
|
|
- res.position.Line = 1
|
|
|
- res.position.Col = 1
|
|
|
- res.position.Name = &name
|
|
|
+ res.location.Line = 1
|
|
|
+ res.location.Col = 1
|
|
|
+ res.location.Name = &name
|
|
|
res.input = text
|
|
|
return res
|
|
|
}
|
|
@@ -341,7 +309,7 @@ func LexAll(lex Lexer, skips ...Kind) []Token {
|
|
|
for !lex.EOF() {
|
|
|
toks := lex.LexOnce()
|
|
|
if toks == nil {
|
|
|
- err := lex.MakeToken(ErrorKind, " Lexer error: no rule matches. Context:%s.", lex.Context())
|
|
|
+ err := lex.MakeToken(ErrorKind, " Lexer error: no lexeme matches. Context:%s.", lex.Context())
|
|
|
res = append(res, err)
|
|
|
return res
|
|
|
}
|