|
@@ -0,0 +1,327 @@
|
|
|
+package flexer
|
|
|
+
|
|
|
+import "fmt"
|
|
|
+import "regexp"
|
|
|
+import "strings"
|
|
|
+import "strconv"
|
|
|
+
|
|
|
+/* Flexer is a flexible regexp and rule based
|
|
|
+lexer that can be used as an implementation for
|
|
|
+generated code.
|
|
|
+*/
|
|
|
+
|
|
|
+type Position struct {
|
|
|
+ Name *string
|
|
|
+ Line int
|
|
|
+ Col int
|
|
|
+}
|
|
|
+
|
|
|
+type Kind int
|
|
|
+
|
|
|
+const (
|
|
|
+ SkipKind Kind = -30000
|
|
|
+ ErrorKind Kind = -31000
|
|
|
+)
|
|
|
+
|
|
|
+type Token interface {
|
|
|
+ Position() Position
|
|
|
+ Kind() Kind
|
|
|
+ Text() string
|
|
|
+}
|
|
|
+
|
|
|
+type Lexer interface {
|
|
|
+ // Accept will accept a regexp and advance, returning the matches.
|
|
|
+ // Returns nil if no matches were found.
|
|
|
+ Accept(re *regexp.Regexp) []string
|
|
|
+ // Returns the current lexer position.
|
|
|
+ Position() Position
|
|
|
+ // Returns if the lexer is at the end or not.
|
|
|
+ EOF() bool
|
|
|
+
|
|
|
+ // The lexer creates a token with the current lexer position and
|
|
|
+ // the given kind and text.
|
|
|
+ MakeToken(kind Kind, form string, args ...interface{}) Token
|
|
|
+
|
|
|
+ // The lexer creates a token with the current lexer position and
|
|
|
+ // the given kind. The text is taken from the lexer string builder and
|
|
|
+ // that builser is reset.
|
|
|
+ MakeBuilderToken(kind Kind) Token
|
|
|
+
|
|
|
+ // The lexer has a string builder, which can be used to append
|
|
|
+ // strings or runes to and which can be returned and cleared when the
|
|
|
+ // token is complete.
|
|
|
+ Builder() *strings.Builder
|
|
|
+
|
|
|
+ // Adds a rule to the lexer.
|
|
|
+ Rule(kind Kind, re, context string, act Action) error
|
|
|
+ // Calls the lexer once.
|
|
|
+ LexOnce() []Token
|
|
|
+
|
|
|
+ // Returns the current lexer context
|
|
|
+ Context() string
|
|
|
+ // Pushes the named context on the lexer context stack
|
|
|
+ PushContext(name string)
|
|
|
+ // Pops the current context from the lexer context stack.
|
|
|
+ PopContext()
|
|
|
+}
|
|
|
+
|
|
|
+type Action func(f Lexer, k Kind, matches ...string) []Token
|
|
|
+
|
|
|
+type BasicToken struct {
|
|
|
+ position Position
|
|
|
+ kind Kind
|
|
|
+ text string
|
|
|
+}
|
|
|
+
|
|
|
+func (bt BasicToken) Kind() Kind {
|
|
|
+ return bt.kind
|
|
|
+}
|
|
|
+
|
|
|
+func (bt BasicToken) Position() Position {
|
|
|
+ return bt.position
|
|
|
+}
|
|
|
+
|
|
|
+func (bt BasicToken) Text() string {
|
|
|
+ return bt.text
|
|
|
+}
|
|
|
+
|
|
|
+func MakeToken(position Position, kind Kind, form string,
|
|
|
+ args ...interface{}) BasicToken {
|
|
|
+ text := fmt.Sprintf(form, args...)
|
|
|
+ return BasicToken{position, kind, text}
|
|
|
+}
|
|
|
+
|
|
|
+type ErrorToken struct {
|
|
|
+ BasicToken
|
|
|
+}
|
|
|
+
|
|
|
+/* A rule for Flexer is based on a regular expression.
|
|
|
+* While the rule may have submatches, the lexer will consume
|
|
|
+* the whole match if it matches at the beginning of the current input.
|
|
|
+ */
|
|
|
+type Rule struct {
|
|
|
+ Kind
|
|
|
+ *regexp.Regexp
|
|
|
+ Context string
|
|
|
+ Action
|
|
|
+}
|
|
|
+
|
|
|
+// DefaultAction is the default action on a match.
|
|
|
+// If there is only 1 match, then that is the token,
|
|
|
+// otherwise all sub-macthes excluding the first
|
|
|
+// whole string match are the tokens.
|
|
|
+func DefaultAction(lex Lexer, k Kind, matches ...string) []Token {
|
|
|
+ if len(matches) == 1 {
|
|
|
+ tok := lex.MakeToken(k, matches[0])
|
|
|
+ return []Token{tok}
|
|
|
+ }
|
|
|
+ res := []Token{}
|
|
|
+ for i := 1; 1 < len(matches); i++ {
|
|
|
+ tok := lex.MakeToken(k, matches[i])
|
|
|
+ res = append(res, tok)
|
|
|
+ }
|
|
|
+ return res
|
|
|
+}
|
|
|
+
|
|
|
+// ContextAction returns an action that returns
|
|
|
+// no tokens but switches the lexer context and
|
|
|
+// empties the buffer.
|
|
|
+func ContextAction(context string) func(lex Lexer, k Kind, matches ...string) []Token {
|
|
|
+ return func(lex Lexer, k Kind, matches ...string) []Token {
|
|
|
+ lex.PushContext(context)
|
|
|
+ lex.Builder().Reset()
|
|
|
+ return []Token{}
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// Returns an action that pops the context and
|
|
|
+// returns the token in the buffer with the given kind
|
|
|
+func PopAction(kind Kind) func(lex Lexer, k Kind, matches ...string) []Token {
|
|
|
+ return func(lex Lexer, k Kind, matches ...string) []Token {
|
|
|
+ lex.PopContext()
|
|
|
+ tok := lex.MakeBuilderToken(kind)
|
|
|
+ return []Token{tok}
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// Returns an action that stores the match in the lexer buffer.
|
|
|
+func StoreAction() func(lex Lexer, k Kind, matches ...string) []Token {
|
|
|
+ return func(lex Lexer, k Kind, matches ...string) []Token {
|
|
|
+ for _, m := range matches {
|
|
|
+ lex.Builder().WriteString(m)
|
|
|
+ }
|
|
|
+ return []Token{}
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// Returns an action that stores the match in the lexer buffer after applying UnquoteChar to apply
|
|
|
+// an escape sequence.
|
|
|
+func EscapeAction(quote byte) func(lex Lexer, k Kind, matches ...string) []Token {
|
|
|
+ return func(lex Lexer, k Kind, matches ...string) []Token {
|
|
|
+ s, _, t, e := strconv.UnquoteChar(matches[0], quote)
|
|
|
+ print("escape", s, t, e)
|
|
|
+ if e != nil {
|
|
|
+ et := lex.MakeToken(ErrorKind, "%s", e)
|
|
|
+ return []Token{et}
|
|
|
+ }
|
|
|
+ lex.Builder().WriteRune(s)
|
|
|
+ lex.Builder().WriteString(t)
|
|
|
+ return []Token{}
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// Try tries to apply a rule.
|
|
|
+// Returns nil on no match.
|
|
|
+func (r Rule) Try(lex Lexer) []Token {
|
|
|
+ matches := lex.Accept(r.Regexp)
|
|
|
+ if matches == nil || len(matches) == 0 {
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+ if r.Action != nil {
|
|
|
+ return r.Action(lex, r.Kind, matches...)
|
|
|
+ }
|
|
|
+ // No action, use default action
|
|
|
+ return DefaultAction(lex, r.Kind, matches...)
|
|
|
+}
|
|
|
+
|
|
|
+type Flexer struct {
|
|
|
+ index int
|
|
|
+ position Position
|
|
|
+ rules []Rule
|
|
|
+ input string
|
|
|
+ name string
|
|
|
+ contexts []string
|
|
|
+ builder strings.Builder
|
|
|
+}
|
|
|
+
|
|
|
+func (f Flexer) MakeToken(kind Kind, form string, args ...interface{}) Token {
|
|
|
+ return MakeToken(f.position, kind, form, args...)
|
|
|
+}
|
|
|
+
|
|
|
+func (f *Flexer) MakeBuilderToken(kind Kind) Token {
|
|
|
+ text := f.builder.String()
|
|
|
+ f.builder.Reset()
|
|
|
+ return f.MakeToken(kind, text)
|
|
|
+}
|
|
|
+
|
|
|
+// Advances the flexer to the given index,
|
|
|
+// updating the position.
|
|
|
+func (f *Flexer) advanceTo(index int) {
|
|
|
+ start := f.index
|
|
|
+ end := index
|
|
|
+ for i := start; i < end; i++ {
|
|
|
+ c := f.input[i] // This works because newlines are ascii.
|
|
|
+ if c == '\r' || c == '\n' {
|
|
|
+ if c == '\r' && (i+1) < len(f.input) {
|
|
|
+ if f.input[i+1] == '\n' {
|
|
|
+ i++
|
|
|
+ }
|
|
|
+ }
|
|
|
+ f.position.Line++
|
|
|
+ f.position.Col = 1
|
|
|
+ } else {
|
|
|
+ f.position.Col++
|
|
|
+ }
|
|
|
+ }
|
|
|
+ f.index = end
|
|
|
+}
|
|
|
+
|
|
|
+func (f *Flexer) Accept(re *regexp.Regexp) []string {
|
|
|
+ indexes := re.FindStringSubmatchIndex(f.input[f.index:len(f.input)])
|
|
|
+ if indexes == nil || len(indexes) < 1 {
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+ _, end := f.index+indexes[0], f.index+indexes[1]
|
|
|
+ matches := []string{}
|
|
|
+ for i := 1; i < len(indexes); i += 2 {
|
|
|
+ subStart, subEnd := indexes[i-1]+f.index, indexes[i]+f.index
|
|
|
+ sub := f.input[subStart:subEnd]
|
|
|
+ matches = append(matches, sub)
|
|
|
+ }
|
|
|
+ f.advanceTo(end)
|
|
|
+ return matches
|
|
|
+}
|
|
|
+
|
|
|
+func (f *Flexer) Rule(kind Kind, expr, context string, act Action) error {
|
|
|
+ re, err := regexp.Compile(`\A` + expr)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ rule := Rule{kind, re, context, act}
|
|
|
+ f.rules = append(f.rules, rule)
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+func (f *Flexer) PushContext(context string) {
|
|
|
+ f.contexts = append(f.contexts, context)
|
|
|
+}
|
|
|
+
|
|
|
+func (f *Flexer) Context() string {
|
|
|
+ context := ""
|
|
|
+ clen := len(f.contexts)
|
|
|
+ if clen > 0 {
|
|
|
+ context = f.contexts[clen-1]
|
|
|
+ }
|
|
|
+ return context
|
|
|
+}
|
|
|
+
|
|
|
+func (f *Flexer) PopContext() {
|
|
|
+ clen := len(f.contexts)
|
|
|
+ if clen > 0 {
|
|
|
+ f.contexts = f.contexts[0 : clen-1]
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func (f *Flexer) Builder() *strings.Builder {
|
|
|
+ return &f.builder
|
|
|
+}
|
|
|
+
|
|
|
+// Runs the lexer once.
|
|
|
+// Return nil if no more progress can be made
|
|
|
+func (f *Flexer) LexOnce() []Token {
|
|
|
+ for _, rule := range f.rules {
|
|
|
+ if rule.Context != f.Context() {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ tokens := rule.Try(f)
|
|
|
+ if tokens != nil {
|
|
|
+ return tokens
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+func (f Flexer) Position() Position {
|
|
|
+ return f.position
|
|
|
+}
|
|
|
+
|
|
|
+func (f Flexer) EOF() bool {
|
|
|
+ return f.index >= len(f.input)
|
|
|
+}
|
|
|
+
|
|
|
+func NewFlexer(name, text string) *Flexer {
|
|
|
+ res := &Flexer{}
|
|
|
+ res.position.Line = 1
|
|
|
+ res.position.Col = 1
|
|
|
+ res.position.Name = &name
|
|
|
+ res.input = text
|
|
|
+ return res
|
|
|
+}
|
|
|
+
|
|
|
+// Lexes all tokens from the lexer until it reaches
|
|
|
+// EOF, or until it cannot progress anymore.
|
|
|
+// All tokens of kind SkipKind will be skipped
|
|
|
+// from the results.
|
|
|
+func LexAll(lex Lexer) []Token {
|
|
|
+ res := []Token{}
|
|
|
+ for !lex.EOF() {
|
|
|
+ toks := lex.LexOnce()
|
|
|
+ if toks == nil {
|
|
|
+ err := lex.MakeToken(ErrorKind, " Lexer error: no rule matches. Context:%s.", lex.Context())
|
|
|
+ res = append(res, err)
|
|
|
+ return res
|
|
|
+ }
|
|
|
+ res = append(res, toks...)
|
|
|
+ }
|
|
|
+ return res
|
|
|
+}
|