123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326 |
- package flexer
- import "fmt"
- import "regexp"
- import "strings"
- import "strconv"
- import . "src.eruta.nl/beoran/ll1/common"
- /* Flexer is a flexible regexp and lexeme based
- lexer that can be used as an implementation for
- generated code.
- */
- type BasicToken struct {
- location Location
- kind Kind
- text string
- value Value
- }
- func (bt BasicToken) Kind() Kind {
- return bt.kind
- }
- func (bt BasicToken) Location() Location {
- return bt.location
- }
- func (bt BasicToken) Text() string {
- return bt.text
- }
- func (bt BasicToken) Value() Value {
- if bt.Value() == nil {
- return StringValue(bt.text)
- } else {
- return bt.value
- }
- }
- func MakeToken(Location Location, kind Kind, form string,
- args ...interface{}) BasicToken {
- text := fmt.Sprintf(form, args...)
- return BasicToken{Location, kind, text, StringValue(text)}
- }
- func MakeValueToken(Location Location, kind Kind, value Value) BasicToken {
- text := value.String()
- return BasicToken{Location, kind, text, value}
- }
- type ErrorToken struct {
- BasicToken
- }
- func MakeErrorToken(Location Location, form string, args ...interface{}) ErrorToken {
- err := fmt.Errorf(form, args...)
- tok := MakeValueToken(Location, ErrorKind, ErrorValue{err})
- return ErrorToken{tok}
- }
- func (e ErrorToken) Error() string {
- return fmt.Sprintf("%s%s", e.Location(), e.text)
- }
- /* Lexeme for Flexer is based on a regular expression.
- * While the lexeme may have submatches, the lexer will consume
- * the whole match if it matches at the beginning of the current input.
- */
- type Lexeme struct {
- Kind
- *regexp.Regexp
- Context string
- Action
- }
- // DefaultAction is the default action on a match.
- // If there is only 1 match, then that is the token,
- // otherwise all sub-macthes excluding the first
- // whole string match are the tokens.
- func DefaultAction(lex Lexer, k Kind, matches ...string) []Token {
- if len(matches) == 1 {
- tok := lex.MakeToken(k, matches[0])
- return []Token{tok}
- }
- res := []Token{}
- for i := 1; 1 < len(matches); i++ {
- tok := lex.MakeToken(k, matches[i])
- res = append(res, tok)
- }
- return res
- }
- // ContextAction returns an action that returns
- // no tokens but switches the lexer context and
- // empties the buffer.
- func ContextAction(context string) func(lex Lexer, k Kind, matches ...string) []Token {
- return func(lex Lexer, k Kind, matches ...string) []Token {
- lex.PushContext(context)
- lex.Builder().Reset()
- return []Token{}
- }
- }
- // Returns an action that pops the context and
- // returns the token in the buffer with the given kind
- func PopAction(kind Kind) func(lex Lexer, k Kind, matches ...string) []Token {
- return func(lex Lexer, k Kind, matches ...string) []Token {
- lex.PopContext()
- tok := lex.MakeBuilderToken(kind)
- return []Token{tok}
- }
- }
- // Returns an action that stores the match in the lexer buffer.
- func StoreAction() func(lex Lexer, k Kind, matches ...string) []Token {
- return func(lex Lexer, k Kind, matches ...string) []Token {
- for _, m := range matches {
- lex.Builder().WriteString(m)
- }
- return []Token{}
- }
- }
- // Returns an action that stores the match in the lexer buffer after applying UnquoteChar to apply
- // an escape sequence.
- func EscapeAction(quote byte) func(lex Lexer, k Kind, matches ...string) []Token {
- return func(lex Lexer, k Kind, matches ...string) []Token {
- s, _, t, e := strconv.UnquoteChar(matches[0], quote)
- print("escape", s, t, e)
- if e != nil {
- et := lex.MakeToken(ErrorKind, "%s", e)
- return []Token{et}
- }
- lex.Builder().WriteRune(s)
- lex.Builder().WriteString(t)
- return []Token{}
- }
- }
- // Try tries to apply a lexeme.
- // Returns nil on no match.
- func (r Lexeme) Try(lex Lexer) []Token {
- matches := lex.Accept(r.Regexp)
- if matches == nil || len(matches) == 0 {
- return nil
- }
- if r.Action != nil {
- return r.Action(lex, r.Kind, matches...)
- }
- // No action, use default action
- return DefaultAction(lex, r.Kind, matches...)
- }
- type Flexer struct {
- index int
- location Location
- lexemes []Lexeme
- input string
- name string
- contexts []string
- builder strings.Builder
- }
- func (f Flexer) MakeToken(kind Kind, form string, args ...interface{}) Token {
- return MakeToken(f.location, kind, form, args...)
- }
- func (f *Flexer) MakeBuilderToken(kind Kind) Token {
- text := f.builder.String()
- f.builder.Reset()
- return f.MakeToken(kind, text)
- }
- // Advances the flexer to the given index,
- // updating the Location.
- func (f *Flexer) advanceTo(index int) {
- start := f.index
- end := index
- for i := start; i < end; i++ {
- c := f.input[i] // This works because newlines are ascii.
- if c == '\r' || c == '\n' {
- if c == '\r' && (i+1) < len(f.input) {
- if f.input[i+1] == '\n' {
- i++
- }
- }
- f.location.Line++
- f.location.Col = 1
- } else {
- f.location.Col++
- }
- }
- f.index = end
- }
- func (f *Flexer) Accept(re *regexp.Regexp) []string {
- indexes := re.FindStringSubmatchIndex(f.input[f.index:len(f.input)])
- if indexes == nil || len(indexes) < 1 {
- return nil
- }
- _, end := f.index+indexes[0], f.index+indexes[1]
- matches := []string{}
- for i := 1; i < len(indexes); i += 2 {
- subStart, subEnd := indexes[i-1]+f.index, indexes[i]+f.index
- sub := f.input[subStart:subEnd]
- matches = append(matches, sub)
- }
- f.advanceTo(end)
- return matches
- }
- func (f *Flexer) Lexeme(kind Kind, expr, context string, act Action) error {
- re, err := regexp.Compile(`\A` + expr)
- if err != nil {
- return err
- }
- lexeme := Lexeme{kind, re, context, act}
- f.lexemes = append(f.lexemes, lexeme)
- return nil
- }
- func (f *Flexer) EscapedStringLexeme(kind Kind, first, last, context string) {
- f.Lexeme(SkipKind, first, "", ContextAction(context))
- f.Lexeme(kind, last, context, PopAction(kind))
- f.Lexeme(SkipKind, `\\[etnru][0-9a-f]*`, context, EscapeAction(last[0]))
- f.Lexeme(SkipKind, `.`, context, StoreAction())
- }
- func (f *Flexer) RawStringLexeme(kind Kind, first, last, context string) {
- f.Lexeme(SkipKind, first, "", ContextAction(context))
- f.Lexeme(kind, last, context, PopAction(kind))
- f.Lexeme(SkipKind, `.`, context, StoreAction())
- }
- func (f *Flexer) PushContext(context string) {
- f.contexts = append(f.contexts, context)
- }
- func (f *Flexer) Context() string {
- context := ""
- clen := len(f.contexts)
- if clen > 0 {
- context = f.contexts[clen-1]
- }
- return context
- }
- func (f *Flexer) PopContext() {
- clen := len(f.contexts)
- if clen > 0 {
- f.contexts = f.contexts[0 : clen-1]
- }
- }
- func (f *Flexer) Builder() *strings.Builder {
- return &f.builder
- }
- // Runs the lexer once.
- // Return nil if no more progress can be made
- func (f *Flexer) LexOnce() []Token {
- for _, lexeme := range f.lexemes {
- if lexeme.Context != f.Context() {
- continue
- }
- tokens := lexeme.Try(f)
- if tokens != nil {
- return tokens
- }
- }
- return nil
- }
- func (f Flexer) Location() Location {
- return f.location
- }
- func (f Flexer) EOF() bool {
- return f.index >= len(f.input)
- }
- func NewFlexer(name, text string) *Flexer {
- res := &Flexer{}
- res.location.Line = 1
- res.location.Col = 1
- res.location.Name = &name
- res.input = text
- return res
- }
- // KeepToken returns true if the token should be kept
- // and false if it is of the kind in the skip list
- func KeepToken(tok Token, skips ...Kind) bool {
- for _, skip := range skips {
- if skip == tok.Kind() {
- return false
- }
- }
- return true
- }
- // Lexes all tokens from the lexer until it reaches
- // EOF, or until it cannot progress anymore.
- // All tokens in the skip array will be skipped
- // from the results. If the lexer reachest he end of input,
- // a token with kind EndKind will be appended
- func LexAll(lex Lexer, skips ...Kind) []Token {
- res := []Token{}
- for !lex.EOF() {
- toks := lex.LexOnce()
- if toks == nil {
- err := lex.MakeToken(ErrorKind, " Lexer error: no lexeme matches. Context:%s.", lex.Context())
- res = append(res, err)
- return res
- }
- for _, tok := range toks {
- if KeepToken(tok, skips...) {
- res = append(res, tok)
- }
- }
- }
- // here we reached EOF
- res = append(res, lex.MakeToken(EndKind, "<end>"))
- return res
- }
|