flexer.go 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. package flexer
  2. import "fmt"
  3. import "regexp"
  4. import "strings"
  5. import "strconv"
  6. /* Flexer is a flexible regexp and rule based
  7. lexer that can be used as an implementation for
  8. generated code.
  9. */
  10. type Position struct {
  11. Name *string
  12. Line int
  13. Col int
  14. }
  15. type Kind int
  16. const (
  17. SkipKind Kind = -30000
  18. ErrorKind Kind = -31000
  19. )
  20. type Token interface {
  21. Position() Position
  22. Kind() Kind
  23. Text() string
  24. }
  25. type Lexer interface {
  26. // Accept will accept a regexp and advance, returning the matches.
  27. // Returns nil if no matches were found.
  28. Accept(re *regexp.Regexp) []string
  29. // Returns the current lexer position.
  30. Position() Position
  31. // Returns if the lexer is at the end or not.
  32. EOF() bool
  33. // The lexer creates a token with the current lexer position and
  34. // the given kind and text.
  35. MakeToken(kind Kind, form string, args ...interface{}) Token
  36. // The lexer creates a token with the current lexer position and
  37. // the given kind. The text is taken from the lexer string builder and
  38. // that builser is reset.
  39. MakeBuilderToken(kind Kind) Token
  40. // The lexer has a string builder, which can be used to append
  41. // strings or runes to and which can be returned and cleared when the
  42. // token is complete.
  43. Builder() *strings.Builder
  44. // Adds a rule to the lexer.
  45. Rule(kind Kind, re, context string, act Action) error
  46. // Calls the lexer once.
  47. LexOnce() []Token
  48. // Returns the current lexer context
  49. Context() string
  50. // Pushes the named context on the lexer context stack
  51. PushContext(name string)
  52. // Pops the current context from the lexer context stack.
  53. PopContext()
  54. }
  55. type Action func(f Lexer, k Kind, matches ...string) []Token
  56. type BasicToken struct {
  57. position Position
  58. kind Kind
  59. text string
  60. }
  61. func (bt BasicToken) Kind() Kind {
  62. return bt.kind
  63. }
  64. func (bt BasicToken) Position() Position {
  65. return bt.position
  66. }
  67. func (bt BasicToken) Text() string {
  68. return bt.text
  69. }
  70. func MakeToken(position Position, kind Kind, form string,
  71. args ...interface{}) BasicToken {
  72. text := fmt.Sprintf(form, args...)
  73. return BasicToken{position, kind, text}
  74. }
  75. type ErrorToken struct {
  76. BasicToken
  77. }
  78. /* A rule for Flexer is based on a regular expression.
  79. * While the rule may have submatches, the lexer will consume
  80. * the whole match if it matches at the beginning of the current input.
  81. */
  82. type Rule struct {
  83. Kind
  84. *regexp.Regexp
  85. Context string
  86. Action
  87. }
  88. // DefaultAction is the default action on a match.
  89. // If there is only 1 match, then that is the token,
  90. // otherwise all sub-macthes excluding the first
  91. // whole string match are the tokens.
  92. func DefaultAction(lex Lexer, k Kind, matches ...string) []Token {
  93. if len(matches) == 1 {
  94. tok := lex.MakeToken(k, matches[0])
  95. return []Token{tok}
  96. }
  97. res := []Token{}
  98. for i := 1; 1 < len(matches); i++ {
  99. tok := lex.MakeToken(k, matches[i])
  100. res = append(res, tok)
  101. }
  102. return res
  103. }
  104. // ContextAction returns an action that returns
  105. // no tokens but switches the lexer context and
  106. // empties the buffer.
  107. func ContextAction(context string) func(lex Lexer, k Kind, matches ...string) []Token {
  108. return func(lex Lexer, k Kind, matches ...string) []Token {
  109. lex.PushContext(context)
  110. lex.Builder().Reset()
  111. return []Token{}
  112. }
  113. }
  114. // Returns an action that pops the context and
  115. // returns the token in the buffer with the given kind
  116. func PopAction(kind Kind) func(lex Lexer, k Kind, matches ...string) []Token {
  117. return func(lex Lexer, k Kind, matches ...string) []Token {
  118. lex.PopContext()
  119. tok := lex.MakeBuilderToken(kind)
  120. return []Token{tok}
  121. }
  122. }
  123. // Returns an action that stores the match in the lexer buffer.
  124. func StoreAction() func(lex Lexer, k Kind, matches ...string) []Token {
  125. return func(lex Lexer, k Kind, matches ...string) []Token {
  126. for _, m := range matches {
  127. lex.Builder().WriteString(m)
  128. }
  129. return []Token{}
  130. }
  131. }
  132. // Returns an action that stores the match in the lexer buffer after applying UnquoteChar to apply
  133. // an escape sequence.
  134. func EscapeAction(quote byte) func(lex Lexer, k Kind, matches ...string) []Token {
  135. return func(lex Lexer, k Kind, matches ...string) []Token {
  136. s, _, t, e := strconv.UnquoteChar(matches[0], quote)
  137. print("escape", s, t, e)
  138. if e != nil {
  139. et := lex.MakeToken(ErrorKind, "%s", e)
  140. return []Token{et}
  141. }
  142. lex.Builder().WriteRune(s)
  143. lex.Builder().WriteString(t)
  144. return []Token{}
  145. }
  146. }
  147. // Try tries to apply a rule.
  148. // Returns nil on no match.
  149. func (r Rule) Try(lex Lexer) []Token {
  150. matches := lex.Accept(r.Regexp)
  151. if matches == nil || len(matches) == 0 {
  152. return nil
  153. }
  154. if r.Action != nil {
  155. return r.Action(lex, r.Kind, matches...)
  156. }
  157. // No action, use default action
  158. return DefaultAction(lex, r.Kind, matches...)
  159. }
  160. type Flexer struct {
  161. index int
  162. position Position
  163. rules []Rule
  164. input string
  165. name string
  166. contexts []string
  167. builder strings.Builder
  168. }
  169. func (f Flexer) MakeToken(kind Kind, form string, args ...interface{}) Token {
  170. return MakeToken(f.position, kind, form, args...)
  171. }
  172. func (f *Flexer) MakeBuilderToken(kind Kind) Token {
  173. text := f.builder.String()
  174. f.builder.Reset()
  175. return f.MakeToken(kind, text)
  176. }
  177. // Advances the flexer to the given index,
  178. // updating the position.
  179. func (f *Flexer) advanceTo(index int) {
  180. start := f.index
  181. end := index
  182. for i := start; i < end; i++ {
  183. c := f.input[i] // This works because newlines are ascii.
  184. if c == '\r' || c == '\n' {
  185. if c == '\r' && (i+1) < len(f.input) {
  186. if f.input[i+1] == '\n' {
  187. i++
  188. }
  189. }
  190. f.position.Line++
  191. f.position.Col = 1
  192. } else {
  193. f.position.Col++
  194. }
  195. }
  196. f.index = end
  197. }
  198. func (f *Flexer) Accept(re *regexp.Regexp) []string {
  199. indexes := re.FindStringSubmatchIndex(f.input[f.index:len(f.input)])
  200. if indexes == nil || len(indexes) < 1 {
  201. return nil
  202. }
  203. _, end := f.index+indexes[0], f.index+indexes[1]
  204. matches := []string{}
  205. for i := 1; i < len(indexes); i += 2 {
  206. subStart, subEnd := indexes[i-1]+f.index, indexes[i]+f.index
  207. sub := f.input[subStart:subEnd]
  208. matches = append(matches, sub)
  209. }
  210. f.advanceTo(end)
  211. return matches
  212. }
  213. func (f *Flexer) Rule(kind Kind, expr, context string, act Action) error {
  214. re, err := regexp.Compile(`\A` + expr)
  215. if err != nil {
  216. return err
  217. }
  218. rule := Rule{kind, re, context, act}
  219. f.rules = append(f.rules, rule)
  220. return nil
  221. }
  222. func (f *Flexer) PushContext(context string) {
  223. f.contexts = append(f.contexts, context)
  224. }
  225. func (f *Flexer) Context() string {
  226. context := ""
  227. clen := len(f.contexts)
  228. if clen > 0 {
  229. context = f.contexts[clen-1]
  230. }
  231. return context
  232. }
  233. func (f *Flexer) PopContext() {
  234. clen := len(f.contexts)
  235. if clen > 0 {
  236. f.contexts = f.contexts[0 : clen-1]
  237. }
  238. }
  239. func (f *Flexer) Builder() *strings.Builder {
  240. return &f.builder
  241. }
  242. // Runs the lexer once.
  243. // Return nil if no more progress can be made
  244. func (f *Flexer) LexOnce() []Token {
  245. for _, rule := range f.rules {
  246. if rule.Context != f.Context() {
  247. continue
  248. }
  249. tokens := rule.Try(f)
  250. if tokens != nil {
  251. return tokens
  252. }
  253. }
  254. return nil
  255. }
  256. func (f Flexer) Position() Position {
  257. return f.position
  258. }
  259. func (f Flexer) EOF() bool {
  260. return f.index >= len(f.input)
  261. }
  262. func NewFlexer(name, text string) *Flexer {
  263. res := &Flexer{}
  264. res.position.Line = 1
  265. res.position.Col = 1
  266. res.position.Name = &name
  267. res.input = text
  268. return res
  269. }
  270. // Lexes all tokens from the lexer until it reaches
  271. // EOF, or until it cannot progress anymore.
  272. // All tokens of kind SkipKind will be skipped
  273. // from the results.
  274. func LexAll(lex Lexer) []Token {
  275. res := []Token{}
  276. for !lex.EOF() {
  277. toks := lex.LexOnce()
  278. if toks == nil {
  279. err := lex.MakeToken(ErrorKind, " Lexer error: no rule matches. Context:%s.", lex.Context())
  280. res = append(res, err)
  281. return res
  282. }
  283. res = append(res, toks...)
  284. }
  285. return res
  286. }