lexer.go 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. package muesli
  2. import (
  3. _ "bytes"
  4. _ "errors"
  5. "fmt"
  6. _ "io"
  7. _ "reflect"
  8. _ "runtime"
  9. "strings"
  10. _ "unicode"
  11. "io"
  12. "os"
  13. "bufio"
  14. "unicode"
  15. // "gitlab.com/beoran/woe/graphviz"
  16. // _ "gitlab.com/beoran/woe/monolog"
  17. )
  18. /* A Lexer splits scanned input into tokens.
  19. */
  20. type Lexer struct {
  21. Position
  22. Index int
  23. Start int
  24. io.RuneScanner
  25. buffer []rune
  26. Current rune
  27. }
  28. func (lexer * Lexer) ClearBuffer() {
  29. lexer.buffer = make([]rune, 0)
  30. }
  31. func (lexer * Lexer) MakeToken(kind TokenKind) Token {
  32. val := StringValue(string(lexer.buffer))
  33. lexer.ClearBuffer()
  34. return NewToken(kind, val, lexer.Position)
  35. }
  36. func (lexer Lexer) MakeErrorToken(err error) Token {
  37. return NewToken(TokenKindError, err.Error(), lexer.Position)
  38. }
  39. func (lexer Lexer) MakeErrorfToken(format string, va ... interface{}) Token {
  40. err := fmt.Errorf(format, va...)
  41. return lexer.MakeErrorToken(err)
  42. }
  43. func (lexer Lexer) MakeEOFToken() Token {
  44. return NewToken(TokenKindEOF, "", lexer.Position)
  45. }
  46. func (lexer * Lexer) Peek() (rune, error) {
  47. r, _, err := lexer.RuneScanner.ReadRune()
  48. err2 := lexer.RuneScanner.UnreadRune()
  49. if err == nil {
  50. err = err2
  51. }
  52. return r, err
  53. }
  54. /* Advances the lexer's position based on the rune r read. */
  55. func (lexer * Lexer) advance(r rune) {
  56. lexer.Current = r
  57. lexer.Index++
  58. lexer.Position.Column++
  59. if r == '\n' {
  60. lexer.Position.Column = 1
  61. lexer.Position.Line++
  62. }
  63. }
  64. /* Actually reads the next rune from the lexer's input source and stores
  65. * them in the lexer's token buffer. */
  66. func (lexer * Lexer) Next() (rune, error) {
  67. r, _, err := lexer.RuneScanner.ReadRune()
  68. if err != nil {
  69. return 0, err
  70. }
  71. lexer.advance(r)
  72. lexer.buffer = append(lexer.buffer, r)
  73. return r, nil
  74. }
  75. /* Advances the lexer's input buffer but does not store the read runes*/
  76. func (lexer * Lexer) Skip() (rune, error) {
  77. r, _, err := lexer.RuneScanner.ReadRune()
  78. if err != nil {
  79. return 0, err
  80. }
  81. lexer.advance(r)
  82. return r, nil
  83. }
  84. func (lexer * Lexer) NextIf(predicate func(rune) bool) (bool, error) {
  85. r, err := lexer.Peek()
  86. if err != nil {
  87. return false, err
  88. }
  89. if (predicate(r)) {
  90. r, err = lexer.Next()
  91. if err != nil {
  92. return true, err
  93. }
  94. return true, nil
  95. }
  96. return false, nil
  97. }
  98. func (lexer * Lexer) SkipIf(predicate func(rune) bool) (bool, error) {
  99. r, err := lexer.Peek()
  100. if err != nil {
  101. return false, err
  102. }
  103. if (predicate(r)) {
  104. r, err = lexer.Skip()
  105. if err != nil {
  106. return true, err
  107. }
  108. return true, nil
  109. }
  110. return false, nil
  111. }
  112. func (lexer * Lexer) NextWhile(predicate func(rune) bool) (bool, error) {
  113. result := true
  114. ok, err := lexer.NextIf(predicate)
  115. result = result || ok
  116. for ; ok && (err == nil) ; ok, err = lexer.NextIf(predicate) {
  117. result = result || ok
  118. }
  119. return result, err
  120. }
  121. func (lexer * Lexer) SkipWhile(predicate func(rune) bool) (bool, error) {
  122. result := true
  123. ok, err := lexer.SkipIf(predicate)
  124. result = result || ok
  125. for ; ok && (err == nil) ; ok, err = lexer.SkipIf(predicate) {
  126. result = result || ok
  127. }
  128. return result, err
  129. }
  130. func isSpace(r rune) bool {
  131. return r == ' ' || r == '\t'
  132. }
  133. func (lexer * Lexer) SkipSpace() (error) {
  134. _, err := lexer.SkipWhile(isSpace)
  135. return err
  136. }
  137. /* Handles errors including EOF by either returning an error token or an
  138. * EOF token.
  139. */
  140. func (lexer * Lexer) handleError(err error) Token {
  141. if err == io.EOF {
  142. return lexer.MakeEOFToken()
  143. } else {
  144. return lexer.MakeErrorToken(err)
  145. }
  146. }
  147. func (lexer * Lexer) LexNumber() Token {
  148. isFloat := false
  149. // skip any first -
  150. _, err := lexer.NextIf(func (r rune) bool {
  151. return r == '-'
  152. })
  153. _, err = lexer.NextWhile(func (r rune) bool {
  154. if unicode.IsDigit(r) {
  155. return true
  156. } else if r == '.' {
  157. if isFloat {
  158. return false // double point in floating point
  159. } else {
  160. isFloat = true
  161. return true
  162. }
  163. } else {
  164. return false
  165. }
  166. })
  167. if err != nil {
  168. return lexer.MakeErrorfToken("when parsing number: %s", err)
  169. }
  170. if isFloat {
  171. return lexer.MakeToken(TokenKindFloat)
  172. } else {
  173. return lexer.MakeToken(TokenKindInteger)
  174. }
  175. }
  176. func isDoubleQuote(r rune) bool {
  177. return r == '"'
  178. }
  179. func (lexer * Lexer) LexString() Token {
  180. inEscape := false
  181. var err error
  182. _, err = lexer.Skip() // Skip first "
  183. if err != nil {
  184. return lexer.handleError(err)
  185. }
  186. _, err = lexer.NextWhile(func (r rune) bool {
  187. if r == '"' && !inEscape {
  188. return false
  189. }
  190. if r == '\\' {
  191. // TODO escape parsing, now just a single character after it
  192. if inEscape { // double backslash
  193. inEscape = false
  194. } else {
  195. inEscape = true
  196. }
  197. } else {
  198. inEscape = false
  199. }
  200. return true // still inside the string
  201. })
  202. if err != nil {
  203. return lexer.MakeErrorfToken("when parsing string: %s", err)
  204. }
  205. _, err = lexer.Skip() // skip last "
  206. if err != nil {
  207. return lexer.handleError(err)
  208. }
  209. return lexer.MakeToken(TokenKindString)
  210. }
  211. func (lexer * Lexer) LexLongString() Token {
  212. var err error
  213. _, err = lexer.Skip()
  214. if err != nil {
  215. return lexer.handleError(err)
  216. }
  217. _, err = lexer.NextWhile(func (r rune) bool {
  218. return r != '`'
  219. })
  220. if err != nil {
  221. return lexer.MakeErrorfToken("when parsing long string: %s", err)
  222. }
  223. _, err = lexer.Skip()
  224. if err != nil {
  225. return lexer.handleError(err)
  226. }
  227. return lexer.MakeToken(TokenKindString)
  228. }
  229. func (lexer * Lexer) LexWord() Token {
  230. var err error
  231. _, err = lexer.Next()
  232. if err != nil {
  233. return lexer.handleError(err)
  234. }
  235. lexer.NextWhile(func(r rune) bool {
  236. return unicode.IsLetter(r)
  237. })
  238. return lexer.MakeToken(TokenKindWord)
  239. }
  240. func (lexer * Lexer) lex() Token {
  241. r, err := lexer.Peek()
  242. if err != nil {
  243. return lexer.handleError(err)
  244. }
  245. if isSpace(r) {
  246. err = lexer.SkipSpace()
  247. if err != nil {
  248. return lexer.handleError(err)
  249. }
  250. r, err = lexer.Peek()
  251. if err != nil {
  252. return lexer.handleError(err)
  253. }
  254. }
  255. if unicode.IsDigit(r) || r == '-' {
  256. return lexer.LexNumber()
  257. }
  258. if r == '\n' || r == '.' {
  259. lexer.Next()
  260. return lexer.MakeToken(TokenKindEOX)
  261. }
  262. if r == '"' {
  263. return lexer.LexString()
  264. }
  265. if r == '`' {
  266. return lexer.LexLongString()
  267. }
  268. switch (TokenKind(r)) {
  269. case TokenKindGet : fallthrough
  270. case TokenKindSet : fallthrough
  271. case TokenKindOpenBlock : fallthrough
  272. case TokenKindCloseBlock: fallthrough
  273. case TokenKindOpenList : fallthrough
  274. case TokenKindCloseList : fallthrough
  275. case TokenKindOpenParen : fallthrough
  276. case TokenKindCloseParen:
  277. lexer.Next()
  278. return lexer.MakeToken(TokenKind(r))
  279. default:
  280. }
  281. if unicode.IsLetter(r) {
  282. return lexer.LexWord()
  283. }
  284. return lexer.MakeErrorfToken("Unknown character: %c", r)
  285. }
  286. func (lexer * Lexer) Lex() Token {
  287. res := lexer.lex()
  288. lexer.ClearBuffer() // ensure buffer is cleared after lexing, always.
  289. return res
  290. }
  291. func (lexer * Lexer) LexAll() []Token {
  292. var token Token
  293. res := make([]Token, 0)
  294. for token = lexer.Lex() ; ! token.IsLast() ; token = lexer.Lex() {
  295. fmt.Printf("token: %s %v\n", token.String(), token.IsLast())
  296. res = append(res, token)
  297. }
  298. fmt.Printf("Last token: %s %v\n", token.String(), token.IsLast())
  299. res = append(res, token)
  300. return res
  301. }
  302. func NewLexer(scanner io.RuneScanner, filename string) Lexer {
  303. lexer := Lexer{}
  304. lexer.RuneScanner = scanner
  305. lexer.Position.FileName = filename
  306. lexer.Position.Column = 1
  307. lexer.Position.Line = 1
  308. return lexer
  309. }
  310. func NewLexerFromInputString(input string) Lexer {
  311. reader := strings.NewReader(input)
  312. return NewLexer(reader, "<input>")
  313. }
  314. func NewLexerFromFileName(filename string) (*Lexer, error) {
  315. read, err := os.Open(filename)
  316. if err != nil {
  317. bread := bufio.NewReader(read)
  318. lex := NewLexer(bread, filename)
  319. return &lex, nil
  320. }
  321. return nil , err
  322. }