lexer.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636
  1. package muesli
  2. import (
  3. "bufio"
  4. _ "bytes"
  5. _ "errors"
  6. "fmt"
  7. "io"
  8. _ "io"
  9. "os"
  10. _ "reflect"
  11. _ "runtime"
  12. "strconv"
  13. "strings"
  14. "unicode"
  15. _ "unicode"
  16. // "gitlab.com/beoran/woe/graphviz"
  17. // _ "gitlab.com/beoran/woe/monolog"
  18. )
  19. /* A Lexer splits scanned input into tokens.
  20. */
  21. type Lexer struct {
  22. Position
  23. Index int
  24. Start int
  25. io.RuneScanner
  26. buffer []rune
  27. Current rune
  28. LoggerWrapper
  29. }
  30. func (lexer *Lexer) SetLogger(logger Logger) {
  31. lexer.LoggerWrapper = LoggerWrapper{logger}
  32. }
  33. func (lexer *Lexer) ClearBuffer() {
  34. lexer.buffer = make([]rune, 0)
  35. }
  36. func (lexer *Lexer) MakeIntegerToken() Token {
  37. var sbuffer = string(lexer.buffer)
  38. i, err := strconv.ParseInt(sbuffer, 0, 64)
  39. if err == nil {
  40. lexer.ClearBuffer()
  41. return NewToken(TokenKindInteger, IntValue(i), lexer.Position)
  42. } else {
  43. lexer.ClearBuffer()
  44. return lexer.MakeErrorToken(err)
  45. }
  46. }
  47. func (lexer *Lexer) MakeFloatToken() Token {
  48. var sbuffer = string(lexer.buffer)
  49. f, err := strconv.ParseFloat(sbuffer, 64)
  50. if err == nil {
  51. lexer.ClearBuffer()
  52. return NewToken(TokenKindFloat, FloatValue(f), lexer.Position)
  53. } else {
  54. lexer.ClearBuffer()
  55. return lexer.MakeErrorToken(err)
  56. }
  57. }
  58. func (lexer *Lexer) MakeBooleanToken(b bool) Token {
  59. lexer.ClearBuffer()
  60. return NewToken(TokenKindBoolean, BoolValue(b), lexer.Position)
  61. }
  62. func (lexer *Lexer) MakeStringValueToken(kind TokenKind) Token {
  63. var sbuffer = string(lexer.buffer)
  64. return NewToken(kind, StringValue(sbuffer), lexer.Position)
  65. }
  66. func (lexer *Lexer) MakeToken(kind TokenKind) Token {
  67. switch kind {
  68. case TokenKindInteger:
  69. return lexer.MakeIntegerToken()
  70. case TokenKindFloat:
  71. return lexer.MakeFloatToken()
  72. case TokenKindString:
  73. fallthrough
  74. case TokenKindSymbol:
  75. fallthrough
  76. case TokenKindType:
  77. fallthrough
  78. case TokenKindError:
  79. fallthrough
  80. case TokenKindWord:
  81. return lexer.MakeStringValueToken(kind)
  82. case TokenKindBoolean:
  83. fallthrough
  84. case TokenKindGet:
  85. fallthrough
  86. case TokenKindSet:
  87. fallthrough
  88. case TokenKindOpenBlock:
  89. fallthrough
  90. case TokenKindCloseBlock:
  91. fallthrough
  92. case TokenKindOpenList:
  93. fallthrough
  94. case TokenKindCloseList:
  95. fallthrough
  96. case TokenKindOpenParen:
  97. fallthrough
  98. case TokenKindCloseParen:
  99. fallthrough
  100. case TokenKindEOX:
  101. fallthrough
  102. case TokenKindEOF:
  103. val := StringValue(string(lexer.buffer))
  104. lexer.ClearBuffer()
  105. return NewToken(kind, val, lexer.Position)
  106. default:
  107. return lexer.MakeErrorfToken("Internal error on token type %s", kind)
  108. }
  109. }
  110. func (lexer Lexer) MakeErrorToken(err error) Token {
  111. return NewToken(TokenKindError, ErrorValue{err}, lexer.Position)
  112. }
  113. func (lexer Lexer) MakeErrorfToken(format string, va ...interface{}) Token {
  114. err := fmt.Errorf(format, va...)
  115. return lexer.MakeErrorToken(err)
  116. }
  117. func (lexer Lexer) MakeEOFToken() Token {
  118. return NewToken(TokenKindEOF, &EmptyValue{}, lexer.Position)
  119. }
  120. func (lexer *Lexer) Peek() (rune, error) {
  121. r, _, err := lexer.RuneScanner.ReadRune()
  122. err2 := lexer.RuneScanner.UnreadRune()
  123. if err == nil {
  124. err = err2
  125. }
  126. return r, err
  127. }
  128. /* Advances the lexer's position based on the rune r read. */
  129. func (lexer *Lexer) advance(r rune) {
  130. lexer.Current = r
  131. lexer.Index++
  132. lexer.Position.Column++
  133. if r == '\n' {
  134. lexer.Position.Column = 1
  135. lexer.Position.Line++
  136. }
  137. }
  138. /* Append a rune to the lexer's buffer. */
  139. func (lexer *Lexer) appendRune(r rune) {
  140. lexer.buffer = append(lexer.buffer, r)
  141. }
  142. /* Advances the lexer's input buffer but does not store the rune read,
  143. * but just returns it. */
  144. func (lexer *Lexer) Skip() (rune, error) {
  145. r, _, err := lexer.RuneScanner.ReadRune()
  146. if err != nil {
  147. return 0, err
  148. }
  149. lexer.advance(r)
  150. return r, nil
  151. }
  152. /* Actually reads the next rune from the lexer's input source and stores
  153. * them in the lexer's token buffer.
  154. * Shorthand for r, err := lexer.Skip() ; lexer.appendRune(r) */
  155. func (lexer *Lexer) Next() (rune, error) {
  156. r, err := lexer.Skip()
  157. if err == nil {
  158. lexer.appendRune(r)
  159. }
  160. return r, nil
  161. }
  162. func (lexer *Lexer) NextIf(predicate func(rune) bool) (bool, error) {
  163. r, err := lexer.Peek()
  164. if err != nil {
  165. return false, err
  166. }
  167. if predicate(r) {
  168. r, err = lexer.Next()
  169. if err != nil {
  170. return true, err
  171. }
  172. return true, nil
  173. }
  174. return false, nil
  175. }
  176. func (lexer *Lexer) SkipIf(predicate func(rune) bool) (bool, error) {
  177. r, err := lexer.Peek()
  178. if err != nil {
  179. return false, err
  180. }
  181. if predicate(r) {
  182. r, err = lexer.Skip()
  183. if err != nil {
  184. return true, err
  185. }
  186. return true, nil
  187. }
  188. return false, nil
  189. }
  190. func (lexer *Lexer) NextWhile(predicate func(rune) bool) (bool, error) {
  191. result := true
  192. ok, err := lexer.NextIf(predicate)
  193. result = result || ok
  194. for ; ok && (err == nil); ok, err = lexer.NextIf(predicate) {
  195. result = result || ok
  196. }
  197. return result, err
  198. }
  199. func (lexer *Lexer) SkipWhile(predicate func(rune) bool) (bool, error) {
  200. result := true
  201. ok, err := lexer.SkipIf(predicate)
  202. result = result || ok
  203. for ; ok && (err == nil); ok, err = lexer.SkipIf(predicate) {
  204. result = result || ok
  205. }
  206. return result, err
  207. }
  208. func isSpace(r rune) bool {
  209. return r == ' ' || r == '\t' || r == '\v' || r == '\r'
  210. }
  211. func isComment(r rune) bool {
  212. return r == '#'
  213. }
  214. func (lexer *Lexer) SkipSpace() error {
  215. r, err := lexer.Skip()
  216. lexer.LogDebug("Skipping %c.", r)
  217. if err != nil {
  218. return err
  219. }
  220. for ; isSpace(r) && err == nil; r, err = lexer.Skip() {
  221. }
  222. return err
  223. }
  224. func (lexer *Lexer) SkipBlockComment() error {
  225. var err error
  226. var r rune
  227. lexer.LogDebug("Skipping block comment.")
  228. for block := 1; block > 0; {
  229. r, err = lexer.Skip()
  230. if err != nil {
  231. return err
  232. }
  233. if r == '{' {
  234. block++
  235. } else if r == '}' {
  236. block--
  237. }
  238. lexer.LogDebug("Skipping block comment: %d", block)
  239. }
  240. return err
  241. }
  242. func (lexer *Lexer) SkipComment() error {
  243. r, err := lexer.Skip()
  244. lexer.LogDebug("Skipping %c.", r)
  245. if err != nil {
  246. return err
  247. }
  248. first := true
  249. for r, err = lexer.Skip(); r != '\n' && err == nil; r, err = lexer.Skip() {
  250. lexer.LogDebug("Skipping loop %c.", r)
  251. if first && r == '{' {
  252. first = false
  253. return lexer.SkipBlockComment()
  254. }
  255. }
  256. if err != nil {
  257. return err
  258. }
  259. return err
  260. }
  261. /* Handles errors including EOF by either returning an error token or an
  262. * EOF token.
  263. */
  264. func (lexer *Lexer) handleError(err error) Token {
  265. if err == io.EOF {
  266. return lexer.MakeEOFToken()
  267. } else {
  268. return lexer.MakeErrorToken(err)
  269. }
  270. }
  271. func (lexer *Lexer) LexNumber() Token {
  272. isFloat := false
  273. // skip any first -
  274. _, err := lexer.NextIf(func(r rune) bool {
  275. return r == '-'
  276. })
  277. _, err = lexer.NextWhile(func(r rune) bool {
  278. if unicode.IsDigit(r) {
  279. return true
  280. } else if r == '.' {
  281. if isFloat {
  282. return false // double point in floating point
  283. } else {
  284. isFloat = true
  285. return true
  286. }
  287. } else {
  288. return false
  289. }
  290. })
  291. if err != nil {
  292. return lexer.MakeErrorfToken("when parsing number: %s", err)
  293. }
  294. if isFloat {
  295. return lexer.MakeToken(TokenKindFloat)
  296. } else {
  297. return lexer.MakeToken(TokenKindInteger)
  298. }
  299. }
  300. func isDoubleQuote(r rune) bool {
  301. return r == '"'
  302. }
  303. func (lexer *Lexer) handleEscapeHexChars(amount int) error {
  304. buffer := make([]byte, 0)
  305. r, err := lexer.Skip()
  306. for index := 0; err == nil && index < amount; {
  307. if unicode.Is(unicode.ASCII_Hex_Digit, r) {
  308. buffer = append(buffer, byte(r))
  309. } else {
  310. return fmt.Errorf("Not a hexadecimal digit: %c", r)
  311. }
  312. index++
  313. if index < amount {
  314. r, err = lexer.Skip()
  315. }
  316. }
  317. if err != nil {
  318. return err
  319. }
  320. i, err := strconv.ParseInt(string(buffer), 16, 32)
  321. if err != nil {
  322. return err
  323. }
  324. lexer.appendRune(rune(i))
  325. _, err = lexer.Peek()
  326. return err
  327. }
  328. func (lexer *Lexer) handleEscape() error {
  329. r, err := lexer.Skip()
  330. if err != nil {
  331. return err
  332. }
  333. switch r {
  334. case 'a':
  335. lexer.appendRune('\a')
  336. case 'b':
  337. lexer.appendRune('\b')
  338. case 'e':
  339. lexer.appendRune('\033')
  340. case 'f':
  341. lexer.appendRune('\f')
  342. case 'n':
  343. lexer.appendRune('\n')
  344. case 'r':
  345. lexer.appendRune('\r')
  346. case 't':
  347. lexer.appendRune('\t')
  348. case '\\':
  349. lexer.appendRune('\\')
  350. case '"':
  351. lexer.appendRune('"')
  352. // case 'o': fallthrough // No octals, for now.
  353. case 'x':
  354. err = lexer.handleEscapeHexChars(2)
  355. case 'u':
  356. err = lexer.handleEscapeHexChars(4)
  357. case 'U':
  358. err = lexer.handleEscapeHexChars(6)
  359. default:
  360. return fmt.Errorf("Unknown escape sequence character %c: %d", r, r)
  361. }
  362. return err
  363. }
  364. func (lexer *Lexer) LexString() Token {
  365. var err error
  366. var r rune
  367. _, err = lexer.Skip() // Skip first "
  368. if err != nil {
  369. return lexer.handleError(err)
  370. }
  371. r, err = lexer.Skip()
  372. for r != '"' && err == nil {
  373. if r == '\\' {
  374. err = lexer.handleEscape()
  375. if err != nil {
  376. return lexer.handleError(err)
  377. }
  378. } else {
  379. lexer.appendRune(r)
  380. // still inside the string
  381. }
  382. r, err = lexer.Skip()
  383. }
  384. if err != nil {
  385. return lexer.MakeErrorfToken("when parsing string: %s", err)
  386. }
  387. _, err = lexer.Skip() // skip last "
  388. if err != nil {
  389. return lexer.handleError(err)
  390. }
  391. return lexer.MakeToken(TokenKindString)
  392. }
  393. func (lexer *Lexer) LexLongString() Token {
  394. var err error
  395. _, err = lexer.Skip()
  396. if err != nil {
  397. return lexer.handleError(err)
  398. }
  399. _, err = lexer.NextWhile(func(r rune) bool {
  400. return r != '`'
  401. })
  402. if err != nil {
  403. return lexer.MakeErrorfToken("when parsing long string: %s", err)
  404. }
  405. _, err = lexer.Skip()
  406. if err != nil {
  407. return lexer.handleError(err)
  408. }
  409. return lexer.MakeToken(TokenKindString)
  410. }
  411. func (lexer *Lexer) LexWord() Token {
  412. var err error
  413. first := true
  414. _, err = lexer.Next()
  415. if err != nil {
  416. return lexer.handleError(err)
  417. }
  418. _, err = lexer.NextWhile(func(r rune) bool {
  419. if first {
  420. first = false
  421. return unicode.IsLetter(r) || r == '_'
  422. } else {
  423. return unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_'
  424. }
  425. })
  426. if err != nil {
  427. return lexer.handleError(err)
  428. }
  429. sbuffer := string(lexer.buffer)
  430. // handle key words
  431. switch sbuffer {
  432. case "true":
  433. return lexer.MakeBooleanToken(true)
  434. case "false":
  435. return lexer.MakeBooleanToken(false)
  436. default:
  437. return lexer.MakeToken(TokenKindWord)
  438. }
  439. }
  440. func (lexer *Lexer) LexSymbol() Token {
  441. var err error
  442. _, err = lexer.Skip()
  443. if err != nil {
  444. return lexer.handleError(err)
  445. }
  446. _, err = lexer.NextWhile(func(r rune) bool {
  447. return !unicode.IsSpace(r)
  448. })
  449. if err != nil {
  450. return lexer.handleError(err)
  451. }
  452. return lexer.MakeToken(TokenKindSymbol)
  453. }
  454. func (lexer *Lexer) skipSpaceAndCommentAndPeek() (rune, error) {
  455. r, err := lexer.Peek()
  456. if err != nil {
  457. return r, err
  458. }
  459. for isSpace(r) || isComment(r) {
  460. if isSpace(r) {
  461. err = lexer.SkipSpace()
  462. } else if isComment(r) {
  463. err = lexer.SkipComment()
  464. }
  465. if err != nil {
  466. return r, err
  467. }
  468. lexer.LogDebug("Peeked again: >%c<", r)
  469. r, err := lexer.Peek()
  470. if err != nil {
  471. return r, err
  472. }
  473. }
  474. return r, err
  475. }
  476. func (lexer *Lexer) lex() Token {
  477. r, err := lexer.skipSpaceAndCommentAndPeek()
  478. lexer.LogDebug(" After skip: >%c< >%v<\n", r, err)
  479. if err != nil {
  480. return lexer.handleError(err)
  481. }
  482. if unicode.IsDigit(r) || r == '-' {
  483. return lexer.LexNumber()
  484. }
  485. if r == '\n' || r == '.' {
  486. lexer.Next()
  487. return lexer.MakeToken(TokenKindEOX)
  488. }
  489. if r == '"' {
  490. return lexer.LexString()
  491. }
  492. if r == '`' {
  493. return lexer.LexLongString()
  494. }
  495. if r == ':' {
  496. return lexer.LexSymbol()
  497. }
  498. switch TokenKind(r) {
  499. case TokenKindGet:
  500. fallthrough
  501. case TokenKindSet:
  502. fallthrough
  503. case TokenKindOpenBlock:
  504. fallthrough
  505. case TokenKindCloseBlock:
  506. fallthrough
  507. case TokenKindOpenList:
  508. fallthrough
  509. case TokenKindCloseList:
  510. fallthrough
  511. case TokenKindOpenParen:
  512. fallthrough
  513. case TokenKindCloseParen:
  514. lexer.Next()
  515. return lexer.MakeToken(TokenKind(r))
  516. default:
  517. }
  518. if unicode.IsLetter(r) {
  519. return lexer.LexWord()
  520. }
  521. return lexer.MakeErrorfToken("Unknown character: %c", r)
  522. }
  523. func (lexer *Lexer) Lex() Token {
  524. res := lexer.lex()
  525. lexer.ClearBuffer() // ensure buffer is cleared after lexing, always.
  526. return res
  527. }
  528. func (lexer *Lexer) LexAll() []Token {
  529. var token Token
  530. res := make([]Token, 0)
  531. for token = lexer.Lex(); !token.IsLast(); token = lexer.Lex() {
  532. res = append(res, token)
  533. }
  534. res = append(res, token)
  535. return res
  536. }
  537. func NewLexer(scanner io.RuneScanner, filename string) *Lexer {
  538. lexer := &Lexer{}
  539. lexer.RuneScanner = scanner
  540. lexer.Position.FileName = filename
  541. lexer.Position.Column = 1
  542. lexer.Position.Line = 1
  543. lexer.LoggerWrapper = LoggerWrapper{nil}
  544. return lexer
  545. }
  546. func NewLexerFromString(input string) *Lexer {
  547. reader := strings.NewReader(input)
  548. return NewLexer(reader, "<input>")
  549. }
  550. func NewLexerFromFileName(filename string) (*Lexer, error) {
  551. read, err := os.Open(filename)
  552. if err != nil {
  553. bread := bufio.NewReader(read)
  554. lex := NewLexer(bread, filename)
  555. return lex, nil
  556. }
  557. return nil, err
  558. }