lexer.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761
  1. package muesli
  2. import (
  3. "bufio"
  4. _ "bytes"
  5. _ "errors"
  6. "fmt"
  7. "io"
  8. _ "io"
  9. "os"
  10. _ "reflect"
  11. _ "runtime"
  12. "strconv"
  13. "strings"
  14. "unicode"
  15. _ "unicode"
  16. // "gitlab.com/beoran/woe/graphviz"
  17. // _ "gitlab.com/beoran/woe/monolog"
  18. )
  19. /* A Lexer splits scanned input into tokens.
  20. */
  21. type Lexer struct {
  22. Position
  23. Index int
  24. Start int
  25. io.RuneScanner
  26. buffer []rune
  27. Current rune
  28. LoggerWrapper
  29. }
  30. func (lexer *Lexer) SetLogger(logger Logger) {
  31. lexer.LoggerWrapper = LoggerWrapper{logger}
  32. }
  33. func (lexer *Lexer) ClearBuffer() {
  34. lexer.buffer = make([]rune, 0)
  35. }
  36. func (lexer *Lexer) MakeIntegerToken() Token {
  37. var sbuffer = string(lexer.buffer)
  38. i, err := strconv.ParseInt(sbuffer, 0, 64)
  39. if err == nil {
  40. lexer.ClearBuffer()
  41. return NewToken(TokenKindInteger, IntValue(i), lexer.Position)
  42. } else {
  43. lexer.ClearBuffer()
  44. return lexer.MakeErrorToken(err)
  45. }
  46. }
  47. func (lexer *Lexer) MakeFloatToken() Token {
  48. var sbuffer = string(lexer.buffer)
  49. f, err := strconv.ParseFloat(sbuffer, 64)
  50. if err == nil {
  51. lexer.ClearBuffer()
  52. return NewToken(TokenKindFloat, FloatValue(f), lexer.Position)
  53. } else {
  54. lexer.ClearBuffer()
  55. return lexer.MakeErrorToken(err)
  56. }
  57. }
  58. func (lexer *Lexer) MakeBooleanToken(b bool) Token {
  59. lexer.ClearBuffer()
  60. if b {
  61. return NewToken(TokenKindBoolean, TrueValue, lexer.Position)
  62. } else {
  63. return NewToken(TokenKindBoolean, FalseValue, lexer.Position)
  64. }
  65. }
  66. func (lexer *Lexer) MakeNilToken() Token {
  67. lexer.ClearBuffer()
  68. return NewToken(TokenKindNil, NilValue, lexer.Position)
  69. }
  70. func (lexer *Lexer) MakeBuiltinToken() Token {
  71. var sbuffer = string(lexer.buffer)
  72. lexer.ClearBuffer()
  73. if sbuffer == "true" {
  74. lexer.ClearBuffer()
  75. return NewToken(TokenKindBoolean, TrueValue, lexer.Position)
  76. } else if sbuffer == "false" {
  77. return NewToken(TokenKindBoolean, FalseValue, lexer.Position)
  78. } else if sbuffer == "nil" {
  79. return NewToken(TokenKindNil, NilValue, lexer.Position)
  80. } else {
  81. return lexer.MakeErrorfToken("Not a builtin: %s", sbuffer)
  82. }
  83. }
  84. func (lexer *Lexer) MakeStringValueToken(kind TokenKind) Token {
  85. var sbuffer = string(lexer.buffer)
  86. return NewToken(kind, StringValue(sbuffer), lexer.Position)
  87. }
  88. func (lexer *Lexer) MakeTypeValueToken(kind TokenKind) Token {
  89. var sbuffer = string(lexer.buffer)
  90. return NewToken(kind, TypeValue(sbuffer), lexer.Position)
  91. }
  92. func (lexer *Lexer) MakeErrorValueToken(kind TokenKind) Token {
  93. var sbuffer = string(lexer.buffer)
  94. return NewToken(kind, NewErrorValuef("%s", sbuffer), lexer.Position)
  95. }
  96. func (lexer *Lexer) MakeWordValueToken(kind TokenKind) Token {
  97. var sbuffer = string(lexer.buffer)
  98. return NewToken(kind, WordValue(sbuffer), lexer.Position)
  99. }
  100. func (lexer *Lexer) MakeToken(kind TokenKind) Token {
  101. switch kind {
  102. case TokenKindInteger:
  103. return lexer.MakeIntegerToken()
  104. case TokenKindFloat:
  105. return lexer.MakeFloatToken()
  106. case TokenKindString:
  107. return lexer.MakeStringValueToken(kind)
  108. case TokenKindSymbol:
  109. return lexer.MakeWordValueToken(kind)
  110. case TokenKindType:
  111. return lexer.MakeTypeValueToken(kind)
  112. case TokenKindError:
  113. return lexer.MakeErrorValueToken(kind)
  114. case TokenKindWord:
  115. return lexer.MakeWordValueToken(kind)
  116. case TokenKindNil:
  117. fallthrough
  118. case TokenKindBoolean:
  119. return lexer.MakeBuiltinToken()
  120. case TokenKindGet:
  121. fallthrough
  122. case TokenKindSet:
  123. fallthrough
  124. case TokenKindOpenBlock:
  125. fallthrough
  126. case TokenKindCloseBlock:
  127. fallthrough
  128. case TokenKindOpenList:
  129. fallthrough
  130. case TokenKindCloseList:
  131. fallthrough
  132. case TokenKindOpenParen:
  133. fallthrough
  134. case TokenKindCloseParen:
  135. fallthrough
  136. case TokenKindEOX:
  137. fallthrough
  138. case TokenKindEOF:
  139. val := StringValue(string(lexer.buffer))
  140. lexer.ClearBuffer()
  141. return NewToken(kind, val, lexer.Position)
  142. default:
  143. return lexer.MakeErrorfToken("Internal error on token type %s", kind)
  144. }
  145. }
  146. func (lexer Lexer) MakeErrorToken(err error) Token {
  147. return NewToken(TokenKindError, ErrorValue{err}, lexer.Position)
  148. }
  149. func (lexer Lexer) MakeErrorfToken(format string, va ...interface{}) Token {
  150. err := fmt.Errorf(format, va...)
  151. return lexer.MakeErrorToken(err)
  152. }
  153. func (lexer Lexer) MakeEOFToken() Token {
  154. return NewToken(TokenKindEOF, &EmptyValue{}, lexer.Position)
  155. }
  156. func (lexer *Lexer) Peek() (rune, error) {
  157. r, _, err := lexer.RuneScanner.ReadRune()
  158. err2 := lexer.RuneScanner.UnreadRune()
  159. if err == nil {
  160. err = err2
  161. }
  162. return r, err
  163. }
  164. /* Advances the lexer's position based on the rune r read. */
  165. func (lexer *Lexer) advance(r rune) {
  166. lexer.Current = r
  167. lexer.Index++
  168. lexer.Position.Column++
  169. if r == '\n' {
  170. lexer.Position.Column = 1
  171. lexer.Position.Line++
  172. }
  173. }
  174. /* Append a rune to the lexer's buffer. */
  175. func (lexer *Lexer) appendRune(r rune) {
  176. lexer.buffer = append(lexer.buffer, r)
  177. }
  178. /* Advances the lexer's input buffer but does not store the rune read,
  179. * but just returns it. */
  180. func (lexer *Lexer) Skip() (rune, error) {
  181. r, _, err := lexer.RuneScanner.ReadRune()
  182. if err != nil {
  183. return 0, err
  184. }
  185. lexer.advance(r)
  186. return r, nil
  187. }
  188. /* Actually reads the next rune from the lexer's input source and stores
  189. * them in the lexer's token buffer.
  190. * Shorthand for r, err := lexer.Skip() ; lexer.appendRune(r) */
  191. func (lexer *Lexer) Next() (rune, error) {
  192. r, err := lexer.Skip()
  193. if err == nil {
  194. lexer.appendRune(r)
  195. }
  196. return r, nil
  197. }
  198. func (lexer *Lexer) DoIf(predicate func(rune) bool,
  199. todo func(*Lexer) (rune, error)) (bool, error) {
  200. r, err := lexer.Peek()
  201. if err != nil {
  202. return false, err
  203. }
  204. if predicate(r) {
  205. r, err = todo(lexer)
  206. if err != nil {
  207. return true, err
  208. }
  209. return true, nil
  210. }
  211. return false, nil
  212. }
  213. func (lexer *Lexer) NextIf(predicate func(rune) bool) (bool, error) {
  214. return lexer.DoIf(predicate, (*Lexer).Next)
  215. }
  216. func (lexer *Lexer) SkipIf(predicate func(rune) bool) (bool, error) {
  217. return lexer.DoIf(predicate, (*Lexer).Skip)
  218. }
  219. func (lexer *Lexer) NextWhile(predicate func(rune) bool) (bool, error) {
  220. result := true
  221. ok, err := lexer.NextIf(predicate)
  222. result = result || ok
  223. for ; ok && (err == nil); ok, err = lexer.NextIf(predicate) {
  224. result = result || ok
  225. }
  226. return result, err
  227. }
  228. func (lexer *Lexer) SkipWhile(predicate func(rune) bool) (bool, error) {
  229. result := true
  230. ok, err := lexer.SkipIf(predicate)
  231. result = result || ok
  232. for ; ok && (err == nil); ok, err = lexer.SkipIf(predicate) {
  233. result = result || ok
  234. }
  235. return result, err
  236. }
  237. func isEOX(r rune) bool {
  238. return r == '\n' || r == '.'
  239. }
  240. func isSpace(r rune) bool {
  241. return r == ' ' || r == '\t' || r == '\v' || r == '\r'
  242. }
  243. func isSpaceOrEOX(r rune) bool {
  244. return r == ' ' || r == '\t' || r == '\v' || r == '\r' || r == '\n' || r == '.'
  245. }
  246. func isComment(r rune) bool {
  247. return r == '#'
  248. }
  249. func (lexer *Lexer) SkipSpace() error {
  250. _, err := lexer.SkipWhile(isSpace)
  251. return err
  252. }
  253. func (lexer *Lexer) SkipBlockComment() error {
  254. var err error
  255. var r rune
  256. lexer.LogDebug("Skipping block comment.")
  257. for block := 1; block > 0 && err == nil; {
  258. _, err = lexer.Skip()
  259. if err != nil {
  260. return err
  261. }
  262. r, err = lexer.Peek()
  263. if r == '{' {
  264. block++
  265. } else if r == '}' {
  266. block--
  267. }
  268. lexer.LogDebug("Skipping block comment: %d", block)
  269. }
  270. _, err = lexer.Skip()
  271. return err
  272. }
  273. func (lexer *Lexer) SkipComment() error {
  274. r, err := lexer.Skip()
  275. lexer.LogDebug("Skipping %c.", r)
  276. if err != nil {
  277. return err
  278. }
  279. r, err = lexer.Peek()
  280. if r == '{' {
  281. return lexer.SkipBlockComment()
  282. }
  283. for r != '\n' && err == nil {
  284. lexer.LogDebug("Skipping comment %c.", r)
  285. _, err = lexer.Skip()
  286. if err != nil {
  287. return err
  288. }
  289. r, err = lexer.Peek()
  290. }
  291. if err != nil {
  292. return err
  293. }
  294. _, err = lexer.Skip()
  295. return err
  296. }
  297. /* Handles errors including EOF by either returning an error token or an
  298. * EOF token.
  299. */
  300. func (lexer *Lexer) handleError(err error) Token {
  301. if err == io.EOF {
  302. return lexer.MakeEOFToken()
  303. } else {
  304. return lexer.MakeErrorToken(err)
  305. }
  306. }
  307. func (lexer *Lexer) LexNumber() Token {
  308. isFloat := false
  309. // skip any first - or +
  310. _, err := lexer.NextIf(func(r rune) bool {
  311. return r == '-' || r == '+'
  312. })
  313. _, err = lexer.NextWhile(func(r rune) bool {
  314. if unicode.IsDigit(r) {
  315. return true
  316. } else if r == '.' {
  317. if isFloat {
  318. return false // double point in floating point
  319. } else {
  320. isFloat = true
  321. return true
  322. }
  323. } else {
  324. return false
  325. }
  326. })
  327. if err != nil {
  328. return lexer.MakeErrorfToken("when parsing number: %s", err)
  329. }
  330. if isFloat {
  331. return lexer.MakeToken(TokenKindFloat)
  332. } else {
  333. return lexer.MakeToken(TokenKindInteger)
  334. }
  335. }
  336. func isDoubleQuote(r rune) bool {
  337. return r == '"'
  338. }
  339. func (lexer *Lexer) handleEscapeHexChars(amount int) error {
  340. buffer := make([]byte, 0)
  341. r, err := lexer.Skip()
  342. for index := 0; err == nil && index < amount; {
  343. if unicode.Is(unicode.ASCII_Hex_Digit, r) {
  344. buffer = append(buffer, byte(r))
  345. } else {
  346. return fmt.Errorf("Not a hexadecimal digit: %c", r)
  347. }
  348. index++
  349. if index < amount {
  350. r, err = lexer.Skip()
  351. }
  352. }
  353. if err != nil {
  354. return err
  355. }
  356. i, err := strconv.ParseInt(string(buffer), 16, 32)
  357. if err != nil {
  358. return err
  359. }
  360. lexer.appendRune(rune(i))
  361. _, err = lexer.Peek()
  362. return err
  363. }
  364. func (lexer *Lexer) handleEscape() error {
  365. r, err := lexer.Skip()
  366. if err != nil {
  367. return err
  368. }
  369. switch r {
  370. case 'a':
  371. lexer.appendRune('\a')
  372. case 'b':
  373. lexer.appendRune('\b')
  374. case 'e':
  375. lexer.appendRune('\033')
  376. case 'f':
  377. lexer.appendRune('\f')
  378. case 'n':
  379. lexer.appendRune('\n')
  380. case 'r':
  381. lexer.appendRune('\r')
  382. case 't':
  383. lexer.appendRune('\t')
  384. case '\\':
  385. lexer.appendRune('\\')
  386. case '"':
  387. lexer.appendRune('"')
  388. // case 'o': fallthrough // No octals, for now.
  389. case 'x':
  390. err = lexer.handleEscapeHexChars(2)
  391. case 'u':
  392. err = lexer.handleEscapeHexChars(4)
  393. case 'U':
  394. err = lexer.handleEscapeHexChars(6)
  395. default:
  396. return fmt.Errorf("Unknown escape sequence character %c: %d", r, r)
  397. }
  398. return err
  399. }
  400. func (lexer *Lexer) LexString() Token {
  401. var err error
  402. var r rune
  403. _, err = lexer.Skip() // Skip first "
  404. if err != nil {
  405. return lexer.handleError(err)
  406. }
  407. r, err = lexer.Skip()
  408. for r != '"' && err == nil {
  409. if r == '\\' {
  410. err = lexer.handleEscape()
  411. if err != nil {
  412. return lexer.handleError(err)
  413. }
  414. } else {
  415. lexer.appendRune(r)
  416. // still inside the string
  417. }
  418. r, err = lexer.Skip()
  419. }
  420. if err != nil {
  421. return lexer.MakeErrorfToken("when parsing string: %s", err)
  422. }
  423. if err != nil {
  424. return lexer.handleError(err)
  425. }
  426. return lexer.MakeToken(TokenKindString)
  427. }
  428. func (lexer *Lexer) LexLongString() Token {
  429. var err error
  430. _, err = lexer.Skip()
  431. if err != nil {
  432. return lexer.handleError(err)
  433. }
  434. _, err = lexer.NextWhile(func(r rune) bool {
  435. return r != '`'
  436. })
  437. if err != nil {
  438. return lexer.MakeErrorfToken("when parsing long string: %s", err)
  439. }
  440. _, err = lexer.Skip()
  441. if err != nil {
  442. return lexer.handleError(err)
  443. }
  444. return lexer.MakeToken(TokenKindString)
  445. }
  446. func (lexer *Lexer) LexWordOrType(kind TokenKind) Token {
  447. var err error
  448. first := true
  449. _, err = lexer.Next()
  450. if err != nil {
  451. return lexer.handleError(err)
  452. }
  453. _, err = lexer.NextWhile(func(r rune) bool {
  454. if first {
  455. first = false
  456. return unicode.IsLetter(r) || r == '_'
  457. } else {
  458. return unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_'
  459. }
  460. })
  461. if err != nil {
  462. return lexer.handleError(err)
  463. }
  464. sbuffer := string(lexer.buffer)
  465. // handle key words
  466. switch sbuffer {
  467. case "true":
  468. return lexer.MakeBooleanToken(true)
  469. case "false":
  470. return lexer.MakeBooleanToken(false)
  471. case "nil":
  472. return lexer.MakeNilToken()
  473. default:
  474. }
  475. return lexer.MakeToken(kind)
  476. }
  477. func (lexer *Lexer) LexWord() Token {
  478. return lexer.LexWordOrType(TokenKindWord)
  479. }
  480. func (lexer *Lexer) LexType() Token {
  481. return lexer.LexWordOrType(TokenKindType)
  482. }
  483. func (lexer *Lexer) LexSymbol() Token {
  484. var err error
  485. _, err = lexer.Skip()
  486. if err != nil {
  487. return lexer.handleError(err)
  488. }
  489. _, err = lexer.NextWhile(func(r rune) bool {
  490. return !isSpaceOrEOX(r)
  491. })
  492. if err != nil {
  493. return lexer.handleError(err)
  494. }
  495. return lexer.MakeToken(TokenKindSymbol)
  496. }
  497. func (lexer *Lexer) LexBuiltin() Token {
  498. var err error
  499. _, err = lexer.Skip()
  500. if err != nil {
  501. return lexer.handleError(err)
  502. }
  503. _, err = lexer.NextWhile(func(r rune) bool {
  504. return !isSpaceOrEOX(r)
  505. })
  506. if err != nil {
  507. return lexer.handleError(err)
  508. }
  509. return lexer.MakeBuiltinToken()
  510. }
  511. func (lexer *Lexer) skipSpaceAndCommentAndPeek() (rune, error) {
  512. r, err := lexer.Peek()
  513. if err != nil {
  514. return r, err
  515. }
  516. i := 0
  517. for isSpace(r) || isComment(r) {
  518. if isSpace(r) {
  519. err = lexer.SkipSpace()
  520. } else if isComment(r) {
  521. err = lexer.SkipComment()
  522. }
  523. if err != nil {
  524. return r, err
  525. }
  526. i++
  527. r, err = lexer.Peek()
  528. lexer.LogDebug("Peeked again: >%c< %v %v %d", r, isSpace(r), isComment(r), i)
  529. if err != nil {
  530. return r, err
  531. }
  532. }
  533. return r, err
  534. }
  535. func (lexer *Lexer) LexEOX() Token {
  536. lexer.Next()
  537. _, err := lexer.skipSpaceAndCommentAndPeek()
  538. if err != nil {
  539. return lexer.handleError(err)
  540. }
  541. _, err = lexer.NextWhile(func(r rune) bool {
  542. return isSpaceOrEOX(r) || r == '\n' || r == '.'
  543. })
  544. if err != nil {
  545. return lexer.handleError(err)
  546. }
  547. return lexer.MakeToken(TokenKindEOX)
  548. }
  549. func (lexer *Lexer) lex() Token {
  550. r, err := lexer.skipSpaceAndCommentAndPeek()
  551. lexer.LogDebug(" After skip: >%c< >%v<\n", r, err)
  552. if err != nil {
  553. return lexer.handleError(err)
  554. }
  555. if unicode.IsDigit(r) || r == '-' || r == '+' {
  556. return lexer.LexNumber()
  557. }
  558. if r == '\n' || r == '.' {
  559. return lexer.LexEOX()
  560. }
  561. if r == '"' {
  562. return lexer.LexString()
  563. }
  564. if r == '`' {
  565. return lexer.LexLongString()
  566. }
  567. if r == '!' {
  568. return lexer.LexBuiltin()
  569. }
  570. if r == ':' {
  571. return lexer.LexSymbol()
  572. }
  573. switch TokenKind(r) {
  574. case TokenKindGet:
  575. fallthrough
  576. case TokenKindSet:
  577. fallthrough
  578. case TokenKindOpenBlock:
  579. fallthrough
  580. case TokenKindCloseBlock:
  581. fallthrough
  582. case TokenKindOpenList:
  583. fallthrough
  584. case TokenKindCloseList:
  585. fallthrough
  586. case TokenKindOpenParen:
  587. fallthrough
  588. case TokenKindCloseParen:
  589. lexer.Next()
  590. return lexer.MakeToken(TokenKind(r))
  591. default:
  592. }
  593. if unicode.IsLetter(r) {
  594. if unicode.IsUpper(r) {
  595. return lexer.LexType()
  596. } else {
  597. return lexer.LexWord()
  598. }
  599. }
  600. // EOF character
  601. if r == 0x7f {
  602. return lexer.MakeEOFToken()
  603. }
  604. return lexer.MakeErrorfToken("Unknown character: %c", r)
  605. }
  606. func (lexer *Lexer) Lex() Token {
  607. res := lexer.lex()
  608. lexer.ClearBuffer() // ensure buffer is cleared after lexing, always.
  609. return res
  610. }
  611. func (lexer *Lexer) LexAll() []Token {
  612. var token Token
  613. res := make([]Token, 0)
  614. for token = lexer.Lex(); !token.IsLast(); token = lexer.Lex() {
  615. res = append(res, token)
  616. }
  617. res = append(res, token)
  618. return res
  619. }
  620. func NewLexer(scanner io.RuneScanner, filename string) *Lexer {
  621. lexer := &Lexer{}
  622. lexer.RuneScanner = scanner
  623. lexer.Position.FileName = filename
  624. lexer.Position.Column = 1
  625. lexer.Position.Line = 1
  626. lexer.LoggerWrapper = LoggerWrapper{nil}
  627. return lexer
  628. }
  629. func (lexer * Lexer) Report() {
  630. if lexer == nil {
  631. fmt.Printf("Lexer: is nil\n")
  632. } else {
  633. fmt.Printf("Lexer: %s:%d:%d\n",
  634. lexer.Position.FileName,
  635. lexer.Position.Column,
  636. lexer.Position.Line)
  637. }
  638. }
  639. func NewLexerFromString(input string) *Lexer {
  640. reader := strings.NewReader(input)
  641. return NewLexer(reader, "<input>")
  642. }
  643. func NewLexerFromFilename(filename string) (*Lexer, error) {
  644. read, err := os.Open(filename)
  645. if err == nil {
  646. bread := bufio.NewReader(read)
  647. lex := NewLexer(bread, filename)
  648. return lex, nil
  649. }
  650. return nil, err
  651. }