lexer.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739
  1. package muesli
  2. import (
  3. "bufio"
  4. _ "bytes"
  5. _ "errors"
  6. "fmt"
  7. "io"
  8. _ "io"
  9. "os"
  10. _ "reflect"
  11. _ "runtime"
  12. "strconv"
  13. "strings"
  14. "unicode"
  15. _ "unicode"
  16. // "gitlab.com/beoran/woe/graphviz"
  17. // _ "gitlab.com/beoran/woe/monolog"
  18. )
  19. /* A Lexer splits scanned input into tokens.
  20. */
  21. type Lexer struct {
  22. Position
  23. Index int
  24. Start int
  25. io.RuneScanner
  26. buffer []rune
  27. Current rune
  28. LoggerWrapper
  29. }
  30. func (lexer *Lexer) SetLogger(logger Logger) {
  31. lexer.LoggerWrapper = LoggerWrapper{logger}
  32. }
  33. func (lexer *Lexer) ClearBuffer() {
  34. lexer.buffer = make([]rune, 0)
  35. }
  36. func (lexer *Lexer) MakeIntegerToken() Token {
  37. var sbuffer = string(lexer.buffer)
  38. i, err := strconv.ParseInt(sbuffer, 0, 64)
  39. if err == nil {
  40. lexer.ClearBuffer()
  41. return NewToken(TokenKindInteger, IntValue(i), lexer.Position)
  42. } else {
  43. lexer.ClearBuffer()
  44. return lexer.MakeErrorToken(err)
  45. }
  46. }
  47. func (lexer *Lexer) MakeFloatToken() Token {
  48. var sbuffer = string(lexer.buffer)
  49. f, err := strconv.ParseFloat(sbuffer, 64)
  50. if err == nil {
  51. lexer.ClearBuffer()
  52. return NewToken(TokenKindFloat, FloatValue(f), lexer.Position)
  53. } else {
  54. lexer.ClearBuffer()
  55. return lexer.MakeErrorToken(err)
  56. }
  57. }
  58. func (lexer *Lexer) MakeBooleanToken(b bool) Token {
  59. lexer.ClearBuffer()
  60. if b {
  61. return NewToken(TokenKindBoolean, TrueValue, lexer.Position)
  62. } else {
  63. return NewToken(TokenKindBoolean, FalseValue, lexer.Position)
  64. }
  65. }
  66. func (lexer *Lexer) MakeNilToken() Token {
  67. lexer.ClearBuffer()
  68. return NewToken(TokenKindNil, NilValue, lexer.Position)
  69. }
  70. func (lexer *Lexer) MakeBuiltinToken() Token {
  71. var sbuffer = string(lexer.buffer)
  72. lexer.ClearBuffer()
  73. if sbuffer == "true" {
  74. lexer.ClearBuffer()
  75. return NewToken(TokenKindBoolean, TrueValue, lexer.Position)
  76. } else if sbuffer == "false" {
  77. return NewToken(TokenKindBoolean, FalseValue, lexer.Position)
  78. } else if sbuffer == "nil" {
  79. return NewToken(TokenKindNil, NilValue, lexer.Position)
  80. } else {
  81. return lexer.MakeErrorfToken("Not a builtin: %s", sbuffer)
  82. }
  83. }
  84. func (lexer *Lexer) MakeStringValueToken(kind TokenKind) Token {
  85. var sbuffer = string(lexer.buffer)
  86. return NewToken(kind, StringValue(sbuffer), lexer.Position)
  87. }
  88. func (lexer *Lexer) MakeToken(kind TokenKind) Token {
  89. switch kind {
  90. case TokenKindInteger:
  91. return lexer.MakeIntegerToken()
  92. case TokenKindFloat:
  93. return lexer.MakeFloatToken()
  94. case TokenKindString:
  95. fallthrough
  96. case TokenKindSymbol:
  97. fallthrough
  98. case TokenKindType:
  99. fallthrough
  100. case TokenKindError:
  101. fallthrough
  102. case TokenKindWord:
  103. return lexer.MakeStringValueToken(kind)
  104. case TokenKindNil:
  105. fallthrough
  106. case TokenKindBoolean:
  107. return lexer.MakeBuiltinToken()
  108. case TokenKindGet:
  109. fallthrough
  110. case TokenKindSet:
  111. fallthrough
  112. case TokenKindOpenBlock:
  113. fallthrough
  114. case TokenKindCloseBlock:
  115. fallthrough
  116. case TokenKindOpenList:
  117. fallthrough
  118. case TokenKindCloseList:
  119. fallthrough
  120. case TokenKindOpenParen:
  121. fallthrough
  122. case TokenKindCloseParen:
  123. fallthrough
  124. case TokenKindEOX:
  125. fallthrough
  126. case TokenKindEOF:
  127. val := StringValue(string(lexer.buffer))
  128. lexer.ClearBuffer()
  129. return NewToken(kind, val, lexer.Position)
  130. default:
  131. return lexer.MakeErrorfToken("Internal error on token type %s", kind)
  132. }
  133. }
  134. func (lexer Lexer) MakeErrorToken(err error) Token {
  135. return NewToken(TokenKindError, ErrorValue{err}, lexer.Position)
  136. }
  137. func (lexer Lexer) MakeErrorfToken(format string, va ...interface{}) Token {
  138. err := fmt.Errorf(format, va...)
  139. return lexer.MakeErrorToken(err)
  140. }
  141. func (lexer Lexer) MakeEOFToken() Token {
  142. return NewToken(TokenKindEOF, &EmptyValue{}, lexer.Position)
  143. }
  144. func (lexer *Lexer) Peek() (rune, error) {
  145. r, _, err := lexer.RuneScanner.ReadRune()
  146. err2 := lexer.RuneScanner.UnreadRune()
  147. if err == nil {
  148. err = err2
  149. }
  150. return r, err
  151. }
  152. /* Advances the lexer's position based on the rune r read. */
  153. func (lexer *Lexer) advance(r rune) {
  154. lexer.Current = r
  155. lexer.Index++
  156. lexer.Position.Column++
  157. if r == '\n' {
  158. lexer.Position.Column = 1
  159. lexer.Position.Line++
  160. }
  161. }
  162. /* Append a rune to the lexer's buffer. */
  163. func (lexer *Lexer) appendRune(r rune) {
  164. lexer.buffer = append(lexer.buffer, r)
  165. }
  166. /* Advances the lexer's input buffer but does not store the rune read,
  167. * but just returns it. */
  168. func (lexer *Lexer) Skip() (rune, error) {
  169. r, _, err := lexer.RuneScanner.ReadRune()
  170. if err != nil {
  171. return 0, err
  172. }
  173. lexer.advance(r)
  174. return r, nil
  175. }
  176. /* Actually reads the next rune from the lexer's input source and stores
  177. * them in the lexer's token buffer.
  178. * Shorthand for r, err := lexer.Skip() ; lexer.appendRune(r) */
  179. func (lexer *Lexer) Next() (rune, error) {
  180. r, err := lexer.Skip()
  181. if err == nil {
  182. lexer.appendRune(r)
  183. }
  184. return r, nil
  185. }
  186. func (lexer *Lexer) DoIf(predicate func(rune) bool,
  187. todo func(*Lexer) (rune, error)) (bool, error) {
  188. r, err := lexer.Peek()
  189. if err != nil {
  190. return false, err
  191. }
  192. if predicate(r) {
  193. r, err = todo(lexer)
  194. if err != nil {
  195. return true, err
  196. }
  197. return true, nil
  198. }
  199. return false, nil
  200. }
  201. func (lexer *Lexer) NextIf(predicate func(rune) bool) (bool, error) {
  202. return lexer.DoIf(predicate, (*Lexer).Next)
  203. }
  204. func (lexer *Lexer) SkipIf(predicate func(rune) bool) (bool, error) {
  205. return lexer.DoIf(predicate, (*Lexer).Skip)
  206. }
  207. func (lexer *Lexer) NextWhile(predicate func(rune) bool) (bool, error) {
  208. result := true
  209. ok, err := lexer.NextIf(predicate)
  210. result = result || ok
  211. for ; ok && (err == nil); ok, err = lexer.NextIf(predicate) {
  212. result = result || ok
  213. }
  214. return result, err
  215. }
  216. func (lexer *Lexer) SkipWhile(predicate func(rune) bool) (bool, error) {
  217. result := true
  218. ok, err := lexer.SkipIf(predicate)
  219. result = result || ok
  220. for ; ok && (err == nil); ok, err = lexer.SkipIf(predicate) {
  221. result = result || ok
  222. }
  223. return result, err
  224. }
  225. func isEOX(r rune) bool {
  226. return r == '\n' || r == '.'
  227. }
  228. func isSpace(r rune) bool {
  229. return r == ' ' || r == '\t' || r == '\v' || r == '\r'
  230. }
  231. func isSpaceOrEOX(r rune) bool {
  232. return r == ' ' || r == '\t' || r == '\v' || r == '\r' || r == '\n' || r == '.'
  233. }
  234. func isComment(r rune) bool {
  235. return r == '#'
  236. }
  237. func (lexer *Lexer) SkipSpace() error {
  238. _, err := lexer.SkipWhile(isSpace)
  239. return err
  240. }
  241. func (lexer *Lexer) SkipBlockComment() error {
  242. var err error
  243. var r rune
  244. lexer.LogDebug("Skipping block comment.")
  245. for block := 1; block > 0 && err == nil; {
  246. _, err = lexer.Skip()
  247. if err != nil {
  248. return err
  249. }
  250. r, err = lexer.Peek()
  251. if r == '{' {
  252. block++
  253. } else if r == '}' {
  254. block--
  255. }
  256. lexer.LogDebug("Skipping block comment: %d", block)
  257. }
  258. _, err = lexer.Skip()
  259. return err
  260. }
  261. func (lexer *Lexer) SkipComment() error {
  262. r, err := lexer.Skip()
  263. lexer.LogDebug("Skipping %c.", r)
  264. if err != nil {
  265. return err
  266. }
  267. r, err = lexer.Peek()
  268. if r == '{' {
  269. return lexer.SkipBlockComment()
  270. }
  271. for r != '\n' && err == nil {
  272. lexer.LogDebug("Skipping comment %c.", r)
  273. _, err = lexer.Skip()
  274. if err != nil {
  275. return err
  276. }
  277. r, err = lexer.Peek()
  278. }
  279. if err != nil {
  280. return err
  281. }
  282. _, err = lexer.Skip()
  283. return err
  284. }
  285. /* Handles errors including EOF by either returning an error token or an
  286. * EOF token.
  287. */
  288. func (lexer *Lexer) handleError(err error) Token {
  289. if err == io.EOF {
  290. return lexer.MakeEOFToken()
  291. } else {
  292. return lexer.MakeErrorToken(err)
  293. }
  294. }
  295. func (lexer *Lexer) LexNumber() Token {
  296. isFloat := false
  297. // skip any first - or +
  298. _, err := lexer.NextIf(func(r rune) bool {
  299. return r == '-' || r == '+'
  300. })
  301. _, err = lexer.NextWhile(func(r rune) bool {
  302. if unicode.IsDigit(r) {
  303. return true
  304. } else if r == '.' {
  305. if isFloat {
  306. return false // double point in floating point
  307. } else {
  308. isFloat = true
  309. return true
  310. }
  311. } else {
  312. return false
  313. }
  314. })
  315. if err != nil {
  316. return lexer.MakeErrorfToken("when parsing number: %s", err)
  317. }
  318. if isFloat {
  319. return lexer.MakeToken(TokenKindFloat)
  320. } else {
  321. return lexer.MakeToken(TokenKindInteger)
  322. }
  323. }
  324. func isDoubleQuote(r rune) bool {
  325. return r == '"'
  326. }
  327. func (lexer *Lexer) handleEscapeHexChars(amount int) error {
  328. buffer := make([]byte, 0)
  329. r, err := lexer.Skip()
  330. for index := 0; err == nil && index < amount; {
  331. if unicode.Is(unicode.ASCII_Hex_Digit, r) {
  332. buffer = append(buffer, byte(r))
  333. } else {
  334. return fmt.Errorf("Not a hexadecimal digit: %c", r)
  335. }
  336. index++
  337. if index < amount {
  338. r, err = lexer.Skip()
  339. }
  340. }
  341. if err != nil {
  342. return err
  343. }
  344. i, err := strconv.ParseInt(string(buffer), 16, 32)
  345. if err != nil {
  346. return err
  347. }
  348. lexer.appendRune(rune(i))
  349. _, err = lexer.Peek()
  350. return err
  351. }
  352. func (lexer *Lexer) handleEscape() error {
  353. r, err := lexer.Skip()
  354. if err != nil {
  355. return err
  356. }
  357. switch r {
  358. case 'a':
  359. lexer.appendRune('\a')
  360. case 'b':
  361. lexer.appendRune('\b')
  362. case 'e':
  363. lexer.appendRune('\033')
  364. case 'f':
  365. lexer.appendRune('\f')
  366. case 'n':
  367. lexer.appendRune('\n')
  368. case 'r':
  369. lexer.appendRune('\r')
  370. case 't':
  371. lexer.appendRune('\t')
  372. case '\\':
  373. lexer.appendRune('\\')
  374. case '"':
  375. lexer.appendRune('"')
  376. // case 'o': fallthrough // No octals, for now.
  377. case 'x':
  378. err = lexer.handleEscapeHexChars(2)
  379. case 'u':
  380. err = lexer.handleEscapeHexChars(4)
  381. case 'U':
  382. err = lexer.handleEscapeHexChars(6)
  383. default:
  384. return fmt.Errorf("Unknown escape sequence character %c: %d", r, r)
  385. }
  386. return err
  387. }
  388. func (lexer *Lexer) LexString() Token {
  389. var err error
  390. var r rune
  391. _, err = lexer.Skip() // Skip first "
  392. if err != nil {
  393. return lexer.handleError(err)
  394. }
  395. r, err = lexer.Skip()
  396. for r != '"' && err == nil {
  397. if r == '\\' {
  398. err = lexer.handleEscape()
  399. if err != nil {
  400. return lexer.handleError(err)
  401. }
  402. } else {
  403. lexer.appendRune(r)
  404. // still inside the string
  405. }
  406. r, err = lexer.Skip()
  407. }
  408. if err != nil {
  409. return lexer.MakeErrorfToken("when parsing string: %s", err)
  410. }
  411. if err != nil {
  412. return lexer.handleError(err)
  413. }
  414. return lexer.MakeToken(TokenKindString)
  415. }
  416. func (lexer *Lexer) LexLongString() Token {
  417. var err error
  418. _, err = lexer.Skip()
  419. if err != nil {
  420. return lexer.handleError(err)
  421. }
  422. _, err = lexer.NextWhile(func(r rune) bool {
  423. return r != '`'
  424. })
  425. if err != nil {
  426. return lexer.MakeErrorfToken("when parsing long string: %s", err)
  427. }
  428. _, err = lexer.Skip()
  429. if err != nil {
  430. return lexer.handleError(err)
  431. }
  432. return lexer.MakeToken(TokenKindString)
  433. }
  434. func (lexer *Lexer) LexWordOrType(kind TokenKind) Token {
  435. var err error
  436. first := true
  437. _, err = lexer.Next()
  438. if err != nil {
  439. return lexer.handleError(err)
  440. }
  441. _, err = lexer.NextWhile(func(r rune) bool {
  442. if first {
  443. first = false
  444. return unicode.IsLetter(r) || r == '_'
  445. } else {
  446. return unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_'
  447. }
  448. })
  449. if err != nil {
  450. return lexer.handleError(err)
  451. }
  452. sbuffer := string(lexer.buffer)
  453. // handle key words
  454. switch sbuffer {
  455. case "true":
  456. return lexer.MakeBooleanToken(true)
  457. case "false":
  458. return lexer.MakeBooleanToken(false)
  459. case "nil":
  460. return lexer.MakeNilToken()
  461. default:
  462. }
  463. return lexer.MakeToken(kind)
  464. }
  465. func (lexer *Lexer) LexWord() Token {
  466. return lexer.LexWordOrType(TokenKindWord)
  467. }
  468. func (lexer *Lexer) LexType() Token {
  469. return lexer.LexWordOrType(TokenKindType)
  470. }
  471. func (lexer *Lexer) LexSymbol() Token {
  472. var err error
  473. _, err = lexer.Skip()
  474. if err != nil {
  475. return lexer.handleError(err)
  476. }
  477. _, err = lexer.NextWhile(func(r rune) bool {
  478. return !isSpaceOrEOX(r)
  479. })
  480. if err != nil {
  481. return lexer.handleError(err)
  482. }
  483. return lexer.MakeToken(TokenKindSymbol)
  484. }
  485. func (lexer *Lexer) LexBuiltin() Token {
  486. var err error
  487. _, err = lexer.Skip()
  488. if err != nil {
  489. return lexer.handleError(err)
  490. }
  491. _, err = lexer.NextWhile(func(r rune) bool {
  492. return !isSpaceOrEOX(r)
  493. })
  494. if err != nil {
  495. return lexer.handleError(err)
  496. }
  497. return lexer.MakeBuiltinToken()
  498. }
  499. func (lexer *Lexer) skipSpaceAndCommentAndPeek() (rune, error) {
  500. r, err := lexer.Peek()
  501. if err != nil {
  502. return r, err
  503. }
  504. i := 0
  505. for isSpace(r) || isComment(r) {
  506. if isSpace(r) {
  507. err = lexer.SkipSpace()
  508. } else if isComment(r) {
  509. err = lexer.SkipComment()
  510. }
  511. if err != nil {
  512. return r, err
  513. }
  514. i++
  515. r, err = lexer.Peek()
  516. lexer.LogDebug("Peeked again: >%c< %v %v %d", r, isSpace(r), isComment(r), i)
  517. if err != nil {
  518. return r, err
  519. }
  520. }
  521. return r, err
  522. }
  523. func (lexer *Lexer) LexEOX() Token {
  524. lexer.Next()
  525. _, err := lexer.skipSpaceAndCommentAndPeek()
  526. if err != nil {
  527. return lexer.handleError(err)
  528. }
  529. _, err = lexer.NextWhile(func(r rune) bool {
  530. return isSpaceOrEOX(r) || r == '\n' || r == '.'
  531. })
  532. if err != nil {
  533. return lexer.handleError(err)
  534. }
  535. return lexer.MakeToken(TokenKindEOX)
  536. }
  537. func (lexer *Lexer) lex() Token {
  538. r, err := lexer.skipSpaceAndCommentAndPeek()
  539. lexer.LogDebug(" After skip: >%c< >%v<\n", r, err)
  540. if err != nil {
  541. return lexer.handleError(err)
  542. }
  543. if unicode.IsDigit(r) || r == '-' || r == '+' {
  544. return lexer.LexNumber()
  545. }
  546. if r == '\n' || r == '.' {
  547. return lexer.LexEOX()
  548. }
  549. if r == '"' {
  550. return lexer.LexString()
  551. }
  552. if r == '`' {
  553. return lexer.LexLongString()
  554. }
  555. if r == '!' {
  556. return lexer.LexBuiltin()
  557. }
  558. if r == ':' {
  559. return lexer.LexSymbol()
  560. }
  561. switch TokenKind(r) {
  562. case TokenKindGet:
  563. fallthrough
  564. case TokenKindSet:
  565. fallthrough
  566. case TokenKindOpenBlock:
  567. fallthrough
  568. case TokenKindCloseBlock:
  569. fallthrough
  570. case TokenKindOpenList:
  571. fallthrough
  572. case TokenKindCloseList:
  573. fallthrough
  574. case TokenKindOpenParen:
  575. fallthrough
  576. case TokenKindCloseParen:
  577. lexer.Next()
  578. return lexer.MakeToken(TokenKind(r))
  579. default:
  580. }
  581. if unicode.IsLetter(r) {
  582. if unicode.IsUpper(r) {
  583. return lexer.LexType()
  584. } else {
  585. return lexer.LexWord()
  586. }
  587. }
  588. return lexer.MakeErrorfToken("Unknown character: %c", r)
  589. }
  590. func (lexer *Lexer) Lex() Token {
  591. res := lexer.lex()
  592. lexer.ClearBuffer() // ensure buffer is cleared after lexing, always.
  593. return res
  594. }
  595. func (lexer *Lexer) LexAll() []Token {
  596. var token Token
  597. res := make([]Token, 0)
  598. for token = lexer.Lex(); !token.IsLast(); token = lexer.Lex() {
  599. res = append(res, token)
  600. }
  601. res = append(res, token)
  602. return res
  603. }
  604. func NewLexer(scanner io.RuneScanner, filename string) *Lexer {
  605. lexer := &Lexer{}
  606. lexer.RuneScanner = scanner
  607. lexer.Position.FileName = filename
  608. lexer.Position.Column = 1
  609. lexer.Position.Line = 1
  610. lexer.LoggerWrapper = LoggerWrapper{nil}
  611. return lexer
  612. }
  613. func (lexer * Lexer) Report() {
  614. if lexer == nil {
  615. fmt.Printf("Lexer: is nil\n")
  616. } else {
  617. fmt.Printf("Lexer: %s:%d:%d\n",
  618. lexer.Position.FileName,
  619. lexer.Position.Column,
  620. lexer.Position.Line)
  621. }
  622. }
  623. func NewLexerFromString(input string) *Lexer {
  624. reader := strings.NewReader(input)
  625. return NewLexer(reader, "<input>")
  626. }
  627. func NewLexerFromFilename(filename string) (*Lexer, error) {
  628. read, err := os.Open(filename)
  629. if err == nil {
  630. bread := bufio.NewReader(read)
  631. lex := NewLexer(bread, filename)
  632. return lex, nil
  633. }
  634. return nil, err
  635. }