lexer.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647
  1. package muesli
  2. import (
  3. "bufio"
  4. _ "bytes"
  5. _ "errors"
  6. "fmt"
  7. "io"
  8. _ "io"
  9. "os"
  10. _ "reflect"
  11. _ "runtime"
  12. "strconv"
  13. "strings"
  14. "unicode"
  15. _ "unicode"
  16. // "gitlab.com/beoran/woe/graphviz"
  17. // _ "gitlab.com/beoran/woe/monolog"
  18. )
  19. /* A Lexer splits scanned input into tokens.
  20. */
  21. type Lexer struct {
  22. Position
  23. Index int
  24. Start int
  25. io.RuneScanner
  26. buffer []rune
  27. Current rune
  28. LoggerWrapper
  29. }
  30. func (lexer *Lexer) SetLogger(logger Logger) {
  31. lexer.LoggerWrapper = LoggerWrapper{logger}
  32. }
  33. func (lexer *Lexer) ClearBuffer() {
  34. lexer.buffer = make([]rune, 0)
  35. }
  36. func (lexer *Lexer) MakeIntegerToken() Token {
  37. var sbuffer = string(lexer.buffer)
  38. i, err := strconv.ParseInt(sbuffer, 0, 64)
  39. if err == nil {
  40. lexer.ClearBuffer()
  41. return NewToken(TokenKindInteger, IntValue(i), lexer.Position)
  42. } else {
  43. lexer.ClearBuffer()
  44. return lexer.MakeErrorToken(err)
  45. }
  46. }
  47. func (lexer *Lexer) MakeFloatToken() Token {
  48. var sbuffer = string(lexer.buffer)
  49. f, err := strconv.ParseFloat(sbuffer, 64)
  50. if err == nil {
  51. lexer.ClearBuffer()
  52. return NewToken(TokenKindFloat, FloatValue(f), lexer.Position)
  53. } else {
  54. lexer.ClearBuffer()
  55. return lexer.MakeErrorToken(err)
  56. }
  57. }
  58. func (lexer *Lexer) MakeBooleanToken(b bool) Token {
  59. lexer.ClearBuffer()
  60. return NewToken(TokenKindBoolean, BoolValue(b), lexer.Position)
  61. }
  62. func (lexer *Lexer) MakeStringValueToken(kind TokenKind) Token {
  63. var sbuffer = string(lexer.buffer)
  64. return NewToken(kind, StringValue(sbuffer), lexer.Position)
  65. }
  66. func (lexer *Lexer) MakeToken(kind TokenKind) Token {
  67. switch kind {
  68. case TokenKindInteger:
  69. return lexer.MakeIntegerToken()
  70. case TokenKindFloat:
  71. return lexer.MakeFloatToken()
  72. case TokenKindString:
  73. fallthrough
  74. case TokenKindSymbol:
  75. fallthrough
  76. case TokenKindType:
  77. fallthrough
  78. case TokenKindError:
  79. fallthrough
  80. case TokenKindWord:
  81. return lexer.MakeStringValueToken(kind)
  82. case TokenKindBoolean:
  83. fallthrough
  84. case TokenKindGet:
  85. fallthrough
  86. case TokenKindSet:
  87. fallthrough
  88. case TokenKindOpenBlock:
  89. fallthrough
  90. case TokenKindCloseBlock:
  91. fallthrough
  92. case TokenKindOpenList:
  93. fallthrough
  94. case TokenKindCloseList:
  95. fallthrough
  96. case TokenKindOpenParen:
  97. fallthrough
  98. case TokenKindCloseParen:
  99. fallthrough
  100. case TokenKindEOX:
  101. fallthrough
  102. case TokenKindEOF:
  103. val := StringValue(string(lexer.buffer))
  104. lexer.ClearBuffer()
  105. return NewToken(kind, val, lexer.Position)
  106. default:
  107. return lexer.MakeErrorfToken("Internal error on token type %s", kind)
  108. }
  109. }
  110. func (lexer Lexer) MakeErrorToken(err error) Token {
  111. return NewToken(TokenKindError, ErrorValue{err}, lexer.Position)
  112. }
  113. func (lexer Lexer) MakeErrorfToken(format string, va ...interface{}) Token {
  114. err := fmt.Errorf(format, va...)
  115. return lexer.MakeErrorToken(err)
  116. }
  117. func (lexer Lexer) MakeEOFToken() Token {
  118. return NewToken(TokenKindEOF, &EmptyValue{}, lexer.Position)
  119. }
  120. func (lexer *Lexer) Peek() (rune, error) {
  121. r, _, err := lexer.RuneScanner.ReadRune()
  122. err2 := lexer.RuneScanner.UnreadRune()
  123. if err == nil {
  124. err = err2
  125. }
  126. return r, err
  127. }
  128. /* Advances the lexer's position based on the rune r read. */
  129. func (lexer *Lexer) advance(r rune) {
  130. lexer.Current = r
  131. lexer.Index++
  132. lexer.Position.Column++
  133. if r == '\n' {
  134. lexer.Position.Column = 1
  135. lexer.Position.Line++
  136. }
  137. }
  138. /* Append a rune to the lexer's buffer. */
  139. func (lexer *Lexer) appendRune(r rune) {
  140. lexer.buffer = append(lexer.buffer, r)
  141. }
  142. /* Advances the lexer's input buffer but does not store the rune read,
  143. * but just returns it. */
  144. func (lexer *Lexer) Skip() (rune, error) {
  145. r, _, err := lexer.RuneScanner.ReadRune()
  146. if err != nil {
  147. return 0, err
  148. }
  149. lexer.advance(r)
  150. return r, nil
  151. }
  152. /* Actually reads the next rune from the lexer's input source and stores
  153. * them in the lexer's token buffer.
  154. * Shorthand for r, err := lexer.Skip() ; lexer.appendRune(r) */
  155. func (lexer *Lexer) Next() (rune, error) {
  156. r, err := lexer.Skip()
  157. if err == nil {
  158. lexer.appendRune(r)
  159. }
  160. return r, nil
  161. }
  162. func (lexer *Lexer) DoIf(predicate func(rune) bool,
  163. todo func(*Lexer) (rune, error)) (bool, error) {
  164. r, err := lexer.Peek()
  165. if err != nil {
  166. return false, err
  167. }
  168. if predicate(r) {
  169. r, err = todo(lexer)
  170. if err != nil {
  171. return true, err
  172. }
  173. return true, nil
  174. }
  175. return false, nil
  176. }
  177. func (lexer *Lexer) NextIf(predicate func(rune) bool) (bool, error) {
  178. return lexer.DoIf(predicate, (*Lexer).Next)
  179. }
  180. func (lexer *Lexer) SkipIf(predicate func(rune) bool) (bool, error) {
  181. return lexer.DoIf(predicate, (*Lexer).Skip)
  182. }
  183. func (lexer *Lexer) NextWhile(predicate func(rune) bool) (bool, error) {
  184. result := true
  185. ok, err := lexer.NextIf(predicate)
  186. result = result || ok
  187. for ; ok && (err == nil); ok, err = lexer.NextIf(predicate) {
  188. result = result || ok
  189. }
  190. return result, err
  191. }
  192. func (lexer *Lexer) SkipWhile(predicate func(rune) bool) (bool, error) {
  193. result := true
  194. ok, err := lexer.SkipIf(predicate)
  195. result = result || ok
  196. for ; ok && (err == nil); ok, err = lexer.SkipIf(predicate) {
  197. result = result || ok
  198. }
  199. return result, err
  200. }
  201. func isSpace(r rune) bool {
  202. return r == ' ' || r == '\t' || r == '\v' || r == '\r'
  203. }
  204. func isComment(r rune) bool {
  205. return r == '#'
  206. }
  207. func (lexer *Lexer) SkipSpace() error {
  208. _, err := lexer.SkipWhile(isSpace)
  209. return err
  210. }
  211. func (lexer *Lexer) SkipBlockComment() error {
  212. var err error
  213. var r rune
  214. lexer.LogDebug("Skipping block comment.")
  215. for block := 1; block > 0 && err == nil; {
  216. _, err = lexer.Skip()
  217. if err != nil {
  218. return err
  219. }
  220. r, err = lexer.Peek()
  221. if r == '{' {
  222. block++
  223. } else if r == '}' {
  224. block--
  225. }
  226. lexer.LogDebug("Skipping block comment: %d", block)
  227. }
  228. _, err = lexer.Skip()
  229. return err
  230. }
  231. func (lexer *Lexer) SkipComment() error {
  232. r, err := lexer.Skip()
  233. lexer.LogDebug("Skipping %c.", r)
  234. if err != nil {
  235. return err
  236. }
  237. r, err = lexer.Peek()
  238. if r == '{' {
  239. return lexer.SkipBlockComment()
  240. }
  241. for r != '\n' && err == nil {
  242. lexer.LogDebug("Skipping comment %c.", r)
  243. _, err = lexer.Skip()
  244. if err != nil {
  245. return err
  246. }
  247. r, err = lexer.Peek()
  248. }
  249. if err != nil {
  250. return err
  251. }
  252. _, err = lexer.Skip()
  253. return err
  254. }
  255. /* Handles errors including EOF by either returning an error token or an
  256. * EOF token.
  257. */
  258. func (lexer *Lexer) handleError(err error) Token {
  259. if err == io.EOF {
  260. return lexer.MakeEOFToken()
  261. } else {
  262. return lexer.MakeErrorToken(err)
  263. }
  264. }
  265. func (lexer *Lexer) LexNumber() Token {
  266. isFloat := false
  267. // skip any first -
  268. _, err := lexer.NextIf(func(r rune) bool {
  269. return r == '-'
  270. })
  271. _, err = lexer.NextWhile(func(r rune) bool {
  272. if unicode.IsDigit(r) {
  273. return true
  274. } else if r == '.' {
  275. if isFloat {
  276. return false // double point in floating point
  277. } else {
  278. isFloat = true
  279. return true
  280. }
  281. } else {
  282. return false
  283. }
  284. })
  285. if err != nil {
  286. return lexer.MakeErrorfToken("when parsing number: %s", err)
  287. }
  288. if isFloat {
  289. return lexer.MakeToken(TokenKindFloat)
  290. } else {
  291. return lexer.MakeToken(TokenKindInteger)
  292. }
  293. }
  294. func isDoubleQuote(r rune) bool {
  295. return r == '"'
  296. }
  297. func (lexer *Lexer) handleEscapeHexChars(amount int) error {
  298. buffer := make([]byte, 0)
  299. r, err := lexer.Skip()
  300. for index := 0; err == nil && index < amount; {
  301. if unicode.Is(unicode.ASCII_Hex_Digit, r) {
  302. buffer = append(buffer, byte(r))
  303. } else {
  304. return fmt.Errorf("Not a hexadecimal digit: %c", r)
  305. }
  306. index++
  307. if index < amount {
  308. r, err = lexer.Skip()
  309. }
  310. }
  311. if err != nil {
  312. return err
  313. }
  314. i, err := strconv.ParseInt(string(buffer), 16, 32)
  315. if err != nil {
  316. return err
  317. }
  318. lexer.appendRune(rune(i))
  319. _, err = lexer.Peek()
  320. return err
  321. }
  322. func (lexer *Lexer) handleEscape() error {
  323. r, err := lexer.Skip()
  324. if err != nil {
  325. return err
  326. }
  327. switch r {
  328. case 'a':
  329. lexer.appendRune('\a')
  330. case 'b':
  331. lexer.appendRune('\b')
  332. case 'e':
  333. lexer.appendRune('\033')
  334. case 'f':
  335. lexer.appendRune('\f')
  336. case 'n':
  337. lexer.appendRune('\n')
  338. case 'r':
  339. lexer.appendRune('\r')
  340. case 't':
  341. lexer.appendRune('\t')
  342. case '\\':
  343. lexer.appendRune('\\')
  344. case '"':
  345. lexer.appendRune('"')
  346. // case 'o': fallthrough // No octals, for now.
  347. case 'x':
  348. err = lexer.handleEscapeHexChars(2)
  349. case 'u':
  350. err = lexer.handleEscapeHexChars(4)
  351. case 'U':
  352. err = lexer.handleEscapeHexChars(6)
  353. default:
  354. return fmt.Errorf("Unknown escape sequence character %c: %d", r, r)
  355. }
  356. return err
  357. }
  358. func (lexer *Lexer) LexString() Token {
  359. var err error
  360. var r rune
  361. _, err = lexer.Skip() // Skip first "
  362. if err != nil {
  363. return lexer.handleError(err)
  364. }
  365. r, err = lexer.Skip()
  366. for r != '"' && err == nil {
  367. if r == '\\' {
  368. err = lexer.handleEscape()
  369. if err != nil {
  370. return lexer.handleError(err)
  371. }
  372. } else {
  373. lexer.appendRune(r)
  374. // still inside the string
  375. }
  376. r, err = lexer.Skip()
  377. }
  378. if err != nil {
  379. return lexer.MakeErrorfToken("when parsing string: %s", err)
  380. }
  381. _, err = lexer.Skip() // skip last "
  382. if err != nil {
  383. return lexer.handleError(err)
  384. }
  385. return lexer.MakeToken(TokenKindString)
  386. }
  387. func (lexer *Lexer) LexLongString() Token {
  388. var err error
  389. _, err = lexer.Skip()
  390. if err != nil {
  391. return lexer.handleError(err)
  392. }
  393. _, err = lexer.NextWhile(func(r rune) bool {
  394. return r != '`'
  395. })
  396. if err != nil {
  397. return lexer.MakeErrorfToken("when parsing long string: %s", err)
  398. }
  399. _, err = lexer.Skip()
  400. if err != nil {
  401. return lexer.handleError(err)
  402. }
  403. return lexer.MakeToken(TokenKindString)
  404. }
  405. func (lexer *Lexer) LexWordOrType(kind TokenKind) Token {
  406. var err error
  407. first := true
  408. _, err = lexer.Next()
  409. if err != nil {
  410. return lexer.handleError(err)
  411. }
  412. _, err = lexer.NextWhile(func(r rune) bool {
  413. if first {
  414. first = false
  415. return unicode.IsLetter(r) || r == '_'
  416. } else {
  417. return unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_'
  418. }
  419. })
  420. if err != nil {
  421. return lexer.handleError(err)
  422. }
  423. sbuffer := string(lexer.buffer)
  424. // handle key words
  425. switch sbuffer {
  426. case "true":
  427. return lexer.MakeBooleanToken(true)
  428. case "false":
  429. return lexer.MakeBooleanToken(false)
  430. default:
  431. }
  432. return lexer.MakeToken(kind)
  433. }
  434. func (lexer *Lexer) LexWord() Token {
  435. return lexer.LexWordOrType(TokenKindWord)
  436. }
  437. func (lexer *Lexer) LexType() Token {
  438. return lexer.LexWordOrType(TokenKindType)
  439. }
  440. func (lexer *Lexer) LexSymbol() Token {
  441. var err error
  442. _, err = lexer.Skip()
  443. if err != nil {
  444. return lexer.handleError(err)
  445. }
  446. _, err = lexer.NextWhile(func(r rune) bool {
  447. return !unicode.IsSpace(r)
  448. })
  449. if err != nil {
  450. return lexer.handleError(err)
  451. }
  452. return lexer.MakeToken(TokenKindSymbol)
  453. }
  454. func (lexer *Lexer) skipSpaceAndCommentAndPeek() (rune, error) {
  455. r, err := lexer.Peek()
  456. if err != nil {
  457. return r, err
  458. }
  459. i := 0
  460. for isSpace(r) || isComment(r) {
  461. if isSpace(r) {
  462. err = lexer.SkipSpace()
  463. } else if isComment(r) {
  464. err = lexer.SkipComment()
  465. }
  466. if err != nil {
  467. return r, err
  468. }
  469. i++
  470. r, err = lexer.Peek()
  471. lexer.LogDebug("Peeked again: >%c< %v %v %d", r, isSpace(r), isComment(r), i)
  472. if err != nil {
  473. return r, err
  474. }
  475. }
  476. return r, err
  477. }
  478. func (lexer *Lexer) lex() Token {
  479. r, err := lexer.skipSpaceAndCommentAndPeek()
  480. lexer.LogDebug(" After skip: >%c< >%v<\n", r, err)
  481. if err != nil {
  482. return lexer.handleError(err)
  483. }
  484. if unicode.IsDigit(r) || r == '-' {
  485. return lexer.LexNumber()
  486. }
  487. if r == '\n' || r == '.' {
  488. lexer.Next()
  489. return lexer.MakeToken(TokenKindEOX)
  490. }
  491. if r == '"' {
  492. return lexer.LexString()
  493. }
  494. if r == '`' {
  495. return lexer.LexLongString()
  496. }
  497. if r == ':' {
  498. return lexer.LexSymbol()
  499. }
  500. switch TokenKind(r) {
  501. case TokenKindGet:
  502. fallthrough
  503. case TokenKindSet:
  504. fallthrough
  505. case TokenKindOpenBlock:
  506. fallthrough
  507. case TokenKindCloseBlock:
  508. fallthrough
  509. case TokenKindOpenList:
  510. fallthrough
  511. case TokenKindCloseList:
  512. fallthrough
  513. case TokenKindOpenParen:
  514. fallthrough
  515. case TokenKindCloseParen:
  516. lexer.Next()
  517. return lexer.MakeToken(TokenKind(r))
  518. default:
  519. }
  520. if unicode.IsLetter(r) {
  521. if unicode.IsUpper(r) {
  522. return lexer.LexType()
  523. } else {
  524. return lexer.LexWord()
  525. }
  526. }
  527. return lexer.MakeErrorfToken("Unknown character: %c", r)
  528. }
  529. func (lexer *Lexer) Lex() Token {
  530. res := lexer.lex()
  531. lexer.ClearBuffer() // ensure buffer is cleared after lexing, always.
  532. return res
  533. }
  534. func (lexer *Lexer) LexAll() []Token {
  535. var token Token
  536. res := make([]Token, 0)
  537. for token = lexer.Lex(); !token.IsLast(); token = lexer.Lex() {
  538. res = append(res, token)
  539. }
  540. res = append(res, token)
  541. return res
  542. }
  543. func NewLexer(scanner io.RuneScanner, filename string) *Lexer {
  544. lexer := &Lexer{}
  545. lexer.RuneScanner = scanner
  546. lexer.Position.FileName = filename
  547. lexer.Position.Column = 1
  548. lexer.Position.Line = 1
  549. lexer.LoggerWrapper = LoggerWrapper{nil}
  550. return lexer
  551. }
  552. func NewLexerFromString(input string) *Lexer {
  553. reader := strings.NewReader(input)
  554. return NewLexer(reader, "<input>")
  555. }
  556. func NewLexerFromFileName(filename string) (*Lexer, error) {
  557. read, err := os.Open(filename)
  558. if err != nil {
  559. bread := bufio.NewReader(read)
  560. lex := NewLexer(bread, filename)
  561. return lex, nil
  562. }
  563. return nil, err
  564. }