lexer.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
  1. package muesli
  2. import (
  3. "bufio"
  4. _ "bytes"
  5. _ "errors"
  6. "fmt"
  7. "io"
  8. _ "io"
  9. "os"
  10. _ "reflect"
  11. _ "runtime"
  12. "strings"
  13. "strconv"
  14. "unicode"
  15. _ "unicode"
  16. // "gitlab.com/beoran/woe/graphviz"
  17. // _ "gitlab.com/beoran/woe/monolog"
  18. )
  19. /* A Lexer splits scanned input into tokens.
  20. */
  21. type Lexer struct {
  22. Position
  23. Index int
  24. Start int
  25. io.RuneScanner
  26. buffer []rune
  27. Current rune
  28. }
  29. func (lexer *Lexer) ClearBuffer() {
  30. lexer.buffer = make([]rune, 0)
  31. }
  32. func (lexer *Lexer) MakeIntegerToken() Token {
  33. var sbuffer = string(lexer.buffer)
  34. i, err := strconv.ParseInt(sbuffer, 0, 64)
  35. if err == nil {
  36. lexer.ClearBuffer()
  37. return NewToken(TokenKindInteger, IntValue(i), lexer.Position)
  38. } else {
  39. lexer.ClearBuffer()
  40. return lexer.MakeErrorToken(err);
  41. }
  42. }
  43. func (lexer *Lexer) MakeFloatToken() Token {
  44. var sbuffer = string(lexer.buffer)
  45. f, err := strconv.ParseFloat(sbuffer, 64)
  46. if err == nil {
  47. lexer.ClearBuffer()
  48. return NewToken(TokenKindFloat, FloatValue(f), lexer.Position)
  49. } else {
  50. lexer.ClearBuffer()
  51. return lexer.MakeErrorToken(err);
  52. }
  53. }
  54. func (lexer *Lexer) MakeBooleanToken(b bool) Token {
  55. lexer.ClearBuffer()
  56. return NewToken(TokenKindBoolean, BoolValue(b), lexer.Position)
  57. }
  58. func (lexer *Lexer) MakeStringValueToken(kind TokenKind) Token {
  59. var sbuffer = string(lexer.buffer)
  60. return NewToken(kind, StringValue(sbuffer), lexer.Position)
  61. }
  62. func (lexer *Lexer) MakeToken(kind TokenKind) Token {
  63. switch (kind) {
  64. case TokenKindInteger : return lexer.MakeIntegerToken()
  65. case TokenKindFloat : return lexer.MakeFloatToken()
  66. case TokenKindString : fallthrough
  67. case TokenKindSymbol : fallthrough
  68. case TokenKindType : fallthrough
  69. case TokenKindError : fallthrough
  70. case TokenKindWord : return lexer.MakeStringValueToken(kind)
  71. case TokenKindBoolean : fallthrough
  72. case TokenKindGet : fallthrough
  73. case TokenKindSet : fallthrough
  74. case TokenKindOpenBlock : fallthrough
  75. case TokenKindCloseBlock: fallthrough
  76. case TokenKindOpenList : fallthrough
  77. case TokenKindCloseList : fallthrough
  78. case TokenKindOpenParen : fallthrough
  79. case TokenKindCloseParen: fallthrough
  80. case TokenKindEOX : fallthrough
  81. case TokenKindEOF :
  82. val := StringValue(string(lexer.buffer))
  83. lexer.ClearBuffer()
  84. return NewToken(kind, val, lexer.Position)
  85. default :
  86. return lexer.MakeErrorfToken("Internal error on token type %s", kind)
  87. }
  88. }
  89. func (lexer Lexer) MakeErrorToken(err error) Token {
  90. return NewToken(TokenKindError, err.Error(), lexer.Position)
  91. }
  92. func (lexer Lexer) MakeErrorfToken(format string, va ...interface{}) Token {
  93. err := fmt.Errorf(format, va...)
  94. return lexer.MakeErrorToken(err)
  95. }
  96. func (lexer Lexer) MakeEOFToken() Token {
  97. return NewToken(TokenKindEOF, "", lexer.Position)
  98. }
  99. func (lexer *Lexer) Peek() (rune, error) {
  100. r, _, err := lexer.RuneScanner.ReadRune()
  101. err2 := lexer.RuneScanner.UnreadRune()
  102. if err == nil {
  103. err = err2
  104. }
  105. return r, err
  106. }
  107. /* Advances the lexer's position based on the rune r read. */
  108. func (lexer *Lexer) advance(r rune) {
  109. lexer.Current = r
  110. lexer.Index++
  111. lexer.Position.Column++
  112. if r == '\n' {
  113. lexer.Position.Column = 1
  114. lexer.Position.Line++
  115. }
  116. }
  117. /* Append a rune to the lexer's buffer. */
  118. func (lexer *Lexer) appendRune(r rune) {
  119. lexer.buffer = append(lexer.buffer, r)
  120. }
  121. /* Advances the lexer's input buffer but does not store the rune read,
  122. * but just returns it. */
  123. func (lexer *Lexer) Skip() (rune, error) {
  124. r, _, err := lexer.RuneScanner.ReadRune()
  125. if err != nil {
  126. return 0, err
  127. }
  128. lexer.advance(r)
  129. return r, nil
  130. }
  131. /* Actually reads the next rune from the lexer's input source and stores
  132. * them in the lexer's token buffer.
  133. * Shorthand for r, err := lexer.Skip() ; lexer.appendRune(r) */
  134. func (lexer *Lexer) Next() (rune, error) {
  135. r, err := lexer.Skip()
  136. if err == nil {
  137. lexer.appendRune(r)
  138. }
  139. return r, nil
  140. }
  141. func (lexer *Lexer) NextIf(predicate func(rune) bool) (bool, error) {
  142. r, err := lexer.Peek()
  143. if err != nil {
  144. return false, err
  145. }
  146. if predicate(r) {
  147. r, err = lexer.Next()
  148. if err != nil {
  149. return true, err
  150. }
  151. return true, nil
  152. }
  153. return false, nil
  154. }
  155. func (lexer *Lexer) SkipIf(predicate func(rune) bool) (bool, error) {
  156. r, err := lexer.Peek()
  157. if err != nil {
  158. return false, err
  159. }
  160. if predicate(r) {
  161. r, err = lexer.Skip()
  162. if err != nil {
  163. return true, err
  164. }
  165. return true, nil
  166. }
  167. return false, nil
  168. }
  169. func (lexer *Lexer) NextWhile(predicate func(rune) bool) (bool, error) {
  170. result := true
  171. ok, err := lexer.NextIf(predicate)
  172. result = result || ok
  173. for ; ok && (err == nil); ok, err = lexer.NextIf(predicate) {
  174. result = result || ok
  175. }
  176. return result, err
  177. }
  178. func (lexer *Lexer) SkipWhile(predicate func(rune) bool) (bool, error) {
  179. result := true
  180. ok, err := lexer.SkipIf(predicate)
  181. result = result || ok
  182. for ; ok && (err == nil); ok, err = lexer.SkipIf(predicate) {
  183. result = result || ok
  184. }
  185. return result, err
  186. }
  187. func isSpace(r rune) bool {
  188. return r == ' ' || r == '\t'
  189. }
  190. func (lexer *Lexer) SkipSpace() error {
  191. _, err := lexer.SkipWhile(isSpace)
  192. return err
  193. }
  194. /* Handles errors including EOF by either returning an error token or an
  195. * EOF token.
  196. */
  197. func (lexer *Lexer) handleError(err error) Token {
  198. if err == io.EOF {
  199. return lexer.MakeEOFToken()
  200. } else {
  201. return lexer.MakeErrorToken(err)
  202. }
  203. }
  204. func (lexer *Lexer) LexNumber() Token {
  205. isFloat := false
  206. // skip any first -
  207. _, err := lexer.NextIf(func(r rune) bool {
  208. return r == '-'
  209. })
  210. _, err = lexer.NextWhile(func(r rune) bool {
  211. if unicode.IsDigit(r) {
  212. return true
  213. } else if r == '.' {
  214. if isFloat {
  215. return false // double point in floating point
  216. } else {
  217. isFloat = true
  218. return true
  219. }
  220. } else {
  221. return false
  222. }
  223. })
  224. if err != nil {
  225. return lexer.MakeErrorfToken("when parsing number: %s", err)
  226. }
  227. if isFloat {
  228. return lexer.MakeToken(TokenKindFloat)
  229. } else {
  230. return lexer.MakeToken(TokenKindInteger)
  231. }
  232. }
  233. func isDoubleQuote(r rune) bool {
  234. return r == '"'
  235. }
  236. func (lexer *Lexer) handleEscapeHexChars(amount int) error {
  237. buffer := make([]byte, 0)
  238. r, err := lexer.Skip()
  239. for index := 0 ; err == nil && index < amount ; {
  240. if unicode.Is(unicode.ASCII_Hex_Digit, r) {
  241. buffer = append(buffer, byte(r))
  242. } else {
  243. return fmt.Errorf("Not a hexadecimal digit: %c", r)
  244. }
  245. index++
  246. if (index < amount) {
  247. r, err = lexer.Skip()
  248. }
  249. }
  250. if err != nil {
  251. return err
  252. }
  253. i, err := strconv.ParseInt(string(buffer), 16, 32)
  254. if err != nil {
  255. return err
  256. }
  257. lexer.appendRune(rune(i))
  258. _, err = lexer.Peek()
  259. return err
  260. }
  261. func (lexer *Lexer) handleEscape() error {
  262. r, err := lexer.Skip()
  263. if err != nil {
  264. return err
  265. }
  266. switch r {
  267. case 'a': lexer.appendRune('\a')
  268. case 'b': lexer.appendRune('\b')
  269. case 'e': lexer.appendRune('\033')
  270. case 'f': lexer.appendRune('\f')
  271. case 'n': lexer.appendRune('\n')
  272. case 'r': lexer.appendRune('\r')
  273. case 't': lexer.appendRune('\t')
  274. case '\\': lexer.appendRune('\\')
  275. case '"': lexer.appendRune('"')
  276. // case 'o': fallthrough // No octals, for now.
  277. case 'x': err = lexer.handleEscapeHexChars(2)
  278. case 'u': err = lexer.handleEscapeHexChars(4)
  279. case 'U': err = lexer.handleEscapeHexChars(6)
  280. default:
  281. return fmt.Errorf("Unknown escape sequence character %c: %d", r, r)
  282. }
  283. return err
  284. }
  285. func (lexer *Lexer) LexString() Token {
  286. var err error
  287. var r rune
  288. _, err = lexer.Skip() // Skip first "
  289. if err != nil {
  290. return lexer.handleError(err)
  291. }
  292. r, err = lexer.Skip()
  293. for ; r != '"' && err == nil ; {
  294. if r == '\\' {
  295. err = lexer.handleEscape()
  296. if err != nil {
  297. return lexer.handleError(err)
  298. }
  299. } else {
  300. lexer.appendRune(r)
  301. // still inside the string
  302. }
  303. r, err = lexer.Skip()
  304. }
  305. if err != nil {
  306. return lexer.MakeErrorfToken("when parsing string: %s", err)
  307. }
  308. _, err = lexer.Skip() // skip last "
  309. if err != nil {
  310. return lexer.handleError(err)
  311. }
  312. return lexer.MakeToken(TokenKindString)
  313. }
  314. func (lexer *Lexer) LexLongString() Token {
  315. var err error
  316. _, err = lexer.Skip()
  317. if err != nil {
  318. return lexer.handleError(err)
  319. }
  320. _, err = lexer.NextWhile(func(r rune) bool {
  321. return r != '`'
  322. })
  323. if err != nil {
  324. return lexer.MakeErrorfToken("when parsing long string: %s", err)
  325. }
  326. _, err = lexer.Skip()
  327. if err != nil {
  328. return lexer.handleError(err)
  329. }
  330. return lexer.MakeToken(TokenKindString)
  331. }
  332. func (lexer *Lexer) LexWord() Token {
  333. var err error
  334. first := true
  335. _, err = lexer.Next()
  336. if err != nil {
  337. return lexer.handleError(err)
  338. }
  339. _, err = lexer.NextWhile(func(r rune) bool {
  340. if first {
  341. first = false
  342. return unicode.IsLetter(r) || r == '_'
  343. } else {
  344. return unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_'
  345. }
  346. })
  347. if err != nil {
  348. return lexer.handleError(err)
  349. }
  350. sbuffer := string(lexer.buffer)
  351. // handle key words
  352. switch sbuffer {
  353. case "true" : return lexer.MakeBooleanToken(true)
  354. case "false": return lexer.MakeBooleanToken(false)
  355. default: return lexer.MakeToken(TokenKindWord)
  356. }
  357. }
  358. func (lexer *Lexer) LexSymbol() Token {
  359. var err error
  360. _, err = lexer.Skip()
  361. if err != nil {
  362. return lexer.handleError(err)
  363. }
  364. _, err = lexer.NextWhile(func(r rune) bool {
  365. return !unicode.IsSpace(r)
  366. })
  367. if err != nil {
  368. return lexer.handleError(err)
  369. }
  370. return lexer.MakeToken(TokenKindSymbol)
  371. }
  372. func (lexer *Lexer) lex() Token {
  373. r, err := lexer.Peek()
  374. if err != nil {
  375. return lexer.handleError(err)
  376. }
  377. if isSpace(r) {
  378. err = lexer.SkipSpace()
  379. if err != nil {
  380. return lexer.handleError(err)
  381. }
  382. r, err = lexer.Peek()
  383. if err != nil {
  384. return lexer.handleError(err)
  385. }
  386. }
  387. if unicode.IsDigit(r) || r == '-' {
  388. return lexer.LexNumber()
  389. }
  390. if r == '\n' || r == '.' {
  391. lexer.Next()
  392. return lexer.MakeToken(TokenKindEOX)
  393. }
  394. if r == '"' {
  395. return lexer.LexString()
  396. }
  397. if r == '`' {
  398. return lexer.LexLongString()
  399. }
  400. if r == ':' {
  401. return lexer.LexSymbol()
  402. }
  403. switch TokenKind(r) {
  404. case TokenKindGet:
  405. fallthrough
  406. case TokenKindSet:
  407. fallthrough
  408. case TokenKindOpenBlock:
  409. fallthrough
  410. case TokenKindCloseBlock:
  411. fallthrough
  412. case TokenKindOpenList:
  413. fallthrough
  414. case TokenKindCloseList:
  415. fallthrough
  416. case TokenKindOpenParen:
  417. fallthrough
  418. case TokenKindCloseParen:
  419. lexer.Next()
  420. return lexer.MakeToken(TokenKind(r))
  421. default:
  422. }
  423. if unicode.IsLetter(r) {
  424. return lexer.LexWord()
  425. }
  426. return lexer.MakeErrorfToken("Unknown character: %c", r)
  427. }
  428. func (lexer *Lexer) Lex() Token {
  429. res := lexer.lex()
  430. lexer.ClearBuffer() // ensure buffer is cleared after lexing, always.
  431. return res
  432. }
  433. func (lexer *Lexer) LexAll() []Token {
  434. var token Token
  435. res := make([]Token, 0)
  436. for token = lexer.Lex(); !token.IsLast(); token = lexer.Lex() {
  437. res = append(res, token)
  438. }
  439. res = append(res, token)
  440. return res
  441. }
  442. func NewLexer(scanner io.RuneScanner, filename string) Lexer {
  443. lexer := Lexer{}
  444. lexer.RuneScanner = scanner
  445. lexer.Position.FileName = filename
  446. lexer.Position.Column = 1
  447. lexer.Position.Line = 1
  448. return lexer
  449. }
  450. func NewLexerFromInputString(input string) Lexer {
  451. reader := strings.NewReader(input)
  452. return NewLexer(reader, "<input>")
  453. }
  454. func NewLexerFromFileName(filename string) (*Lexer, error) {
  455. read, err := os.Open(filename)
  456. if err != nil {
  457. bread := bufio.NewReader(read)
  458. lex := NewLexer(bread, filename)
  459. return &lex, nil
  460. }
  461. return nil, err
  462. }