lexer.go 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. package muesli
  2. import (
  3. _ "bytes"
  4. _ "errors"
  5. "fmt"
  6. _ "io"
  7. _ "reflect"
  8. _ "runtime"
  9. "strings"
  10. _ "unicode"
  11. "io"
  12. "os"
  13. "bufio"
  14. "unicode"
  15. // "gitlab.com/beoran/woe/graphviz"
  16. // _ "gitlab.com/beoran/woe/monolog"
  17. )
  18. /* A Lexer splits scanned input into tokens.
  19. */
  20. type Lexer struct {
  21. Position
  22. Index int
  23. Start int
  24. io.RuneScanner
  25. buffer []rune
  26. Current rune
  27. }
  28. func (lexer * Lexer) ClearBuffer() {
  29. lexer.buffer = make([]rune, 0)
  30. }
  31. func (lexer * Lexer) MakeToken(kind TokenKind) Token {
  32. val := StringValue(string(lexer.buffer))
  33. lexer.ClearBuffer()
  34. return NewToken(kind, val, lexer.Position)
  35. }
  36. func (lexer Lexer) MakeErrorToken(err error) Token {
  37. return NewToken(TokenKindError, err.Error(), lexer.Position)
  38. }
  39. func (lexer Lexer) MakeErrorfToken(format string, va ... interface{}) Token {
  40. err := fmt.Errorf(format, va...)
  41. return lexer.MakeErrorToken(err)
  42. }
  43. func (lexer Lexer) MakeEOFToken() Token {
  44. return NewToken(TokenKindEOF, "", lexer.Position)
  45. }
  46. func (lexer * Lexer) Peek() (rune, error) {
  47. r, _, err := lexer.RuneScanner.ReadRune()
  48. err2 := lexer.RuneScanner.UnreadRune()
  49. if err == nil {
  50. err = err2
  51. }
  52. return r, err
  53. }
  54. func (lexer * Lexer) Next() (rune, error) {
  55. r, _, err := lexer.RuneScanner.ReadRune()
  56. if err != nil {
  57. return 0, err
  58. }
  59. lexer.Current = r
  60. lexer.buffer = append(lexer.buffer, r)
  61. lexer.Index++
  62. lexer.Position.Column++
  63. if r == '\n' {
  64. lexer.Position.Column = 1
  65. lexer.Position.Line++
  66. }
  67. return lexer.buffer[len(lexer.buffer) - 1], nil
  68. }
  69. func (lexer * Lexer) oldPrevious() error {
  70. fmt.Printf("Previous: now %c \n", lexer.Current)
  71. err := lexer.RuneScanner.UnreadRune()
  72. if err != nil {
  73. return err
  74. }
  75. lexer.Index--
  76. lexer.Position.Column--
  77. if (len(lexer.buffer) > 0) {
  78. r := lexer.buffer[len(lexer.buffer) - 1];
  79. lexer.buffer = lexer.buffer[0: len(lexer.buffer) - 1];
  80. if r == '\n' {
  81. lexer.Position.Column = 1 // XXX wrong
  82. lexer.Position.Line--
  83. }
  84. lexer.Current = r
  85. }
  86. return nil
  87. }
  88. func (lexer * Lexer) NextIf(predicate func(rune) bool) (bool, error) {
  89. r, err := lexer.Peek()
  90. if err != nil {
  91. return false, err
  92. }
  93. if (predicate(r)) {
  94. r, err = lexer.Next()
  95. if err != nil {
  96. return true, err
  97. }
  98. return true, nil
  99. }
  100. return false, nil
  101. }
  102. func (lexer * Lexer) NextWhile(predicate func(rune) bool) (bool, error) {
  103. result := true
  104. ok, err := lexer.NextIf(predicate)
  105. result = result || ok
  106. for ; ok && (err == nil) ; ok, err = lexer.NextIf(predicate) {
  107. result = result || ok
  108. }
  109. return result, err
  110. }
  111. func isSpace(r rune) bool {
  112. return r == ' ' || r == '\t'
  113. }
  114. func (lexer * Lexer) SkipSpace() (error) {
  115. _, err := lexer.NextWhile(isSpace)
  116. if err == nil {
  117. lexer.ClearBuffer()
  118. }
  119. return err
  120. }
  121. /* Handles errors including EOF by either returning an error token or an
  122. * EOF token.
  123. */
  124. func (lexer * Lexer) handleError(err error) Token {
  125. if err == io.EOF {
  126. return lexer.MakeEOFToken()
  127. } else {
  128. return lexer.MakeErrorToken(err)
  129. }
  130. }
  131. func (lexer * Lexer) LexNumber() Token {
  132. isFloat := false
  133. _, err := lexer.NextWhile(func (r rune) bool {
  134. if unicode.IsDigit(r) {
  135. return true
  136. } else if r == '.' {
  137. if isFloat {
  138. return false // double point in floating point
  139. } else {
  140. isFloat = true
  141. return true
  142. }
  143. } else {
  144. return false
  145. }
  146. })
  147. if err != nil {
  148. return lexer.MakeErrorfToken("when parsing number: %s", err)
  149. }
  150. if isFloat {
  151. return lexer.MakeToken(TokenKindFloat)
  152. } else {
  153. return lexer.MakeToken(TokenKindInteger)
  154. }
  155. }
  156. func isDoubleQuote(r rune) bool {
  157. return r == '"'
  158. }
  159. func (lexer * Lexer) LexString() Token {
  160. inEscape := false
  161. var err error
  162. _, err = lexer.Next()
  163. if err != nil {
  164. return lexer.handleError(err)
  165. }
  166. _, err = lexer.NextWhile(func (r rune) bool {
  167. if r == '"' && !inEscape {
  168. return false
  169. }
  170. if r == '\\' {
  171. // TODO escape parsing, now just a single character after it
  172. if inEscape { // double backslash
  173. inEscape = false
  174. } else {
  175. inEscape = true
  176. }
  177. } else {
  178. inEscape = false
  179. }
  180. return true // still inside the string
  181. })
  182. if err != nil {
  183. return lexer.MakeErrorfToken("when parsing string: %s", err)
  184. }
  185. _, err = lexer.Next()
  186. if err != nil {
  187. return lexer.handleError(err)
  188. }
  189. return lexer.MakeToken(TokenKindString)
  190. }
  191. func (lexer * Lexer) LexLongString() Token {
  192. var err error
  193. _, err = lexer.Next()
  194. if err != nil {
  195. return lexer.handleError(err)
  196. }
  197. _, err = lexer.NextWhile(func (r rune) bool {
  198. return r != '`'
  199. })
  200. if err != nil {
  201. return lexer.MakeErrorfToken("when parsing long string: %s", err)
  202. }
  203. _, err = lexer.Next()
  204. if err != nil {
  205. return lexer.handleError(err)
  206. }
  207. return lexer.MakeToken(TokenKindString)
  208. }
  209. func (lexer * Lexer) LexWord() Token {
  210. var err error
  211. _, err = lexer.Next()
  212. if err != nil {
  213. return lexer.handleError(err)
  214. }
  215. lexer.NextWhile(func(r rune) bool {
  216. return unicode.IsLetter(r)
  217. })
  218. return lexer.MakeToken(TokenKindWord)
  219. }
  220. func (lexer * Lexer) lex() Token {
  221. r, err := lexer.Peek()
  222. if err != nil {
  223. return lexer.handleError(err)
  224. }
  225. if isSpace(r) {
  226. err = lexer.SkipSpace()
  227. if err != nil {
  228. return lexer.handleError(err)
  229. }
  230. r, err = lexer.Peek()
  231. if err != nil {
  232. return lexer.handleError(err)
  233. }
  234. }
  235. if unicode.IsDigit(r) {
  236. return lexer.LexNumber()
  237. }
  238. if r == '\n' || r == '.' {
  239. lexer.Next()
  240. return lexer.MakeToken(TokenKindEOX)
  241. }
  242. if r == '"' {
  243. return lexer.LexString()
  244. }
  245. if r == '`' {
  246. return lexer.LexLongString()
  247. }
  248. switch (TokenKind(r)) {
  249. case TokenKindGet : fallthrough
  250. case TokenKindSet : fallthrough
  251. case TokenKindOpenBlock : fallthrough
  252. case TokenKindCloseBlock: fallthrough
  253. case TokenKindOpenList : fallthrough
  254. case TokenKindCloseList : fallthrough
  255. case TokenKindOpenParen : fallthrough
  256. case TokenKindCloseParen:
  257. lexer.Next()
  258. return lexer.MakeToken(TokenKind(r))
  259. default:
  260. }
  261. if unicode.IsLetter(r) {
  262. return lexer.LexWord()
  263. }
  264. return lexer.MakeErrorfToken("Unknown character: %c", r)
  265. }
  266. func (lexer * Lexer) Lex() Token {
  267. res := lexer.lex()
  268. lexer.ClearBuffer() // ensure buffer is cleared after lexing, always.
  269. return res
  270. }
  271. func (lexer * Lexer) LexAll() []Token {
  272. var token Token
  273. res := make([]Token, 0)
  274. for token = lexer.Lex() ; ! token.IsLast() ; token = lexer.Lex() {
  275. fmt.Printf("token: %s %v\n", token.String(), token.IsLast())
  276. res = append(res, token)
  277. }
  278. fmt.Printf("Last token: %s %v\n", token.String(), token.IsLast())
  279. res = append(res, token)
  280. return res
  281. }
  282. func NewLexer(scanner io.RuneScanner, filename string) Lexer {
  283. lexer := Lexer{}
  284. lexer.RuneScanner = scanner
  285. lexer.Position.FileName = filename
  286. lexer.Position.Column = 1
  287. lexer.Position.Line = 1
  288. return lexer
  289. }
  290. func NewLexerFromInputString(input string) Lexer {
  291. reader := strings.NewReader(input)
  292. return NewLexer(reader, "<input>")
  293. }
  294. func NewLexerFromFileName(filename string) (*Lexer, error) {
  295. read, err := os.Open(filename)
  296. if err != nil {
  297. bread := bufio.NewReader(read)
  298. lex := NewLexer(bread, filename)
  299. return &lex, nil
  300. }
  301. return nil , err
  302. }