raku.go 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. // raku
  2. /* Raku is an easy to use scripting language that can also be used easily interactively
  3. Syntax (verified LL(1) )
  4. PROGRAM -> STATEMENTS .
  5. STATEMENTS -> STATEMENT STATEMENTS | .
  6. STATEMENT -> EXPRESSION | BLOCK | EMPTY_LINE | comment .
  7. EXPRESSION -> VALUE PARAMETERS NL.
  8. PARAMETERS_NONEMPTY -> PARAMETER PARAMETERS.
  9. PARAMETERS-> PARAMETERS_NONEMPTY | .
  10. PARAMETER -> BLOCK | VALUE .
  11. EMPTY_LINE -> NL .
  12. BLOCK -> ob STATEMENTS cb | op STATEMENTS cp | oa STATEMENTS ca.
  13. NL -> nl | semicolon .
  14. VALUE -> string | float | integer | symbol .
  15. Lexer:
  16. */
  17. package raku
  18. import (
  19. "bytes"
  20. "fmt"
  21. "io"
  22. "strings"
  23. "unicode"
  24. )
  25. type Value string
  26. type TokenType int
  27. type Position struct {
  28. Index int
  29. Row int
  30. Column int
  31. }
  32. const (
  33. TokenEOS TokenType = TokenType('.')
  34. TokenComma TokenType = TokenType(',')
  35. TokenError TokenType = -1
  36. TokenWord TokenType = -2
  37. TokenEOL TokenType = -3
  38. TokenEOF TokenType = -4
  39. TokenNumber TokenType = -5
  40. )
  41. type Token struct {
  42. TokenType
  43. Value
  44. Position
  45. }
  46. func (me Token) String() string {
  47. return fmt.Sprintf("Token: %d >%s< %d %d %d.", me.TokenType, string(me.Value), me.Index, me.Row, me.Column)
  48. }
  49. type TokenChannel chan Token
  50. type Lexer struct {
  51. Reader io.Reader
  52. Current Position
  53. Last Position
  54. Token Token
  55. rule LexerRule
  56. Output TokenChannel
  57. buffer []byte
  58. runes []rune
  59. }
  60. type LexerRule func(lexer *Lexer) LexerRule
  61. func (me *Lexer) Emit(t TokenType, v Value) {
  62. tok := Token{t, v, me.Current}
  63. me.Output <- tok
  64. }
  65. func (me *Lexer) Error(message string, args ...interface{}) {
  66. value := fmt.Sprintf(message, args...)
  67. me.Emit(TokenError, Value(value))
  68. }
  69. func LexError(me *Lexer) LexerRule {
  70. me.Error("Error")
  71. return nil
  72. }
  73. func (me *Lexer) SkipComment() bool {
  74. if me.Peek() == '#' {
  75. if me.Next() == '(' {
  76. return me.SkipNotIn(")")
  77. } else {
  78. return me.SkipNotIn("\r\n")
  79. }
  80. }
  81. return true
  82. }
  83. func LexWord(me *Lexer) LexerRule {
  84. me.Found(TokenWord)
  85. return LexNormal
  86. }
  87. func LexNumber(me *Lexer) LexerRule {
  88. me.Found(TokenNumber)
  89. return LexNormal
  90. }
  91. func LexComment(me *Lexer) LexerRule {
  92. if !me.SkipComment() {
  93. me.Error("Unterminated comment")
  94. return LexError
  95. }
  96. me.Advance()
  97. return LexNormal
  98. }
  99. func LexEOS(me *Lexer) LexerRule {
  100. me.Found(TokenEOS)
  101. return LexNormal
  102. }
  103. func LexEOL(me *Lexer) LexerRule {
  104. me.Found(TokenEOL)
  105. return LexNormal
  106. }
  107. func LexNormal(me *Lexer) LexerRule {
  108. me.SkipWhitespace()
  109. peek := me.Peek()
  110. if peek == '#' {
  111. return LexComment
  112. } else if peek == '.' {
  113. return LexEOS
  114. } else if peek == '\n' || peek == '\r' {
  115. return LexEOL
  116. } else if unicode.IsLetter(me.Peek()) {
  117. return LexWord
  118. } else if unicode.IsDigit(me.Peek()) {
  119. return LexNumber
  120. }
  121. return nil
  122. }
  123. func OpenLexer(reader io.Reader) *Lexer {
  124. lexer := &Lexer{}
  125. lexer.Reader = reader
  126. lexer.Output = make(TokenChannel)
  127. // lexer.buffer = new(byte[1024])
  128. return lexer
  129. }
  130. func (me *Lexer) ReadReaderOnce() (bool, error) {
  131. buffer := make([]byte, 1024)
  132. n, err := me.Reader.Read(buffer)
  133. fmt.Printf("read %v %d %v\n", buffer[:n], n, err)
  134. if n > 0 {
  135. me.buffer = append(me.buffer, buffer[:n]...)
  136. fmt.Printf("append %s", me.buffer)
  137. }
  138. if err == io.EOF {
  139. me.Emit(TokenEOF, "")
  140. return true, nil
  141. } else if err != nil {
  142. me.Error("Error reading from reader: %s", err)
  143. return true, err
  144. }
  145. return false, nil
  146. }
  147. func (me *Lexer) ReadReader() bool {
  148. me.buffer = make([]byte, 0)
  149. more, err := me.ReadReaderOnce()
  150. for err == nil && more {
  151. more, err = me.ReadReaderOnce()
  152. }
  153. me.runes = bytes.Runes(me.buffer)
  154. return err != nil && err != io.EOF
  155. }
  156. func (me *Lexer) Peek() rune {
  157. return me.runes[me.Current.Index]
  158. }
  159. func (me *Lexer) PeekNext() rune {
  160. if (me.Current.Index) >= len(me.runes) {
  161. return '\000'
  162. }
  163. return me.runes[me.Current.Index+1]
  164. }
  165. func (me *Lexer) Next() rune {
  166. if me.Peek() == '\n' {
  167. me.Current.Column = 0
  168. me.Current.Row++
  169. }
  170. me.Current.Index++
  171. if me.Current.Index >= len(me.runes) {
  172. me.Emit(TokenEOF, "")
  173. }
  174. return me.Peek()
  175. }
  176. func (me *Lexer) Previous() rune {
  177. if me.Current.Index > 0 {
  178. me.Current.Index--
  179. if me.Peek() == '\n' {
  180. me.Current.Column = 0
  181. me.Current.Row++
  182. }
  183. }
  184. return me.Peek()
  185. }
  186. func (me *Lexer) SkipRune() {
  187. _ = me.Next
  188. }
  189. func (me *Lexer) SkipIn(set string) bool {
  190. _ = me.Next
  191. for strings.ContainsRune(set, me.Peek()) {
  192. if me.Next() == '\000' {
  193. return false
  194. }
  195. }
  196. return true
  197. }
  198. func (me *Lexer) SkipNotIn(set string) bool {
  199. _ = me.Next
  200. for !strings.ContainsRune(set, me.Peek()) {
  201. if me.Next() == '\000' {
  202. return false
  203. }
  204. }
  205. return true
  206. }
  207. func (me *Lexer) SkipWhile(should_skip func(r rune) bool) bool {
  208. _ = me.Next
  209. for should_skip(me.Peek()) {
  210. if me.Next() == '\000' {
  211. return false
  212. }
  213. }
  214. return true
  215. }
  216. func (me *Lexer) SkipWhitespace() {
  217. me.SkipIn(" \t")
  218. }
  219. func (me *Lexer) Advance() {
  220. me.Last = me.Current
  221. }
  222. func (me *Lexer) Retry() {
  223. me.Current = me.Last
  224. }
  225. func (me *Lexer) Found(kind TokenType) {
  226. value := me.runes[me.Last.Index:me.Current.Index]
  227. svalue := string(value)
  228. me.Emit(kind, Value(svalue))
  229. me.Advance()
  230. }
  231. func (me *Lexer) Start() {
  232. if me.ReadReader() {
  233. rule := LexNormal
  234. for rule != nil {
  235. rule = rule(me)
  236. }
  237. }
  238. close(me.Output)
  239. }
  240. func (me *Lexer) TryLexing() {
  241. go me.Start()
  242. for token := range me.Output {
  243. fmt.Println("Token %s", token)
  244. }
  245. }
  246. type Parser struct {
  247. Lexer
  248. }
  249. type Environment struct {
  250. Parent *Environment
  251. }
  252. func main() {
  253. fmt.Println("Hello World!")
  254. }