raku.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553
  1. // raku
  2. /* Raku is an easy to use scripting language that can also be used easily interactively
  3. Syntax (verified LL(1) on smlweb.cpsc.ucalgary.ca)
  4. PROGRAM -> STATEMENTS.
  5. STATEMENTS -> STATEMENT STATEMENTS | .
  6. STATEMENT -> DEFINITION | EXPRESSION | BLOCK .
  7. DEFINITION -> to WORDS BLOCK.
  8. WORDS -> word WORDS | .
  9. EXPRESSION -> WORD_EXPRESSION | VALUE_EXPRESSION.
  10. WORD_EXPRESSION -> word WORD_CALLOP.
  11. WORD_CALLOP -> WORD_OPERATION | WORD_CALL.
  12. WORD_OPERATION -> operator PARAMETERS_NONEMPTY EOX.
  13. WORD_CALL -> PARAMETERS EOX.
  14. VALUE_EXPRESSION -> value VALUE_CALLOP.
  15. VALUE_CALLOP -> VALUE_OPERATION | VALUE_CALL.
  16. VALUE_OPERATION -> operator PARAMETERS_NONEMPTY EOX.
  17. VALUE_CALL -> EOX.
  18. PARAMETERS_NONEMPTY -> PARAMETER PARAMETERS.
  19. PARAMETERS -> PARAMETERS_NONEMPTY | .
  20. PARAMETER -> BLOCK | WORDVALUE .
  21. BLOCK -> ob STATEMENTS cb | op STATEMENTS cp | oa STATEMENTS ca | do
  22. STATEMENTS end.
  23. WORDVALUE -> word | VALUE.
  24. VALUE -> string | number | symbol.
  25. EOX -> eol | period.
  26. Lexer:
  27. */
  28. package raku
  29. import (
  30. "bytes"
  31. "fmt"
  32. "io"
  33. "reflect"
  34. "runtime"
  35. "sort"
  36. "strings"
  37. "unicode"
  38. "github.com/beoran/woe/monolog"
  39. "github.com/beoran/woe/tree"
  40. )
  41. type Value string
  42. type TokenType int64
  43. type Position struct {
  44. Index int
  45. Row int
  46. Column int
  47. }
  48. const (
  49. TokenEOS TokenType = TokenType('.')
  50. TokenComma TokenType = TokenType(',')
  51. TokenSemicolumn TokenType = TokenType(';')
  52. TokenColumn TokenType = TokenType(':')
  53. TokenOpenParen TokenType = TokenType('(')
  54. TokenCloseParen TokenType = TokenType(')')
  55. TokenOpenBrace TokenType = TokenType('{')
  56. TokenCloseBrace TokenType = TokenType('}')
  57. TokenOpenBracket TokenType = TokenType('[')
  58. TokenCloseBracket TokenType = TokenType(']')
  59. TokenNone TokenType = 0
  60. TokenError TokenType = -1
  61. TokenWord TokenType = -2
  62. TokenEOL TokenType = -3
  63. TokenEOF TokenType = -4
  64. TokenNumber TokenType = -5
  65. TokenOperator TokenType = -6
  66. TokenString TokenType = -7
  67. TokenKeyword TokenType = -8
  68. TokenLast TokenType = -9
  69. )
  70. type Token struct {
  71. TokenType
  72. Value
  73. Position
  74. }
  75. var tokenTypeNames []string = []string{
  76. "TokenNone", "TokenError", "TokenWord", "TokenEOL", "TokenEOF", "TokenNumber", "TokenOperator", "TokenString", "TokenKeyword",
  77. }
  78. var keywordList []string = []string{
  79. "a", "do", "end", "the", "to",
  80. }
  81. func (me TokenType) String() string {
  82. if int(me) > 0 {
  83. return fmt.Sprintf("Token %c", rune(me))
  84. } else if me > TokenLast {
  85. return tokenTypeNames[-int(me)]
  86. } else {
  87. return fmt.Sprintf("Unknown Token %d", int(me))
  88. }
  89. }
  90. func (me Token) String() string {
  91. return fmt.Sprintf("Token: %s >%s< %d %d %d.", me.TokenType, string(me.Value), me.Index, me.Row, me.Column)
  92. }
  93. type TokenChannel chan Token
  94. type Lexer struct {
  95. Reader io.Reader
  96. Current Position
  97. Last Position
  98. Token Token
  99. rule LexerRule
  100. Output TokenChannel
  101. buffer []byte
  102. runes []rune
  103. }
  104. type LexerRule func(lexer *Lexer) LexerRule
  105. func (me *Lexer) Emit(t TokenType, v Value) {
  106. tok := Token{t, v, me.Current}
  107. me.Output <- tok
  108. }
  109. func (me *Lexer) Error(message string, args ...interface{}) {
  110. value := fmt.Sprintf(message, args...)
  111. monolog.Error("Lex Error: %s", value)
  112. me.Emit(TokenError, Value(value))
  113. }
  114. func LexError(me *Lexer) LexerRule {
  115. me.Error("Error")
  116. return nil
  117. }
  118. func (me *Lexer) SkipComment() bool {
  119. if me.Peek() == '#' {
  120. if me.Next() == '(' {
  121. return me.SkipNotIn(")")
  122. } else {
  123. return me.SkipNotIn("\r\n")
  124. }
  125. }
  126. return true
  127. }
  128. func IsKeyword(word string) bool {
  129. i := sort.SearchStrings(keywordList, word)
  130. if i >= len(keywordList) {
  131. return false
  132. }
  133. return word == keywordList[i]
  134. }
  135. func LexWord(me *Lexer) LexerRule {
  136. me.SkipNotIn(" \t\r\n'")
  137. if IsKeyword(me.CurrentStringValue()) {
  138. me.Found(TokenKeyword)
  139. } else {
  140. me.Found(TokenWord)
  141. }
  142. return LexNormal
  143. }
  144. func LexNumber(me *Lexer) LexerRule {
  145. me.SkipNotIn(" \tBBBT\r\n")
  146. me.Found(TokenNumber)
  147. return LexNormal
  148. }
  149. func LexWhitespace(me *Lexer) LexerRule {
  150. me.SkipWhitespace()
  151. me.Advance()
  152. return LexNormal
  153. }
  154. func LexComment(me *Lexer) LexerRule {
  155. if !me.SkipComment() {
  156. me.Error("Unterminated comment")
  157. return LexError
  158. }
  159. me.Advance()
  160. return LexNormal
  161. }
  162. func LexPunctuator(me *Lexer) LexerRule {
  163. me.Found(TokenType(me.Peek()))
  164. return LexNormal
  165. }
  166. func LexEOL(me *Lexer) LexerRule {
  167. me.SkipIn("\r\n")
  168. me.Found(TokenEOL)
  169. return LexNormal
  170. }
  171. func LexOperator(me *Lexer) LexerRule {
  172. me.SkipNotIn(" \t\r\n")
  173. me.Found(TokenOperator)
  174. return LexNormal
  175. }
  176. func lexEscape(me *Lexer) error {
  177. _ = me.Next()
  178. return nil
  179. }
  180. func LexString(me *Lexer) LexerRule {
  181. open := me.Peek()
  182. do_escape := open == '"'
  183. peek := me.Next()
  184. me.Advance()
  185. for ; peek != '\000'; peek = me.Next() {
  186. if do_escape && peek == '\\' {
  187. if err := lexEscape(me); err != nil {
  188. return LexError
  189. }
  190. } else if peek == open {
  191. me.Found(TokenString)
  192. _ = me.Next()
  193. me.Advance()
  194. return LexNormal
  195. }
  196. }
  197. me.Error("Unexpected EOF in string.")
  198. return nil
  199. }
  200. func LexNumberOrOperator(me *Lexer) LexerRule {
  201. if unicode.IsDigit(me.Next()) {
  202. return LexNumber
  203. } else {
  204. _ = me.Previous()
  205. return LexOperator
  206. }
  207. }
  208. func LexNormal(me *Lexer) LexerRule {
  209. peek := me.Peek()
  210. if peek == '#' {
  211. return LexComment
  212. } else if strings.ContainsRune(" \t", peek) {
  213. return LexWhitespace
  214. } else if strings.ContainsRune(".,;:", peek) {
  215. return LexPunctuator
  216. } else if strings.ContainsRune("\r\n", peek) {
  217. return LexEOL
  218. } else if strings.ContainsRune("+-", peek) {
  219. return LexNumberOrOperator
  220. } else if strings.ContainsRune("\"`", peek) {
  221. return LexString
  222. } else if peek == '\000' {
  223. me.Emit(TokenEOF, "")
  224. return nil
  225. } else if unicode.IsLetter(peek) {
  226. return LexWord
  227. } else if unicode.IsDigit(peek) {
  228. return LexNumber
  229. } else {
  230. return LexOperator
  231. }
  232. }
  233. func OpenLexer(reader io.Reader) *Lexer {
  234. lexer := &Lexer{}
  235. lexer.Reader = reader
  236. lexer.Output = make(TokenChannel)
  237. // lexer.buffer = new(byte[1024])
  238. return lexer
  239. }
  240. func (me *Lexer) ReadReaderOnce() (bool, error) {
  241. buffer := make([]byte, 1024)
  242. n, err := me.Reader.Read(buffer)
  243. monolog.Debug("read %v %d %v\n", buffer[:n], n, err)
  244. if n > 0 {
  245. me.buffer = append(me.buffer, buffer[:n]...)
  246. monolog.Debug("append %s", me.buffer)
  247. }
  248. if err == io.EOF {
  249. me.Emit(TokenEOF, "")
  250. return true, nil
  251. } else if err != nil {
  252. me.Error("Error reading from reader: %s", err)
  253. return true, err
  254. }
  255. return false, nil
  256. }
  257. func (me *Lexer) ReadReader() error {
  258. me.buffer = make([]byte, 0)
  259. more, err := me.ReadReaderOnce()
  260. for err == nil && more {
  261. more, err = me.ReadReaderOnce()
  262. }
  263. me.runes = bytes.Runes(me.buffer)
  264. return err
  265. }
  266. func (me *Lexer) Peek() rune {
  267. if (me.Current.Index) >= len(me.runes) {
  268. return '\000'
  269. }
  270. return me.runes[me.Current.Index]
  271. }
  272. func (me *Lexer) PeekNext() rune {
  273. if (me.Current.Index + 1) >= len(me.runes) {
  274. return '\000'
  275. }
  276. return me.runes[me.Current.Index+1]
  277. }
  278. func (me *Lexer) Next() rune {
  279. if me.Peek() == '\n' {
  280. me.Current.Column = 0
  281. me.Current.Row++
  282. }
  283. me.Current.Index++
  284. if me.Current.Index >= len(me.runes) {
  285. me.Emit(TokenEOF, "")
  286. }
  287. return me.Peek()
  288. }
  289. func (me *Lexer) Previous() rune {
  290. if me.Current.Index > 0 {
  291. me.Current.Index--
  292. if me.Peek() == '\n' {
  293. me.Current.Column = 0
  294. me.Current.Row++
  295. }
  296. }
  297. return me.Peek()
  298. }
  299. func (me *Lexer) SkipRune() {
  300. _ = me.Next()
  301. }
  302. func (me *Lexer) SkipIn(set string) bool {
  303. for strings.ContainsRune(set, me.Next()) {
  304. monolog.Debug("SkipIn: %s %c\n", set, me.Peek())
  305. if me.Peek() == '\000' {
  306. return false
  307. }
  308. }
  309. return true
  310. }
  311. func (me *Lexer) SkipNotIn(set string) bool {
  312. _ = me.Next()
  313. for !strings.ContainsRune(set, me.Peek()) {
  314. if me.Next() == '\000' {
  315. return false
  316. }
  317. }
  318. return true
  319. }
  320. func (me *Lexer) SkipWhile(should_skip func(r rune) bool) bool {
  321. for should_skip(me.Peek()) {
  322. if me.Next() == '\000' {
  323. return false
  324. }
  325. }
  326. return true
  327. }
  328. func (me *Lexer) SkipWhitespace() {
  329. me.SkipIn(" \t")
  330. }
  331. func (me *Lexer) Advance() {
  332. me.Last = me.Current
  333. }
  334. func (me *Lexer) Rewind() {
  335. me.Current = me.Last
  336. }
  337. func (me *Lexer) CurrentRuneValue() []rune {
  338. return me.runes[me.Last.Index:me.Current.Index]
  339. }
  340. func (me *Lexer) CurrentStringValue() string {
  341. return string(me.CurrentRuneValue())
  342. }
  343. func (me *Lexer) Found(kind TokenType) {
  344. me.Emit(kind, Value(me.CurrentStringValue()))
  345. me.Advance()
  346. }
  347. func GetFunctionName(fun interface{}) string {
  348. return runtime.FuncForPC(reflect.ValueOf(fun).Pointer()).Name()
  349. }
  350. func (me *Lexer) Start() {
  351. if err := me.ReadReader(); err == nil || err == io.EOF {
  352. rule := LexNormal
  353. for rule != nil {
  354. monolog.Debug("Lexer Rule: %s\n", GetFunctionName(rule))
  355. rule = rule(me)
  356. }
  357. } else {
  358. me.Error("Could not read in input buffer: %s", err)
  359. }
  360. close(me.Output)
  361. }
  362. func (me *Lexer) TryLexing() {
  363. go me.Start()
  364. for token := range me.Output {
  365. monolog.Info("Token %s", token)
  366. }
  367. }
  368. type AstType int
  369. const (
  370. AstTypeProgram = AstType(iota)
  371. AstTypeStatements
  372. AstTypeStatement
  373. AstTypeDefinition
  374. AstTypeWords
  375. AstTypeExpression
  376. AstTypeWordExpression
  377. AstTypeWordCallop
  378. AstTypeWordOperation
  379. AstTypeWordCall
  380. AstTypeValueExpression
  381. AstTypeValueCallop
  382. AstTypeValueCall
  383. AstTypeParametersNonempty
  384. AstTypeParameters
  385. AstTypeParameter
  386. AstTypeBlock
  387. AstTypeWordvalue
  388. AstTypeValue
  389. AstTypeEox
  390. AstTypeError
  391. )
  392. type Ast struct {
  393. *tree.Node
  394. AstType
  395. *Token
  396. }
  397. func (me *Ast) NewChild(kind AstType, token *Token) *Ast {
  398. child := &Ast{}
  399. child.AstType = kind
  400. child.Token = token
  401. child.Node = me.Node.NewChild(child)
  402. return child
  403. }
  404. func (me *Ast) Walk(walker func(ast *Ast) *Ast) *Ast {
  405. node_res := me.Node.Walk(
  406. func(node *tree.Node) *tree.Node {
  407. ast_res := walker(node.Data.(*Ast))
  408. if ast_res == nil {
  409. return nil
  410. } else {
  411. return ast_res.Node
  412. }
  413. })
  414. return node_res.Data.(*Ast)
  415. }
  416. func NewAst(kind AstType) *Ast {
  417. ast := &Ast{}
  418. ast.Node = tree.New(nil, ast)
  419. ast.AstType = kind
  420. ast.Token = nil
  421. return ast
  422. }
  423. type Parser struct {
  424. *Ast
  425. *Lexer
  426. }
  427. func (me *Parser) ParseDefinition() {
  428. /*
  429. ParseWords()
  430. ParseBlock()
  431. */
  432. }
  433. func (me *Parser) ParseProgram() {
  434. me.Ast = NewAst(AstTypeProgram)
  435. token := <-me.Lexer.Output
  436. switch token.TokenType {
  437. case TokenKeyword:
  438. if token.Value == "to" {
  439. me.ParseDefinition()
  440. return
  441. }
  442. fallthrough
  443. default:
  444. me.Ast.NewChild(AstTypeError, &token)
  445. }
  446. }
  447. /*
  448. PROGRAM -> STATEMENTS.
  449. STATEMENTS -> STATEMENT STATEMENTS | .
  450. STATEMENT -> DEFINITION | EXPRESSION | BLOCK .
  451. DEFINITION -> to WORDS BLOCK.
  452. WORDS -> word WORDS | .
  453. EXPRESSION -> WORD_EXPRESSION | VALUE_EXPRESSION.
  454. WORD_EXPRESSION -> word WORD_CALLOP.
  455. WORD_CALLOP -> WORD_OPERATION | WORD_CALL.
  456. WORD_OPERATION -> operator PARAMETERS_NONEMPTY EOX.
  457. WORD_CALL -> PARAMETERS EOX.
  458. VALUE_EXPRESSION -> value VALUE_CALLOP.
  459. VALUE_CALLOP -> VALUE_OPERATION | VALUE_CALL.
  460. VALUE_OPERATION -> operator PARAMETERS_NONEMPTY EOX.
  461. VALUE_CALL -> EOX.
  462. PARAMETERS_NONEMPTY -> PARAMETER PARAMETERS.
  463. PARAMETERS -> PARAMETERS_NONEMPTY | .
  464. PARAMETER -> BLOCK | WORDVALUE .
  465. BLOCK -> ob STATEMENTS cb | op STATEMENTS cp | oa STATEMENTS ca | do STATEMENTS end.
  466. WORDVALUE -> word | VALUE.
  467. VALUE -> string | number | symbol.
  468. EOX -> eol | period.
  469. AstNodeBlock = AstNodeType(iota)
  470. )
  471. */
  472. type Environment struct {
  473. Parent *Environment
  474. }
  475. func main() {
  476. fmt.Println("Hello World!")
  477. }