parser.go 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. package raku
  2. import (
  3. "fmt"
  4. "strings"
  5. "gitlab.com/beoran/woe/graphviz"
  6. "gitlab.com/beoran/woe/monolog"
  7. "gitlab.com/beoran/woe/tree"
  8. )
  9. type ParseAction func(parser *ManualParser) bool
  10. type RuleType int
  11. const (
  12. RuleTypeNone = RuleType(iota)
  13. RuleTypeAlternate
  14. RuleTypeSequence
  15. )
  16. type Rule struct {
  17. tree.Node
  18. Name string
  19. RuleType
  20. ParseAction
  21. }
  22. func NewRule(name string, ruty RuleType) *Rule {
  23. res := &Rule{}
  24. res.RuleType = ruty
  25. res.Name = name
  26. return res
  27. }
  28. func (me *Rule) NewChild(action ParseAction) *Rule {
  29. child := NewRule("foo", RuleTypeNone)
  30. tree.AppendChild(me, child)
  31. return child
  32. }
  33. func (me *Rule) Walk(walker func(rule *Rule) *Rule) *Rule {
  34. node_res := tree.Walk(me,
  35. func(node tree.Noder) tree.Noder {
  36. rule_res := walker(node.(*Rule))
  37. if rule_res == nil {
  38. return nil
  39. } else {
  40. return rule_res
  41. }
  42. })
  43. return node_res.(*Rule)
  44. }
  45. type ManualParser struct {
  46. *Ast
  47. *Tokenizer
  48. Classifier
  49. now *Ast
  50. position int
  51. tokens []*Token
  52. lookahead *Token
  53. }
  54. func (me *ManualParser) SetupRules() {
  55. }
  56. func (me *ManualParser) Expect(types ...TokenType) bool {
  57. monolog.Debug("Expecting: ", types, " from ", me.now.AstType, " have ", me.LookaheadType(), " \n")
  58. for _, t := range types {
  59. if me.LookaheadType() == t {
  60. monolog.Debug("Found: ", t, "\n")
  61. return true
  62. }
  63. }
  64. monolog.Debug("Not found.\n")
  65. return false
  66. }
  67. type Parsable interface {
  68. isParsable()
  69. }
  70. func (me TokenType) isParsable() {
  71. }
  72. func (me ParseAction) isParsable() {
  73. }
  74. /* Advance the lexer but only of there is no lookahead token already available in me.lookahead.
  75. */
  76. func (me *ManualParser) Advance() *Token {
  77. if me.lookahead == nil {
  78. me.lookahead = me.tokens[me.position]
  79. me.position++
  80. }
  81. return me.lookahead
  82. }
  83. func (me *ManualParser) DropLookahead() {
  84. me.lookahead = nil
  85. }
  86. func (me *ManualParser) Lookahead() *Token {
  87. return me.lookahead
  88. }
  89. func (me *ManualParser) LookaheadType() TokenType {
  90. if me.lookahead == nil {
  91. return TokenError
  92. }
  93. return me.Lookahead().TokenType
  94. }
  95. func (me *ManualParser) Consume(atyp AstType, types ...TokenType) bool {
  96. me.Advance()
  97. res := me.Expect(types...)
  98. if res {
  99. me.NewAstChild(atyp)
  100. me.DropLookahead()
  101. }
  102. return res
  103. }
  104. func (me *ManualParser) ConsumeWithoutAst(types ...TokenType) bool {
  105. me.Advance()
  106. res := me.Expect(types...)
  107. if res {
  108. me.DropLookahead()
  109. }
  110. return res
  111. }
  112. /*
  113. func (me * ManualParser) OneOf(restype AstType, options ...Parsable) bool {
  114. res := false
  115. k, v := range options {
  116. switch option := v.Type {
  117. case TokenType: res := Consume(restype, option)
  118. case ParseAction: res := option(me)
  119. }
  120. }
  121. return res
  122. }
  123. */
  124. func (me *ManualParser) ParseEOX() bool {
  125. return me.ConsumeWithoutAst(TokenEOL, TokenPeriod)
  126. }
  127. func (me *ManualParser) ParseValue() bool {
  128. return me.Consume(AstTypeValue, TokenString, TokenNumber, TokenSymbol)
  129. }
  130. func (me *ManualParser) ParseWord() bool {
  131. return me.Consume(AstTypeWord, TokenWord, TokenArticle)
  132. }
  133. func (me *ManualParser) ParseWordValue() bool {
  134. me.NewAstChildDescend(AstTypeWordValue)
  135. res := me.ParseValue() || me.ParseWord()
  136. me.AstAscend(res)
  137. return res
  138. }
  139. func (me *ManualParser) ParseParametersNonempty() bool {
  140. res := false
  141. for me.ParseParameter() {
  142. res = true
  143. }
  144. return res
  145. }
  146. func (me *ManualParser) ParseCallArgs() bool {
  147. me.NewAstChildDescend(AstTypeCallArgs)
  148. res := me.ParseParameters() && me.ParseEOX()
  149. me.AstAscend(res)
  150. return res
  151. }
  152. func (me *ManualParser) ParseOperator() bool {
  153. return me.Consume(AstTypeOperator, TokenOperator)
  154. }
  155. func (me *ManualParser) NewAstChild(tyty AstType) *Ast {
  156. return me.now.NewChild(tyty, me.lookahead)
  157. }
  158. func (me *ManualParser) NewAstChildDescend(tyty AstType) {
  159. node := me.NewAstChild(tyty)
  160. me.now = node
  161. }
  162. func (me *ManualParser) AstAscend(keep bool) {
  163. if me.now.Parent() != nil {
  164. now := me.now
  165. me.now = now.Parent().(*Ast)
  166. if !keep {
  167. now.Remove()
  168. }
  169. }
  170. }
  171. func (me TokenType) BlockCloseForOpen() (TokenType, bool) {
  172. switch me {
  173. case TokenOpenBrace:
  174. return TokenCloseBrace, true
  175. case TokenDo:
  176. return TokenEnd, true
  177. default:
  178. return TokenError, false
  179. }
  180. }
  181. func (me TokenType) ParenthesisCloseForOpen() (TokenType, bool) {
  182. switch me {
  183. case TokenOpenBracket:
  184. return TokenCloseBracket, true
  185. case TokenOpenParen:
  186. return TokenCloseParen, true
  187. default:
  188. return TokenError, false
  189. }
  190. }
  191. func (me *ManualParser) ParseBlock() bool {
  192. me.Advance()
  193. open := me.LookaheadType()
  194. done, ok := open.BlockCloseForOpen()
  195. if !ok {
  196. /* Not an opening of a block, so no block found. */
  197. return false
  198. }
  199. me.DropLookahead()
  200. me.NewAstChildDescend(AstTypeBlock)
  201. res := me.ParseStatements()
  202. me.AstAscend(res)
  203. if res {
  204. me.Advance()
  205. if me.LookaheadType() != done {
  206. return me.ParseError()
  207. }
  208. me.DropLookahead()
  209. }
  210. return res
  211. }
  212. func (me *ManualParser) ParseParenthesis() bool {
  213. me.Advance()
  214. open := me.LookaheadType()
  215. done, ok := open.ParenthesisCloseForOpen()
  216. if !ok {
  217. /* Not an opening of a parenthesis, so no parenthesis found. */
  218. return false
  219. }
  220. me.DropLookahead()
  221. me.NewAstChildDescend(AstTypeParenthesis)
  222. res := me.ParseExpression()
  223. me.AstAscend(res)
  224. if res {
  225. me.Advance()
  226. if me.LookaheadType() != done {
  227. return me.ParseError()
  228. }
  229. me.DropLookahead()
  230. }
  231. return res
  232. }
  233. func (me *ManualParser) ParseWords() bool {
  234. me.NewAstChildDescend(AstTypeWords)
  235. res := me.ParseWord()
  236. for me.ParseWord() {
  237. }
  238. me.AstAscend(res)
  239. return res
  240. }
  241. func (me *ManualParser) ParseDefinition() bool {
  242. me.Advance()
  243. res := me.Consume(AstTypeDefinition, TokenDef)
  244. if !res {
  245. return false
  246. }
  247. res = res && (me.ParseWord() || me.ParseOperator())
  248. if !res {
  249. _ = me.ParseError()
  250. }
  251. res = res && me.ParseParametersNonempty()
  252. if !res {
  253. _ = me.ParseError()
  254. }
  255. me.AstAscend(res)
  256. return res
  257. }
  258. func (me *ManualParser) ParseParameter() bool {
  259. me.NewAstChildDescend(AstTypeParameter)
  260. res := me.ParseWordValue() || me.ParseOperator() ||
  261. me.ParseParenthesis() || me.ParseBlock()
  262. me.AstAscend(res)
  263. return res
  264. }
  265. func (me *ManualParser) ParseParameters() bool {
  266. for me.ParseParameter() {
  267. }
  268. return true
  269. }
  270. func (me *ManualParser) ParseError() bool {
  271. me.now.NewChild(AstTypeError, me.lookahead)
  272. fmt.Printf("Parse error: at %s\n", me.lookahead)
  273. return false
  274. }
  275. func (me *ManualParser) ParseExpression() bool {
  276. return (me.ParseWordValue() || me.ParseOperator()) && me.ParseParameters()
  277. }
  278. func (me *ManualParser) ParseStatement() bool {
  279. me.NewAstChildDescend(AstTypeStatement)
  280. /* First case is for an empty expression/statement. */
  281. res := me.ParseEOX() ||
  282. me.ParseDefinition() ||
  283. (me.ParseExpression() && me.ParseEOX()) ||
  284. me.ParseBlock()
  285. me.AstAscend(res)
  286. return res
  287. }
  288. func (me *ManualParser) ParseEOF() bool {
  289. return me.Consume(AstTypeEox, TokenEOF)
  290. }
  291. func (me *ManualParser) ParseStatements() bool {
  292. me.NewAstChildDescend(AstTypeStatements)
  293. res := me.ParseStatement()
  294. for me.ParseStatement() {
  295. }
  296. me.AstAscend(res)
  297. return res
  298. }
  299. func (me *ManualParser) ParseProgram() bool {
  300. return me.ParseStatements() && me.ParseEOF()
  301. }
  302. func (me *Ast) DotID() string {
  303. return fmt.Sprintf("ast_%p", me)
  304. }
  305. func (me *Ast) ToGraph() *graphviz.Digraph {
  306. g := graphviz.NewDigraph("rankdir", "LR")
  307. me.Walk(func(ast *Ast) *Ast {
  308. label := ast.AstType.String()
  309. if ast.Token != nil {
  310. token := ast.Token.ShortString()
  311. label = label + "\n" + token
  312. }
  313. g.AddNode(ast.DotID(), "label", label)
  314. if ast.Parent() != nil {
  315. g.AddEdgeByName(ast.Parent().(*Ast).DotID(), ast.DotID())
  316. }
  317. return nil
  318. })
  319. return g
  320. }
  321. func (me *Ast) Dotty() {
  322. g := me.ToGraph()
  323. g.Dotty()
  324. }
  325. func (me *Ast) ToAscii() {
  326. me.Walk(func(ast *Ast) *Ast {
  327. depth := tree.Depth(ast)
  328. nchild:= tree.CountChildren(ast)
  329. label := ast.AstType.String()
  330. indent:= strings.Repeat("--", depth)
  331. if ast.Token != nil {
  332. token := ast.Token.ShortString()
  333. fmt.Printf("%s>%s: %s\n", indent, label, token);
  334. } else {
  335. fmt.Printf("%s>%s: (%d)\n", indent, label, nchild );
  336. }
  337. return nil
  338. })
  339. }
  340. /*
  341. PROGRAM -> STATEMENTS.
  342. STATEMENTS -> STATEMENT STATEMENTS | .
  343. STATEMENT -> EXPRESSION EOX | DEFINITION | BLOCK .
  344. DEFINITION -> define WORDOP WORDOPS BLOCK.
  345. WORDOPS -> WORDOP WORDOPS | .
  346. EXPRESSION -> WORDVALUE PARAMETERS.
  347. PARAMETERS -> PARAMETER PARAMETERS | .
  348. PARAMETER -> WORDVALUE | PARENTHESIS | BLOCK | operator.
  349. PARENTHESIS -> '(' EXPRESSION ')' | ot EXPRESSION ct.
  350. BLOCK -> oe STATEMENTS ce | do STATEMENTS end .
  351. WORDOP -> word | operator | a | the.
  352. WORDVALUE -> word | VALUE | a | the.
  353. VALUE -> string | number | symbol.
  354. EOX -> eol | period.
  355. )
  356. */