grammar.go 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. // Grammar describes the rules for a particular ll-parsable language.
  2. package grammar
  3. import "fmt"
  4. import "unicode"
  5. import "sort"
  6. import "strings"
  7. import . "src.eruta.nl/beoran/ll1/common"
  8. // Grammars consists of rules. A Rule can be a Terminal,
  9. // which includes the special terminals Epsilon and End,
  10. // or one of the following nonterminals: and Alternate, a Sequence or a Reference.
  11. type Rule interface {
  12. // Name is the optional name of the rule. If empty, the rule is anonymous.
  13. Name() string
  14. // Definition is the detail of how the rule should be parsed.
  15. // For a Terminal Rule this is the rule itself.
  16. Definition() Rule
  17. // Rules can be stringified
  18. String() string
  19. Check(g *Grammar, r Rule) error
  20. // The first set of the rule
  21. FirstSet() (Set, error)
  22. FollowSet(g *Grammar) (Set, error)
  23. }
  24. // A set is a set of terminals, either the first set or the follow set
  25. // of a rule in the grammar.
  26. type Set map[string]Terminal
  27. type BasicRule struct {
  28. name string
  29. }
  30. func (r BasicRule) Definition() Rule {
  31. return r
  32. }
  33. func (e BasicRule) Name() string {
  34. return e.name
  35. }
  36. func (e BasicRule) String() string {
  37. return fmt.Sprintf("%s", e.name)
  38. }
  39. func (e BasicRule) Check(g *Grammar, r Rule) error {
  40. return nil
  41. }
  42. func (e BasicRule) FirstSet() (Set, error) {
  43. res := make(Set)
  44. return res, nil
  45. }
  46. func (e BasicRule) FollowSet(g *Grammar) (Set, error) {
  47. res := make(Set)
  48. return res, nil
  49. }
  50. func (r BasicRule) IsTerminal() bool {
  51. return !unicode.IsUpper([]rune(r.name)[0])
  52. }
  53. type Terminal struct {
  54. BasicRule
  55. Kind
  56. }
  57. func (t Terminal) String() string {
  58. return fmt.Sprintf("%s(%d)", t.name, t.Kind)
  59. }
  60. func (t Terminal) FirstSet() (Set, error) {
  61. res := make(Set)
  62. res.Add(t)
  63. return res, nil
  64. }
  65. // Epsilon is the empty rule.
  66. type Epsilon struct {
  67. Terminal
  68. }
  69. func (e Epsilon) String() string {
  70. return "ε"
  71. }
  72. // End corresponds to EOF or the end of the input.
  73. type EndOfInput struct {
  74. Terminal
  75. }
  76. func (e EndOfInput) String() string {
  77. return "$"
  78. }
  79. func End() EndOfInput {
  80. terminal := Term("<end>", EndKind)
  81. return EndOfInput{terminal}
  82. }
  83. type Nonterminal struct {
  84. BasicRule
  85. Define Rule
  86. Template string
  87. // First set of the rule
  88. First Set
  89. // Follow set of the rule
  90. Follow Set
  91. // Nullable or not
  92. Nullable bool
  93. // Realizable or not
  94. Realizable bool
  95. // Recursive (i.e. LEFT-recursive, which LL(1) disallows)
  96. Recursive bool
  97. // Undefined nonterminals in this rule
  98. Undefined bool
  99. // Depth is the depth of (mutual) recursion of a rule. Limited to 16.
  100. Depth int
  101. }
  102. func (n Nonterminal) String() string {
  103. return fmt.Sprintf("%s -> %s", n.BasicRule, n.Define)
  104. }
  105. func (n Nonterminal) Definition() Rule {
  106. return n.Define
  107. }
  108. func (r BasicRule) Equals(r2 Rule) bool {
  109. // XXX probably not correct
  110. return r.Name() == r2.Name()
  111. }
  112. func (n Nonterminal) FirstSet() (Set, error) {
  113. if n.Define == nil { // XXX can this happen an when/why?
  114. return Set{}, nil
  115. }
  116. return n.Define.FirstSet()
  117. }
  118. func (n Nonterminal) FollowSet(g *Grammar) (Set, error) {
  119. if n.Define == nil {
  120. return Set{}, nil
  121. }
  122. return n.Define.FollowSet(g)
  123. }
  124. // Nonterminals can cause recursion and need to be checked
  125. // XXX this function still is very likely wrong.
  126. func (n Nonterminal) Check(g *Grammar, r Rule) error {
  127. if n.Define == nil {
  128. n.Undefined = true
  129. return fmt.Errorf("%s: rule has empty definition", n.Name())
  130. } else if n.Name() != r.Name() {
  131. return nil
  132. }
  133. // check recursively as well
  134. if n.Depth < 16 {
  135. n.Depth++
  136. return n.Define.Check(g, n)
  137. }
  138. return fmt.Errorf("%s: left recursive or recursion too deep", n)
  139. }
  140. type Sequence struct {
  141. Nonterminal
  142. Rules []Rule
  143. }
  144. func (s Sequence) String() string {
  145. sep := ""
  146. res := ""
  147. for _, rule := range s.Rules {
  148. res = res + sep + rule.String()
  149. sep = " "
  150. }
  151. return res
  152. }
  153. func (s Sequence) Check(g *Grammar, r Rule) error {
  154. // check Leftmost Rule for left recursion
  155. if len(s.Rules) > 0 {
  156. e := s.Rules[0]
  157. return e.Check(g, r)
  158. }
  159. return nil
  160. }
  161. func (s Sequence) FirstSet() (Set, error) {
  162. if len(s.Rules) > 0 {
  163. return s.Rules[0].FirstSet()
  164. }
  165. return make(Set), nil
  166. }
  167. func (a Alternates) FirstSet() (Set, error) {
  168. res := make(Set)
  169. for _, s := range a.Rules {
  170. fs, err := s.FirstSet()
  171. if err != nil {
  172. return res, err
  173. }
  174. res = res.Union(fs)
  175. }
  176. return res, nil
  177. }
  178. type Alternates struct {
  179. Nonterminal
  180. Rules []Rule
  181. }
  182. func (a Alternates) Check(g *Grammar, r Rule) error {
  183. for _, s := range a.Rules {
  184. err := s.Check(g, r)
  185. if err != nil {
  186. return err
  187. }
  188. }
  189. return nil
  190. }
  191. func (a Alternates) String() string {
  192. sep := ""
  193. res := "("
  194. for _, rule := range a.Rules {
  195. res = res + sep + rule.String()
  196. sep = " | "
  197. }
  198. return res + ")"
  199. }
  200. // Reference to anther rule by name, to enable right recursion.
  201. // This also requires apointer to the grammar for later resolution.
  202. type Reference struct {
  203. Grammar *Grammar
  204. Nonterminal
  205. To string
  206. }
  207. func (r Reference) Resolve() (Rule, error) {
  208. return r.Grammar.Lookup(r.To)
  209. }
  210. func IsEpsilon(e Rule) bool {
  211. _, ok := e.(Epsilon)
  212. return ok
  213. }
  214. func IsNonterminal(e Rule) bool {
  215. _, ok := e.(Nonterminal)
  216. return ok
  217. }
  218. // whether an Set is nullable (i.e. contains epsilon)
  219. func (s Set) IsNullable() bool {
  220. for _, e := range s {
  221. if IsEpsilon(e) {
  222. return true
  223. }
  224. }
  225. return false
  226. }
  227. func (s Set) Contains(e Terminal) bool {
  228. name := e.Name()
  229. _, ok := s[name]
  230. return ok
  231. }
  232. func (s Set) ContainsKind(k Kind) bool {
  233. kinds := s.ToKinds()
  234. for _, ok := range kinds {
  235. if ok == k {
  236. return true
  237. }
  238. }
  239. return false
  240. }
  241. func (s *Set) Add(e Terminal) bool {
  242. name := e.Name()
  243. _, ok := (*s)[name]
  244. if ok {
  245. return false
  246. }
  247. (*s)[name] = e
  248. return true
  249. }
  250. func (s Set) UnionWithoutEpsilon(s2 Set) Set {
  251. res := make(Set)
  252. if s != nil {
  253. for _, v := range s {
  254. if !IsEpsilon(v) {
  255. res.Add(v)
  256. }
  257. }
  258. }
  259. if s2 != nil {
  260. for _, v := range s2 {
  261. if !IsEpsilon(v) {
  262. res.Add(v)
  263. }
  264. }
  265. }
  266. return res
  267. }
  268. func (s Set) Union(s2 Set) Set {
  269. res := make(Set)
  270. for _, v := range s {
  271. res.Add(v)
  272. }
  273. for _, v := range s2 {
  274. res.Add(v)
  275. }
  276. return res
  277. }
  278. func (s Set) Intersect(s2 Set) Set {
  279. res := make(Set)
  280. for _, v := range s {
  281. if s2.Contains(v) {
  282. res.Add(v)
  283. }
  284. }
  285. return res
  286. }
  287. func (s Set) String() string {
  288. if len(s) == 0 {
  289. return "∅"
  290. }
  291. aid := []string{}
  292. for _, v := range s {
  293. aid = append(aid, v.String())
  294. }
  295. sort.Strings(aid)
  296. return strings.Join(aid, " ")
  297. }
  298. func (s Set) ToKinds() []Kind {
  299. if len(s) == 0 {
  300. return []Kind{}
  301. }
  302. res := []Kind{}
  303. for _, t := range s {
  304. res = append(res, t.Kind)
  305. }
  306. return res
  307. }
  308. type Grammar struct {
  309. BasicRule
  310. Top Rule
  311. Rules []Rule
  312. // All rules, terminals and nonterminals mapped by name.
  313. NamedRules map[string]Rule
  314. // List of error of the grammar. Only valid
  315. // after running Check()
  316. Errors []error
  317. }
  318. func NewGrammar() *Grammar {
  319. g := &Grammar{}
  320. g.NamedRules = map[string]Rule{}
  321. return g
  322. }
  323. func (g Grammar) String() string {
  324. res := "Top → " + g.Top.Name() + "\n"
  325. for _, r := range g.Rules {
  326. res = res + r.String() + "\n"
  327. }
  328. return res
  329. }
  330. func (g Grammar) Lookup(name string) (Rule, error) {
  331. r, ok := g.NamedRules[name]
  332. if ok {
  333. return r, nil
  334. }
  335. return nil, fmt.Errorf("Undefined rule: %s", name)
  336. }
  337. func (g *Grammar) AddRule(r Rule) Rule {
  338. g.Rules = append(g.Rules, r)
  339. if r.Name() != "" {
  340. g.NamedRules[r.Name()] = r
  341. }
  342. return r
  343. }
  344. func Term(name string, kind Kind) Terminal {
  345. term := Terminal{}
  346. term.name = name
  347. term.Kind = kind
  348. return term
  349. }
  350. func Seq(name, template string, rules ...Rule) Sequence {
  351. seq := Sequence{}
  352. seq.name = name
  353. seq.Template = template
  354. seq.Rules = rules
  355. return seq
  356. }
  357. func And(rules ...Rule) Rule {
  358. return Seq("", "", rules...)
  359. }
  360. func Alt(name, template string, rules ...Rule) Alternates {
  361. alt := Alternates{}
  362. alt.name = name
  363. alt.Template = template
  364. alt.Rules = rules
  365. return alt
  366. }
  367. func Or(seqs ...Rule) Rule {
  368. return Alt("", "", seqs...)
  369. }
  370. func Opt(name, template string, rule Rule) Rule {
  371. rules := []Rule{rule, Epsilon{}}
  372. return Alt(name, template, rules...)
  373. }
  374. func (g *Grammar) Alt(name, template string, seqs ...Rule) Rule {
  375. return g.AddRule(Alt(name, template, seqs...))
  376. }
  377. func (g *Grammar) Seq(name, template string, seqs ...Rule) Rule {
  378. return g.AddRule(Seq(name, template, seqs...))
  379. }
  380. func (g *Grammar) Term(name string, kind Kind) Rule {
  381. return g.AddRule(Term(name, kind))
  382. }
  383. func (g *Grammar) Opt(name, template string, rule Rule) Rule {
  384. return g.AddRule(Opt(name, template, rule))
  385. }
  386. func Ref(g *Grammar, name, to string) Reference {
  387. ref := Reference{}
  388. ref.Nonterminal.name = name
  389. ref.Grammar = g
  390. ref.To = to
  391. return ref
  392. }
  393. func (g *Grammar) Ref(name, to string) Rule {
  394. return g.AddRule(Ref(g, name, to))
  395. }