grammar.go 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. // Grammar describes the rules for a particular ll-parsable language.
  2. package grammar
  3. import "fmt"
  4. import "unicode"
  5. import "sort"
  6. import "strings"
  7. import . "src.eruta.nl/beoran/ll1/common"
  8. // Grammars consists of rules. A Rule can be a Terminal,
  9. // which includes the special terminals Epsilon and End,
  10. // or one of the following nonterminals: and Alternate, a Sequence or a Reference.
  11. type Rule interface {
  12. // Name is the optional name of the rule. If empty, the rule is anonymous.
  13. Name() string
  14. // Definition is the detail of how the rule should be parsed.
  15. // For a Terminal Rule this is the rule itself.
  16. Definition() Rule
  17. // Rules can be stringified
  18. String() string
  19. Check(g *Grammar, r Rule) error
  20. // The first set of the rule
  21. FirstSet() (Set, error)
  22. FollowSet(g *Grammar) (Set, error)
  23. }
  24. // A set is a set of terminals, either the first set or the follow set
  25. // of a rule in the grammar.
  26. type Set map[string]Terminal
  27. type BasicRule struct {
  28. name string
  29. }
  30. func (r BasicRule) Definition() Rule {
  31. return r
  32. }
  33. func (e BasicRule) Name() string {
  34. return e.name
  35. }
  36. func (e BasicRule) String() string {
  37. return fmt.Sprintf("%s", e.name)
  38. }
  39. func (e BasicRule) Check(g *Grammar, r Rule) error {
  40. return nil
  41. }
  42. func (e BasicRule) FirstSet() (Set, error) {
  43. res := make(Set)
  44. return res, nil
  45. }
  46. func (e BasicRule) FollowSet(g *Grammar) (Set, error) {
  47. res := make(Set)
  48. return res, nil
  49. }
  50. func (r BasicRule) IsTerminal() bool {
  51. return !unicode.IsUpper([]rune(r.name)[0])
  52. }
  53. type Terminal struct {
  54. BasicRule
  55. Kind
  56. }
  57. func (t Terminal) String() string {
  58. return fmt.Sprintf("%s(%d)", t.name, t.Kind)
  59. }
  60. func (t Terminal) FirstSet() (Set, error) {
  61. res := make(Set)
  62. res.Add(t)
  63. return res, nil
  64. }
  65. // Epsilon is the empty rule.
  66. type Epsilon struct {
  67. Terminal
  68. }
  69. func (e Epsilon) String() string {
  70. return "ε"
  71. }
  72. // End corresponds to EOF or the end of the input.
  73. type End struct {
  74. Terminal
  75. }
  76. func (e End) String() string {
  77. return "$"
  78. }
  79. type Nonterminal struct {
  80. BasicRule
  81. Define Rule
  82. Template string
  83. // First set of the rule
  84. First Set
  85. // Follow set of the rule
  86. Follow Set
  87. // Nullable or not
  88. Nullable bool
  89. // Realizable or not
  90. Realizable bool
  91. // Recursive (i.e. LEFT-recursive, which LL(1) disallows)
  92. Recursive bool
  93. // Undefined nonterminals in this rule
  94. Undefined bool
  95. // Depth is the depth of (mutual) recursion of a rule. Limited to 16.
  96. Depth int
  97. }
  98. func (n Nonterminal) String() string {
  99. return fmt.Sprintf("%s -> %s", n.BasicRule, n.Define)
  100. }
  101. func (n Nonterminal) Definition() Rule {
  102. return n.Define
  103. }
  104. func (r BasicRule) Equals(r2 Rule) bool {
  105. // XXX probably not correct
  106. return r.Name() == r2.Name()
  107. }
  108. func (n Nonterminal) FirstSet() (Set, error) {
  109. if n.Define == nil { // XXX can this happen an when/why?
  110. return Set{}, nil
  111. }
  112. return n.Define.FirstSet()
  113. }
  114. func (n Nonterminal) FollowSet(g *Grammar) (Set, error) {
  115. if n.Define == nil {
  116. return Set{}, nil
  117. }
  118. return n.Define.FollowSet(g)
  119. }
  120. // Nonterminals can cause recursion and need to be checked
  121. // XXX this function still is very likely wrong.
  122. func (n Nonterminal) Check(g *Grammar, r Rule) error {
  123. if n.Define == nil {
  124. n.Undefined = true
  125. return fmt.Errorf("%s: rule has empty definition", n.Name())
  126. } else if n.Name() != r.Name() {
  127. return nil
  128. }
  129. // check recursively as well
  130. if n.Depth < 16 {
  131. n.Depth++
  132. return n.Define.Check(g, n)
  133. }
  134. return fmt.Errorf("%s: left recursive or recursion too deep", n)
  135. }
  136. type Sequence struct {
  137. Nonterminal
  138. Rules []Rule
  139. }
  140. func (s Sequence) String() string {
  141. sep := ""
  142. res := ""
  143. for _, rule := range s.Rules {
  144. res = res + sep + rule.String()
  145. sep = " "
  146. }
  147. return res
  148. }
  149. func (s Sequence) Check(g *Grammar, r Rule) error {
  150. // check Leftmost Rule for left recursion
  151. if len(s.Rules) > 0 {
  152. e := s.Rules[0]
  153. return e.Check(g, r)
  154. }
  155. return nil
  156. }
  157. func (s Sequence) FirstSet() (Set, error) {
  158. if len(s.Rules) > 0 {
  159. return s.Rules[0].FirstSet()
  160. }
  161. return make(Set), nil
  162. }
  163. func (a Alternates) FirstSet() (Set, error) {
  164. res := make(Set)
  165. for _, s := range a.Rules {
  166. fs, err := s.FirstSet()
  167. if err != nil {
  168. return res, err
  169. }
  170. res = res.Union(fs)
  171. }
  172. return res, nil
  173. }
  174. type Alternates struct {
  175. Nonterminal
  176. Rules []Rule
  177. }
  178. func (a Alternates) Check(g *Grammar, r Rule) error {
  179. for _, s := range a.Rules {
  180. err := s.Check(g, r)
  181. if err != nil {
  182. return err
  183. }
  184. }
  185. return nil
  186. }
  187. func (a Alternates) String() string {
  188. sep := ""
  189. res := "("
  190. for _, rule := range a.Rules {
  191. res = res + sep + rule.String()
  192. sep = " | "
  193. }
  194. return res + ")"
  195. }
  196. // Reference to anther rule by name, to enable right recursion.
  197. // This also requires apointer to the grammar for later resolution.
  198. type Reference struct {
  199. Grammar *Grammar
  200. Nonterminal
  201. To string
  202. }
  203. func (r Reference) Resolve() (Rule, error) {
  204. return r.Grammar.Lookup(r.To)
  205. }
  206. func IsEpsilon(e Rule) bool {
  207. _, ok := e.(Epsilon)
  208. return ok
  209. }
  210. func IsNonterminal(e Rule) bool {
  211. _, ok := e.(Nonterminal)
  212. return ok
  213. }
  214. // whether an Set is nullable (i.e. contains epsilon)
  215. func (s Set) IsNullable() bool {
  216. for _, e := range s {
  217. if IsEpsilon(e) {
  218. return true
  219. }
  220. }
  221. return false
  222. }
  223. func (s Set) Contains(e Terminal) bool {
  224. name := e.Name()
  225. _, ok := s[name]
  226. return ok
  227. }
  228. func (s Set) ContainsKind(k Kind) bool {
  229. kinds := s.ToKinds()
  230. for _, ok := range kinds {
  231. if ok == k {
  232. return true
  233. }
  234. }
  235. return false
  236. }
  237. func (s *Set) Add(e Terminal) bool {
  238. name := e.Name()
  239. _, ok := (*s)[name]
  240. if ok {
  241. return false
  242. }
  243. (*s)[name] = e
  244. return true
  245. }
  246. func (s Set) UnionWithoutEpsilon(s2 Set) Set {
  247. res := make(Set)
  248. if s != nil {
  249. for _, v := range s {
  250. if !IsEpsilon(v) {
  251. res.Add(v)
  252. }
  253. }
  254. }
  255. if s2 != nil {
  256. for _, v := range s2 {
  257. if !IsEpsilon(v) {
  258. res.Add(v)
  259. }
  260. }
  261. }
  262. return res
  263. }
  264. func (s Set) Union(s2 Set) Set {
  265. res := make(Set)
  266. for _, v := range s {
  267. res.Add(v)
  268. }
  269. for _, v := range s2 {
  270. res.Add(v)
  271. }
  272. return res
  273. }
  274. func (s Set) Intersect(s2 Set) Set {
  275. res := make(Set)
  276. for _, v := range s {
  277. if s2.Contains(v) {
  278. res.Add(v)
  279. }
  280. }
  281. return res
  282. }
  283. func (s Set) String() string {
  284. if len(s) == 0 {
  285. return "∅"
  286. }
  287. aid := []string{}
  288. for _, v := range s {
  289. aid = append(aid, v.String())
  290. }
  291. sort.Strings(aid)
  292. return strings.Join(aid, " ")
  293. }
  294. func (s Set) ToKinds() []Kind {
  295. if len(s) == 0 {
  296. return []Kind{}
  297. }
  298. res := []Kind{}
  299. for _, t := range s {
  300. res = append(res, t.Kind)
  301. }
  302. return res
  303. }
  304. type Grammar struct {
  305. BasicRule
  306. Top Rule
  307. Rules []Rule
  308. // All rules, terminals and nonterminals mapped by name.
  309. NamedRules map[string]Rule
  310. // List of error of the grammar. Only valid
  311. // after running Check()
  312. Errors []error
  313. }
  314. func NewGrammar() *Grammar {
  315. g := &Grammar{}
  316. g.NamedRules = map[string]Rule{}
  317. return g
  318. }
  319. func (g Grammar) String() string {
  320. res := "Top → " + g.Top.Name() + "\n"
  321. for _, r := range g.Rules {
  322. res = res + r.String() + "\n"
  323. }
  324. return res
  325. }
  326. func (g Grammar) Lookup(name string) (Rule, error) {
  327. r, ok := g.NamedRules[name]
  328. if ok {
  329. return r, nil
  330. }
  331. return nil, fmt.Errorf("Undefined rule: %s", name)
  332. }
  333. func (g *Grammar) AddRule(r Rule) Rule {
  334. g.Rules = append(g.Rules, r)
  335. if r.Name() != "" {
  336. g.NamedRules[r.Name()] = r
  337. }
  338. return r
  339. }
  340. func Term(name string, kind Kind) Rule {
  341. term := Terminal{}
  342. term.name = name
  343. term.Kind = kind
  344. return term
  345. }
  346. func Seq(name, template string, rules ...Rule) Sequence {
  347. seq := Sequence{}
  348. seq.name = name
  349. seq.Template = template
  350. seq.Rules = rules
  351. return seq
  352. }
  353. func And(rules ...Rule) Rule {
  354. return Seq("", "", rules...)
  355. }
  356. func Alt(name, template string, rules ...Rule) Alternates {
  357. alt := Alternates{}
  358. alt.name = name
  359. alt.Template = template
  360. alt.Rules = rules
  361. return alt
  362. }
  363. func Or(seqs ...Rule) Rule {
  364. return Alt("", "", seqs...)
  365. }
  366. func Opt(name, template string, rule Rule) Rule {
  367. rules := []Rule{rule, Epsilon{}}
  368. return Alt(name, template, rules...)
  369. }
  370. func (g *Grammar) Alt(name, template string, seqs ...Rule) Rule {
  371. return g.AddRule(Alt(name, template, seqs...))
  372. }
  373. func (g *Grammar) Seq(name, template string, seqs ...Rule) Rule {
  374. return g.AddRule(Seq(name, template, seqs...))
  375. }
  376. func (g *Grammar) Term(name string, kind Kind) Rule {
  377. return g.AddRule(Term(name, kind))
  378. }
  379. func (g *Grammar) Opt(name, template string, rule Rule) Rule {
  380. return g.AddRule(Opt(name, template, rule))
  381. }
  382. func Ref(g *Grammar, name, to string) Reference {
  383. ref := Reference{}
  384. ref.Nonterminal.name = name
  385. ref.Grammar = g
  386. ref.To = to
  387. return ref
  388. }
  389. func (g *Grammar) Ref(name, to string) Rule {
  390. return g.AddRule(Ref(g, name, to))
  391. }