grammar.go 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. // Grammar describes the rules for a particular ll-parsable language.
  2. package grammar
  3. import "fmt"
  4. import "unicode"
  5. import "sort"
  6. import "strings"
  7. import . "src.eruta.nl/beoran/ll1/common"
  8. // Grammars consists of rules. A Rule can be a Terminal,
  9. // which includes the special terminals Epsilon and End,
  10. // or one of the following nonterminals: and Alternate, a Sequence or a Reference.
  11. type Rule interface {
  12. // Name is the optional name of the rule. If empty, the rule is anonymous.
  13. Name() string
  14. // Definition is the detail of how the rule should be parsed.
  15. // For a Terminal Rule this is the rule itself.
  16. Definition() Rule
  17. // Rules can be stringified
  18. String() string
  19. Check(g *Grammar, r Rule) error
  20. // The first set of the rule
  21. FirstSet() (Set, error)
  22. FollowSet(g *Grammar) (Set, error)
  23. }
  24. // A set is a set of terminals, either the first set or the follow set
  25. // of a rule in the grammar.
  26. type Set map[string]Terminal
  27. type BasicRule struct {
  28. name string
  29. }
  30. func (r BasicRule) Definition() Rule {
  31. return r
  32. }
  33. func (e BasicRule) Name() string {
  34. return e.name
  35. }
  36. func (e BasicRule) String() string {
  37. return fmt.Sprintf("%s", e.name)
  38. }
  39. func (e BasicRule) Check(g *Grammar, r Rule) error {
  40. return nil
  41. }
  42. func (e BasicRule) FirstSet() (Set, error) {
  43. res := make(Set)
  44. return res, nil
  45. }
  46. func (e BasicRule) FollowSet(g *Grammar) (Set, error) {
  47. res := make(Set)
  48. return res, nil
  49. }
  50. func (r BasicRule) IsTerminal() bool {
  51. return !unicode.IsUpper([]rune(r.name)[0])
  52. }
  53. type Terminal struct {
  54. BasicRule
  55. Kind
  56. }
  57. func (t Terminal) String() string {
  58. return fmt.Sprintf("%s(%d)", t.name, t.Kind)
  59. }
  60. func (t Terminal) FirstSet() (Set, error) {
  61. res := make(Set)
  62. res.Add(t)
  63. return res, nil
  64. }
  65. // Epsilon is the empty rule.
  66. type Epsilon struct {
  67. Terminal
  68. }
  69. func (e Epsilon) String() string {
  70. return "ε"
  71. }
  72. // End corresponds to EOF or the end of the input.
  73. type End struct {
  74. Terminal
  75. }
  76. func (e End) String() string {
  77. return "$"
  78. }
  79. type Nonterminal struct {
  80. BasicRule
  81. Define Rule
  82. Template string
  83. // First set of the rule
  84. First Set
  85. // Follow set of the rule
  86. Follow Set
  87. // Nullable or not
  88. Nullable bool
  89. // Realizable or not
  90. Realizable bool
  91. // Recursive (i.e. LEFT-recursive, which LL(1) disallows)
  92. Recursive bool
  93. // Undefined nonterminals in this rule
  94. Undefined bool
  95. // Depth is the depth of (mutual) recursion of a rule. Limited to 16.
  96. Depth int
  97. }
  98. func (n Nonterminal) String() string {
  99. return fmt.Sprintf("%s -> %s", n.BasicRule, n.Define)
  100. }
  101. func (n Nonterminal) Definition() Rule {
  102. return n.Define
  103. }
  104. func (r BasicRule) Equals(r2 Rule) bool {
  105. // XXX probably not correct
  106. return r.Name() == r2.Name()
  107. }
  108. func (n Nonterminal) FirstSet() (Set, error) {
  109. if n.Define == nil { // XXX can this happen an when/why?
  110. return Set{}, nil
  111. }
  112. return n.Define.FirstSet()
  113. }
  114. func (n Nonterminal) FollowSet(g *Grammar) (Set, error) {
  115. if n.Define == nil {
  116. return Set{}, nil
  117. }
  118. return n.Define.FollowSet(g)
  119. }
  120. // Nonterminals can cause recursion and need to be checked
  121. // XXX this function still is very likely wrong.
  122. func (n Nonterminal) Check(g *Grammar, r Rule) error {
  123. if n.Define == nil {
  124. n.Undefined = true
  125. return fmt.Errorf("%s: rule has empty definition", n.Name())
  126. } else if n.Name() != r.Name() {
  127. return nil
  128. }
  129. // check recursively as well
  130. if n.Depth < 16 {
  131. n.Depth++
  132. return n.Define.Check(g, n)
  133. }
  134. return fmt.Errorf("%s: left recursive or recursion too deep", n)
  135. }
  136. type Sequence struct {
  137. Nonterminal
  138. Rules []Rule
  139. }
  140. func (s Sequence) String() string {
  141. sep := ""
  142. res := ""
  143. for _, rule := range s.Rules {
  144. res = res + sep + rule.String()
  145. sep = " "
  146. }
  147. return res
  148. }
  149. func (s Sequence) Check(g *Grammar, r Rule) error {
  150. // check Leftmost Rule for left recursion
  151. if len(s.Rules) > 0 {
  152. e := s.Rules[0]
  153. return e.Check(g, r)
  154. }
  155. return nil
  156. }
  157. func (s Sequence) FirstSet() (Set, error) {
  158. if len(s.Rules) > 0 {
  159. return s.Rules[0].FirstSet()
  160. }
  161. return make(Set), nil
  162. }
  163. func (a Alternates) FirstSet() (Set, error) {
  164. res := make(Set)
  165. for _, s := range a.Rules {
  166. fs, err := s.FirstSet()
  167. if err != nil {
  168. return res, err
  169. }
  170. res = res.Union(fs)
  171. }
  172. return res, nil
  173. }
  174. type Alternates struct {
  175. Nonterminal
  176. Rules []Rule
  177. }
  178. func (a Alternates) Check(g *Grammar, r Rule) error {
  179. for _, s := range a.Rules {
  180. err := s.Check(g, r)
  181. if err != nil {
  182. return err
  183. }
  184. }
  185. return nil
  186. }
  187. func (a Alternates) String() string {
  188. sep := ""
  189. res := "("
  190. for _, rule := range a.Rules {
  191. res = res + sep + rule.String()
  192. sep = " | "
  193. }
  194. return res + ")"
  195. }
  196. // Reference to anther rule by name, to enable right recursion.
  197. // This also requires apointer to the grammar for later resolution.
  198. type Reference struct {
  199. Grammar *Grammar
  200. Nonterminal
  201. To string
  202. }
  203. func (r Reference) Resolve() (Rule, error) {
  204. return r.Grammar.Lookup(r.To)
  205. }
  206. func IsEpsilon(e Rule) bool {
  207. _, ok := e.(Epsilon)
  208. return ok
  209. }
  210. func IsNonterminal(e Rule) bool {
  211. _, ok := e.(Nonterminal)
  212. return ok
  213. }
  214. // whether an Set is nullable (i.e. contains epsilon)
  215. func (s Set) IsNullable() bool {
  216. for _, e := range s {
  217. if IsEpsilon(e) {
  218. return true
  219. }
  220. }
  221. return false
  222. }
  223. func (s Set) Contains(e Terminal) bool {
  224. name := e.Name()
  225. _, ok := s[name]
  226. return ok
  227. }
  228. func (s *Set) Add(e Terminal) bool {
  229. name := e.Name()
  230. _, ok := (*s)[name]
  231. if ok {
  232. return false
  233. }
  234. (*s)[name] = e
  235. return true
  236. }
  237. func (s Set) UnionWithoutEpsilon(s2 Set) Set {
  238. res := make(Set)
  239. if s != nil {
  240. for _, v := range s {
  241. if !IsEpsilon(v) {
  242. res.Add(v)
  243. }
  244. }
  245. }
  246. if s2 != nil {
  247. for _, v := range s2 {
  248. if !IsEpsilon(v) {
  249. res.Add(v)
  250. }
  251. }
  252. }
  253. return res
  254. }
  255. func (s Set) Union(s2 Set) Set {
  256. res := make(Set)
  257. for _, v := range s {
  258. res.Add(v)
  259. }
  260. for _, v := range s2 {
  261. res.Add(v)
  262. }
  263. return res
  264. }
  265. func (s Set) Intersect(s2 Set) Set {
  266. res := make(Set)
  267. for _, v := range s {
  268. if s2.Contains(v) {
  269. res.Add(v)
  270. }
  271. }
  272. return res
  273. }
  274. func (s Set) String() string {
  275. if len(s) == 0 {
  276. return "∅"
  277. }
  278. aid := []string{}
  279. for _, v := range s {
  280. aid = append(aid, v.String())
  281. }
  282. sort.Strings(aid)
  283. return strings.Join(aid, " ")
  284. }
  285. func (s Set) ToTokenKinds() []Kind {
  286. if len(s) == 0 {
  287. return []Kind{}
  288. }
  289. res := []Kind{}
  290. for _, t := range s {
  291. res = append(res, t.Kind)
  292. }
  293. return res
  294. }
  295. type Grammar struct {
  296. BasicRule
  297. Top Rule
  298. Rules []Rule
  299. // All rules, terminals and nonterminals mapped by name.
  300. NamedRules map[string]Rule
  301. // List of error of the grammar. Only valid
  302. // after running Check()
  303. Errors []error
  304. }
  305. func (g Grammar) String() string {
  306. res := "Top → " + g.Top.Name() + "\n"
  307. for _, r := range g.Rules {
  308. res = res + r.String() + "\n"
  309. }
  310. return res
  311. }
  312. func (g Grammar) Lookup(name string) (Rule, error) {
  313. r, ok := g.NamedRules[name]
  314. if ok {
  315. return r, nil
  316. }
  317. return nil, fmt.Errorf("Undefined rule: %s", name)
  318. }
  319. func (g *Grammar) AddRule(r Rule) Rule {
  320. g.Rules = append(g.Rules, r)
  321. if r.Name() != "" {
  322. g.NamedRules[r.Name()] = r
  323. }
  324. return r
  325. }
  326. func Term(name string, kind Kind) Rule {
  327. term := Terminal{}
  328. term.name = name
  329. term.Kind = kind
  330. return term
  331. }
  332. func Seq(name, template string, rules ...Rule) Sequence {
  333. seq := Sequence{}
  334. seq.name = name
  335. seq.Template = template
  336. seq.Rules = rules
  337. return seq
  338. }
  339. func And(rules ...Rule) Rule {
  340. return Seq("", "", rules...)
  341. }
  342. func Alt(name, template string, rules ...Rule) Alternates {
  343. alt := Alternates{}
  344. alt.name = name
  345. alt.Template = template
  346. alt.Rules = rules
  347. return alt
  348. }
  349. func Or(seqs ...Rule) Rule {
  350. return Alt("", "", seqs...)
  351. }
  352. func Opt(name, template string, rule Rule) Rule {
  353. rules := []Rule{rule, Epsilon{}}
  354. return Alt(name, template, rules...)
  355. }
  356. func (g *Grammar) Alt(name, template string, seqs ...Rule) Rule {
  357. return g.AddRule(Alt(name, template, seqs...))
  358. }
  359. func (g *Grammar) Term(name string, kind Kind) Rule {
  360. return g.AddRule(Term(name, kind))
  361. }
  362. func Ref(g *Grammar, name, to string) Reference {
  363. ref := Reference{}
  364. ref.Nonterminal.name = name
  365. ref.Grammar = g
  366. ref.To = to
  367. return ref
  368. }
  369. func (g *Grammar) Ref(name, to string) Rule {
  370. return g.AddRule(Ref(g, name, to))
  371. }