ll1.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671
  1. package main
  2. // Use the go scanner so we don't need to write a lexer(yet)
  3. import "text/scanner"
  4. import "fmt"
  5. import "unicode"
  6. import "sort"
  7. import "strings"
  8. /*
  9. The meta-grammar for ll1 is:
  10. Grammar -> Rules .
  11. Rules -> Rule eor Rules | .
  12. Rule -> Name arrow Definition.
  13. Name -> identifier .
  14. Definition -> Alternates . // Alternates have a higher priority than sequences.
  15. Alternates -> Sequence or Alternates | .
  16. Sequence -> Element Sequence | .
  17. Element -> Parenthesis | Terminal | Literal .
  18. Parenthesis -> '(' Definition ')' .
  19. Terminal -> identifier .
  20. */
  21. type Token struct {
  22. Kind rune
  23. Value string
  24. Position scanner.Position
  25. }
  26. func (t Token) String() string {
  27. return fmt.Sprintf("token: %s %s %s", t.Position, scanner.TokenString(t.Kind), t.Value)
  28. }
  29. func (t Token) ShortString() string {
  30. return t.Value
  31. }
  32. func (t Token) MakeError(mesg string) error {
  33. return fmt.Errorf("%s: error at %s: <%s>: %s", t.Position, scanner.TokenString(t.Kind), t.Value, mesg)
  34. }
  35. /*
  36. Check if a Grammar is LL(1)
  37. May 9, 2020 by admin
  38. Predictive Parser is used to construct a parsing table for a class of grammar called LL(1).
  39. The first ‘L’ means input is scanned from left to right, second ‘L’ refers to Left-Derivation,
  40. and ‘1’ means the number of lookaheads. Rules of LL(1) Grammar
  41. A grammar is LL(1) if it satisfy the following rules for a production
  42. \bf A \bf \rightarrow \pmb \alpha | \pmb \beta for all \pmb \alpha,\pmb \beta .
  43. \bf \pmb \alpha,\pmb \beta do not derives same terminal.
  44. Only one of \pmb \alpha,\pmb \beta can derive ‘Є’.
  45. If \pmb \alpha derives ‘Є’, then \bf FIRST(\pmb \beta) should not contain any terminal that is in \bf FOLLOW(A). If \pmb \beta derives ‘Є’, then \bf FIRST(\pmb \alpha) should not contain any terminal that is in \bf FOLLOW(A).
  46. The first two conditions are equivalent to saying \bf FIRST(\pmb \alpha) \cap FIRST(\pmb \beta) = \pmb \varnothing . The third condition is equivalent to saying \bf FOLLOW(A) \cap FIRST(\pmb \beta) = \pmb \varnothing if \pmb \alpha derives ‘Є’ and \bf FOLLOW(A) \cap FIRST(\pmb \alpha) = \pmb \varnothing if \pmb \beta derives ‘Є’.
  47. Note
  48. If a Grammar has Left-Recursion then it is not LL(1).Example, \bf A \bf \rightarrow A\pmb \beta . The Parser will not able to determine deterministically how many times production \bf A \bf \rightarrow A\pmb \beta must be used.
  49. */
  50. type element struct {
  51. token Token
  52. }
  53. func (element ) isElement() {
  54. }
  55. func (e element) String() string {
  56. return e.token.ShortString()
  57. }
  58. func (e element) Check(g *Grammar, r * Rule) error {
  59. return nil
  60. }
  61. func (e element) Token() Token {
  62. return e.token
  63. }
  64. func (e element) FirstSet(g *Grammar) (Set, error) {
  65. res := make(Set)
  66. res.Add(e)
  67. return res, nil
  68. }
  69. func (e element) FollowSet(g *Grammar, r *Rule) (Set, error) {
  70. // most elements, i.e. terminals and epsilon have no follow set
  71. // so return an empty set
  72. res := make(Set)
  73. return res, nil
  74. }
  75. // Epsilon is the empty rule.
  76. type Epsilon struct {
  77. element
  78. }
  79. func (e Epsilon) String() string {
  80. return "ε"
  81. }
  82. // End corresponds to EOF/ end of the input.
  83. type End struct {
  84. element
  85. }
  86. func (e End) String() string {
  87. return "$"
  88. }
  89. type Terminal struct {
  90. element
  91. }
  92. func (t Terminal) String() string {
  93. return t.Token().Value
  94. }
  95. type Nonterminal struct {
  96. element
  97. *Rule
  98. }
  99. func (n Nonterminal) String() string {
  100. return n.Token().Value
  101. }
  102. // Nonterminals can cause recursion
  103. func (n Nonterminal) Check(g *Grammar, r *Rule) error {
  104. found, err := g.Lookup(n)
  105. if err != nil {
  106. r.Undefined = true
  107. return err
  108. }
  109. if r.Equals(*found) {
  110. r.Recursive = true
  111. return n.Token().MakeError("left recursive")
  112. }
  113. // still need to check recursively as well
  114. if r.Depth < 16 {
  115. r.Depth ++
  116. return found.Definition.Check(g, r)
  117. }
  118. return n.Token().MakeError("left recursive or recursion too deep")
  119. }
  120. func (a Alternates) Check(g *Grammar, r *Rule) error {
  121. for _, s := range a.Sequences {
  122. // check Leftmost element for left recursion
  123. if (len(s.Elements) > 0) {
  124. e := s.Elements[0]
  125. err := e.Check(g, r)
  126. if err != nil {
  127. return err
  128. }
  129. }
  130. }
  131. return nil
  132. }
  133. type Literal struct {
  134. Terminal
  135. }
  136. func (l Literal) String() string {
  137. return "\"" + l.Token().Value + "\""
  138. }
  139. // Element can be one of Terminal, Nonterminal, Literal, or Alternates
  140. // in case of a sub-rule.
  141. type Element interface {
  142. isElement()
  143. Token() Token
  144. String() string
  145. Check(g *Grammar, r * Rule) error
  146. FirstSet(g *Grammar) (Set, error)
  147. FollowSet(g *Grammar, r *Rule) (Set, error)
  148. }
  149. type Sequence struct {
  150. Elements []Element
  151. }
  152. func (s Sequence) String() string {
  153. sep := ""
  154. res := ""
  155. for _, seq := range s.Elements {
  156. res = res + sep + seq.String()
  157. sep = " "
  158. }
  159. return res
  160. }
  161. type Alternates struct {
  162. element
  163. Sequences []Sequence
  164. }
  165. func (a Alternates) String() string {
  166. sep := ""
  167. res := "("
  168. for _, seq := range a.Sequences {
  169. res = res + sep + seq.String()
  170. sep = " | "
  171. }
  172. return res + ")"
  173. }
  174. type Set map[string]Element
  175. func IsEpsilon(e Element) bool {
  176. _, ok := e.(Epsilon)
  177. return ok
  178. }
  179. func IsNonterminal(e Element) bool {
  180. _, ok := e.(Nonterminal)
  181. return ok
  182. }
  183. // whether an element set is nullable (i.e. contains epsilon)
  184. func (s Set) IsNullable() bool {
  185. for _, e := range s {
  186. if IsEpsilon(e) {
  187. return true
  188. }
  189. }
  190. return false
  191. }
  192. func (s Set) Contains(e Element) bool {
  193. name := e.Token().Value
  194. v, ok := s[name]
  195. return v!= nil && ok
  196. }
  197. func (s * Set) Add(e Element) bool {
  198. name := e.Token().Value
  199. v, ok := (*s)[name]
  200. if v!= nil || ok {
  201. return false
  202. }
  203. (*s)[e.Token().Value] = e
  204. return true
  205. }
  206. func (s Set) UnionWithoutEpsilon(s2 Set) Set {
  207. res := make(Set)
  208. if s != nil {
  209. for _, v := range s {
  210. if !IsEpsilon(v) {
  211. res.Add(v)
  212. }
  213. }
  214. }
  215. if s2 != nil {
  216. for _, v := range s2 {
  217. if !IsEpsilon(v) {
  218. res.Add(v)
  219. }
  220. }
  221. }
  222. return res
  223. }
  224. func (s Set) Union(s2 Set) Set {
  225. res := make(Set)
  226. for _, v := range s {
  227. res.Add(v)
  228. }
  229. for _, v := range s2 {
  230. res.Add(v)
  231. }
  232. return res
  233. }
  234. func (s Set) Intersect(s2 Set) Set {
  235. res := make(Set)
  236. for _, v := range s {
  237. if s2.Contains(v) {
  238. res.Add(v)
  239. }
  240. }
  241. return res
  242. }
  243. func (s Set) String() string {
  244. if len(s) == 0 {
  245. return "∅"
  246. }
  247. aid := []string{}
  248. for _, v := range s {
  249. aid = append(aid, v.String())
  250. }
  251. sort.Strings(aid)
  252. return strings.Join(aid, " ")
  253. }
  254. // Definition of a rule is nothing but a set of alternates,
  255. // where alternates can contain sequences and parenthesis.
  256. type Definition = Alternates
  257. type Rule struct {
  258. Name string
  259. Definition
  260. // Template for code generation for this rule.
  261. Template string
  262. // First set of the rule
  263. First Set
  264. // Follow set of the rule
  265. Follow Set
  266. // Nullable or not
  267. Nullable bool
  268. // Realizable or not
  269. Realizable bool
  270. // Recursive (i.e. LEFT-recursive, which LL(1) disallows)
  271. Recursive bool
  272. // Undefined nonterminals in this rule
  273. Undefined bool
  274. // Depth is the depth of (mutual) recursion of a rule. Limited to 16.
  275. Depth int
  276. }
  277. func (r Rule) String() string {
  278. return r.Name + " → " + r.Definition.String()
  279. }
  280. func (r Rule) IsTerminal() bool {
  281. return !unicode.IsUpper([]rune(r.Name)[0])
  282. }
  283. type Grammar struct {
  284. Top *Rule
  285. Rules []*Rule
  286. // Unique terminals in all Rules
  287. Terminals map[string]Terminal
  288. // Unique nonterminals in all Rules
  289. NonTerminal map[string]Rule
  290. // Unique literals in all Rules
  291. Literals map[string]Literal
  292. // Whether or not the grammar is LL(1). Only valid
  293. // after running Check()
  294. LL1 bool
  295. }
  296. func (g Grammar) String() string {
  297. res := "Top → " + g.Top.Name + "\n"
  298. for _, r := range g.Rules {
  299. res = res + r.String() + "\n"
  300. }
  301. return res
  302. }
  303. func (g Grammar) Lookup(nt Nonterminal) (*Rule, error) {
  304. for _, r := range g.Rules {
  305. if r.Name == nt.Token().Value {
  306. return r, nil
  307. }
  308. }
  309. return nil, nt.Token().MakeError("Undefined non terminal")
  310. }
  311. func (n Nonterminal) FirstSet(g *Grammar) (Set, error) {
  312. found, err := g.Lookup(n)
  313. if err != nil {
  314. return Set{}, err
  315. }
  316. return found.Definition.FirstSet(g)
  317. }
  318. func (n Nonterminal) FollowSet(g *Grammar, r *Rule) (Set, error) {
  319. found, err := g.Lookup(n)
  320. if err != nil {
  321. return Set{}, err
  322. }
  323. return found.Definition.FollowSet(g, r)
  324. }
  325. func (s Sequence) FirstSet(g *Grammar) (Set, error) {
  326. res := make(Set)
  327. for _, elt := range s.Elements {
  328. fs, err := elt.FirstSet(g)
  329. if err != nil {
  330. return res, err
  331. }
  332. res = res.UnionWithoutEpsilon(fs)
  333. if !fs.IsNullable() {
  334. return res, nil
  335. }
  336. }
  337. // If we get here, all sequence elementswere nullable.
  338. // Add an epsilon to the set
  339. res.Add(Epsilon{})
  340. return res, nil
  341. }
  342. // FollowSet returns the follow set for rule r in grammar g. for this Sequence.
  343. func (seq Sequence) FollowSet(g *Grammar, r *Rule) (Set, error) {
  344. res := make(Set)
  345. // If in one of the sequences of one of the rules of the grammar
  346. // the original rule is referred to by a nonterminal
  347. // then, the follow set is the first set of the following.
  348. // If that contains epsilon, go on to the following and so on
  349. for i := 1 ; i < len(seq.Elements) ; i++ {
  350. elt := seq.Elements[i]
  351. if nt, ok := elt.(Nonterminal) ; ok {
  352. if nt.Token().Value != r.Name {
  353. continue
  354. }
  355. first, err := nt.FirstSet(g)
  356. if err != nil {
  357. return res, err
  358. }
  359. for _, e := range first {
  360. fos, err := e.FollowSet(g, r)
  361. if err != nil {
  362. return res, err
  363. }
  364. res = res.UnionWithoutEpsilon(fos)
  365. }
  366. fmt.Printf("Found follow candidate: %v at %d, %d\n", nt, i, len(seq.Elements))
  367. for j := i + 1 ; j < len(seq.Elements) ; j ++ {
  368. next := seq.Elements[j]
  369. first, err := next.FirstSet(g)
  370. if err != nil {
  371. return res, err
  372. }
  373. // union first sets until it is not nillable
  374. res = res.UnionWithoutEpsilon(first)
  375. if !first.IsNullable() {
  376. break
  377. }
  378. }
  379. }
  380. }
  381. return res, nil
  382. }
  383. func (a Alternates) FirstSet(g *Grammar) (Set, error) {
  384. res := make(Set)
  385. for _, s := range a.Sequences {
  386. // Check Leftmost element
  387. if (len(s.Elements) > 0) {
  388. e := s.Elements[0]
  389. fs, err := e.FirstSet(g)
  390. if err != nil {
  391. return res, err
  392. }
  393. res = res.Union(fs)
  394. }
  395. }
  396. return res, nil
  397. }
  398. func (a Alternates) FollowSet(g *Grammar, r *Rule) (Set, error) {
  399. res := make(Set)
  400. for _, s := range a.Sequences {
  401. fos, err := s.FollowSet(g, r)
  402. if err != nil {
  403. return res, err
  404. }
  405. res = res.UnionWithoutEpsilon(fos)
  406. }
  407. return res, nil
  408. }
  409. // Rules are compared by name
  410. func (r Rule) Equals(o Rule) bool {
  411. return r.Name == o.Name
  412. }
  413. func (r *Rule) Check(g *Grammar) error {
  414. err := r.Definition.Check(g, r)
  415. if err != nil {
  416. return err
  417. }
  418. // Now calculate the first and follow set
  419. fs, err := r.Definition.FirstSet(g)
  420. r.First = r.First.Union(fs)
  421. return err
  422. }
  423. /*
  424. Computing the Follow-sets for the nonterminals in a grammar can be done as follows:
  425. initialize Fo(S) with { $ } and every other Fo(Ai) with the empty set
  426. if there is a rule of the form Aj → wAiw' , then
  427. if the terminal a is in Fi(w' ), then add a to Fo(Ai)
  428. if ε is in Fi(w' ), then add Fo(Aj) to Fo(Ai)
  429. if w' has length 0, then add Fo(Aj) to Fo(Ai)
  430. repeat step 2 until all Fo sets stay the same.
  431. This provides the least fixed point solution to the following system:
  432. */
  433. /*
  434. * function makeFollowSets() {
  435. followSets[rules[0].left].push(END_MARKER);
  436. let isSetChanged;
  437. do {
  438. isSetChanged = false;
  439. rules.forEach(({ left, right }) => {
  440. right.forEach((item, index) => {
  441. if (!isNonterminal(item)) return;
  442. let set = followSets[item];
  443. set = union(
  444. set,
  445. index + 1 < right.length
  446. ? collectSet(set, right.slice(index + 1), followSets[left])
  447. : followSets[left],
  448. );
  449. if (followSets[item].length !== set.length) {
  450. followSets[item] = set;
  451. isSetChanged = true;
  452. }
  453. });
  454. });
  455. } while (isSetChanged);
  456. return followSets;
  457. }
  458. */
  459. // FollowSet calculates the follow set of the rule in the grammar.
  460. func (r1 *Rule) FollowSet(g *Grammar) (Set, error) {
  461. // Top level has empty follow set
  462. if r1.Equals(*g.Top) {
  463. return make(Set), nil
  464. }
  465. set := make(Set)
  466. for _, r2 := range g.Rules {
  467. fos, err := r2.Definition.FollowSet(g, r1)
  468. if err != nil {
  469. return set, err
  470. }
  471. set = set.UnionWithoutEpsilon(fos)
  472. if !r1.Equals(*r2) {
  473. fos, err := r1.Definition.FollowSet(g, r2)
  474. if err != nil {
  475. return set, err
  476. }
  477. set = set.UnionWithoutEpsilon(fos)
  478. }
  479. }
  480. return set, nil
  481. }
  482. func (g * Grammar) CalculateFollowSets() error {
  483. g.Top.Follow = make(Set)
  484. g.Top.Follow.Add(End{})
  485. for _, r := range g.Rules {
  486. if !r.Equals(*g.Top) {
  487. r.Follow = make(Set)
  488. }
  489. }
  490. changed := false
  491. for {
  492. changed = false
  493. for _, r := range g.Rules {
  494. for _, seq := range r.Definition.Sequences {
  495. for index, elt := range seq.Elements {
  496. nt, ok := elt.(Nonterminal)
  497. if !ok {
  498. continue
  499. }
  500. found, err := g.Lookup(nt)
  501. if err != nil {
  502. return err
  503. }
  504. originalLength := len(found.Follow)
  505. // if w' has length 0, then add Fo(Aj) to Fo(Ai)
  506. if index + 1 == len(seq.Elements) {
  507. found.Follow = found.Follow.UnionWithoutEpsilon(r.Follow)
  508. } else {
  509. j := 1
  510. for ; index + j < len(seq.Elements); j++ {
  511. next := seq.Elements[index + j]
  512. // if ε is in Fi(w' ), then add Fo(Aj) to Fo(Ai)
  513. first, err := next.FirstSet(g)
  514. if err != nil {
  515. return err
  516. }
  517. found.Follow = found.Follow.UnionWithoutEpsilon(first)
  518. // if ε is in Fi(w' ), then add Fo(Aj) to Fo(Ai)
  519. if first.IsNullable() {
  520. found.Follow = found.Follow.UnionWithoutEpsilon(r.Follow)
  521. } else {
  522. break; // stop adding if the i+j'th element
  523. // is not nullable
  524. }
  525. }
  526. if index + j == len(seq.Elements) {
  527. found.Follow = found.Follow.UnionWithoutEpsilon(r.Follow)
  528. }
  529. }
  530. changed = changed || (originalLength != len(found.Follow))
  531. }
  532. }
  533. }
  534. if !changed {
  535. break
  536. }
  537. }
  538. return nil
  539. }
  540. func (r *Rule) FirstSet(g *Grammar) (Set, error) {
  541. return r.Definition.FirstSet(g)
  542. }
  543. func (g *Grammar) Check() []error {
  544. errs := []error{}
  545. // Check for left recursion
  546. for i, rule := range g.Rules {
  547. err := rule.Check(g)
  548. if err != nil {
  549. errs = append(errs, err)
  550. }
  551. // Store modifications by check to rule.
  552. g.Rules[i] = rule
  553. }
  554. for _, rule := range g.Rules {
  555. var err error
  556. tok := rule.Token()
  557. switch {
  558. case rule.Recursive:
  559. err = tok.MakeError("left recursive here")
  560. case rule.Undefined:
  561. err = tok.MakeError("has undefined elements")
  562. default:
  563. err = nil
  564. }
  565. if err != nil {
  566. errs = append(errs, err)
  567. }
  568. }
  569. err := g.CalculateFollowSets()
  570. if err != nil {
  571. errs = append(errs, err)
  572. }
  573. return errs
  574. }