ll1.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690
  1. package main
  2. // Use the go scanner so we don't need to write a lexer(yet)
  3. import "text/scanner"
  4. import "fmt"
  5. import "unicode"
  6. import "sort"
  7. import "strings"
  8. /*
  9. The meta-grammar for ll1 is:
  10. Grammar -> Rules .
  11. Rules -> Rule eor Rules | .
  12. Rule -> Name arrow Definition.
  13. Name -> identifier .
  14. Definition -> Alternates . // Alternates have a higher priority than sequences.
  15. Alternates -> Sequence or Alternates | .
  16. Sequence -> Element Sequence | Operator Sequence | .
  17. Operator -> plus Element | star Element .
  18. Element -> Parenthesis | Terminal | Literal .
  19. Parenthesis -> '(' Definition ')' .
  20. Terminal -> identifier .
  21. */
  22. type Token struct {
  23. Kind rune
  24. Value string
  25. Position scanner.Position
  26. }
  27. func (t Token) String() string {
  28. return fmt.Sprintf("token: %s %s %s", t.Position, scanner.TokenString(t.Kind), t.Value)
  29. }
  30. func (t Token) ShortString() string {
  31. return t.Value
  32. }
  33. func (t Token) MakeError(mesg string) error {
  34. return fmt.Errorf("%s: error at %s: <%s>: %s", t.Position, scanner.TokenString(t.Kind), t.Value, mesg)
  35. }
  36. /*
  37. Check if a Grammar is LL(1)
  38. May 9, 2020 by admin
  39. Predictive Parser is used to construct a parsing table for a class of grammar called LL(1).
  40. The first ‘L’ means input is scanned from left to right, second ‘L’ refers to Left-Derivation,
  41. and ‘1’ means the number of lookaheads. Rules of LL(1) Grammar
  42. A grammar is LL(1) if it satisfy the following rules for a production
  43. \bf A \bf \rightarrow \pmb \alpha | \pmb \beta for all \pmb \alpha,\pmb \beta .
  44. \bf \pmb \alpha,\pmb \beta do not derives same terminal.
  45. Only one of \pmb \alpha,\pmb \beta can derive ‘Є’.
  46. If \pmb \alpha derives ‘Є’, then \bf FIRST(\pmb \beta) should not contain any terminal that is in \bf FOLLOW(A). If \pmb \beta derives ‘Є’, then \bf FIRST(\pmb \alpha) should not contain any terminal that is in \bf FOLLOW(A).
  47. The first two conditions are equivalent to saying \bf FIRST(\pmb \alpha) \cap FIRST(\pmb \beta) = \pmb \varnothing . The third condition is equivalent to saying \bf FOLLOW(A) \cap FIRST(\pmb \beta) = \pmb \varnothing if \pmb \alpha derives ‘Є’ and \bf FOLLOW(A) \cap FIRST(\pmb \alpha) = \pmb \varnothing if \pmb \beta derives ‘Є’.
  48. Note
  49. If a Grammar has Left-Recursion then it is not LL(1).Example, \bf A \bf \rightarrow A\pmb \beta . The Parser will not able to determine deterministically how many times production \bf A \bf \rightarrow A\pmb \beta must be used.
  50. */
  51. type element struct {
  52. token Token
  53. }
  54. func (element ) isElement() {
  55. }
  56. func (e element) String() string {
  57. return e.token.ShortString()
  58. }
  59. func (e element) Check(g *Grammar, r * Rule) error {
  60. return nil
  61. }
  62. func (e element) Token() Token {
  63. return e.token
  64. }
  65. func (e element) FirstSet(g *Grammar) (Set, error) {
  66. res := make(Set)
  67. res.Add(e)
  68. return res, nil
  69. }
  70. func (e element) FollowSet(g *Grammar, r *Rule) (Set, error) {
  71. // most elements, i.e. terminals and epsilon have no follow set
  72. // so return an empty set
  73. res := make(Set)
  74. return res, nil
  75. }
  76. // Epsilon is the empty rule.
  77. type Epsilon struct {
  78. element
  79. }
  80. func (e Epsilon) String() string {
  81. return "ε"
  82. }
  83. // End corresponds to EOF/ end of the input.
  84. type End struct {
  85. element
  86. }
  87. func (e End) String() string {
  88. return "$"
  89. }
  90. type Terminal struct {
  91. element
  92. }
  93. func (t Terminal) String() string {
  94. return t.Token().Value
  95. }
  96. type Nonterminal struct {
  97. element
  98. *Rule
  99. }
  100. func (n Nonterminal) String() string {
  101. return n.Token().Value
  102. }
  103. // Nonterminals can cause recursion
  104. func (n Nonterminal) Check(g *Grammar, r *Rule) error {
  105. found, err := g.Lookup(n)
  106. if err != nil {
  107. r.Undefined = true
  108. return err
  109. }
  110. if r.Equals(*found) {
  111. r.Recursive = true
  112. return n.Token().MakeError("left recursive")
  113. }
  114. // still need to check recursively as well
  115. if r.Depth < 16 {
  116. r.Depth ++
  117. return found.Definition.Check(g, r)
  118. }
  119. return n.Token().MakeError("left recursive or recursion too deep")
  120. }
  121. func (a Alternates) Check(g *Grammar, r *Rule) error {
  122. for _, s := range a.Sequences {
  123. // check Leftmost element for left recursion
  124. if (len(s.Elements) > 0) {
  125. e := s.Elements[0]
  126. err := e.Check(g, r)
  127. if err != nil {
  128. return err
  129. }
  130. }
  131. }
  132. return nil
  133. }
  134. type Literal struct {
  135. Terminal
  136. }
  137. func (l Literal) String() string {
  138. return "\"" + l.Token().Value + "\""
  139. }
  140. // Element can be one of Terminal, Nonterminal, Literal, or Alternates
  141. // in case of a sub-rule.
  142. type Element interface {
  143. isElement()
  144. Token() Token
  145. String() string
  146. Check(g *Grammar, r * Rule) error
  147. FirstSet(g *Grammar) (Set, error)
  148. FollowSet(g *Grammar, r *Rule) (Set, error)
  149. }
  150. type Sequence struct {
  151. Elements []Element
  152. }
  153. func (s Sequence) String() string {
  154. sep := ""
  155. res := ""
  156. for _, seq := range s.Elements {
  157. res = res + sep + seq.String()
  158. sep = " "
  159. }
  160. return res
  161. }
  162. type Alternates struct {
  163. element
  164. Sequences []Sequence
  165. }
  166. func (a Alternates) String() string {
  167. sep := ""
  168. res := "("
  169. for _, seq := range a.Sequences {
  170. res = res + sep + seq.String()
  171. sep = " | "
  172. }
  173. return res + ")"
  174. }
  175. type Set map[string]Element
  176. func IsEpsilon(e Element) bool {
  177. _, ok := e.(Epsilon)
  178. return ok
  179. }
  180. func IsNonterminal(e Element) bool {
  181. _, ok := e.(Nonterminal)
  182. return ok
  183. }
  184. // whether an element set is nullable (i.e. contains epsilon)
  185. func (s Set) IsNullable() bool {
  186. for _, e := range s {
  187. if IsEpsilon(e) {
  188. return true
  189. }
  190. }
  191. return false
  192. }
  193. func (s Set) Contains(e Element) bool {
  194. name := e.Token().Value
  195. v, ok := s[name]
  196. return v!= nil && ok
  197. }
  198. func (s * Set) Add(e Element) bool {
  199. name := e.Token().Value
  200. v, ok := (*s)[name]
  201. if v!= nil || ok {
  202. return false
  203. }
  204. (*s)[e.Token().Value] = e
  205. return true
  206. }
  207. func (s Set) UnionWithoutEpsilon(s2 Set) Set {
  208. res := make(Set)
  209. if s != nil {
  210. for _, v := range s {
  211. if !IsEpsilon(v) {
  212. res.Add(v)
  213. }
  214. }
  215. }
  216. if s2 != nil {
  217. for _, v := range s2 {
  218. if !IsEpsilon(v) {
  219. res.Add(v)
  220. }
  221. }
  222. }
  223. return res
  224. }
  225. func (s Set) Union(s2 Set) Set {
  226. res := make(Set)
  227. for _, v := range s {
  228. res.Add(v)
  229. }
  230. for _, v := range s2 {
  231. res.Add(v)
  232. }
  233. return res
  234. }
  235. func (s Set) Intersect(s2 Set) Set {
  236. res := make(Set)
  237. for _, v := range s {
  238. if s2.Contains(v) {
  239. res.Add(v)
  240. }
  241. }
  242. return res
  243. }
  244. func (s Set) String() string {
  245. if len(s) == 0 {
  246. return "∅"
  247. }
  248. aid := []string{}
  249. for _, v := range s {
  250. aid = append(aid, v.String())
  251. }
  252. sort.Strings(aid)
  253. return strings.Join(aid, " ")
  254. }
  255. func (s Set) ToTokenKinds() []rune {
  256. if len(s) == 0 {
  257. return []rune{}
  258. }
  259. aid := []rune{}
  260. for _, v := range s {
  261. t , ok := v.(Terminal)
  262. if !ok {
  263. continue
  264. }
  265. if len(t.token.Value) == 1 {
  266. aid = append(aid, rune(t.token.Value[0]))
  267. }
  268. }
  269. return aid
  270. }
  271. // Definition of a rule is nothing but a set of alternates,
  272. // where alternates can contain sequences and parenthesis.
  273. type Definition = Alternates
  274. type Rule struct {
  275. Name string
  276. Definition
  277. // Template for code generation for this rule.
  278. Template string
  279. // First set of the rule
  280. First Set
  281. // Follow set of the rule
  282. Follow Set
  283. // Nullable or not
  284. Nullable bool
  285. // Realizable or not
  286. Realizable bool
  287. // Recursive (i.e. LEFT-recursive, which LL(1) disallows)
  288. Recursive bool
  289. // Undefined nonterminals in this rule
  290. Undefined bool
  291. // Depth is the depth of (mutual) recursion of a rule. Limited to 16.
  292. Depth int
  293. }
  294. func (r Rule) String() string {
  295. return r.Name + " → " + r.Definition.String()
  296. }
  297. func (r Rule) IsTerminal() bool {
  298. return !unicode.IsUpper([]rune(r.Name)[0])
  299. }
  300. type Grammar struct {
  301. Top *Rule
  302. Rules []*Rule
  303. // Unique terminals in all Rules
  304. Terminals map[string]Terminal
  305. // Unique nonterminals in all Rules
  306. NonTerminal map[string]Rule
  307. // Unique literals in all Rules
  308. Literals map[string]Literal
  309. // Whether or not the grammar is LL(1). Only valid
  310. // after running Check()
  311. LL1 bool
  312. }
  313. func (g Grammar) String() string {
  314. res := "Top → " + g.Top.Name + "\n"
  315. for _, r := range g.Rules {
  316. res = res + r.String() + "\n"
  317. }
  318. return res
  319. }
  320. func (g Grammar) Lookup(nt Nonterminal) (*Rule, error) {
  321. for _, r := range g.Rules {
  322. if r.Name == nt.Token().Value {
  323. return r, nil
  324. }
  325. }
  326. return nil, nt.Token().MakeError("Undefined non terminal")
  327. }
  328. func (n Nonterminal) FirstSet(g *Grammar) (Set, error) {
  329. found, err := g.Lookup(n)
  330. if err != nil {
  331. return Set{}, err
  332. }
  333. return found.Definition.FirstSet(g)
  334. }
  335. func (n Nonterminal) FollowSet(g *Grammar, r *Rule) (Set, error) {
  336. found, err := g.Lookup(n)
  337. if err != nil {
  338. return Set{}, err
  339. }
  340. return found.Definition.FollowSet(g, r)
  341. }
  342. func (s Sequence) FirstSet(g *Grammar) (Set, error) {
  343. res := make(Set)
  344. for _, elt := range s.Elements {
  345. fs, err := elt.FirstSet(g)
  346. if err != nil {
  347. return res, err
  348. }
  349. res = res.UnionWithoutEpsilon(fs)
  350. if !fs.IsNullable() {
  351. return res, nil
  352. }
  353. }
  354. // If we get here, all sequence elementswere nullable.
  355. // Add an epsilon to the set
  356. res.Add(Epsilon{})
  357. return res, nil
  358. }
  359. // FollowSet returns the follow set for rule r in grammar g. for this Sequence.
  360. func (seq Sequence) FollowSet(g *Grammar, r *Rule) (Set, error) {
  361. res := make(Set)
  362. // If in one of the sequences of one of the rules of the grammar
  363. // the original rule is referred to by a nonterminal
  364. // then, the follow set is the first set of the following.
  365. // If that contains epsilon, go on to the following and so on
  366. for i := 1 ; i < len(seq.Elements) ; i++ {
  367. elt := seq.Elements[i]
  368. if nt, ok := elt.(Nonterminal) ; ok {
  369. if nt.Token().Value != r.Name {
  370. continue
  371. }
  372. first, err := nt.FirstSet(g)
  373. if err != nil {
  374. return res, err
  375. }
  376. for _, e := range first {
  377. fos, err := e.FollowSet(g, r)
  378. if err != nil {
  379. return res, err
  380. }
  381. res = res.UnionWithoutEpsilon(fos)
  382. }
  383. fmt.Printf("Found follow candidate: %v at %d, %d\n", nt, i, len(seq.Elements))
  384. for j := i + 1 ; j < len(seq.Elements) ; j ++ {
  385. next := seq.Elements[j]
  386. first, err := next.FirstSet(g)
  387. if err != nil {
  388. return res, err
  389. }
  390. // union first sets until it is not nillable
  391. res = res.UnionWithoutEpsilon(first)
  392. if !first.IsNullable() {
  393. break
  394. }
  395. }
  396. }
  397. }
  398. return res, nil
  399. }
  400. func (a Alternates) FirstSet(g *Grammar) (Set, error) {
  401. res := make(Set)
  402. for _, s := range a.Sequences {
  403. // Check Leftmost element
  404. if (len(s.Elements) > 0) {
  405. e := s.Elements[0]
  406. fs, err := e.FirstSet(g)
  407. if err != nil {
  408. return res, err
  409. }
  410. res = res.Union(fs)
  411. }
  412. }
  413. return res, nil
  414. }
  415. func (a Alternates) FollowSet(g *Grammar, r *Rule) (Set, error) {
  416. res := make(Set)
  417. for _, s := range a.Sequences {
  418. fos, err := s.FollowSet(g, r)
  419. if err != nil {
  420. return res, err
  421. }
  422. res = res.UnionWithoutEpsilon(fos)
  423. }
  424. return res, nil
  425. }
  426. // Rules are compared by name
  427. func (r Rule) Equals(o Rule) bool {
  428. return r.Name == o.Name
  429. }
  430. func (r *Rule) Check(g *Grammar) error {
  431. err := r.Definition.Check(g, r)
  432. if err != nil {
  433. return err
  434. }
  435. // Now calculate the first and follow set
  436. fs, err := r.Definition.FirstSet(g)
  437. r.First = r.First.Union(fs)
  438. return err
  439. }
  440. /*
  441. Computing the Follow-sets for the nonterminals in a grammar can be done as follows:
  442. initialize Fo(S) with { $ } and every other Fo(Ai) with the empty set
  443. if there is a rule of the form Aj → wAiw' , then
  444. if the terminal a is in Fi(w' ), then add a to Fo(Ai)
  445. if ε is in Fi(w' ), then add Fo(Aj) to Fo(Ai)
  446. if w' has length 0, then add Fo(Aj) to Fo(Ai)
  447. repeat step 2 until all Fo sets stay the same.
  448. This provides the least fixed point solution to the following system:
  449. */
  450. /*
  451. * function makeFollowSets() {
  452. followSets[rules[0].left].push(END_MARKER);
  453. let isSetChanged;
  454. do {
  455. isSetChanged = false;
  456. rules.forEach(({ left, right }) => {
  457. right.forEach((item, index) => {
  458. if (!isNonterminal(item)) return;
  459. let set = followSets[item];
  460. set = union(
  461. set,
  462. index + 1 < right.length
  463. ? collectSet(set, right.slice(index + 1), followSets[left])
  464. : followSets[left],
  465. );
  466. if (followSets[item].length !== set.length) {
  467. followSets[item] = set;
  468. isSetChanged = true;
  469. }
  470. });
  471. });
  472. } while (isSetChanged);
  473. return followSets;
  474. }
  475. */
  476. // FollowSet calculates the follow set of the rule in the grammar.
  477. func (r1 *Rule) FollowSet(g *Grammar) (Set, error) {
  478. // Top level has empty follow set
  479. if r1.Equals(*g.Top) {
  480. return make(Set), nil
  481. }
  482. set := make(Set)
  483. for _, r2 := range g.Rules {
  484. fos, err := r2.Definition.FollowSet(g, r1)
  485. if err != nil {
  486. return set, err
  487. }
  488. set = set.UnionWithoutEpsilon(fos)
  489. if !r1.Equals(*r2) {
  490. fos, err := r1.Definition.FollowSet(g, r2)
  491. if err != nil {
  492. return set, err
  493. }
  494. set = set.UnionWithoutEpsilon(fos)
  495. }
  496. }
  497. return set, nil
  498. }
  499. func (g * Grammar) CalculateFollowSets() error {
  500. g.Top.Follow = make(Set)
  501. g.Top.Follow.Add(End{})
  502. for _, r := range g.Rules {
  503. if !r.Equals(*g.Top) {
  504. r.Follow = make(Set)
  505. }
  506. }
  507. changed := false
  508. for {
  509. changed = false
  510. for _, r := range g.Rules {
  511. for _, seq := range r.Definition.Sequences {
  512. for index, elt := range seq.Elements {
  513. nt, ok := elt.(Nonterminal)
  514. if !ok {
  515. continue
  516. }
  517. found, err := g.Lookup(nt)
  518. if err != nil {
  519. return err
  520. }
  521. originalLength := len(found.Follow)
  522. // if w' has length 0, then add Fo(Aj) to Fo(Ai)
  523. if index + 1 == len(seq.Elements) {
  524. found.Follow = found.Follow.UnionWithoutEpsilon(r.Follow)
  525. } else {
  526. j := 1
  527. for ; index + j < len(seq.Elements); j++ {
  528. next := seq.Elements[index + j]
  529. // if ε is in Fi(w' ), then add Fo(Aj) to Fo(Ai)
  530. first, err := next.FirstSet(g)
  531. if err != nil {
  532. return err
  533. }
  534. found.Follow = found.Follow.UnionWithoutEpsilon(first)
  535. // if ε is in Fi(w' ), then add Fo(Aj) to Fo(Ai)
  536. if first.IsNullable() {
  537. found.Follow = found.Follow.UnionWithoutEpsilon(r.Follow)
  538. } else {
  539. break; // stop adding if the i+j'th element
  540. // is not nullable
  541. }
  542. }
  543. if index + j == len(seq.Elements) {
  544. found.Follow = found.Follow.UnionWithoutEpsilon(r.Follow)
  545. }
  546. }
  547. changed = changed || (originalLength != len(found.Follow))
  548. }
  549. }
  550. }
  551. if !changed {
  552. break
  553. }
  554. }
  555. return nil
  556. }
  557. func (r *Rule) FirstSet(g *Grammar) (Set, error) {
  558. return r.Definition.FirstSet(g)
  559. }
  560. func (g *Grammar) Check() []error {
  561. errs := []error{}
  562. // Check for left recursion
  563. for i, rule := range g.Rules {
  564. err := rule.Check(g)
  565. if err != nil {
  566. errs = append(errs, err)
  567. }
  568. // Store modifications by check to rule.
  569. g.Rules[i] = rule
  570. }
  571. for _, rule := range g.Rules {
  572. var err error
  573. tok := rule.Token()
  574. switch {
  575. case rule.Recursive:
  576. err = tok.MakeError("left recursive here")
  577. case rule.Undefined:
  578. err = tok.MakeError("has undefined elements")
  579. default:
  580. err = nil
  581. }
  582. if err != nil {
  583. errs = append(errs, err)
  584. }
  585. }
  586. err := g.CalculateFollowSets()
  587. if err != nil {
  588. errs = append(errs, err)
  589. }
  590. return errs
  591. }