123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690 |
- package main
- // Use the go scanner so we don't need to write a lexer(yet)
- import "text/scanner"
- import "fmt"
- import "unicode"
- import "sort"
- import "strings"
- /*
- The meta-grammar for ll1 is:
- Grammar -> Rules .
- Rules -> Rule eor Rules | .
- Rule -> Name arrow Definition.
- Name -> identifier .
- Definition -> Alternates . // Alternates have a higher priority than sequences.
- Alternates -> Sequence or Alternates | .
- Sequence -> Element Sequence | Operator Sequence | .
- Operator -> plus Element | star Element .
- Element -> Parenthesis | Terminal | Literal .
- Parenthesis -> '(' Definition ')' .
- Terminal -> identifier .
- */
- type Token struct {
- Kind rune
- Value string
- Position scanner.Position
- }
- func (t Token) String() string {
- return fmt.Sprintf("token: %s %s %s", t.Position, scanner.TokenString(t.Kind), t.Value)
- }
- func (t Token) ShortString() string {
- return t.Value
- }
- func (t Token) MakeError(mesg string) error {
- return fmt.Errorf("%s: error at %s: <%s>: %s", t.Position, scanner.TokenString(t.Kind), t.Value, mesg)
- }
- /*
- Check if a Grammar is LL(1)
- May 9, 2020 by admin
- Predictive Parser is used to construct a parsing table for a class of grammar called LL(1).
- The first ‘L’ means input is scanned from left to right, second ‘L’ refers to Left-Derivation,
- and ‘1’ means the number of lookaheads. Rules of LL(1) Grammar
- A grammar is LL(1) if it satisfy the following rules for a production
- \bf A \bf \rightarrow \pmb \alpha | \pmb \beta for all \pmb \alpha,\pmb \beta .
- \bf \pmb \alpha,\pmb \beta do not derives same terminal.
- Only one of \pmb \alpha,\pmb \beta can derive ‘Є’.
- If \pmb \alpha derives ‘Є’, then \bf FIRST(\pmb \beta) should not contain any terminal that is in \bf FOLLOW(A). If \pmb \beta derives ‘Є’, then \bf FIRST(\pmb \alpha) should not contain any terminal that is in \bf FOLLOW(A).
- The first two conditions are equivalent to saying \bf FIRST(\pmb \alpha) \cap FIRST(\pmb \beta) = \pmb \varnothing . The third condition is equivalent to saying \bf FOLLOW(A) \cap FIRST(\pmb \beta) = \pmb \varnothing if \pmb \alpha derives ‘Є’ and \bf FOLLOW(A) \cap FIRST(\pmb \alpha) = \pmb \varnothing if \pmb \beta derives ‘Є’.
- Note
- If a Grammar has Left-Recursion then it is not LL(1).Example, \bf A \bf \rightarrow A\pmb \beta . The Parser will not able to determine deterministically how many times production \bf A \bf \rightarrow A\pmb \beta must be used.
- */
- type element struct {
- token Token
- }
- func (element ) isElement() {
- }
- func (e element) String() string {
- return e.token.ShortString()
- }
- func (e element) Check(g *Grammar, r * Rule) error {
- return nil
- }
- func (e element) Token() Token {
- return e.token
- }
- func (e element) FirstSet(g *Grammar) (Set, error) {
- res := make(Set)
- res.Add(e)
- return res, nil
- }
- func (e element) FollowSet(g *Grammar, r *Rule) (Set, error) {
- // most elements, i.e. terminals and epsilon have no follow set
- // so return an empty set
- res := make(Set)
- return res, nil
- }
- // Epsilon is the empty rule.
- type Epsilon struct {
- element
- }
- func (e Epsilon) String() string {
- return "ε"
- }
- // End corresponds to EOF/ end of the input.
- type End struct {
- element
- }
- func (e End) String() string {
- return "$"
- }
- type Terminal struct {
- element
- }
- func (t Terminal) String() string {
- return t.Token().Value
- }
- type Nonterminal struct {
- element
- *Rule
- }
- func (n Nonterminal) String() string {
- return n.Token().Value
- }
- // Nonterminals can cause recursion
- func (n Nonterminal) Check(g *Grammar, r *Rule) error {
- found, err := g.Lookup(n)
- if err != nil {
- r.Undefined = true
- return err
- }
- if r.Equals(*found) {
- r.Recursive = true
- return n.Token().MakeError("left recursive")
- }
- // still need to check recursively as well
- if r.Depth < 16 {
- r.Depth ++
- return found.Definition.Check(g, r)
- }
- return n.Token().MakeError("left recursive or recursion too deep")
- }
-
- func (a Alternates) Check(g *Grammar, r *Rule) error {
- for _, s := range a.Sequences {
- // check Leftmost element for left recursion
- if (len(s.Elements) > 0) {
- e := s.Elements[0]
- err := e.Check(g, r)
- if err != nil {
- return err
- }
- }
- }
- return nil
- }
- type Literal struct {
- Terminal
- }
- func (l Literal) String() string {
- return "\"" + l.Token().Value + "\""
- }
- // Element can be one of Terminal, Nonterminal, Literal, or Alternates
- // in case of a sub-rule.
- type Element interface {
- isElement()
- Token() Token
- String() string
- Check(g *Grammar, r * Rule) error
- FirstSet(g *Grammar) (Set, error)
- FollowSet(g *Grammar, r *Rule) (Set, error)
- }
- type Sequence struct {
- Elements []Element
- }
- func (s Sequence) String() string {
- sep := ""
- res := ""
- for _, seq := range s.Elements {
- res = res + sep + seq.String()
- sep = " "
- }
- return res
- }
- type Alternates struct {
- element
- Sequences []Sequence
- }
- func (a Alternates) String() string {
- sep := ""
- res := "("
- for _, seq := range a.Sequences {
- res = res + sep + seq.String()
- sep = " | "
- }
- return res + ")"
- }
- type Set map[string]Element
- func IsEpsilon(e Element) bool {
- _, ok := e.(Epsilon)
- return ok
- }
- func IsNonterminal(e Element) bool {
- _, ok := e.(Nonterminal)
- return ok
- }
- // whether an element set is nullable (i.e. contains epsilon)
- func (s Set) IsNullable() bool {
- for _, e := range s {
- if IsEpsilon(e) {
- return true
- }
- }
- return false
- }
- func (s Set) Contains(e Element) bool {
- name := e.Token().Value
- v, ok := s[name]
- return v!= nil && ok
- }
- func (s * Set) Add(e Element) bool {
- name := e.Token().Value
- v, ok := (*s)[name]
- if v!= nil || ok {
- return false
- }
- (*s)[e.Token().Value] = e
- return true
- }
- func (s Set) UnionWithoutEpsilon(s2 Set) Set {
- res := make(Set)
- if s != nil {
- for _, v := range s {
- if !IsEpsilon(v) {
- res.Add(v)
- }
- }
- }
- if s2 != nil {
- for _, v := range s2 {
- if !IsEpsilon(v) {
- res.Add(v)
- }
- }
- }
- return res
- }
- func (s Set) Union(s2 Set) Set {
- res := make(Set)
- for _, v := range s {
- res.Add(v)
- }
- for _, v := range s2 {
- res.Add(v)
- }
- return res
- }
- func (s Set) Intersect(s2 Set) Set {
- res := make(Set)
- for _, v := range s {
- if s2.Contains(v) {
- res.Add(v)
- }
- }
- return res
- }
- func (s Set) String() string {
- if len(s) == 0 {
- return "∅"
- }
- aid := []string{}
- for _, v := range s {
- aid = append(aid, v.String())
- }
- sort.Strings(aid)
- return strings.Join(aid, " ")
- }
- func (s Set) ToTokenKinds() []rune {
- if len(s) == 0 {
- return []rune{}
- }
- aid := []rune{}
- for _, v := range s {
- t , ok := v.(Terminal)
- if !ok {
- continue
- }
- if len(t.token.Value) == 1 {
- aid = append(aid, rune(t.token.Value[0]))
- }
- }
- return aid
- }
- // Definition of a rule is nothing but a set of alternates,
- // where alternates can contain sequences and parenthesis.
- type Definition = Alternates
- type Rule struct {
- Name string
- Definition
- // Template for code generation for this rule.
- Template string
- // First set of the rule
- First Set
- // Follow set of the rule
- Follow Set
- // Nullable or not
- Nullable bool
- // Realizable or not
- Realizable bool
- // Recursive (i.e. LEFT-recursive, which LL(1) disallows)
- Recursive bool
- // Undefined nonterminals in this rule
- Undefined bool
- // Depth is the depth of (mutual) recursion of a rule. Limited to 16.
- Depth int
- }
- func (r Rule) String() string {
- return r.Name + " → " + r.Definition.String()
- }
-
- func (r Rule) IsTerminal() bool {
- return !unicode.IsUpper([]rune(r.Name)[0])
- }
-
- type Grammar struct {
- Top *Rule
- Rules []*Rule
- // Unique terminals in all Rules
- Terminals map[string]Terminal
- // Unique nonterminals in all Rules
- NonTerminal map[string]Rule
- // Unique literals in all Rules
- Literals map[string]Literal
- // Whether or not the grammar is LL(1). Only valid
- // after running Check()
- LL1 bool
- }
- func (g Grammar) String() string {
- res := "Top → " + g.Top.Name + "\n"
- for _, r := range g.Rules {
- res = res + r.String() + "\n"
- }
- return res
- }
- func (g Grammar) Lookup(nt Nonterminal) (*Rule, error) {
- for _, r := range g.Rules {
- if r.Name == nt.Token().Value {
- return r, nil
- }
- }
- return nil, nt.Token().MakeError("Undefined non terminal")
- }
- func (n Nonterminal) FirstSet(g *Grammar) (Set, error) {
- found, err := g.Lookup(n)
- if err != nil {
- return Set{}, err
- }
- return found.Definition.FirstSet(g)
- }
- func (n Nonterminal) FollowSet(g *Grammar, r *Rule) (Set, error) {
- found, err := g.Lookup(n)
- if err != nil {
- return Set{}, err
- }
- return found.Definition.FollowSet(g, r)
- }
- func (s Sequence) FirstSet(g *Grammar) (Set, error) {
- res := make(Set)
- for _, elt := range s.Elements {
- fs, err := elt.FirstSet(g)
- if err != nil {
- return res, err
- }
- res = res.UnionWithoutEpsilon(fs)
- if !fs.IsNullable() {
- return res, nil
- }
- }
- // If we get here, all sequence elementswere nullable.
- // Add an epsilon to the set
- res.Add(Epsilon{})
- return res, nil
- }
- // FollowSet returns the follow set for rule r in grammar g. for this Sequence.
- func (seq Sequence) FollowSet(g *Grammar, r *Rule) (Set, error) {
- res := make(Set)
- // If in one of the sequences of one of the rules of the grammar
- // the original rule is referred to by a nonterminal
- // then, the follow set is the first set of the following.
- // If that contains epsilon, go on to the following and so on
- for i := 1 ; i < len(seq.Elements) ; i++ {
- elt := seq.Elements[i]
- if nt, ok := elt.(Nonterminal) ; ok {
- if nt.Token().Value != r.Name {
- continue
- }
- first, err := nt.FirstSet(g)
- if err != nil {
- return res, err
- }
- for _, e := range first {
- fos, err := e.FollowSet(g, r)
- if err != nil {
- return res, err
- }
- res = res.UnionWithoutEpsilon(fos)
- }
- fmt.Printf("Found follow candidate: %v at %d, %d\n", nt, i, len(seq.Elements))
- for j := i + 1 ; j < len(seq.Elements) ; j ++ {
- next := seq.Elements[j]
- first, err := next.FirstSet(g)
- if err != nil {
- return res, err
- }
- // union first sets until it is not nillable
- res = res.UnionWithoutEpsilon(first)
- if !first.IsNullable() {
- break
- }
- }
- }
- }
- return res, nil
- }
-
- func (a Alternates) FirstSet(g *Grammar) (Set, error) {
- res := make(Set)
- for _, s := range a.Sequences {
- // Check Leftmost element
- if (len(s.Elements) > 0) {
- e := s.Elements[0]
- fs, err := e.FirstSet(g)
- if err != nil {
- return res, err
- }
- res = res.Union(fs)
- }
- }
- return res, nil
- }
- func (a Alternates) FollowSet(g *Grammar, r *Rule) (Set, error) {
- res := make(Set)
- for _, s := range a.Sequences {
- fos, err := s.FollowSet(g, r)
- if err != nil {
- return res, err
- }
- res = res.UnionWithoutEpsilon(fos)
- }
- return res, nil
- }
- // Rules are compared by name
- func (r Rule) Equals(o Rule) bool {
- return r.Name == o.Name
- }
- func (r *Rule) Check(g *Grammar) error {
- err := r.Definition.Check(g, r)
- if err != nil {
- return err
- }
- // Now calculate the first and follow set
- fs, err := r.Definition.FirstSet(g)
- r.First = r.First.Union(fs)
- return err
- }
- /*
- Computing the Follow-sets for the nonterminals in a grammar can be done as follows:
- initialize Fo(S) with { $ } and every other Fo(Ai) with the empty set
- if there is a rule of the form Aj → wAiw' , then
- if the terminal a is in Fi(w' ), then add a to Fo(Ai)
- if ε is in Fi(w' ), then add Fo(Aj) to Fo(Ai)
- if w' has length 0, then add Fo(Aj) to Fo(Ai)
- repeat step 2 until all Fo sets stay the same.
- This provides the least fixed point solution to the following system:
- */
- /*
- * function makeFollowSets() {
- followSets[rules[0].left].push(END_MARKER);
- let isSetChanged;
- do {
- isSetChanged = false;
- rules.forEach(({ left, right }) => {
- right.forEach((item, index) => {
- if (!isNonterminal(item)) return;
- let set = followSets[item];
- set = union(
- set,
- index + 1 < right.length
- ? collectSet(set, right.slice(index + 1), followSets[left])
- : followSets[left],
- );
- if (followSets[item].length !== set.length) {
- followSets[item] = set;
- isSetChanged = true;
- }
- });
- });
- } while (isSetChanged);
- return followSets;
- }
- */
- // FollowSet calculates the follow set of the rule in the grammar.
- func (r1 *Rule) FollowSet(g *Grammar) (Set, error) {
- // Top level has empty follow set
- if r1.Equals(*g.Top) {
- return make(Set), nil
- }
-
- set := make(Set)
-
- for _, r2 := range g.Rules {
- fos, err := r2.Definition.FollowSet(g, r1)
- if err != nil {
- return set, err
- }
- set = set.UnionWithoutEpsilon(fos)
- if !r1.Equals(*r2) {
- fos, err := r1.Definition.FollowSet(g, r2)
- if err != nil {
- return set, err
- }
- set = set.UnionWithoutEpsilon(fos)
- }
- }
- return set, nil
- }
- func (g * Grammar) CalculateFollowSets() error {
- g.Top.Follow = make(Set)
- g.Top.Follow.Add(End{})
- for _, r := range g.Rules {
- if !r.Equals(*g.Top) {
- r.Follow = make(Set)
- }
- }
- changed := false
- for {
- changed = false
- for _, r := range g.Rules {
- for _, seq := range r.Definition.Sequences {
- for index, elt := range seq.Elements {
- nt, ok := elt.(Nonterminal)
- if !ok {
- continue
- }
- found, err := g.Lookup(nt)
- if err != nil {
- return err
- }
- originalLength := len(found.Follow)
- // if w' has length 0, then add Fo(Aj) to Fo(Ai)
- if index + 1 == len(seq.Elements) {
- found.Follow = found.Follow.UnionWithoutEpsilon(r.Follow)
- } else {
- j := 1
- for ; index + j < len(seq.Elements); j++ {
- next := seq.Elements[index + j]
- // if ε is in Fi(w' ), then add Fo(Aj) to Fo(Ai)
- first, err := next.FirstSet(g)
- if err != nil {
- return err
- }
- found.Follow = found.Follow.UnionWithoutEpsilon(first)
- // if ε is in Fi(w' ), then add Fo(Aj) to Fo(Ai)
- if first.IsNullable() {
- found.Follow = found.Follow.UnionWithoutEpsilon(r.Follow)
- } else {
- break; // stop adding if the i+j'th element
- // is not nullable
- }
- }
- if index + j == len(seq.Elements) {
- found.Follow = found.Follow.UnionWithoutEpsilon(r.Follow)
- }
- }
- changed = changed || (originalLength != len(found.Follow))
- }
- }
- }
- if !changed {
- break
- }
- }
- return nil
- }
- func (r *Rule) FirstSet(g *Grammar) (Set, error) {
- return r.Definition.FirstSet(g)
- }
- func (g *Grammar) Check() []error {
- errs := []error{}
- // Check for left recursion
- for i, rule := range g.Rules {
- err := rule.Check(g)
- if err != nil {
- errs = append(errs, err)
- }
- // Store modifications by check to rule.
- g.Rules[i] = rule
- }
-
- for _, rule := range g.Rules {
- var err error
- tok := rule.Token()
- switch {
- case rule.Recursive:
- err = tok.MakeError("left recursive here")
- case rule.Undefined:
- err = tok.MakeError("has undefined elements")
- default:
- err = nil
- }
- if err != nil {
- errs = append(errs, err)
- }
- }
-
- err := g.CalculateFollowSets()
- if err != nil {
- errs = append(errs, err)
- }
-
- return errs
- }
|