// Grammar describes the rules for a particular ll-parsable language. package grammar import "fmt" import "unicode" import "sort" import "strings" import . "src.eruta.nl/beoran/ll1/common" // Grammars consists of rules. A Rule can be a Terminal, // which includes the special terminals Epsilon and End, // or one of the following nonterminals: and Alternate, a Sequence or a Reference. type Rule interface { // Name is the optional name of the rule. If empty, the rule is anonymous. Name() string // Definition is the detail of how the rule should be parsed. // For a Terminal Rule this is the rule itself. Definition() Rule // Rules can be stringified String() string Check(g *Grammar, r Rule) error // The first set of the rule FirstSet() (Set, error) FollowSet(g *Grammar) (Set, error) } // A set is a set of terminals, either the first set or the follow set // of a rule in the grammar. type Set map[string]Terminal type BasicRule struct { name string } func (r BasicRule) Definition() Rule { return r } func (e BasicRule) Name() string { return e.name } func (e BasicRule) String() string { return fmt.Sprintf("%s", e.name) } func (e BasicRule) Check(g *Grammar, r Rule) error { return nil } func (e BasicRule) FirstSet() (Set, error) { res := make(Set) return res, nil } func (e BasicRule) FollowSet(g *Grammar) (Set, error) { res := make(Set) return res, nil } func (r BasicRule) IsTerminal() bool { return !unicode.IsUpper([]rune(r.name)[0]) } type Terminal struct { BasicRule Kind } func (t Terminal) String() string { return fmt.Sprintf("%s(%d)", t.name, t.Kind) } func (t Terminal) FirstSet() (Set, error) { res := make(Set) res.Add(t) return res, nil } // Epsilon is the empty rule. type Epsilon struct { Terminal } func (e Epsilon) String() string { return "ε" } // End corresponds to EOF or the end of the input. type EndOfInput struct { Terminal } func (e EndOfInput) String() string { return "$" } func End() EndOfInput { terminal := Term("", EndKind) return EndOfInput{terminal} } type Nonterminal struct { BasicRule Define Rule Template string // First set of the rule First Set // Follow set of the rule Follow Set // Nullable or not Nullable bool // Realizable or not Realizable bool // Recursive (i.e. LEFT-recursive, which LL(1) disallows) Recursive bool // Undefined nonterminals in this rule Undefined bool // Depth is the depth of (mutual) recursion of a rule. Limited to 16. Depth int } func (n Nonterminal) String() string { return fmt.Sprintf("%s -> %s", n.BasicRule, n.Define) } func (n Nonterminal) Definition() Rule { return n.Define } func (r BasicRule) Equals(r2 Rule) bool { // XXX probably not correct return r.Name() == r2.Name() } func (n Nonterminal) FirstSet() (Set, error) { if n.Define == nil { // XXX can this happen an when/why? return Set{}, nil } return n.Define.FirstSet() } func (n Nonterminal) FollowSet(g *Grammar) (Set, error) { if n.Define == nil { return Set{}, nil } return n.Define.FollowSet(g) } // Nonterminals can cause recursion and need to be checked // XXX this function still is very likely wrong. func (n Nonterminal) Check(g *Grammar, r Rule) error { if n.Define == nil { n.Undefined = true return fmt.Errorf("%s: rule has empty definition", n.Name()) } else if n.Name() != r.Name() { return nil } // check recursively as well if n.Depth < 16 { n.Depth++ return n.Define.Check(g, n) } return fmt.Errorf("%s: left recursive or recursion too deep", n) } type Sequence struct { Nonterminal Rules []Rule } func (s Sequence) String() string { sep := "" res := "" for _, rule := range s.Rules { res = res + sep + rule.String() sep = " " } return res } func (s Sequence) Check(g *Grammar, r Rule) error { // check Leftmost Rule for left recursion if len(s.Rules) > 0 { e := s.Rules[0] return e.Check(g, r) } return nil } func (s Sequence) FirstSet() (Set, error) { if len(s.Rules) > 0 { return s.Rules[0].FirstSet() } return make(Set), nil } func (a Alternates) FirstSet() (Set, error) { res := make(Set) for _, s := range a.Rules { fs, err := s.FirstSet() if err != nil { return res, err } res = res.Union(fs) } return res, nil } type Alternates struct { Nonterminal Rules []Rule } func (a Alternates) Check(g *Grammar, r Rule) error { for _, s := range a.Rules { err := s.Check(g, r) if err != nil { return err } } return nil } func (a Alternates) String() string { sep := "" res := "(" for _, rule := range a.Rules { res = res + sep + rule.String() sep = " | " } return res + ")" } // Reference to anther rule by name, to enable right recursion. // This also requires apointer to the grammar for later resolution. type Reference struct { Grammar *Grammar Nonterminal To string } func (r Reference) Resolve() (Rule, error) { return r.Grammar.Lookup(r.To) } func IsEpsilon(e Rule) bool { _, ok := e.(Epsilon) return ok } func IsNonterminal(e Rule) bool { _, ok := e.(Nonterminal) return ok } // whether an Set is nullable (i.e. contains epsilon) func (s Set) IsNullable() bool { for _, e := range s { if IsEpsilon(e) { return true } } return false } func (s Set) Contains(e Terminal) bool { name := e.Name() _, ok := s[name] return ok } func (s Set) ContainsKind(k Kind) bool { kinds := s.ToKinds() for _, ok := range kinds { if ok == k { return true } } return false } func (s *Set) Add(e Terminal) bool { name := e.Name() _, ok := (*s)[name] if ok { return false } (*s)[name] = e return true } func (s Set) UnionWithoutEpsilon(s2 Set) Set { res := make(Set) if s != nil { for _, v := range s { if !IsEpsilon(v) { res.Add(v) } } } if s2 != nil { for _, v := range s2 { if !IsEpsilon(v) { res.Add(v) } } } return res } func (s Set) Union(s2 Set) Set { res := make(Set) for _, v := range s { res.Add(v) } for _, v := range s2 { res.Add(v) } return res } func (s Set) Intersect(s2 Set) Set { res := make(Set) for _, v := range s { if s2.Contains(v) { res.Add(v) } } return res } func (s Set) String() string { if len(s) == 0 { return "∅" } aid := []string{} for _, v := range s { aid = append(aid, v.String()) } sort.Strings(aid) return strings.Join(aid, " ") } func (s Set) ToKinds() []Kind { if len(s) == 0 { return []Kind{} } res := []Kind{} for _, t := range s { res = append(res, t.Kind) } return res } type Grammar struct { BasicRule Top Rule Rules []Rule // All rules, terminals and nonterminals mapped by name. NamedRules map[string]Rule // List of error of the grammar. Only valid // after running Check() Errors []error } func NewGrammar() *Grammar { g := &Grammar{} g.NamedRules = map[string]Rule{} return g } func (g Grammar) String() string { res := "Top → " + g.Top.Name() + "\n" for _, r := range g.Rules { res = res + r.String() + "\n" } return res } func (g Grammar) Lookup(name string) (Rule, error) { r, ok := g.NamedRules[name] if ok { return r, nil } return nil, fmt.Errorf("Undefined rule: %s", name) } func (g *Grammar) AddRule(r Rule) Rule { g.Rules = append(g.Rules, r) if r.Name() != "" { g.NamedRules[r.Name()] = r } return r } func Term(name string, kind Kind) Terminal { term := Terminal{} term.name = name term.Kind = kind return term } func Seq(name, template string, rules ...Rule) Sequence { seq := Sequence{} seq.name = name seq.Template = template seq.Rules = rules return seq } func And(rules ...Rule) Rule { return Seq("", "", rules...) } func Alt(name, template string, rules ...Rule) Alternates { alt := Alternates{} alt.name = name alt.Template = template alt.Rules = rules return alt } func Or(seqs ...Rule) Rule { return Alt("", "", seqs...) } func Opt(name, template string, rule Rule) Rule { rules := []Rule{rule, Epsilon{}} return Alt(name, template, rules...) } func (g *Grammar) Alt(name, template string, seqs ...Rule) Rule { return g.AddRule(Alt(name, template, seqs...)) } func (g *Grammar) Seq(name, template string, seqs ...Rule) Rule { return g.AddRule(Seq(name, template, seqs...)) } func (g *Grammar) Term(name string, kind Kind) Rule { return g.AddRule(Term(name, kind)) } func (g *Grammar) Opt(name, template string, rule Rule) Rule { return g.AddRule(Opt(name, template, rule)) } func Ref(g *Grammar, name, to string) Reference { ref := Reference{} ref.Nonterminal.name = name ref.Grammar = g ref.To = to return ref } func (g *Grammar) Ref(name, to string) Rule { return g.AddRule(Ref(g, name, to)) }