123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460 |
- // Grammar describes the rules for a particular ll-parsable language.
- package grammar
- import "fmt"
- import "unicode"
- import "sort"
- import "strings"
- import . "src.eruta.nl/beoran/ll1/common"
- // Grammars consists of rules. A Rule can be a Terminal,
- // which includes the special terminals Epsilon and End,
- // or one of the following nonterminals: and Alternate, a Sequence or a Reference.
- type Rule interface {
- // Name is the optional name of the rule. If empty, the rule is anonymous.
- Name() string
- // Definition is the detail of how the rule should be parsed.
- // For a Terminal Rule this is the rule itself.
- Definition() Rule
- // Rules can be stringified
- String() string
- Check(g *Grammar, r Rule) error
- // The first set of the rule
- FirstSet() (Set, error)
- FollowSet(g *Grammar) (Set, error)
- }
- // A set is a set of terminals, either the first set or the follow set
- // of a rule in the grammar.
- type Set map[string]Terminal
- type BasicRule struct {
- name string
- }
- func (r BasicRule) Definition() Rule {
- return r
- }
- func (e BasicRule) Name() string {
- return e.name
- }
- func (e BasicRule) String() string {
- return fmt.Sprintf("%s", e.name)
- }
- func (e BasicRule) Check(g *Grammar, r Rule) error {
- return nil
- }
- func (e BasicRule) FirstSet() (Set, error) {
- res := make(Set)
- return res, nil
- }
- func (e BasicRule) FollowSet(g *Grammar) (Set, error) {
- res := make(Set)
- return res, nil
- }
- func (r BasicRule) IsTerminal() bool {
- return !unicode.IsUpper([]rune(r.name)[0])
- }
- type Terminal struct {
- BasicRule
- Kind
- }
- func (t Terminal) String() string {
- return fmt.Sprintf("%s(%d)", t.name, t.Kind)
- }
- func (t Terminal) FirstSet() (Set, error) {
- res := make(Set)
- res.Add(t)
- return res, nil
- }
- // Epsilon is the empty rule.
- type Epsilon struct {
- Terminal
- }
- func (e Epsilon) String() string {
- return "ε"
- }
- // End corresponds to EOF or the end of the input.
- type EndOfInput struct {
- Terminal
- }
- func (e EndOfInput) String() string {
- return "$"
- }
- func End() EndOfInput {
- terminal := Term("<end>", EndKind)
- return EndOfInput{terminal}
- }
- type Nonterminal struct {
- BasicRule
- Define Rule
- Template string
- // First set of the rule
- First Set
- // Follow set of the rule
- Follow Set
- // Nullable or not
- Nullable bool
- // Realizable or not
- Realizable bool
- // Recursive (i.e. LEFT-recursive, which LL(1) disallows)
- Recursive bool
- // Undefined nonterminals in this rule
- Undefined bool
- // Depth is the depth of (mutual) recursion of a rule. Limited to 16.
- Depth int
- }
- func (n Nonterminal) String() string {
- return fmt.Sprintf("%s -> %s", n.BasicRule, n.Define)
- }
- func (n Nonterminal) Definition() Rule {
- return n.Define
- }
- func (r BasicRule) Equals(r2 Rule) bool {
- // XXX probably not correct
- return r.Name() == r2.Name()
- }
- func (n Nonterminal) FirstSet() (Set, error) {
- if n.Define == nil { // XXX can this happen an when/why?
- return Set{}, nil
- }
- return n.Define.FirstSet()
- }
- func (n Nonterminal) FollowSet(g *Grammar) (Set, error) {
- if n.Define == nil {
- return Set{}, nil
- }
- return n.Define.FollowSet(g)
- }
- // Nonterminals can cause recursion and need to be checked
- // XXX this function still is very likely wrong.
- func (n Nonterminal) Check(g *Grammar, r Rule) error {
- if n.Define == nil {
- n.Undefined = true
- return fmt.Errorf("%s: rule has empty definition", n.Name())
- } else if n.Name() != r.Name() {
- return nil
- }
- // check recursively as well
- if n.Depth < 16 {
- n.Depth++
- return n.Define.Check(g, n)
- }
- return fmt.Errorf("%s: left recursive or recursion too deep", n)
- }
- type Sequence struct {
- Nonterminal
- Rules []Rule
- }
- func (s Sequence) String() string {
- sep := ""
- res := ""
- for _, rule := range s.Rules {
- res = res + sep + rule.String()
- sep = " "
- }
- return res
- }
- func (s Sequence) Check(g *Grammar, r Rule) error {
- // check Leftmost Rule for left recursion
- if len(s.Rules) > 0 {
- e := s.Rules[0]
- return e.Check(g, r)
- }
- return nil
- }
- func (s Sequence) FirstSet() (Set, error) {
- if len(s.Rules) > 0 {
- return s.Rules[0].FirstSet()
- }
- return make(Set), nil
- }
- func (a Alternates) FirstSet() (Set, error) {
- res := make(Set)
- for _, s := range a.Rules {
- fs, err := s.FirstSet()
- if err != nil {
- return res, err
- }
- res = res.Union(fs)
- }
- return res, nil
- }
- type Alternates struct {
- Nonterminal
- Rules []Rule
- }
- func (a Alternates) Check(g *Grammar, r Rule) error {
- for _, s := range a.Rules {
- err := s.Check(g, r)
- if err != nil {
- return err
- }
- }
- return nil
- }
- func (a Alternates) String() string {
- sep := ""
- res := "("
- for _, rule := range a.Rules {
- res = res + sep + rule.String()
- sep = " | "
- }
- return res + ")"
- }
- // Reference to anther rule by name, to enable right recursion.
- // This also requires apointer to the grammar for later resolution.
- type Reference struct {
- Grammar *Grammar
- Nonterminal
- To string
- }
- func (r Reference) Resolve() (Rule, error) {
- return r.Grammar.Lookup(r.To)
- }
- func IsEpsilon(e Rule) bool {
- _, ok := e.(Epsilon)
- return ok
- }
- func IsNonterminal(e Rule) bool {
- _, ok := e.(Nonterminal)
- return ok
- }
- // whether an Set is nullable (i.e. contains epsilon)
- func (s Set) IsNullable() bool {
- for _, e := range s {
- if IsEpsilon(e) {
- return true
- }
- }
- return false
- }
- func (s Set) Contains(e Terminal) bool {
- name := e.Name()
- _, ok := s[name]
- return ok
- }
- func (s Set) ContainsKind(k Kind) bool {
- kinds := s.ToKinds()
- for _, ok := range kinds {
- if ok == k {
- return true
- }
- }
- return false
- }
- func (s *Set) Add(e Terminal) bool {
- name := e.Name()
- _, ok := (*s)[name]
- if ok {
- return false
- }
- (*s)[name] = e
- return true
- }
- func (s Set) UnionWithoutEpsilon(s2 Set) Set {
- res := make(Set)
- if s != nil {
- for _, v := range s {
- if !IsEpsilon(v) {
- res.Add(v)
- }
- }
- }
- if s2 != nil {
- for _, v := range s2 {
- if !IsEpsilon(v) {
- res.Add(v)
- }
- }
- }
- return res
- }
- func (s Set) Union(s2 Set) Set {
- res := make(Set)
- for _, v := range s {
- res.Add(v)
- }
- for _, v := range s2 {
- res.Add(v)
- }
- return res
- }
- func (s Set) Intersect(s2 Set) Set {
- res := make(Set)
- for _, v := range s {
- if s2.Contains(v) {
- res.Add(v)
- }
- }
- return res
- }
- func (s Set) String() string {
- if len(s) == 0 {
- return "∅"
- }
- aid := []string{}
- for _, v := range s {
- aid = append(aid, v.String())
- }
- sort.Strings(aid)
- return strings.Join(aid, " ")
- }
- func (s Set) ToKinds() []Kind {
- if len(s) == 0 {
- return []Kind{}
- }
- res := []Kind{}
- for _, t := range s {
- res = append(res, t.Kind)
- }
- return res
- }
- type Grammar struct {
- BasicRule
- Top Rule
- Rules []Rule
- // All rules, terminals and nonterminals mapped by name.
- NamedRules map[string]Rule
- // List of error of the grammar. Only valid
- // after running Check()
- Errors []error
- }
- func NewGrammar() *Grammar {
- g := &Grammar{}
- g.NamedRules = map[string]Rule{}
- return g
- }
- func (g Grammar) String() string {
- res := "Top → " + g.Top.Name() + "\n"
- for _, r := range g.Rules {
- res = res + r.String() + "\n"
- }
- return res
- }
- func (g Grammar) Lookup(name string) (Rule, error) {
- r, ok := g.NamedRules[name]
- if ok {
- return r, nil
- }
- return nil, fmt.Errorf("Undefined rule: %s", name)
- }
- func (g *Grammar) AddRule(r Rule) Rule {
- g.Rules = append(g.Rules, r)
- if r.Name() != "" {
- g.NamedRules[r.Name()] = r
- }
- return r
- }
- func Term(name string, kind Kind) Terminal {
- term := Terminal{}
- term.name = name
- term.Kind = kind
- return term
- }
- func Seq(name, template string, rules ...Rule) Sequence {
- seq := Sequence{}
- seq.name = name
- seq.Template = template
- seq.Rules = rules
- return seq
- }
- func And(rules ...Rule) Rule {
- return Seq("", "", rules...)
- }
- func Alt(name, template string, rules ...Rule) Alternates {
- alt := Alternates{}
- alt.name = name
- alt.Template = template
- alt.Rules = rules
- return alt
- }
- func Or(seqs ...Rule) Rule {
- return Alt("", "", seqs...)
- }
- func Opt(name, template string, rule Rule) Rule {
- rules := []Rule{rule, Epsilon{}}
- return Alt(name, template, rules...)
- }
- func (g *Grammar) Alt(name, template string, seqs ...Rule) Rule {
- return g.AddRule(Alt(name, template, seqs...))
- }
- func (g *Grammar) Seq(name, template string, seqs ...Rule) Rule {
- return g.AddRule(Seq(name, template, seqs...))
- }
- func (g *Grammar) Term(name string, kind Kind) Rule {
- return g.AddRule(Term(name, kind))
- }
- func (g *Grammar) Opt(name, template string, rule Rule) Rule {
- return g.AddRule(Opt(name, template, rule))
- }
- func Ref(g *Grammar, name, to string) Reference {
- ref := Reference{}
- ref.Nonterminal.name = name
- ref.Grammar = g
- ref.To = to
- return ref
- }
- func (g *Grammar) Ref(name, to string) Rule {
- return g.AddRule(Ref(g, name, to))
- }
|