package main import "text/scanner" import "fmt" import "io" import "os" import "unicode" // This parser is for parsing LL1 grammars, not Beo. type Parser struct { reader io.Reader scanner scanner.Scanner current Token Errors []ParserError Filename string Debug io.Writer } func NewParserFromReader(reader io.Reader, filename string, debug bool) *Parser { parser := &Parser{} parser.reader = reader parser.scanner.Init(parser.reader) parser.Filename = filename parser.current.Kind = 0 parser.Debug = nil if debug { parser.Debug = os.Stderr } parser.scanner.Mode = scanner.GoTokens parser.scanner.Error = func (s *scanner.Scanner, msg string) { parser.Panicf("%s: scanner error: %s, %s", s.Position, s.TokenText(), msg) } parser.scanner.IsIdentRune = func(ch rune, i int) bool { if i == 0 { return unicode.IsLetter(ch) } return unicode.IsLetter(ch) || unicode.IsNumber(ch) || ch == '_' || ch == '-' } return parser } func (p *Parser) Lex() Token { scanned := p.scanner.Scan() pos := p.scanner.Position pos.Filename = p.Filename value := p.scanner.TokenText() // Get rid of the quotes if scanned == scanner.Char || scanned == scanner.String || scanned == scanner.RawString { value = value[1:len(value) - 1] } token := Token { scanned, value, pos, } p.Debugf("Lexed: %s\n", p.Filename, token) return token } func (p *Parser) Advance() { token := p.Lex() p.current = token } // panic with this type on errors that would prevent the parser // from making progress. type ParserError struct { *Parser *Token Chain error } func (pe ParserError) Error() string { return pe.Token.MakeError(pe.Chain.Error()).Error() } func (parser *Parser) Errorf(message string, args ...interface{}) ParserError { err := fmt.Errorf(message, args...) pe := ParserError { Parser: parser, Token:&parser.current, Chain: err } parser.Errors = append(parser.Errors, pe) return pe } func (parser *Parser) Panicf(message string, args ...interface{}) { pe := parser.Errorf(message, args...) panic(pe) } func (p *Parser) Debugf(message string, args ...interface{}) { if p.Debug != nil { fmt.Fprintf(p.Debug, message, args) } } /* Looks at the current token and advances the lexer if the token is of any of the token kinds given in kinds. In this case it will return the accepted token and advance the parser. Otherwise, it will call parser.Panicf.*/ func (parser *Parser) Require(kinds ...rune) Token { parser.Debugf("Require: %v\n", kinds) if parser.current.Kind == 0 { parser.Advance() } expected := "" sep := "" for _, kind := range kinds { if kind == parser.current.Kind { accepted := parser.current parser.Advance() return accepted } expected = fmt.Sprintf("%s%s%s", expected, sep, scanner.TokenString(kind)) } parser.Panicf("error: expected one of the following: %s", expected) return Token{} } func (parser Parser) NextIs(kinds ...rune) bool { parser.Debugf("NextIs: %v\n", kinds) if (parser.current.Kind == 0) { parser.Advance() } for _, kind := range kinds { if kind == parser.current.Kind { return true } } return false } /* Sequence -> Element Sequence | . Element -> Parenthesis | Name | Literal . Parenthesis -> '(' Definition ')' . */ func (p *Parser) NextIsElement() (bool) { return p.NextIs('(', scanner.Ident, scanner.Char, scanner.String) } func (p *Parser) ParseElement() (Element, error) { switch { case p.NextIs('('): return p.ParseParenthesis() case p.NextIs(scanner.Ident): ident := p.Require(scanner.Ident) if ident.Value == "epsilon" || ident.Value == "ε" { ident.Value = "ε" return Epsilon{ element { token: ident } }, nil } // Upper case means a nonterminal, otherwise terminal if unicode.IsUpper(([]rune(ident.Value))[0]) { return Nonterminal{ element { token: ident }, nil }, nil } return Terminal{ element { token: ident } }, nil case p.NextIs(scanner.Char, scanner.String): literal := p.Require(scanner.Char, scanner.String) return Literal{ Terminal { element { token: literal } } }, nil default: p.Panicf("error: unexpected for grammar Element: %s", p.current) } return nil, nil } func (p *Parser) ParseSequence() (*Sequence, error) { sequence := &Sequence{} for p.NextIsElement() { element, err := p.ParseElement() p.Debugf("Sequence parsed element: %v\n", element) if err != nil { return sequence, err } sequence.Elements = append(sequence.Elements, element) } return sequence, nil } func (p *Parser) ParseParenthesis() (*Definition, error) { // Parse the sub definition p.Require('(') sub, err := p.ParseDefinition() p.Require(')') return sub, err } func (p *Parser) ParseAlternates() (*Definition, error) { alternates := &Alternates{} for p.NextIsElement() { sequence, err := p.ParseSequence() if err != nil { return alternates, err } alternates.Sequences = append(alternates.Sequences, *sequence) if !p.NextIs('.', scanner.RawString, scanner.EOF, ')') { p.Require('|') } } return alternates, nil } func (p *Parser) ParseDefinition() (*Definition, error) { return p.ParseAlternates() /*if p.NextIs('(') { sub, err := p.ParseParenthesis() if err != nil { return nil, err } }*/ } func (p *Parser) ParseRule() (*Rule, error) { rule := &Rule{} name := p.Require(scanner.Ident) // XXX Require or → or -> separator. // This is a hack, needs to be moved to a lexer. if p.NextIs('→') { p.Require('→') } else { p.Require('-') p.Require('>') } definition, err := p.ParseDefinition() if err != nil { return rule, err } if p.NextIs(scanner.RawString) { rs := p.Require(scanner.RawString) rule.Template = rs.Value } // require '.' terminator p.Require('.') rule.Name = name.Value rule.Definition = *definition return rule, nil } func (p *Parser) ParseRules() ([]*Rule, error) { var rules []*Rule for p.current.Kind != scanner.EOF { rule, err := p.ParseRule() if err != nil { return rules, err } appended := false for i, existing := range(rules) { if existing.Name == rule.Name { // Add to the definition in stead existing.Definition.Sequences = append(existing.Definition.Sequences, rule.Definition.Sequences...) rules[i] = existing appended = true break } } if !appended { rules = append(rules, rule) } } return rules, nil } func (p *Parser) Parse() (g *Grammar, e error) { defer func() { thrown := recover() if thrown != nil { g = nil e = thrown.(error) return } }() p.Advance() rules, err := p.ParseRules() if err != nil { return nil, err } if len(rules) < 1 { return nil, nil } grammar := &Grammar { Top: rules[0], Rules: rules, } return grammar, nil } func ParseFile(filename string, debug bool) (*Parser, *Grammar, error) { file, err := os.Open(filename) defer file.Close() parser := NewParserFromReader(file, filename, debug) grammar, err := parser.Parse() return parser, grammar, err }