123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309 |
- package main
- import "text/scanner"
- import "fmt"
- import "io"
- import "os"
- import "unicode"
- // This parser is for parsing LL1 grammars, not Beo.
- type Parser struct {
- reader io.Reader
- scanner scanner.Scanner
- current Token
- Errors []ParserError
- Filename string
- Debug io.Writer
- }
- func NewParserFromReader(reader io.Reader, filename string, debug bool) *Parser {
- parser := &Parser{}
- parser.reader = reader
- parser.scanner.Init(parser.reader)
- parser.Filename = filename
- parser.current.Kind = 0
- parser.Debug = nil
- if debug {
- parser.Debug = os.Stderr
- }
- parser.scanner.Mode = scanner.GoTokens
- parser.scanner.Error = func (s *scanner.Scanner, msg string) {
- parser.Panicf("%s: scanner error: %s, %s", s.Position, s.TokenText(), msg)
- }
- parser.scanner.IsIdentRune = func(ch rune, i int) bool {
- if i == 0 {
- return unicode.IsLetter(ch)
- }
- return unicode.IsLetter(ch) ||
- unicode.IsNumber(ch) ||
- ch == '_' ||
- ch == '-'
- }
-
- return parser
- }
- func (p *Parser) Lex() Token {
- scanned := p.scanner.Scan()
- pos := p.scanner.Position
- pos.Filename = p.Filename
- value := p.scanner.TokenText()
- // Get rid of the quotes
- if scanned == scanner.Char ||
- scanned == scanner.String ||
- scanned == scanner.RawString {
- value = value[1:len(value) - 1]
- }
- token := Token {
- scanned,
- value,
- pos,
- }
- p.Debugf("Lexed: %s\n", p.Filename, token)
- return token
- }
- func (p *Parser) Advance() {
- token := p.Lex()
- p.current = token
- }
- // panic with this type on errors that would prevent the parser
- // from making progress.
- type ParserError struct {
- *Parser
- *Token
- Chain error
- }
- func (pe ParserError) Error() string {
- return pe.Token.MakeError(pe.Chain.Error()).Error()
- }
- func (parser *Parser) Errorf(message string, args ...interface{}) ParserError {
- err := fmt.Errorf(message, args...)
- pe := ParserError { Parser: parser, Token:&parser.current, Chain: err }
- parser.Errors = append(parser.Errors, pe)
- return pe
- }
- func (parser *Parser) Panicf(message string, args ...interface{}) {
- pe := parser.Errorf(message, args...)
- panic(pe)
- }
- func (p *Parser) Debugf(message string, args ...interface{}) {
- if p.Debug != nil {
- fmt.Fprintf(p.Debug, message, args)
- }
- }
- /* Looks at the current token and advances the lexer if the token is of any of
- the token kinds given in kinds. In this case it will return the accepted
- token and advance the parser. Otherwise, it will call parser.Panicf.*/
- func (parser *Parser) Require(kinds ...rune) Token {
- parser.Debugf("Require: %v\n", kinds)
- if parser.current.Kind == 0 {
- parser.Advance()
- }
-
- expected := ""
- sep := ""
- for _, kind := range kinds {
- if kind == parser.current.Kind {
- accepted := parser.current
- parser.Advance()
- return accepted
- }
- expected = fmt.Sprintf("%s%s%s", expected, sep, scanner.TokenString(kind))
- }
-
- parser.Panicf("error: expected one of the following: %s", expected)
- return Token{}
- }
- func (parser Parser) NextIs(kinds ...rune) bool {
- parser.Debugf("NextIs: %v\n", kinds)
- if (parser.current.Kind == 0) {
- parser.Advance()
- }
- for _, kind := range kinds {
- if kind == parser.current.Kind {
- return true
- }
- }
- return false
- }
- /*
- Sequence -> Element Sequence | .
- Element -> Parenthesis | Name | Literal .
- Parenthesis -> '(' Definition ')' .
- */
- func (p *Parser) NextIsElement() (bool) {
- return p.NextIs('(', scanner.Ident, scanner.Char, scanner.String)
- }
- func (p *Parser) ParseElement() (Element, error) {
- switch {
- case p.NextIs('('):
- return p.ParseParenthesis()
-
- case p.NextIs(scanner.Ident):
- ident := p.Require(scanner.Ident)
- if ident.Value == "epsilon" || ident.Value == "ε" {
- ident.Value = "ε"
- return Epsilon{ element { token: ident } }, nil
- }
- // Upper case means a nonterminal, otherwise terminal
- if unicode.IsUpper(([]rune(ident.Value))[0]) {
- return Nonterminal{ element { token: ident }, nil }, nil
- }
- return Terminal{ element { token: ident } }, nil
- case p.NextIs(scanner.Char, scanner.String):
- literal := p.Require(scanner.Char, scanner.String)
- return Literal{ Terminal { element { token: literal } } }, nil
- default:
- p.Panicf("error: unexpected for grammar Element: %s", p.current)
- }
- return nil, nil
- }
- func (p *Parser) ParseSequence() (*Sequence, error) {
- sequence := &Sequence{}
- for p.NextIsElement() {
- element, err := p.ParseElement()
- p.Debugf("Sequence parsed element: %v\n", element)
- if err != nil {
- return sequence, err
- }
- sequence.Elements = append(sequence.Elements, element)
- }
- return sequence, nil
- }
- func (p *Parser) ParseParenthesis() (*Definition, error) {
- // Parse the sub definition
- p.Require('(')
- sub, err := p.ParseDefinition()
- p.Require(')')
- return sub, err
- }
- func (p *Parser) ParseAlternates() (*Definition, error) {
- alternates := &Alternates{}
- for p.NextIsElement() {
- sequence, err := p.ParseSequence()
- if err != nil {
- return alternates, err
- }
- alternates.Sequences = append(alternates.Sequences, *sequence)
- if !p.NextIs('.', scanner.RawString, scanner.EOF, ')') {
- p.Require('|')
- }
- }
- return alternates, nil
- }
- func (p *Parser) ParseDefinition() (*Definition, error) {
- return p.ParseAlternates()
-
-
- /*if p.NextIs('(') {
- sub, err := p.ParseParenthesis()
- if err != nil {
- return nil, err
- }
- }*/
- }
- func (p *Parser) ParseRule() (*Rule, error) {
- rule := &Rule{}
- name := p.Require(scanner.Ident)
- // XXX Require or → or -> separator.
- // This is a hack, needs to be moved to a lexer.
- if p.NextIs('→') {
- p.Require('→')
- } else {
- p.Require('-')
- p.Require('>')
- }
- definition, err := p.ParseDefinition()
- if err != nil {
- return rule, err
- }
- if p.NextIs(scanner.RawString) {
- rs := p.Require(scanner.RawString)
- rule.Template = rs.Value
- }
- // require '.' terminator
- p.Require('.')
- rule.Name = name.Value
- rule.Definition = *definition
- return rule, nil
- }
- func (p *Parser) ParseRules() ([]*Rule, error) {
- var rules []*Rule
- for p.current.Kind != scanner.EOF {
- rule, err := p.ParseRule()
- if err != nil {
- return rules, err
- }
- appended := false
- for i, existing := range(rules) {
- if existing.Name == rule.Name {
- // Add to the definition in stead
- existing.Definition.Sequences = append(existing.Definition.Sequences, rule.Definition.Sequences...)
- rules[i] = existing
- appended = true
- break
- }
- }
- if !appended {
- rules = append(rules, rule)
- }
- }
- return rules, nil
- }
- func (p *Parser) Parse() (g *Grammar, e error) {
- defer func() {
- thrown := recover()
- if thrown != nil {
- g = nil
- e = thrown.(error)
- return
- }
- }()
- p.Advance()
- rules, err := p.ParseRules()
- if err != nil {
- return nil, err
- }
- if len(rules) < 1 {
- return nil, nil
- }
- grammar := &Grammar {
- Top: rules[0],
- Rules: rules,
- }
-
- return grammar, nil
- }
- func ParseFile(filename string, debug bool) (*Parser, *Grammar, error) {
- file, err := os.Open(filename)
- defer file.Close()
- parser := NewParserFromReader(file, filename, debug)
- grammar, err := parser.Parse()
- return parser, grammar, err
- }
|