package main import "text/scanner" import "fmt" import "io" import "os" import "unicode" // Value is the lexical value of a lexer token. // This is an alias for string in the case of ll1. type Value = string // Position is a position within a source file. Since the lexer is based on // text/scanner, we use that packages Position. type Position = scanner.Position // TokenKind is the kind or type of a token. // This has rune as the underlying type so one-character tokens can be easily // supported. EOF will be 65535 (I.e, -1 cast to rune). Non-character token // kinds will start from 65533 down (i.e -3, -4, -5, etc). type TokenKind rune // NoTokenKind means "no token kind" i.e. no token. const NoTokenKind TokenKind = TokenKind(0) // TokenKindEOF means the end of the input. const TokenKindEOF TokenKind = TokenKind(scanner.EOF) // TokenKindError means a parsing or lexing error was encountered. //line ll1.parser.go.tpl:80 const TokenKindError TokenKind = TokenKind(-21) // Scanner based token kinds const TokenKindIdent TokenKind = TokenKind(scanner.Ident ) const TokenKindInt TokenKind = TokenKind(scanner.Int ) const TokenKindFloat TokenKind = TokenKind(scanner.Float ) const TokenKindChar TokenKind = TokenKind(scanner.Char ) const TokenKindString TokenKind = TokenKind(scanner.String ) const TokenKindRawString TokenKind = TokenKind(scanner.RawString) const TokenKindComment TokenKind = TokenKind(scanner.Comment ) // Grammar based token kinds const ( TokenKindEpsilon TokenKind = TokenKind('ε') TokenKindOr TokenKind = TokenKind('|') TokenKindArrow TokenKind = TokenKind('→') TokenKindStar TokenKind = TokenKind('*') TokenKindOptional TokenKind = TokenKind('?') TokenKindList TokenKind = TokenKind('+') TokenKindRuleEnd TokenKind = TokenKind('.') ) // Convert token kind to a string representation //line ll1.parser.go.tpl:86 func (tk TokenKind) String() string { return scanner.TokenString(rune(tk)) } // Token is the result of a single lexical analysis step by the lexer. //line ll1.parser.go.tpl:109 type Token struct { Position // Position in the source where the token was found. TokenKind // Type of the token Value // Value of the token } // MakeToken makes a token with the given position, type and value. //line ll1.parser.go.tpl:118 func MakeToken(pos Position, typ TokenKind, val Value) Token { return Token{ pos, typ, val} } // Lexer performs the lexical analysis of the input. //line ll1.parser.go.tpl:124 type Lexer struct { // Embed scanner.Scanner scanner.Scanner Filename string } // NewLexerFromReader creates a new lexer for the given parser and input. //line ll1.parser.go.tpl:133 func NewLexerFromReader(parser *Parser, reader io.Reader, filename string) *Lexer { lexer := &Lexer{} lexer.Filename = filename lexer.Scanner.Init(reader) lexer.Scanner.Mode = scanner.GoTokens lexer.Scanner.Error = func (s *scanner.Scanner, msg string) { parser.Panicf("%s: scanner error: %s, %s", s.Position, s.TokenText(), msg) } // XXX: needs to be generated from the identifier rule in the syntax! lexer.Scanner.IsIdentRune = func(ch rune, i int) bool { if i == 0 { return unicode.IsLetter(ch) } return unicode.IsLetter(ch) || unicode.IsNumber(ch) || ch == '_' || ch == '-' } return lexer } //line ll1.parser.go.tpl:155 func (lex *Lexer) Lex() Token { scanned := lex.Scanner.Scan() pos := lex.Scanner.Position pos.Filename = lex.Filename value := lex.Scanner.TokenText() // Get rid of the quotes if scanned == scanner.Char || scanned == scanner.String || scanned == scanner.RawString { value = value[1:len(value) - 1] } token := Token { TokenKind: TokenKind(scanned), Value: value, Position: pos, } return token } // This parser is for parsing LL1 grammars, not Beo. type Parser struct { reader io.Reader scanner scanner.Scanner current Token Errors []ParserError Filename string Debug io.Writer } func NewParserFromReader(reader io.Reader, filename string, debug bool) *Parser { parser := &Parser{} parser.reader = reader parser.scanner.Init(parser.reader) parser.Filename = filename parser.current.Kind = 0 parser.Debug = nil if debug { parser.Debug = os.Stderr } parser.scanner.Mode = scanner.GoTokens parser.scanner.Error = func (s *scanner.Scanner, msg string) { parser.Panicf("%s: scanner error: %s, %s", s.Position, s.TokenText(), msg) } parser.scanner.IsIdentRune = func(ch rune, i int) bool { if i == 0 { return unicode.IsLetter(ch) } return unicode.IsLetter(ch) || unicode.IsNumber(ch) || ch == '_' || ch == '-' } return parser } func (p *Parser) Lex() Token { scanned := p.scanner.Scan() pos := p.scanner.Position pos.Filename = p.Filename value := p.scanner.TokenText() // Get rid of the quotes if scanned == scanner.Char || scanned == scanner.String || scanned == scanner.RawString { value = value[1:len(value) - 1] } token := Token { scanned, value, pos, } p.Debugf("Lexed: %s\n", p.Filename, token) return token } func (p *Parser) Advance() { token := p.Lex() p.current = token } // panic with this type on errors that would prevent the parser // from making progress. type ParserError struct { *Parser *Token Chain error } func (pe ParserError) Error() string { return pe.Token.MakeError(pe.Chain.Error()).Error() } func (parser *Parser) Errorf(message string, args ...interface{}) ParserError { err := fmt.Errorf(message, args...) pe := ParserError { Parser: parser, Token:&parser.current, Chain: err } parser.Errors = append(parser.Errors, pe) return pe } func (parser *Parser) Panicf(message string, args ...interface{}) { pe := parser.Errorf(message, args...) panic(pe) } func (p *Parser) Debugf(message string, args ...interface{}) { if p.Debug != nil { fmt.Fprintf(p.Debug, message, args) } } /* Looks at the current token and advances the lexer if the token is of any of the token kinds given in kinds. In this case it will return the accepted token and advance the parser. Otherwise, it will call parser.Panicf.*/ func (parser *Parser) Require(kinds ...rune) Token { parser.Debugf("Require: %v\n", kinds) if parser.current.Kind == 0 { parser.Advance() } expected := "" sep := "" for _, kind := range kinds { if kind == parser.current.Kind { accepted := parser.current parser.Advance() return accepted } expected = fmt.Sprintf("%s%s%s", expected, sep, scanner.TokenString(kind)) } parser.Panicf("error: expected one of the following: %s", expected) return Token{} } func (parser Parser) NextIs(kinds ...rune) bool { parser.Debugf("NextIs: %v\n", kinds) if (parser.current.Kind == 0) { parser.Advance() } for _, kind := range kinds { if kind == parser.current.Kind { return true } } return false } /* Sequence -> Element Sequence | . Element -> Parenthesis | Name | Literal . Parenthesis -> '(' Definition ')' . */ func (p *Parser) NextIsElement() (bool) { return p.NextIs('(', scanner.Ident, scanner.Char, scanner.String) } func (p *Parser) NextIsOperator() (bool) { return p.NextIs('+', '*') } func (p *Parser) ParseElement() (Element, error) { switch { case p.NextIs('('): return p.ParseParenthesis() case p.NextIs(scanner.Ident): ident := p.Require(scanner.Ident) if ident.Value == "epsilon" || ident.Value == "ε" { ident.Value = "ε" return Epsilon{ element { token: ident } }, nil } // Upper case means a nonterminal, otherwise terminal if unicode.IsUpper(([]rune(ident.Value))[0]) { return Nonterminal{ element { token: ident }, nil }, nil } return Terminal{ element { token: ident } }, nil case p.NextIs(scanner.Char, scanner.String): literal := p.Require(scanner.Char, scanner.String) return Literal{ Terminal { element { token: literal } } }, nil default: p.Panicf("error: unexpected for grammar Element: %s", p.current) } return nil, nil } func (p *Parser) ParseSequence() (*Sequence, error) { sequence := &Sequence{} for p.NextIsElement() { element, err := p.ParseElement() p.Debugf("Sequence parsed element: %v\n", element) if err != nil { return sequence, err } sequence.Elements = append(sequence.Elements, element) } return sequence, nil } func (p *Parser) ParseParenthesis() (*Definition, error) { // Parse the sub definition p.Require('(') sub, err := p.ParseDefinition() p.Require(')') return sub, err } func (p *Parser) ParseAlternates() (*Definition, error) { alternates := &Alternates{} for p.NextIsElement() { sequence, err := p.ParseSequence() if err != nil { return alternates, err } alternates.Sequences = append(alternates.Sequences, *sequence) if !p.NextIs('.', scanner.RawString, scanner.EOF, ')') { p.Require('|') } } return alternates, nil } func (p *Parser) ParseDefinition() (*Definition, error) { return p.ParseAlternates() /*if p.NextIs('(') { sub, err := p.ParseParenthesis() if err != nil { return nil, err } }*/ } func (p *Parser) ParseRule() (*Rule, error) { rule := &Rule{} name := p.Require(scanner.Ident) // XXX Require or → or -> separator. // This is a hack, needs to be moved to a lexer. if p.NextIs('→') { p.Require('→') } else { p.Require('-') p.Require('>') } definition, err := p.ParseDefinition() if err != nil { return rule, err } if p.NextIs(scanner.RawString) { rs := p.Require(scanner.RawString) rule.Template = rs.Value } // require '.' terminator p.Require('.') rule.Name = name.Value rule.Definition = *definition return rule, nil } func (p *Parser) ParseRules() ([]*Rule, error) { var rules []*Rule for p.current.Kind != scanner.EOF { rule, err := p.ParseRule() if err != nil { return rules, err } appended := false for i, existing := range(rules) { if existing.Name == rule.Name { // Add to the definition in stead existing.Definition.Sequences = append(existing.Definition.Sequences, rule.Definition.Sequences...) rules[i] = existing appended = true break } } if !appended { rules = append(rules, rule) } } return rules, nil } func (p *Parser) Parse() (g *Grammar, e error) { defer func() { thrown := recover() if thrown != nil { g = nil e = thrown.(error) return } }() p.Advance() rules, err := p.ParseRules() if err != nil { return nil, err } if len(rules) < 1 { return nil, nil } grammar := &Grammar { Top: rules[0], Rules: rules, } return grammar, nil } func ParseFile(filename string, debug bool) (*Parser, *Grammar, error) { file, err := os.Open(filename) defer file.Close() parser := NewParserFromReader(file, filename, debug) grammar, err := parser.Parse() return parser, grammar, err }