123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447 |
- package main
- import "text/scanner"
- import "fmt"
- import "io"
- import "os"
- import "unicode"
- // Value is the lexical value of a lexer token.
- // This is an alias for string in the case of ll1.
- type Value = string
- // Position is a position within a source file. Since the lexer is based on
- // text/scanner, we use that packages Position.
- type Position = scanner.Position
- // TokenKind is the kind or type of a token.
- // This has rune as the underlying type so one-character tokens can be easily
- // supported. EOF will be 65535 (I.e, -1 cast to rune). Non-character token
- // kinds will start from 65533 down (i.e -3, -4, -5, etc).
- type TokenKind rune
- // NoTokenKind means "no token kind" i.e. no token.
- const NoTokenKind TokenKind = TokenKind(0)
- // TokenKindEOF means the end of the input.
- const TokenKindEOF TokenKind = TokenKind(scanner.EOF)
- // TokenKindError means a parsing or lexing error was encountered.
- //line ll1.parser.go.tpl:80
- const TokenKindError TokenKind = TokenKind(-21)
- // Scanner based token kinds
- const TokenKindIdent TokenKind = TokenKind(scanner.Ident )
- const TokenKindInt TokenKind = TokenKind(scanner.Int )
- const TokenKindFloat TokenKind = TokenKind(scanner.Float )
- const TokenKindChar TokenKind = TokenKind(scanner.Char )
- const TokenKindString TokenKind = TokenKind(scanner.String )
- const TokenKindRawString TokenKind = TokenKind(scanner.RawString)
- const TokenKindComment TokenKind = TokenKind(scanner.Comment )
- // Grammar based token kinds
- const (
- TokenKindEpsilon TokenKind = TokenKind('ε')
- TokenKindOr TokenKind = TokenKind('|')
- TokenKindArrow TokenKind = TokenKind('→')
- TokenKindStar TokenKind = TokenKind('*')
- TokenKindOptional TokenKind = TokenKind('?')
- TokenKindList TokenKind = TokenKind('+')
- TokenKindRuleEnd TokenKind = TokenKind('.')
- )
- // Convert token kind to a string representation
- //line ll1.parser.go.tpl:86
- func (tk TokenKind) String() string {
-
- return scanner.TokenString(rune(tk))
-
- }
- // Token is the result of a single lexical analysis step by the lexer.
- //line ll1.parser.go.tpl:109
- type Token struct {
- Position // Position in the source where the token was found.
- TokenKind // Type of the token
- Value // Value of the token
- }
- // MakeToken makes a token with the given position, type and value.
- //line ll1.parser.go.tpl:118
- func MakeToken(pos Position, typ TokenKind, val Value) Token {
- return Token{ pos, typ, val}
- }
- // Lexer performs the lexical analysis of the input.
- //line ll1.parser.go.tpl:124
- type Lexer struct {
- // Embed scanner.Scanner
- scanner.Scanner
- Filename string
- }
- // NewLexerFromReader creates a new lexer for the given parser and input.
- //line ll1.parser.go.tpl:133
- func NewLexerFromReader(parser *Parser, reader io.Reader, filename string) *Lexer {
- lexer := &Lexer{}
- lexer.Filename = filename
- lexer.Scanner.Init(reader)
- lexer.Scanner.Mode = scanner.GoTokens
- lexer.Scanner.Error = func (s *scanner.Scanner, msg string) {
- parser.Panicf("%s: scanner error: %s, %s", s.Position, s.TokenText(), msg)
- }
- // XXX: needs to be generated from the identifier rule in the syntax!
- lexer.Scanner.IsIdentRune = func(ch rune, i int) bool {
- if i == 0 {
- return unicode.IsLetter(ch)
- }
- return unicode.IsLetter(ch) ||
- unicode.IsNumber(ch) ||
- ch == '_' ||
- ch == '-'
- }
- return lexer
- }
- //line ll1.parser.go.tpl:155
- func (lex *Lexer) Lex() Token {
- scanned := lex.Scanner.Scan()
- pos := lex.Scanner.Position
- pos.Filename = lex.Filename
- value := lex.Scanner.TokenText()
- // Get rid of the quotes
- if scanned == scanner.Char ||
- scanned == scanner.String ||
- scanned == scanner.RawString {
- value = value[1:len(value) - 1]
- }
- token := Token {
- TokenKind: TokenKind(scanned),
- Value: value,
- Position: pos,
- }
- return token
- }
- // This parser is for parsing LL1 grammars, not Beo.
- type Parser struct {
- reader io.Reader
- scanner scanner.Scanner
- current Token
- Errors []ParserError
- Filename string
- Debug io.Writer
- }
- func NewParserFromReader(reader io.Reader, filename string, debug bool) *Parser {
- parser := &Parser{}
- parser.reader = reader
- parser.scanner.Init(parser.reader)
- parser.Filename = filename
- parser.current.Kind = 0
- parser.Debug = nil
- if debug {
- parser.Debug = os.Stderr
- }
- parser.scanner.Mode = scanner.GoTokens
- parser.scanner.Error = func (s *scanner.Scanner, msg string) {
- parser.Panicf("%s: scanner error: %s, %s", s.Position, s.TokenText(), msg)
- }
- parser.scanner.IsIdentRune = func(ch rune, i int) bool {
- if i == 0 {
- return unicode.IsLetter(ch)
- }
- return unicode.IsLetter(ch) ||
- unicode.IsNumber(ch) ||
- ch == '_' ||
- ch == '-'
- }
-
- return parser
- }
- func (p *Parser) Lex() Token {
- scanned := p.scanner.Scan()
- pos := p.scanner.Position
- pos.Filename = p.Filename
- value := p.scanner.TokenText()
- // Get rid of the quotes
- if scanned == scanner.Char ||
- scanned == scanner.String ||
- scanned == scanner.RawString {
- value = value[1:len(value) - 1]
- }
- token := Token {
- scanned,
- value,
- pos,
- }
- p.Debugf("Lexed: %s\n", p.Filename, token)
- return token
- }
- func (p *Parser) Advance() {
- token := p.Lex()
- p.current = token
- }
- // panic with this type on errors that would prevent the parser
- // from making progress.
- type ParserError struct {
- *Parser
- *Token
- Chain error
- }
- func (pe ParserError) Error() string {
- return pe.Token.MakeError(pe.Chain.Error()).Error()
- }
- func (parser *Parser) Errorf(message string, args ...interface{}) ParserError {
- err := fmt.Errorf(message, args...)
- pe := ParserError { Parser: parser, Token:&parser.current, Chain: err }
- parser.Errors = append(parser.Errors, pe)
- return pe
- }
- func (parser *Parser) Panicf(message string, args ...interface{}) {
- pe := parser.Errorf(message, args...)
- panic(pe)
- }
- func (p *Parser) Debugf(message string, args ...interface{}) {
- if p.Debug != nil {
- fmt.Fprintf(p.Debug, message, args)
- }
- }
- /* Looks at the current token and advances the lexer if the token is of any of
- the token kinds given in kinds. In this case it will return the accepted
- token and advance the parser. Otherwise, it will call parser.Panicf.*/
- func (parser *Parser) Require(kinds ...rune) Token {
- parser.Debugf("Require: %v\n", kinds)
- if parser.current.Kind == 0 {
- parser.Advance()
- }
-
- expected := ""
- sep := ""
- for _, kind := range kinds {
- if kind == parser.current.Kind {
- accepted := parser.current
- parser.Advance()
- return accepted
- }
- expected = fmt.Sprintf("%s%s%s", expected, sep, scanner.TokenString(kind))
- }
-
- parser.Panicf("error: expected one of the following: %s", expected)
- return Token{}
- }
- func (parser Parser) NextIs(kinds ...rune) bool {
- parser.Debugf("NextIs: %v\n", kinds)
- if (parser.current.Kind == 0) {
- parser.Advance()
- }
- for _, kind := range kinds {
- if kind == parser.current.Kind {
- return true
- }
- }
- return false
- }
- /*
- Sequence -> Element Sequence | .
- Element -> Parenthesis | Name | Literal .
- Parenthesis -> '(' Definition ')' .
- */
- func (p *Parser) NextIsElement() (bool) {
- return p.NextIs('(', scanner.Ident, scanner.Char, scanner.String)
- }
- func (p *Parser) NextIsOperator() (bool) {
- return p.NextIs('+', '*')
- }
- func (p *Parser) ParseElement() (Element, error) {
- switch {
- case p.NextIs('('):
- return p.ParseParenthesis()
-
- case p.NextIs(scanner.Ident):
- ident := p.Require(scanner.Ident)
- if ident.Value == "epsilon" || ident.Value == "ε" {
- ident.Value = "ε"
- return Epsilon{ element { token: ident } }, nil
- }
- // Upper case means a nonterminal, otherwise terminal
- if unicode.IsUpper(([]rune(ident.Value))[0]) {
- return Nonterminal{ element { token: ident }, nil }, nil
- }
- return Terminal{ element { token: ident } }, nil
- case p.NextIs(scanner.Char, scanner.String):
- literal := p.Require(scanner.Char, scanner.String)
- return Literal{ Terminal { element { token: literal } } }, nil
- default:
- p.Panicf("error: unexpected for grammar Element: %s", p.current)
- }
- return nil, nil
- }
- func (p *Parser) ParseSequence() (*Sequence, error) {
- sequence := &Sequence{}
- for p.NextIsElement() {
- element, err := p.ParseElement()
- p.Debugf("Sequence parsed element: %v\n", element)
- if err != nil {
- return sequence, err
- }
- sequence.Elements = append(sequence.Elements, element)
- }
- return sequence, nil
- }
- func (p *Parser) ParseParenthesis() (*Definition, error) {
- // Parse the sub definition
- p.Require('(')
- sub, err := p.ParseDefinition()
- p.Require(')')
- return sub, err
- }
- func (p *Parser) ParseAlternates() (*Definition, error) {
- alternates := &Alternates{}
- for p.NextIsElement() {
- sequence, err := p.ParseSequence()
- if err != nil {
- return alternates, err
- }
- alternates.Sequences = append(alternates.Sequences, *sequence)
- if !p.NextIs('.', scanner.RawString, scanner.EOF, ')') {
- p.Require('|')
- }
- }
- return alternates, nil
- }
- func (p *Parser) ParseDefinition() (*Definition, error) {
- return p.ParseAlternates()
-
-
- /*if p.NextIs('(') {
- sub, err := p.ParseParenthesis()
- if err != nil {
- return nil, err
- }
- }*/
- }
- func (p *Parser) ParseRule() (*Rule, error) {
- rule := &Rule{}
- name := p.Require(scanner.Ident)
- // XXX Require or → or -> separator.
- // This is a hack, needs to be moved to a lexer.
- if p.NextIs('→') {
- p.Require('→')
- } else {
- p.Require('-')
- p.Require('>')
- }
- definition, err := p.ParseDefinition()
- if err != nil {
- return rule, err
- }
- if p.NextIs(scanner.RawString) {
- rs := p.Require(scanner.RawString)
- rule.Template = rs.Value
- }
- // require '.' terminator
- p.Require('.')
- rule.Name = name.Value
- rule.Definition = *definition
- return rule, nil
- }
- func (p *Parser) ParseRules() ([]*Rule, error) {
- var rules []*Rule
- for p.current.Kind != scanner.EOF {
- rule, err := p.ParseRule()
- if err != nil {
- return rules, err
- }
- appended := false
- for i, existing := range(rules) {
- if existing.Name == rule.Name {
- // Add to the definition in stead
- existing.Definition.Sequences = append(existing.Definition.Sequences, rule.Definition.Sequences...)
- rules[i] = existing
- appended = true
- break
- }
- }
- if !appended {
- rules = append(rules, rule)
- }
- }
- return rules, nil
- }
- func (p *Parser) Parse() (g *Grammar, e error) {
- defer func() {
- thrown := recover()
- if thrown != nil {
- g = nil
- e = thrown.(error)
- return
- }
- }()
- p.Advance()
- rules, err := p.ParseRules()
- if err != nil {
- return nil, err
- }
- if len(rules) < 1 {
- return nil, nil
- }
- grammar := &Grammar {
- Top: rules[0],
- Rules: rules,
- }
-
- return grammar, nil
- }
- func ParseFile(filename string, debug bool) (*Parser, *Grammar, error) {
- file, err := os.Open(filename)
- defer file.Close()
- parser := NewParserFromReader(file, filename, debug)
- grammar, err := parser.Parse()
- return parser, grammar, err
- }
|