Browse Source

WIP Parser works but still has end of file problems.

Beoran 2 years ago
parent
commit
e4f52ec406
8 changed files with 257 additions and 91 deletions
  1. 12 1
      ast/ast.go
  2. 0 60
      cmd/ll1lex/flexer.go
  3. 135 0
      cmd/ll1lex/main.go
  4. 0 0
      flexgen/flexer.ll1lex
  5. 18 1
      flexgen/flexer_lexer.go
  6. 17 17
      flexgen/flexer_parser.go
  7. 25 1
      grammar/grammar.go
  8. 50 11
      parser/parser.go

+ 12 - 1
ast/ast.go

@@ -141,7 +141,18 @@ func Walk(ast Astro, walker func(node Astro) Astro) Astro {
 }
 
 func (ast BasicAst) String() string {
-	return fmt.Sprintf("Ast %s: %s", ast.Species.String(), ast.token.Value().String())
+	specname := "<no spec>"
+	if ast.Species != nil {
+		specname = ast.Species.String()
+	}
+	tokval := "<no token>"
+	if ast.token != nil {
+		tokval = ast.token.Text()
+		/* if ast.token.Value() != nil {
+			tokval = ast.token.Value().String()
+		} */
+	}
+	return fmt.Sprintf("Ast %s: %s", specname, tokval)
 }
 
 func Display(ast Astro) {

+ 0 - 60
cmd/ll1lex/flexer.go

@@ -1,60 +0,0 @@
-package main
-
-import "os"
-import "io"
-import "path/filepath"
-import "fmt"
-import "flag"
-import "strings"
-import "src.eruta.nl/beoran/ll1/flexer"
-
-func help() {
-	fmt.Printf("flexer usage: flexer [-o output.go] input.flex\n")
-	fmt.Printf("\nGenerates a lexer in Go language.\n")
-	os.Exit(1)
-}
-
-func showError(in string, err interface{}) {
-	fmt.Fprintf(os.Stderr, "%s: error: %s\n", in, err)
-}
-
-func fatal(err error, code int) {
-	fmt.Fprintf(os.Stderr, "flexer: error: %s\n", err)
-	os.Exit(code)
-}
-
-func main() {
-	outPtr := flag.String("o", "", "")
-	flag.Parse()
-	if flag.NArg() < 1 {
-		help()
-	}
-	inName := flag.Arg(0)
-	inFile, err := os.Open(inName)
-	if err != nil {
-		fatal(err, 1)
-	}
-	defer inFile.Close()
-	buf, err := io.ReadAll(inFile)
-	if err != nil {
-		fatal(err, 2)
-	}
-	outName := inName + ".go"
-	if outPtr != nil && *outPtr != "" {
-		outName = *outPtr
-	}
-	abs, err := filepath.Abs(outName)
-	if err != nil {
-		fatal(err, 3)
-	}
-	parts := filepath.SplitList(abs)
-	pkg := strings.TrimSuffix(filepath.Base(inName), filepath.Ext(inName))
-	if len(parts) > 2 {
-		pkg = parts[len(parts)-1]
-	}
-	tokens := flexer.LexFlexerInputString(inName, string(buf))
-	fmt.Printf("package %s\n\n", pkg)
-	for _, tok := range tokens {
-		fmt.Printf("tok: %v\n", tok)
-	}
-}

+ 135 - 0
cmd/ll1lex/main.go

@@ -0,0 +1,135 @@
+// ll1lex is a lexer generator based on regular expressions
+package main
+
+import "flag"
+import "os"
+
+// import "text/template"
+import "fmt"
+
+import "src.eruta.nl/beoran/ll1/common"
+import "src.eruta.nl/beoran/ll1/flexgen"
+import "src.eruta.nl/beoran/ll1/parser"
+import "src.eruta.nl/beoran/ll1/grammar"
+import "src.eruta.nl/beoran/ll1/ast"
+
+func showUsage() {
+	fmt.Fprintf(flag.CommandLine.Output(),
+		"%s: %s [options] input_file.ll1lex\n",
+		os.Args[0], os.Args[0])
+
+	fmt.Fprintf(flag.CommandLine.Output(),
+		"\n  [options] may be one of the following:\n\n")
+	flag.PrintDefaults()
+	fmt.Fprintf(flag.CommandLine.Output(), "\n")
+}
+
+const helpText = `
+ll1flex is a leger generator based on regular expressions.
+
+Usage:
+    ll1lex [options] input_file.ll1lex
+
+The [options] are:
+
+    -p name
+        Name of the package to generate code for.
+    -help -h
+        Shows the help page.
+    -o file
+        Name of output file to overwrite.
+    -v
+    	Be more verbose. Shows the scanned tokens as well.
+`
+
+func showHelp() {
+	fmt.Fprintf(flag.CommandLine.Output(), "\n%s\n", helpText)
+}
+
+// LL1Lex contains the options and variables of the ll1lex program.
+type Ll1Lex struct {
+	packageName string
+	outName     string
+	usedName    string
+	appendName  string
+	help        bool
+	verbose     bool
+	fout        *os.File
+	tokens      []common.Token
+	parser      *parser.Parser
+	grammar     *grammar.Grammar
+}
+
+func main() {
+	var err error
+	flag.Usage = showUsage
+	ll1lex := Ll1Lex{}
+
+	flag.BoolVar(&ll1lex.verbose, "v", false, "Be more verbose. ")
+	flag.StringVar(&ll1lex.outName, "o", "", "Name of output `file` to overwrite.")
+	flag.StringVar(&ll1lex.packageName, "p", "lexer", "Name of package `package` to use.")
+	flag.BoolVar(&ll1lex.help, "h", false, "Shows the help page.")
+	flag.BoolVar(&ll1lex.help, "help", false, "Shows the help page.")
+	flag.Parse()
+
+	if ll1lex.help {
+		showUsage()
+		showHelp()
+		os.Exit(1)
+	}
+
+	if len(flag.Args()) < 1 {
+		showUsage()
+		os.Exit(1)
+		return
+	}
+
+	ll1lexName := flag.Arg(0)
+	// Parse lexer description
+	tokens, err := flexgen.LexFileName(ll1lexName)
+
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "%v\n", err)
+	}
+
+	if ll1lex.verbose {
+		for _, tok := range tokens {
+			fmt.Fprintf(os.Stderr, "%s\n", tok)
+		}
+	}
+
+	ll1lex.parser = flexgen.MakeFlexerParser()
+	if ll1lex.verbose {
+		fmt.Fprintf(os.Stderr, "Parsing\n")
+	}
+
+	err = ll1lex.parser.Parse(tokens)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "%s\n", err)
+		for _, e := range ll1lex.parser.Errors {
+			fmt.Fprintf(os.Stderr, "error: %v\n", e)
+		}
+		os.Exit(2)
+	}
+
+	// Determine output file
+	ll1lex.usedName = ll1lex.outName
+	if ll1lex.outName == "" {
+		ll1lex.fout = os.Stdout
+	} else {
+		ll1lex.fout, err = os.Create(ll1lex.outName)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Could not open output file %s: %s\n", ll1lex.outName, err)
+		}
+		defer ll1lex.fout.Close()
+	}
+	if ll1lex.verbose {
+		fmt.Fprintf(os.Stderr, "Dumping\n")
+	}
+	i := ll1lex.parser.Index
+	l := len(tokens)
+	fmt.Fprintf(ll1lex.fout, "%s\nTokens:%d/%d\n", ast.Dump(ll1lex.parser.Result), i, l)
+	if i < l {
+		fmt.Fprintf(os.Stderr, "%s\n", tokens[i])
+	}
+}

+ 0 - 0
flexgen/flexer.flex → flexgen/flexer.ll1lex


+ 18 - 1
flexgen/flexer_lexer.go

@@ -1,5 +1,8 @@
 package flexgen
 
+import "os"
+import "io"
+
 import "src.eruta.nl/beoran/ll1/common"
 import "src.eruta.nl/beoran/ll1/flexer"
 
@@ -44,7 +47,7 @@ func Check(err error) {
 	}
 }
 
-func LexcommonInputString(name, input string) []common.Token {
+func LexInputString(name, input string) []common.Token {
 	f := flexer.NewFlexer(name, input)
 	Check(f.Lexeme(FlexgenKindWhitespace, `[ \t\n\r]+`, "", nil))
 	Check(f.Lexeme(FlexgenKindLineComment, `//[^\n\r]+[\n\r]+`, "", nil))
@@ -62,3 +65,17 @@ func LexcommonInputString(name, input string) []common.Token {
 
 	return flexer.LexAll(f, skipKinds...)
 }
+
+func LexFileName(name string) ([]common.Token, error) {
+	fin, err := os.Open(name)
+	if err != nil {
+		return nil, err
+	}
+	defer fin.Close()
+	buf, err := io.ReadAll(fin)
+	if err != nil {
+		return nil, err
+	}
+	input := string(buf)
+	return LexInputString(name, input), nil
+}

+ 17 - 17
flexgen/flexer_parser.go

@@ -1,26 +1,26 @@
 package flexgen
 
-import . "src.eruta.nl/beoran/ll1/common"
-import "src.eruta.nl/beoran/ll1/flexer"
-import "src.eruta.nl/beoran/ll1/ast"
 import . "src.eruta.nl/beoran/ll1/grammar"
 import "src.eruta.nl/beoran/ll1/parser"
 
 func MakeFlexerGrammar() *Grammar {
-	g := &Grammar{}
-	keywordsRef := g.AddRule(Ref("keywordsRef", "keywords"))
-	dot := g.AddRule(Term("dot", FlexgenKindDot))
-	terminal := g.AddRule(Term("terminal", FlexgenKindTerminal))
-	arrow := g.AddRule(Term("arrow", FlexgenKindArrow))
-	keyword := g.AddRule(Term("arrow", FlexgenKindKeyword))
-	keywords := g.AddRule(Opt("keywords", "", And(keyword, keywordsRef)))
-	literalString := g.AddRule(Term("literalString", FlexgenKindLiteralString))
-	literalRaw := g.AddRule(Term("literalRaw", FlexgenKindLiteralRaw))
-	literalChar := g.AddRule(Term("literalChar", FlexgenKindLiteralChar))
-	pattern := g.AddRule(Alt("pattern", "", literalString, literalRaw, literalChar))
-	lexeme := g.AddRule(Seq("lexeme", "", terminal, arrow, pattern, keywords, dot))
-	lexemes := g.AddRule(Seq("lexemes", "", lexeme, End{}))
-	top := g.AddRule(Alt("top", "", lexemes))
+	g := NewGrammar()
+	dot := g.Term("dot", FlexgenKindDot)
+	terminal := g.Term("terminal", FlexgenKindTerminal)
+	arrow := g.Term("arrow", FlexgenKindArrow)
+	action := g.Term("action", FlexgenKindAction)
+	keyword := g.Term("keyword", FlexgenKindKeyword)
+	keywordsRef := g.Ref("keywordsRef", "keywords")
+	keywords := g.Opt("keywords", "", And(keyword, keywordsRef))
+	literalString := g.Term("literalString", FlexgenKindLiteralString)
+	literalRaw := g.Term("literalRaw", FlexgenKindLiteralRaw)
+	literalChar := g.Term("literalChar", FlexgenKindLiteralChar)
+	pattern := g.Alt("pattern", "", literalString, literalRaw, literalChar)
+	optAction := g.Opt("optAction", "", action)
+	lexeme := g.Seq("lexeme", "", terminal, arrow, pattern, keywords, optAction, dot)
+	lexemesRef := g.Ref("lexemesRef", "lexemes")
+	lexemes := g.Opt("lexemes", "", And(lexeme, lexemesRef))
+	top := g.Seq("top", "", lexemes, End{})
 	g.Top = top
 	return g
 }

+ 25 - 1
grammar/grammar.go

@@ -266,6 +266,16 @@ func (s Set) Contains(e Terminal) bool {
 	return ok
 }
 
+func (s Set) ContainsKind(k Kind) bool {
+	kinds := s.ToKinds()
+	for _, ok := range kinds {
+		if ok == k {
+			return true
+		}
+	}
+	return false
+}
+
 func (s *Set) Add(e Terminal) bool {
 	name := e.Name()
 	_, ok := (*s)[name]
@@ -328,7 +338,7 @@ func (s Set) String() string {
 	return strings.Join(aid, " ")
 }
 
-func (s Set) ToTokenKinds() []Kind {
+func (s Set) ToKinds() []Kind {
 	if len(s) == 0 {
 		return []Kind{}
 	}
@@ -350,6 +360,12 @@ type Grammar struct {
 	Errors []error
 }
 
+func NewGrammar() *Grammar {
+	g := &Grammar{}
+	g.NamedRules = map[string]Rule{}
+	return g
+}
+
 func (g Grammar) String() string {
 	res := "Top → " + g.Top.Name() + "\n"
 	for _, r := range g.Rules {
@@ -414,10 +430,18 @@ func (g *Grammar) Alt(name, template string, seqs ...Rule) Rule {
 	return g.AddRule(Alt(name, template, seqs...))
 }
 
+func (g *Grammar) Seq(name, template string, seqs ...Rule) Rule {
+	return g.AddRule(Seq(name, template, seqs...))
+}
+
 func (g *Grammar) Term(name string, kind Kind) Rule {
 	return g.AddRule(Term(name, kind))
 }
 
+func (g *Grammar) Opt(name, template string, rule Rule) Rule {
+	return g.AddRule(Opt(name, template, rule))
+}
+
 func Ref(g *Grammar, name, to string) Reference {
 	ref := Reference{}
 	ref.Nonterminal.name = name

+ 50 - 11
parser/parser.go

@@ -9,15 +9,15 @@ import . "src.eruta.nl/beoran/ll1/flexer"
 
 // A TokenMapper maps a token to a Species.
 type TokenMapper interface {
-	// Returns the pecies to use for the token. If nil, the token
+	// Returns the species to use for the grammar rule. If nil, the token
 	// may be skipped.
-	Map(t Token) Species
+	Map(t Rule) Species
 }
 
 type DefaultMapper struct{}
 
-func (DefaultMapper) Map(t Token) Species {
-	return MakeSpecies("default")
+func (DefaultMapper) Map(r Rule) Species {
+	return MakeSpecies(r.Name())
 }
 
 type Parser struct {
@@ -84,7 +84,7 @@ func (p *Parser) ParseTerminal(term Terminal) error {
 	if tok == nil {
 		return p.MakeError("Expected token kind: %d", term.Kind)
 	}
-	spec := p.Map(tok)
+	spec := p.Map(term)
 	NewChild(p.Now, spec, tok)
 	// now should alreqdy be of suitable kind to accept a terminal token child.
 	return nil
@@ -108,8 +108,16 @@ func (p *Parser) PopUp() {
 	}
 }
 
+func (p *Parser) ParseReference(ref Reference) error {
+	rule, err := ref.Resolve()
+	if err != nil {
+		return err
+	}
+	return p.ParseRule(rule)
+}
+
 func (p *Parser) ParseSequence(seq Sequence) error {
-	spec := p.Map(p.Token())
+	spec := p.Map(seq)
 	p.Now = NewChild(p.Now, spec, p.Token())
 	defer p.PopUp()
 	for _, rule := range seq.Rules {
@@ -122,12 +130,12 @@ func (p *Parser) ParseSequence(seq Sequence) error {
 	return nil
 }
 
-func (p *Parser) ParseAlternates(alt Alternates) error {
-	spec := p.Map(p.Token())
-	p.Now = NewChild(p.Now, spec, p.Token())
-	defer p.PopUp()
-	errors := []error{}
+/*
+func (p *Parser) ParseOptinalAlternates(alt Alternates) error {
 	for _, rule := range alt.Rules {
+		if IsReference(rule) {
+
+		}
 		err := p.ParseRule(rule)
 		if err != nil {
 			errors = append(errors, err)
@@ -139,6 +147,37 @@ func (p *Parser) ParseAlternates(alt Alternates) error {
 	p.Errors = append(p.Errors, errors...)
 	return p.MakeError("Could not parse alternate.")
 }
+*/
+
+func (p *Parser) ParseAlternates(alt Alternates) error {
+	spec := p.Map(alt)
+	p.Now = NewChild(p.Now, spec, p.Token())
+	defer p.PopUp()
+	errors := []error{}
+	hasEpsilon := false
+
+	for _, rule := range alt.Rules {
+		if IsEpsilon(rule) {
+			hasEpsilon = true
+		} else {
+			first, _ := rule.FirstSet()
+			if first.ContainsKind(p.Token().Kind()) {
+				err := p.ParseRule(rule)
+				if err != nil {
+					errors = append(errors, err)
+				} else { // this alternate was OK here.
+					return nil
+				}
+			}
+		}
+	}
+	if hasEpsilon { // No match is ok.
+		return nil
+	}
+	// If we get here no alternate was ok.
+	p.Errors = append(p.Errors, errors...)
+	return p.MakeError("Could not parse alternate.")
+}
 
 func (p *Parser) ParseRule(r Rule) error {
 	p.Recursion++