Browse Source

WIP: AST, Grammar, Parser and Flexgen.

Beoran 2 years ago
parent
commit
0b47d2829d
9 changed files with 1104 additions and 177 deletions
  1. 233 0
      ast/ast.go
  2. 122 0
      common/common.go
  3. 64 96
      flexer/flexer.go
  4. 11 10
      flexer/flexer_test.go
  5. 35 32
      flexgen/flexer_lexer.go
  6. 31 38
      flexgen/flexer_parser.go
  7. 1 1
      flexgen/generator.go
  8. 431 0
      grammar/grammar.go
  9. 176 0
      parser/parser.go

+ 233 - 0
ast/ast.go

@@ -0,0 +1,233 @@
+// Abstract Syntax Tree
+package ast
+
+import (
+	"fmt"
+	"strings"
+)
+
+import . "src.eruta.nl/beoran/ll1/common"
+
+// Species is the kind of Ast node it is. It also has some methods on it.
+type Species interface {
+	Self() Species
+	String() string
+}
+
+// BasicSpecies is a basic implementation of a species.
+type BasicSpecies struct {
+	Name string
+}
+
+func (bs BasicSpecies) Self() Species {
+	return bs
+}
+
+func (bs BasicSpecies) String() string {
+	return bs.Name
+}
+
+func MakeSpecies(name string) Species {
+	return BasicSpecies{Name: name}
+}
+
+// Astro is an abstract syntax tree that is read only
+type Astro interface {
+	Value
+	Species
+	Parent() Ast
+	Children() []Ast
+	Token() Token
+}
+
+// Ast is an abstract syntax tree with read/write capabilities
+type Ast interface {
+	Astro
+	SetParent(Ast)
+	AppendChild(child Ast) Ast
+}
+
+// BasicAst is a basic implementation of an AST.
+type BasicAst struct {
+	Species
+	parent   Ast
+	children []Ast
+	token    Token
+}
+
+func (ast BasicAst) Value() Value {
+	return ast.token.Value()
+}
+
+func AppendChild(parent Ast, child Ast) Ast {
+	basicParent := parent.(*BasicAst)
+	return basicParent.AppendChild(child)
+}
+
+func New(kind Species, parent Ast, children []Ast, token Token) *BasicAst {
+	ast := &BasicAst{Species: kind, parent: parent, token: token}
+	return ast.AppendChildren(children...)
+}
+
+func (ast *BasicAst) AppendChildren(children ...Ast) *BasicAst {
+	for _, child := range children {
+		ast.AppendChild(child)
+	}
+	return ast
+}
+
+func (ast *BasicAst) AppendChild(child Ast) Ast {
+	child.SetParent(ast)
+	ast.children = append(ast.children, child)
+	return ast
+}
+
+func NewChild(ast Ast, spec Species, token Token) Ast {
+	child := New(spec, ast, make([]Ast, 0), token)
+	ast.AppendChild(child)
+	return child
+}
+
+func (ast BasicAst) IsKind(Species Species) bool {
+	return ast.Species == Species
+}
+
+func (ast BasicAst) IsError() bool {
+	return ast.token.Kind() == ErrorKind
+}
+
+func (ast BasicAst) IsNone() bool {
+	return ast.Species == nil
+}
+
+func (ast BasicAst) Token() Token {
+	return ast.token
+}
+
+func (ast BasicAst) Parent() Ast {
+	return ast.parent
+}
+
+func (ast BasicAst) Children() []Ast {
+	return ast.children
+}
+
+func (ast BasicAst) Self() Species {
+	return ast.Species
+}
+
+func (ast *BasicAst) SetParent(parent Ast) {
+	ast.parent = parent
+}
+
+func (ast BasicAst) Child(index int) Ast {
+	count := len(ast.children)
+	if index < 0 || index > count {
+		return nil
+	}
+	return ast.children[index]
+}
+
+func Walk(ast Astro, walker func(node Astro) Astro) Astro {
+	if found := walker(ast); found != nil {
+		return found
+	}
+	for _, child := range ast.Children() {
+		if found := Walk(child, walker); found != nil {
+			return found
+		}
+	}
+	return nil
+}
+
+func (ast BasicAst) String() string {
+	return fmt.Sprintf("Ast %s: %s", ast.Species.String(), ast.token.Value().String())
+}
+
+func Display(ast Astro) {
+	Walk(ast, func(node Astro) Astro {
+		depth := Depth(node)
+		fmt.Printf("%s", strings.Repeat("--", depth))
+		if node != nil {
+			fmt.Printf("Ast: %s\n", node.String())
+		} else {
+			fmt.Printf("Ast: nil node\n")
+		}
+		return nil
+	})
+}
+
+func Dump(ast Astro) string {
+	result := ""
+	Walk(ast, func(node Astro) Astro {
+		depth := Depth(node)
+		result += fmt.Sprintf("%s", strings.Repeat("--", depth))
+		if node != nil {
+			result += fmt.Sprintf("Ast: %s\n", node.String())
+		} else {
+			result += fmt.Sprintf("Ast: nil node\n")
+		}
+		return nil
+	})
+	return result
+}
+
+func Depth(ast Astro) int {
+	var depth int = 0
+	parent := ast.Parent()
+	for parent != nil {
+		depth++
+		parent = parent.Parent()
+	}
+	return depth
+}
+
+func CountChildren(ast Astro) int {
+	return len(ast.Children())
+}
+
+func IsError(ast Astro) bool {
+	return ast.Token().Kind() == ErrorKind
+}
+
+func Errors(ast Astro) []Astro {
+	res := make([]Astro, 0)
+	Walk(ast, func(node Astro) Astro {
+		if node != nil && IsError(ast) {
+			res = append(res, node)
+		}
+		return nil
+	})
+	return res
+}
+
+func EmptyAstArray() []Ast {
+	return make([]Ast, 0)
+}
+
+func NewEmptyAst(species Species) *BasicAst {
+	return NewAstWithToken(species, nil)
+}
+
+func NewAstNone() *BasicAst {
+	return NewEmptyAst(nil)
+}
+
+func NewAstWithToken(Species Species, token Token) *BasicAst {
+	return New(Species, nil, EmptyAstArray(), token)
+}
+
+// If AST has errors, return it as a merged error, otherwise returns nil
+func MergeErrors(ast Ast) error {
+	errlist := Errors(ast)
+	if len(errlist) < 1 {
+		return nil
+	}
+	sep := ""
+	res := ""
+	for _, err := range errlist {
+		res = fmt.Sprintf("%s%s%s", res, sep, err)
+		sep = "\n"
+	}
+	return fmt.Errorf("%s", res)
+}

+ 122 - 0
common/common.go

@@ -0,0 +1,122 @@
+// Common interfaces and shared functions for the ll1 module.
+package common
+
+import "regexp"
+import "strings"
+import "fmt"
+
+type Location struct {
+	Name *string
+	Line int
+	Col  int
+}
+
+func (p Location) String() string {
+	name := "<input>"
+	if p.Name != nil {
+		name = *p.Name
+	}
+	return fmt.Sprintf("%s:%d:%d:", name, p.Line, p.Col)
+}
+
+// Kind is the kind of token.
+type Kind int
+
+const (
+	SkipKind  Kind = -30000
+	ErrorKind Kind = -31000
+)
+
+type Action func(f Lexer, k Kind, matches ...string) []Token
+
+// Value is the value of a token, can be string value, integer
+// or some other custom value
+type Value interface {
+	Value() Value
+	String() string
+}
+
+type StringValue string
+
+func (sv StringValue) Value() Value {
+	return sv
+}
+
+func (sv StringValue) String() string {
+	return string(sv)
+}
+
+type ErrorValue struct {
+	Err error
+}
+
+func (ev ErrorValue) Value() Value {
+	return ev
+}
+
+func (ev ErrorValue) String() string {
+	return string(ev.Err.Error())
+}
+
+type IntValue int64
+
+func (iv IntValue) Value() Value {
+	return iv
+}
+
+func (iv IntValue) String() string {
+	return fmt.Sprintf("%d", iv)
+}
+
+type FloatValue int64
+
+func (fv FloatValue) Value() Value {
+	return fv
+}
+
+func (fv FloatValue) String() string {
+	return fmt.Sprintf("%d", fv)
+}
+
+type Token interface {
+	Location() Location
+	Kind() Kind
+	Text() string
+	Value() Value
+}
+
+type Lexer interface {
+	// Accept will accept a regexp and advance, returning the matches.
+	// Returns nil if no matches were found.
+	Accept(re *regexp.Regexp) []string
+	// Returns the current lexer Location.
+	Location() Location
+	// Returns if the lexer is at the end or not.
+	EOF() bool
+
+	// The lexer creates a token with the current lexer Location and
+	// the given kind and text.
+	MakeToken(kind Kind, form string, args ...interface{}) Token
+
+	// The lexer creates a token with the current lexer Location and
+	// the given kind. The text is taken from the lexer string builder and
+	// that builser is reset.
+	MakeBuilderToken(kind Kind) Token
+
+	// The lexer has a string builder, which can be used to append
+	// strings or runes to and which can be returned and cleared when the
+	// token is complete.
+	Builder() *strings.Builder
+
+	// Lexeme adds a lexeme to the lexer.
+	Lexeme(kind Kind, re, context string, act Action) error
+	// Calls the lexer once.
+	LexOnce() []Token
+
+	// Returns the current lexer context
+	Context() string
+	// Pushes the named context on the lexer context stack
+	PushContext(name string)
+	// Pops the current context from the lexer context stack.
+	PopContext()
+}

+ 64 - 96
flexer/flexer.go

@@ -4,102 +4,70 @@ import "fmt"
 import "regexp"
 import "strings"
 import "strconv"
+import . "src.eruta.nl/beoran/ll1/common"
 
-/* Flexer is a flexible regexp and rule based
+/* Flexer is a flexible regexp and lexeme based
 lexer that can be used as an implementation for
 generated code.
 */
 
-type Position struct {
-	Name *string
-	Line int
-	Col  int
-}
-
-type Kind int
-
-const (
-	SkipKind  Kind = -30000
-	ErrorKind Kind = -31000
-)
-
-type Token interface {
-	Position() Position
-	Kind() Kind
-	Text() string
-}
-
-type Lexer interface {
-	// Accept will accept a regexp and advance, returning the matches.
-	// Returns nil if no matches were found.
-	Accept(re *regexp.Regexp) []string
-	// Returns the current lexer position.
-	Position() Position
-	// Returns if the lexer is at the end or not.
-	EOF() bool
-
-	// The lexer creates a token with the current lexer position and
-	// the given kind and text.
-	MakeToken(kind Kind, form string, args ...interface{}) Token
-
-	// The lexer creates a token with the current lexer position and
-	// the given kind. The text is taken from the lexer string builder and
-	// that builser is reset.
-	MakeBuilderToken(kind Kind) Token
-
-	// The lexer has a string builder, which can be used to append
-	// strings or runes to and which can be returned and cleared when the
-	// token is complete.
-	Builder() *strings.Builder
-
-	// Adds a rule to the lexer.
-	Rule(kind Kind, re, context string, act Action) error
-	// Calls the lexer once.
-	LexOnce() []Token
-
-	// Returns the current lexer context
-	Context() string
-	// Pushes the named context on the lexer context stack
-	PushContext(name string)
-	// Pops the current context from the lexer context stack.
-	PopContext()
-}
-
-type Action func(f Lexer, k Kind, matches ...string) []Token
-
 type BasicToken struct {
-	position Position
+	location Location
 	kind     Kind
 	text     string
+	value    Value
 }
 
 func (bt BasicToken) Kind() Kind {
 	return bt.kind
 }
 
-func (bt BasicToken) Position() Position {
-	return bt.position
+func (bt BasicToken) Location() Location {
+	return bt.location
 }
 
 func (bt BasicToken) Text() string {
 	return bt.text
 }
 
-func MakeToken(position Position, kind Kind, form string,
+func (bt BasicToken) Value() Value {
+	if bt.Value() == nil {
+		return StringValue(bt.text)
+	} else {
+		return bt.value
+	}
+}
+
+func MakeToken(Location Location, kind Kind, form string,
 	args ...interface{}) BasicToken {
 	text := fmt.Sprintf(form, args...)
-	return BasicToken{position, kind, text}
+	return BasicToken{Location, kind, text, StringValue(text)}
+}
+
+func MakeValueToken(Location Location, kind Kind, value Value) BasicToken {
+	text := value.String()
+	return BasicToken{Location, kind, text, value}
 }
 
 type ErrorToken struct {
 	BasicToken
 }
 
-/* A rule for Flexer is based on a regular expression.
-* While the rule may have submatches, the lexer will consume
+func MakeErrorToken(Location Location, form string, args ...interface{}) ErrorToken {
+	err := fmt.Errorf(form, args...)
+	tok := MakeValueToken(Location, ErrorKind, ErrorValue{err})
+	return ErrorToken{tok}
+}
+
+func (e ErrorToken) Error() string {
+	return fmt.Sprintf("%s%s", e.Location(), e.text)
+}
+
+/* Lexeme for Flexer is based on a regular expression.
+* While the lexeme may have submatches, the lexer will consume
 * the whole match if it matches at the beginning of the current input.
  */
-type Rule struct {
+type Lexeme struct {
 	Kind
 	*regexp.Regexp
 	Context string
@@ -170,9 +138,9 @@ func EscapeAction(quote byte) func(lex Lexer, k Kind, matches ...string) []Token
 	}
 }
 
-// Try tries to apply a rule.
+// Try tries to apply a lexeme.
 // Returns nil on no match.
-func (r Rule) Try(lex Lexer) []Token {
+func (r Lexeme) Try(lex Lexer) []Token {
 	matches := lex.Accept(r.Regexp)
 	if matches == nil || len(matches) == 0 {
 		return nil
@@ -186,8 +154,8 @@ func (r Rule) Try(lex Lexer) []Token {
 
 type Flexer struct {
 	index    int
-	position Position
-	rules    []Rule
+	location Location
+	lexemes  []Lexeme
 	input    string
 	name     string
 	contexts []string
@@ -195,7 +163,7 @@ type Flexer struct {
 }
 
 func (f Flexer) MakeToken(kind Kind, form string, args ...interface{}) Token {
-	return MakeToken(f.position, kind, form, args...)
+	return MakeToken(f.location, kind, form, args...)
 }
 
 func (f *Flexer) MakeBuilderToken(kind Kind) Token {
@@ -205,7 +173,7 @@ func (f *Flexer) MakeBuilderToken(kind Kind) Token {
 }
 
 // Advances the flexer to the given index,
-// updating the position.
+// updating the Location.
 func (f *Flexer) advanceTo(index int) {
 	start := f.index
 	end := index
@@ -217,10 +185,10 @@ func (f *Flexer) advanceTo(index int) {
 					i++
 				}
 			}
-			f.position.Line++
-			f.position.Col = 1
+			f.location.Line++
+			f.location.Col = 1
 		} else {
-			f.position.Col++
+			f.location.Col++
 		}
 	}
 	f.index = end
@@ -242,27 +210,27 @@ func (f *Flexer) Accept(re *regexp.Regexp) []string {
 	return matches
 }
 
-func (f *Flexer) Rule(kind Kind, expr, context string, act Action) error {
+func (f *Flexer) Lexeme(kind Kind, expr, context string, act Action) error {
 	re, err := regexp.Compile(`\A` + expr)
 	if err != nil {
 		return err
 	}
-	rule := Rule{kind, re, context, act}
-	f.rules = append(f.rules, rule)
+	lexeme := Lexeme{kind, re, context, act}
+	f.lexemes = append(f.lexemes, lexeme)
 	return nil
 }
 
-func (f *Flexer) EscapedStringRule(kind Kind, first, last, context string) {
-	f.Rule(SkipKind, first, "", ContextAction(context))
-	f.Rule(kind, last, context, PopAction(kind))
-	f.Rule(SkipKind, `\\[etnru][0-9a-f]*`, context, EscapeAction(last[0]))
-	f.Rule(SkipKind, `.`, context, StoreAction())
+func (f *Flexer) EscapedStringLexeme(kind Kind, first, last, context string) {
+	f.Lexeme(SkipKind, first, "", ContextAction(context))
+	f.Lexeme(kind, last, context, PopAction(kind))
+	f.Lexeme(SkipKind, `\\[etnru][0-9a-f]*`, context, EscapeAction(last[0]))
+	f.Lexeme(SkipKind, `.`, context, StoreAction())
 }
 
-func (f *Flexer) RawStringRule(kind Kind, first, last, context string) {
-	f.Rule(SkipKind, first, "", ContextAction(context))
-	f.Rule(kind, last, context, PopAction(kind))
-	f.Rule(SkipKind, `.`, context, StoreAction())
+func (f *Flexer) RawStringLexeme(kind Kind, first, last, context string) {
+	f.Lexeme(SkipKind, first, "", ContextAction(context))
+	f.Lexeme(kind, last, context, PopAction(kind))
+	f.Lexeme(SkipKind, `.`, context, StoreAction())
 }
 
 func (f *Flexer) PushContext(context string) {
@@ -292,11 +260,11 @@ func (f *Flexer) Builder() *strings.Builder {
 // Runs the lexer once.
 // Return nil if no more progress can be made
 func (f *Flexer) LexOnce() []Token {
-	for _, rule := range f.rules {
-		if rule.Context != f.Context() {
+	for _, lexeme := range f.lexemes {
+		if lexeme.Context != f.Context() {
 			continue
 		}
-		tokens := rule.Try(f)
+		tokens := lexeme.Try(f)
 		if tokens != nil {
 			return tokens
 		}
@@ -304,8 +272,8 @@ func (f *Flexer) LexOnce() []Token {
 	return nil
 }
 
-func (f Flexer) Position() Position {
-	return f.position
+func (f Flexer) Location() Location {
+	return f.location
 }
 
 func (f Flexer) EOF() bool {
@@ -314,9 +282,9 @@ func (f Flexer) EOF() bool {
 
 func NewFlexer(name, text string) *Flexer {
 	res := &Flexer{}
-	res.position.Line = 1
-	res.position.Col = 1
-	res.position.Name = &name
+	res.location.Line = 1
+	res.location.Col = 1
+	res.location.Name = &name
 	res.input = text
 	return res
 }
@@ -341,7 +309,7 @@ func LexAll(lex Lexer, skips ...Kind) []Token {
 	for !lex.EOF() {
 		toks := lex.LexOnce()
 		if toks == nil {
-			err := lex.MakeToken(ErrorKind, " Lexer error: no rule matches. Context:%s.", lex.Context())
+			err := lex.MakeToken(ErrorKind, " Lexer error: no lexeme matches. Context:%s.", lex.Context())
 			res = append(res, err)
 			return res
 		}

+ 11 - 10
flexer/flexer_test.go

@@ -1,6 +1,7 @@
 package flexer
 
 import "testing"
+import . "src.eruta.nl/beoran/ll1/common"
 
 const (
 	tWord = Kind(-1 - iota)
@@ -12,7 +13,7 @@ const (
 )
 
 func TestFlexer(t *testing.T) {
-	pos := Position{}
+	pos := Location{}
 	expected := []Token{
 		MakeToken(pos, tSpace, "\t "),
 		MakeToken(pos, tWord, "PROGRAM"),
@@ -29,15 +30,15 @@ func TestFlexer(t *testing.T) {
 		MakeToken(pos, tEos, "."),
 	}
 	f := NewFlexer(`test`, "\t PROGRAM  ->  STATEMENT+ .\nsay \"hello\\nworld\".")
-	f.Rule(tSpace, `[ \t]+`, "", nil)
-	f.Rule(tWord, `[A-Za-z_]+`, "", nil)
-	f.Rule(tArrow, `\->`, "", nil)
-	f.Rule(tPlus, `\+`, "", nil)
-	f.Rule(tEos, `\.[\n\r]*`, "", nil)
-	f.Rule(SkipKind, `"`, "", ContextAction("string"))
-	f.Rule(tString, `"`, "string", PopAction(tString))
-	f.Rule(SkipKind, `\\[etnru][0-9a-f]*`, "string", EscapeAction('"'))
-	f.Rule(SkipKind, `.`, "string", StoreAction())
+	f.Lexeme(tSpace, `[ \t]+`, "", nil)
+	f.Lexeme(tWord, `[A-Za-z_]+`, "", nil)
+	f.Lexeme(tArrow, `\->`, "", nil)
+	f.Lexeme(tPlus, `\+`, "", nil)
+	f.Lexeme(tEos, `\.[\n\r]*`, "", nil)
+	f.Lexeme(SkipKind, `"`, "", ContextAction("string"))
+	f.Lexeme(tString, `"`, "string", PopAction(tString))
+	f.Lexeme(SkipKind, `\\[etnru][0-9a-f]*`, "string", EscapeAction('"'))
+	f.Lexeme(SkipKind, `.`, "string", StoreAction())
 
 	toks := LexAll(f)
 

+ 35 - 32
flexgen/flexer_lexer.go

@@ -1,33 +1,36 @@
-package flexer
+package flexgen
+
+import "src.eruta.nl/beoran/ll1/common"
+import "src.eruta.nl/beoran/ll1/flexer"
 
 const (
-	FlexerKindDot = Kind(-1 - iota)
-	FlexerKindLiteralString
-	FlexerKindLiteralRaw
-	FlexerKindLiteralChar
-	FlexerKindTerminal
-	FlexerKindArrow
-	FlexerKindFlexerKeyword
-	FlexerKindWhitespace
-	FlexerKindLineComment
-	FlexerKindBlockComment
-	FlexerKindFlexerAction
+	FlexgenKindDot = common.Kind(-1 - iota)
+	FlexgenKindLiteralString
+	FlexgenKindLiteralRaw
+	FlexgenKindLiteralChar
+	FlexgenKindTerminal
+	FlexgenKindArrow
+	FlexgenKindKeyword
+	FlexgenKindWhitespace
+	FlexgenKindLineComment
+	FlexgenKindBlockComment
+	FlexgenKindAction
 )
 
 /*
 
-// Flexer generator's input own lexer specification
+// common generator's input own lexer specification
 dot          	-> '\.' .
 arrow        	-> "(?:->|→)" .
 terminal     	-> "[[:isLower:]][[:isAlNum]_-]+" .
 literal-string  -> `"` flex-esc-string  .
 literal-raw     -> "`" flex-string  .
 literal-char    -> "'" flex-string  .
-flexer-keyword	-> "flex-skip|flex-string|flex-esc-string" .
+common-keyword	-> "flex-skip|flex-string|flex-esc-string" .
 whitespace   	-> "[ \t\n\r]+" flex-skip .
 line-comment 	-> "//[^\n\r]+" flex-skip .
 block-comment	-> "/\*(?ms:.)*?\* /" .
-flexer-action   -> `%{(?ms:.)*?}%` .
+common-action   -> `%{(?ms:.)*?}%` .
 
 */
 
@@ -41,21 +44,21 @@ func Check(err error) {
 	}
 }
 
-func LexFlexerInputString(name, input string) []Token {
-	f := NewFlexer(name, input)
-	Check(f.Rule(FlexerKindWhitespace, `[ \t\n\r]+`, "", nil))
-	Check(f.Rule(FlexerKindLineComment, `//[^\n\r]+[\n\r]+`, "", nil))
-	Check(f.Rule(FlexerKindBlockComment, `/\*(?ms:.)*?\*/`, "", nil))
-	Check(f.Rule(FlexerKindDot, `\.`, "", nil))
-	Check(f.Rule(FlexerKindArrow, `(?:->|→)`, "", nil))
-	Check(f.Rule(FlexerKindTerminal, `[[:lower:]][[:alnum:]_-]+`, "", nil))
-	f.EscapedStringRule(FlexerKindLiteralString, `"`, `"`, "literal-string")
-	f.RawStringRule(FlexerKindLiteralRaw, "`", "`", "literal-raw")
-	f.RawStringRule(FlexerKindLiteralChar, `''`, `''`, "literal-char")
-	Check(f.Rule(FlexerKindFlexerKeyword, `flex-skip|flex-string|flex-esc-string`, "", nil))
-	Check(f.Rule(FlexerKindFlexerAction, `@{(?ms:.)*?}@`, "", nil))
-
-	skipKinds := []Kind{SkipKind, FlexerKindWhitespace, FlexerKindBlockComment, FlexerKindLineComment}
-
-	return LexAll(f, skipKinds...)
+func LexcommonInputString(name, input string) []common.Token {
+	f := flexer.NewFlexer(name, input)
+	Check(f.Lexeme(FlexgenKindWhitespace, `[ \t\n\r]+`, "", nil))
+	Check(f.Lexeme(FlexgenKindLineComment, `//[^\n\r]+[\n\r]+`, "", nil))
+	Check(f.Lexeme(FlexgenKindBlockComment, `/\*(?ms:.)*?\*/`, "", nil))
+	Check(f.Lexeme(FlexgenKindDot, `\.`, "", nil))
+	Check(f.Lexeme(FlexgenKindArrow, `(?:->|→)`, "", nil))
+	Check(f.Lexeme(FlexgenKindTerminal, `[[:lower:]][[:alnum:]_-]+`, "", nil))
+	f.EscapedStringLexeme(FlexgenKindLiteralString, `"`, `"`, "literal-string")
+	f.RawStringLexeme(FlexgenKindLiteralRaw, "`", "`", "literal-raw")
+	f.RawStringLexeme(FlexgenKindLiteralChar, `''`, `''`, "literal-char")
+	Check(f.Lexeme(FlexgenKindKeyword, `flex-skip|flex-string|flex-esc-string`, "", nil))
+	Check(f.Lexeme(FlexgenKindAction, `@{(?ms:.)*?}@`, "", nil))
+
+	skipKinds := []common.Kind{common.SkipKind, FlexgenKindWhitespace, FlexgenKindBlockComment, FlexgenKindLineComment}
+
+	return flexer.LexAll(f, skipKinds...)
 }

+ 31 - 38
flexgen/flexer_parser.go

@@ -1,41 +1,34 @@
-package flexer
-
-/* Parser for the flexer lexer generator.  */
-
-type GeneratorRule struct {
-	Name     string
-	Regexp   string
-	Keywords []string
-	Action   string
-}
-
-type GeneratorParser struct {
-	Tokens []Token
-	Index  int
-	Rules  []GeneratorRule
-}
-
-func (g GeneratorParser) Token() Token {
-	return g.Tokens[g.Index]
-}
-
-func (g GeneratorParser) Accept() (Token, err) {
-
-}
-
-func (g *GeneratorParser) ParseRule() error {
-	tok := g.Token()
-	if tok.Kind() != FlexerKind Terminal
-
-	for g.Index < len(g.Tokens) {
-		g.ParseRule()
-	}
-	return nil
+package flexgen
+
+import . "src.eruta.nl/beoran/ll1/common"
+import "src.eruta.nl/beoran/ll1/flexer"
+import "src.eruta.nl/beoran/ll1/ast"
+import . "src.eruta.nl/beoran/ll1/grammar"
+import "src.eruta.nl/beoran/ll1/parser"
+
+func MakeFlexerGrammar() *Grammar {
+	g := &Grammar{}
+	keywordsRef := g.AddRule(Ref("keywordsRef", "keywords"))
+	dot := g.AddRule(Term("dot", FlexgenKindDot))
+	terminal := g.AddRule(Term("terminal", FlexgenKindTerminal))
+	arrow := g.AddRule(Term("arrow", FlexgenKindArrow))
+	keyword := g.AddRule(Term("arrow", FlexgenKindKeyword))
+	keywords := g.AddRule(Opt("keywords", "", And(keyword, keywordsRef)))
+	literalString := g.AddRule(Term("literalString", FlexgenKindLiteralString))
+	literalRaw := g.AddRule(Term("literalRaw", FlexgenKindLiteralRaw))
+	literalChar := g.AddRule(Term("literalChar", FlexgenKindLiteralChar))
+	pattern := g.AddRule(Alt("pattern", "", literalString, literalRaw, literalChar))
+	lexeme := g.AddRule(Seq("lexeme", "", terminal, arrow, pattern, keywords, dot))
+	lexemes := g.AddRule(Seq("lexemes", "", lexeme, End{}))
+	top := g.AddRule(Alt("top", "", lexemes))
+	g.Top = top
+	return g
 }
 
-func (g *GeneratorParser) Parse() error {
-	for g.Index < len(g.Tokens) {
-		g.ParseRule()
-	}
-	return nil
+/* Parser for the flexer lexer .  */
+func MakeFlexerParser() *parser.Parser {
+	p := &parser.Parser{}
+	p.TokenMapper = parser.DefaultMapper{}
+	p.Grammar = *MakeFlexerGrammar()
+	return p
 }

+ 1 - 1
flexgen/generator.go

@@ -1 +1 @@
-package flexer
+package flexgen

+ 431 - 0
grammar/grammar.go

@@ -0,0 +1,431 @@
+// Grammar describes the rules for a particular ll-parsable language.
+package grammar
+
+import "fmt"
+import "unicode"
+import "sort"
+import "strings"
+
+import . "src.eruta.nl/beoran/ll1/common"
+
+// Grammars consists of rules. A Rule can be a Terminal,
+// which includes the special terminals Epsilon and End,
+// or one of the following nonterminals: and Alternate, a Sequence or a Reference.
+type Rule interface {
+	// Name is the optional name of the rule. If empty, the rule is anonymous.
+	Name() string
+	// Definition is the detail of how the rule should be parsed.
+	// For a Terminal Rule this is the rule itself.
+	Definition() Rule
+	// Rules can be stringified
+	String() string
+	Check(g *Grammar, r Rule) error
+	// The first set of the rule
+	FirstSet() (Set, error)
+	FollowSet(g *Grammar) (Set, error)
+}
+
+// A set is a set of terminals, either the first set or the follow set
+// of a rule in the grammar.
+type Set map[string]Terminal
+
+type BasicRule struct {
+	name string
+}
+
+func (r BasicRule) Definition() Rule {
+	return r
+}
+
+func (e BasicRule) Name() string {
+	return e.name
+}
+
+func (e BasicRule) String() string {
+	return fmt.Sprintf("%s", e.name)
+}
+
+func (e BasicRule) Check(g *Grammar, r Rule) error {
+	return nil
+}
+
+func (e BasicRule) FirstSet() (Set, error) {
+	res := make(Set)
+	return res, nil
+}
+
+func (e BasicRule) FollowSet(g *Grammar) (Set, error) {
+	res := make(Set)
+	return res, nil
+}
+
+func (r BasicRule) IsTerminal() bool {
+	return !unicode.IsUpper([]rune(r.name)[0])
+}
+
+type Terminal struct {
+	BasicRule
+	Kind
+}
+
+func (t Terminal) String() string {
+	return fmt.Sprintf("%s(%d)", t.name, t.Kind)
+}
+
+func (t Terminal) FirstSet() (Set, error) {
+	res := make(Set)
+	res.Add(t)
+	return res, nil
+}
+
+// Epsilon is the empty rule.
+type Epsilon struct {
+	Terminal
+}
+
+func (e Epsilon) String() string {
+	return "ε"
+}
+
+// End corresponds to EOF or the end of the input.
+type End struct {
+	Terminal
+}
+
+func (e End) String() string {
+	return "$"
+}
+
+type Nonterminal struct {
+	BasicRule
+	Define   Rule
+	Template string
+	// First set of the rule
+	First Set
+	// Follow set of the rule
+	Follow Set
+	// Nullable or not
+	Nullable bool
+	// Realizable or not
+	Realizable bool
+	// Recursive (i.e. LEFT-recursive, which LL(1) disallows)
+	Recursive bool
+	// Undefined nonterminals in this rule
+	Undefined bool
+	// Depth is the depth of (mutual) recursion of a rule. Limited to 16.
+	Depth int
+}
+
+func (n Nonterminal) String() string {
+	return fmt.Sprintf("%s -> %s", n.BasicRule, n.Define)
+}
+
+func (n Nonterminal) Definition() Rule {
+	return n.Define
+}
+
+func (r BasicRule) Equals(r2 Rule) bool {
+	// XXX probably not correct
+	return r.Name() == r2.Name()
+}
+
+func (n Nonterminal) FirstSet() (Set, error) {
+	if n.Define == nil { // XXX can this happen an when/why?
+		return Set{}, nil
+	}
+	return n.Define.FirstSet()
+}
+
+func (n Nonterminal) FollowSet(g *Grammar) (Set, error) {
+	if n.Define == nil {
+		return Set{}, nil
+	}
+	return n.Define.FollowSet(g)
+}
+
+// Nonterminals can cause recursion and need to be checked
+// XXX this function still is very likely wrong.
+func (n Nonterminal) Check(g *Grammar, r Rule) error {
+	if n.Define == nil {
+		n.Undefined = true
+		return fmt.Errorf("%s: rule has empty definition", n.Name())
+	} else if n.Name() != r.Name() {
+		return nil
+	}
+	// check recursively as well
+	if n.Depth < 16 {
+		n.Depth++
+		return n.Define.Check(g, n)
+	}
+	return fmt.Errorf("%s: left recursive or recursion too deep", n)
+}
+
+type Sequence struct {
+	Nonterminal
+	Rules []Rule
+}
+
+func (s Sequence) String() string {
+	sep := ""
+	res := ""
+	for _, rule := range s.Rules {
+		res = res + sep + rule.String()
+		sep = "   "
+	}
+	return res
+}
+
+func (s Sequence) Check(g *Grammar, r Rule) error {
+	// check Leftmost Rule for left recursion
+	if len(s.Rules) > 0 {
+		e := s.Rules[0]
+		return e.Check(g, r)
+	}
+	return nil
+}
+
+func (s Sequence) FirstSet() (Set, error) {
+	if len(s.Rules) > 0 {
+		return s.Rules[0].FirstSet()
+	}
+	return make(Set), nil
+}
+
+func (a Alternates) FirstSet() (Set, error) {
+	res := make(Set)
+	for _, s := range a.Rules {
+		fs, err := s.FirstSet()
+		if err != nil {
+			return res, err
+		}
+		res = res.Union(fs)
+	}
+	return res, nil
+}
+
+type Alternates struct {
+	Nonterminal
+	Rules []Rule
+}
+
+func (a Alternates) Check(g *Grammar, r Rule) error {
+	for _, s := range a.Rules {
+		err := s.Check(g, r)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (a Alternates) String() string {
+	sep := ""
+	res := "("
+	for _, rule := range a.Rules {
+		res = res + sep + rule.String()
+		sep = " | "
+	}
+	return res + ")"
+}
+
+// Reference to anther rule by name, to enable right recursion.
+// This also requires apointer to the grammar for later resolution.
+type Reference struct {
+	Grammar *Grammar
+	Nonterminal
+	To string
+}
+
+func (r Reference) Resolve() (Rule, error) {
+	return r.Grammar.Lookup(r.To)
+}
+
+func IsEpsilon(e Rule) bool {
+	_, ok := e.(Epsilon)
+	return ok
+}
+
+func IsNonterminal(e Rule) bool {
+	_, ok := e.(Nonterminal)
+	return ok
+}
+
+// whether an Set is nullable (i.e. contains epsilon)
+func (s Set) IsNullable() bool {
+	for _, e := range s {
+		if IsEpsilon(e) {
+			return true
+		}
+	}
+	return false
+}
+
+func (s Set) Contains(e Terminal) bool {
+	name := e.Name()
+	_, ok := s[name]
+	return ok
+}
+
+func (s *Set) Add(e Terminal) bool {
+	name := e.Name()
+	_, ok := (*s)[name]
+	if ok {
+		return false
+	}
+	(*s)[name] = e
+	return true
+}
+
+func (s Set) UnionWithoutEpsilon(s2 Set) Set {
+	res := make(Set)
+	if s != nil {
+		for _, v := range s {
+			if !IsEpsilon(v) {
+				res.Add(v)
+			}
+		}
+	}
+	if s2 != nil {
+		for _, v := range s2 {
+			if !IsEpsilon(v) {
+				res.Add(v)
+			}
+		}
+	}
+	return res
+}
+
+func (s Set) Union(s2 Set) Set {
+	res := make(Set)
+	for _, v := range s {
+		res.Add(v)
+	}
+	for _, v := range s2 {
+		res.Add(v)
+	}
+	return res
+}
+
+func (s Set) Intersect(s2 Set) Set {
+	res := make(Set)
+	for _, v := range s {
+		if s2.Contains(v) {
+			res.Add(v)
+		}
+	}
+	return res
+}
+
+func (s Set) String() string {
+	if len(s) == 0 {
+		return "∅"
+	}
+	aid := []string{}
+	for _, v := range s {
+		aid = append(aid, v.String())
+	}
+	sort.Strings(aid)
+	return strings.Join(aid, " ")
+}
+
+func (s Set) ToTokenKinds() []Kind {
+	if len(s) == 0 {
+		return []Kind{}
+	}
+	res := []Kind{}
+	for _, t := range s {
+		res = append(res, t.Kind)
+	}
+	return res
+}
+
+type Grammar struct {
+	BasicRule
+	Top   Rule
+	Rules []Rule
+	// All rules, terminals and nonterminals mapped by name.
+	NamedRules map[string]Rule
+	// List of error of the grammar. Only valid
+	// after running Check()
+	Errors []error
+}
+
+func (g Grammar) String() string {
+	res := "Top → " + g.Top.Name() + "\n"
+	for _, r := range g.Rules {
+		res = res + r.String() + "\n"
+	}
+	return res
+}
+
+func (g Grammar) Lookup(name string) (Rule, error) {
+	r, ok := g.NamedRules[name]
+	if ok {
+		return r, nil
+	}
+	return nil, fmt.Errorf("Undefined rule: %s", name)
+}
+
+func (g *Grammar) AddRule(r Rule) Rule {
+	g.Rules = append(g.Rules, r)
+	if r.Name() != "" {
+		g.NamedRules[r.Name()] = r
+	}
+	return r
+}
+
+func Term(name string, kind Kind) Rule {
+	term := Terminal{}
+	term.name = name
+	term.Kind = kind
+	return term
+}
+
+func Seq(name, template string, rules ...Rule) Sequence {
+	seq := Sequence{}
+	seq.name = name
+	seq.Template = template
+	seq.Rules = rules
+	return seq
+}
+
+func And(rules ...Rule) Rule {
+	return Seq("", "", rules...)
+}
+
+func Alt(name, template string, rules ...Rule) Alternates {
+	alt := Alternates{}
+	alt.name = name
+	alt.Template = template
+	alt.Rules = rules
+	return alt
+}
+
+func Or(seqs ...Rule) Rule {
+	return Alt("", "", seqs...)
+}
+
+func Opt(name, template string, rule Rule) Rule {
+	rules := []Rule{rule, Epsilon{}}
+	return Alt(name, template, rules...)
+}
+
+func (g *Grammar) Alt(name, template string, seqs ...Rule) Rule {
+	return g.AddRule(Alt(name, template, seqs...))
+}
+
+func (g *Grammar) Term(name string, kind Kind) Rule {
+	return g.AddRule(Term(name, kind))
+}
+
+func Ref(g *Grammar, name, to string) Reference {
+	ref := Reference{}
+	ref.Nonterminal.name = name
+	ref.Grammar = g
+	ref.To = to
+	return ref
+}
+
+func (g *Grammar) Ref(name, to string) Rule {
+	return g.AddRule(Ref(g, name, to))
+}

+ 176 - 0
parser/parser.go

@@ -0,0 +1,176 @@
+// Uses an ll1 grammar to parse an input from an ll1 flexer to an Ast.
+package parser
+
+// import "fmt"
+import . "src.eruta.nl/beoran/ll1/common"
+import . "src.eruta.nl/beoran/ll1/ast"
+import . "src.eruta.nl/beoran/ll1/grammar"
+import . "src.eruta.nl/beoran/ll1/flexer"
+
+// A TokenMapper maps a token to a Species.
+type TokenMapper interface {
+	// Returns the pecies to use for the token. If nil, the token
+	// may be skipped.
+	Map(t Token) Species
+}
+
+type DefaultMapper struct{}
+
+func (DefaultMapper) Map(t Token) Species {
+	return MakeSpecies("default")
+}
+
+type Parser struct {
+	Grammar
+	TokenMapper
+	Tokens []Token
+	Index  int
+	Errors []error
+	Result Ast
+	// current location in the ast being built.
+	Now       Ast
+	Recursion int
+}
+
+func (p *Parser) MakeError(msg string, args ...interface{}) error {
+	return MakeErrorToken(p.Token().Location(), msg, args...)
+}
+
+func (p *Parser) AddError(msg string, args ...interface{}) error {
+	err := MakeErrorToken(p.Token().Location(), msg, args...)
+	p.Errors = append(p.Errors, err)
+	return err
+}
+
+func (g Parser) Token() Token {
+	if g.Index < len(g.Tokens) {
+		return g.Tokens[g.Index]
+	}
+	last := g.Tokens[len(g.Tokens)-1]
+	return MakeErrorToken(last.Location(), "Unexpected end of input.")
+}
+
+func (g *Parser) Expect(kind Kind) Token {
+	tok := g.Token()
+	if tok.Kind() == kind {
+		g.Index++
+		return tok
+	}
+	return nil
+}
+
+func (g *Parser) Accept(kinds ...Kind) Token {
+	tok := g.Token()
+	for _, kind := range kinds {
+		if tok.Kind() == kind {
+			g.Index++
+			return tok
+		}
+	}
+	return nil
+}
+
+func (g *Parser) Require(kinds ...Kind) Token {
+	tok := g.Accept(kinds...)
+	if tok != nil {
+		return tok
+	}
+	g.AddError("Expected: %v", kinds)
+	return nil
+}
+
+func (p *Parser) ParseTerminal(term Terminal) error {
+	tok := p.Expect(term.Kind)
+	if tok == nil {
+		return p.MakeError("Expected token kind: %d", term.Kind)
+	}
+	spec := p.Map(tok)
+	NewChild(p.Now, spec, tok)
+	// now should alreqdy be of suitable kind to accept a terminal token child.
+	return nil
+}
+
+func (g *Parser) ParseEpsilon(term Epsilon) error {
+	return nil
+}
+
+func (p *Parser) ParseEnd(term End) error {
+	if p.Index >= len(p.Tokens) {
+		return nil
+	} else {
+		return p.MakeError("Expected end of input.")
+	}
+}
+
+func (p *Parser) PopUp() {
+	if p.Now != nil && p.Now.Parent() != nil {
+		p.Now = p.Now.Parent()
+	}
+}
+
+func (p *Parser) ParseSequence(seq Sequence) error {
+	spec := p.Map(p.Token())
+	p.Now = NewChild(p.Now, spec, p.Token())
+	defer p.PopUp()
+	for _, rule := range seq.Rules {
+		err := p.ParseRule(rule)
+		if err != nil {
+			p.Errors = append(p.Errors, err)
+			return err
+		}
+	}
+	return nil
+}
+
+func (p *Parser) ParseAlternates(alt Alternates) error {
+	spec := p.Map(p.Token())
+	p.Now = NewChild(p.Now, spec, p.Token())
+	defer p.PopUp()
+	errors := []error{}
+	for _, rule := range alt.Rules {
+		err := p.ParseRule(rule)
+		if err != nil {
+			errors = append(errors, err)
+		} else { // one alternate was OK here.
+			return nil
+		}
+	}
+	// If we get here no alternate was ok.
+	p.Errors = append(p.Errors, errors...)
+	return p.MakeError("Could not parse alternate.")
+}
+
+func (p *Parser) ParseRule(r Rule) error {
+	p.Recursion++
+	if p.Recursion > 800 {
+		panic("Too much recursion in grammar.")
+	}
+	switch rv := r.(type) {
+	case Epsilon:
+		p.ParseEpsilon(rv)
+	case End:
+		p.ParseEnd(rv)
+	case Terminal:
+		p.ParseTerminal(rv)
+	case Alternates:
+		p.ParseAlternates(rv)
+	case Sequence:
+		p.ParseSequence(rv)
+	case Reference:
+		p.ParseReference(rv)
+	default:
+		panic("Unknown rule type in ParseRule.")
+	}
+	// p.Now.AddChild()
+	return nil
+}
+
+func (p *Parser) Parse(input []Token) error {
+	p.Tokens = input
+	p.Recursion = 0
+	p.Errors = []error{}
+	p.Result = NewAstNone()
+	p.Now = p.Result
+	return p.ParseRule(p.Grammar.Top)
+	return nil
+}