2 years ago · f8d4540c67
--- a/README.md
+++ b/README.md
@@ -1,8 +1,8 @@
 
				 # ll1
			
 
				 
			
 
				-ll1 is a tool to parse and check LL(1) specifications, and to generate 
			
 
				-code or reports using Go templates based on these specifications. 
			
 
				-ll1 specifications must contain a definition for an ll1 grammar, and 
			
 
				+ll1 is a tool to parse and check LL(1) specifications, and to generate
			
 
				+code or reports using Go templates based on these specifications.
			
 
				+ll1 specifications must contain a definition for an ll1 grammar, and
			
 
				 may optionally also specify a lexer for that grammar.
			
 
				 
			
 
				 # Usage
			
@@ -14,27 +14,27 @@ The [options] are:
 
				     -append file
			
 
				         Name of output file to append. Takes precedence over -out.
			
 
				     -define definition
			
 
				-    	Add a definition for the template, in the form of key:value or 
			
 
				-        []key:value. Keys that start with a [] are arrays and can be 
			
 
				+    	Add a definition for the template, in the form of key:value or
			
 
				+        []key:value. Keys that start with a [] are arrays and can be
			
 
				         concatenated to by specifying the same definition key again.
			
 
				-        Non array keys will be overwoitten if they are specified again. 
			
 
				+        Non array keys will be overwoitten if they are specified again.
			
 
				     -help
			
 
				         Shows the help page.
			
 
				     -out file
			
 
				-        Name of output file to overwrite. 
			
 
				+        Name of output file to overwrite.
			
 
				     -template file
			
 
				-    	Template file to expand. This may be repeated to make use 
			
 
				+    	Template file to expand. This may be repeated to make use
			
 
				         of several templates to generate one output file.
			
 
				     -verbose
			
 
				     	Be more verbose. Shows the scanned tokens as well.
			
 
				 
			
 
				-The names of template files may be given with the -t option, or after the 
			
 
				+The names of template files may be given with the -t option, or after the
			
 
				 ll1 input file.
			
 
				 
			
 
				 # Syntax
			
 
				 
			
 
				-The syntax of an LL1 grammar itself is: 
			
 
				-    
			
 
				+The syntax of an LL1 grammar itself is:
			
 
				+
			
 
				     Specification -> Grammar OptLexer.
			
 
				     Grammar -> Rules.
			
 
				     Rules -> Rule OptRules .
			
@@ -43,10 +43,10 @@ The syntax of an LL1 grammar itself is:
 
				     Name -> ruleName .
			
 
				     Template -> rawString | epsilon .
			
 
				     // Alternates consist of sequences.
			
 
				-    Definition -> Alternates . 
			
 
				+    Definition -> Alternates .
			
 
				     Alternates -> Sequence OptSequences .
			
 
				     OptSequences -> or Alternates | epsilon.
			
 
				-    Sequence -> Element OptElements . 
			
 
				+    Sequence -> Element OptElements .
			
 
				     OptElements -> Element OptElements | epsilon .
			
 
				     Element -> Parenthesis .
			
 
				     Element -> Name .
			
@@ -54,7 +54,7 @@ The syntax of an LL1 grammar itself is:
 
				     Parenthesis -> '(' Definition ')' .
			
 
				     OptLexer -> LexerTerminal OptLexerTerminals | epsilon .
			
 
				     LexerTerminal -> terminalName arrow LexerDefinition Template .
			
 
				-    LexerDefinition -> LexerAlternates . 
			
 
				+    LexerDefinition -> LexerAlternates .
			
 
				     LexerAlternates -> LexerPattern OptLexerMatches .
			
 
				     OptLexerMatches -> or LexerPattern | epsilon.
			
 
				     LexerPattern -> literal .
			
@@ -71,38 +71,55 @@ The syntax of an LL1 grammar itself is:
 
				     epsilon      -> "epsilon" | 'ε'
			
 
				     arrow        -> "->" | '→'
			
 
				 
			
 
				-The syntax of an ll1 grammar has the following elements:  
			
 
				+The syntax of an ll1 grammar has the following elements:
			
 
				   - //comment : Line comments start with //, /*block comments*/ are C-like
			
 
				-  - RuleName  : names that start with an upper case letter are 
			
 
				+  - RuleName  : names that start with an upper case letter are
			
 
				                 rule names or nonterminals defined by the grammar.
			
 
				-  - terminal  : names that start with a lower case letter are names of 
			
 
				+  - terminal  : names that start with a lower case letter are names of
			
 
				                 teminals that the lexer produces.
			
 
				   - 'l'       : single quoted strings are rune literals that the lexer produces.
			
 
				   - "literal" : double quoted strings are rune literals that the lexer produces.
			
 
				   - arrow     : a literal -> → as a separator.
			
 
				   - epsion    : a literal "epsilon" or 'ε', which indicates the empty rule.
			
 
				-                this is used in conjunction with alternates to make a rule 
			
 
				+                this is used in conjunction with alternates to make a rule
			
 
				                 optional.
			
 
				 
			
 
				 # Templates
			
 
				 
			
 
				-If no templates are given, ll1 simply checks the grammar and outputs a 
			
 
				+If no templates are given, ll1 simply checks the grammar and outputs a
			
 
				 simple text report to the output file.
			
 
				 
			
 
				-If a template is given, it will be expanded and output to the output file. 
			
 
				+If a template is given, it will be expanded and output to the output file.
			
 
				 
			
 
				-Inside the template the following variables are available: 
			
 
				+Inside the template the following variables are available:
			
 
				   - .Grammar: contains the .Rules of the grammar.
			
 
				   - .InName: contains the name of the ll1 input file.
			
 
				   - .OutName: contains the name of the output file specified with -a or -o.
			
 
				   - .Templates: contains the names of the templates read.
			
 
				   - .Definitions: contains the keys of the available definitions.
			
 
				   - All other variables defined with -d
			
 
				-    
			
 
				+
			
 
				 Inside the ll1 templates, the following template functions are available:
			
 
				   - Most functions from the strings package (see go doc strings).
			
 
				   - CompileRegexp compiles a regexp package regexp which can be used as such.
			
 
				   - ToString to convert anything anything that isn't a string to a string.
			
 
				   - NewMap creates a map based it's argumens wich have string keys and interface{} values
			
 
				     This is handly to pass multiple aruments to a sub-template
			
 
				-  - NewList creates a list from the given arguments.
			
 
				+  - NewList creates a list from the given arguments.
			
 
				+
			
 
				+
			
 
				+
			
 
				+Follow conflict example:
			
 
				+
			
 
				+A -> B | C .
			
 
				+
			
 
				+B -> D e .
			
 
				+C -> e .
			
 
				+D -> f | epsilon .
			
 
				+
			
 
				+Since D is optional and can be empty, this
			
 
				+parser cannot decide between B and C, because
			
 
				+in case D is empty, both sides match.
			
 
				+
			
 
				+
			
 
				+
			
--- a/cmd/ll1lex/flexer.go
+++ b/cmd/ll1lex/flexer.go
@@ -0,0 +1,60 @@
 
				+package main
			
 
				+
			
 
				+import "os"
			
 
				+import "io"
			
 
				+import "path/filepath"
			
 
				+import "fmt"
			
 
				+import "flag"
			
 
				+import "strings"
			
 
				+import "src.eruta.nl/beoran/ll1/flexer"
			
 
				+
			
 
				+func help() {
			
 
				+	fmt.Printf("flexer usage: flexer [-o output.go] input.flex\n")
			
 
				+	fmt.Printf("\nGenerates a lexer in Go language.\n")
			
 
				+	os.Exit(1)
			
 
				+}
			
 
				+
			
 
				+func showError(in string, err interface{}) {
			
 
				+	fmt.Fprintf(os.Stderr, "%s: error: %s\n", in, err)
			
 
				+}
			
 
				+
			
 
				+func fatal(err error, code int) {
			
 
				+	fmt.Fprintf(os.Stderr, "flexer: error: %s\n", err)
			
 
				+	os.Exit(code)
			
 
				+}
			
 
				+
			
 
				+func main() {
			
 
				+	outPtr := flag.String("o", "", "")
			
 
				+	flag.Parse()
			
 
				+	if flag.NArg() < 1 {
			
 
				+		help()
			
 
				+	}
			
 
				+	inName := flag.Arg(0)
			
 
				+	inFile, err := os.Open(inName)
			
 
				+	if err != nil {
			
 
				+		fatal(err, 1)
			
 
				+	}
			
 
				+	defer inFile.Close()
			
 
				+	buf, err := io.ReadAll(inFile)
			
 
				+	if err != nil {
			
 
				+		fatal(err, 2)
			
 
				+	}
			
 
				+	outName := inName + ".go"
			
 
				+	if outPtr != nil && *outPtr != "" {
			
 
				+		outName = *outPtr
			
 
				+	}
			
 
				+	abs, err := filepath.Abs(outName)
			
 
				+	if err != nil {
			
 
				+		fatal(err, 3)
			
 
				+	}
			
 
				+	parts := filepath.SplitList(abs)
			
 
				+	pkg := strings.TrimSuffix(filepath.Base(inName), filepath.Ext(inName))
			
 
				+	if len(parts) > 2 {
			
 
				+		pkg = parts[len(parts)-1]
			
 
				+	}
			
 
				+	tokens := flexer.LexFlexerInputString(inName, string(buf))
			
 
				+	fmt.Printf("package %s\n\n", pkg)
			
 
				+	for _, tok := range tokens {
			
 
				+		fmt.Printf("tok: %v\n", tok)
			
 
				+	}
			
 
				+}
			
--- a/flexer/flexer.go
+++ b/flexer/flexer.go
@@ -252,6 +252,19 @@ func (f *Flexer) Rule(kind Kind, expr, context string, act Action) error {
 
				 	return nil
			
 
				 }
			
 
				 
			
 
				+func (f *Flexer) EscapedStringRule(kind Kind, first, last, context string) {
			
 
				+	f.Rule(SkipKind, first, "", ContextAction(context))
			
 
				+	f.Rule(kind, last, context, PopAction(kind))
			
 
				+	f.Rule(SkipKind, `\\[etnru][0-9a-f]*`, context, EscapeAction(last[0]))
			
 
				+	f.Rule(SkipKind, `.`, context, StoreAction())
			
 
				+}
			
 
				+
			
 
				+func (f *Flexer) RawStringRule(kind Kind, first, last, context string) {
			
 
				+	f.Rule(SkipKind, first, "", ContextAction(context))
			
 
				+	f.Rule(kind, last, context, PopAction(kind))
			
 
				+	f.Rule(SkipKind, `.`, context, StoreAction())
			
 
				+}
			
 
				+
			
 
				 func (f *Flexer) PushContext(context string) {
			
 
				 	f.contexts = append(f.contexts, context)
			
 
				 }
			
@@ -308,11 +321,22 @@ func NewFlexer(name, text string) *Flexer {
 
				 	return res
			
 
				 }
			
 
				 
			
 
				+// KeepToken returns true if the token should be kept
			
 
				+// and false if it is of the kind in the skip list
			
 
				+func KeepToken(tok Token, skips ...Kind) bool {
			
 
				+	for _, skip := range skips {
			
 
				+		if skip == tok.Kind() {
			
 
				+			return false
			
 
				+		}
			
 
				+	}
			
 
				+	return true
			
 
				+}
			
 
				+
			
 
				 // Lexes all tokens from the lexer until it reaches
			
 
				 // EOF, or until it cannot progress anymore.
			
 
				-// All tokens of kind SkipKind will be skipped
			
 
				+// All tokens in the skip array will be skipped
			
 
				 // from the results.
			
 
				-func LexAll(lex Lexer) []Token {
			
 
				+func LexAll(lex Lexer, skips ...Kind) []Token {
			
 
				 	res := []Token{}
			
 
				 	for !lex.EOF() {
			
 
				 		toks := lex.LexOnce()
			
@@ -321,7 +345,11 @@ func LexAll(lex Lexer) []Token {
 
				 			res = append(res, err)
			
 
				 			return res
			
 
				 		}
			
 
				-		res = append(res, toks...)
			
 
				+		for _, tok := range toks {
			
 
				+			if KeepToken(tok, skips...) {
			
 
				+				res = append(res, tok)
			
 
				+			}
			
 
				+		}
			
 
				 	}
			
 
				 	return res
			
 
				 }
			
--- a/flexgen/flexer.flex
+++ b/flexgen/flexer.flex
@@ -0,0 +1,18 @@
 
				+// Flexer generator's input own lexer specification
			
 
				+line-comment 	-> "//[^\n\r]+" flex-skip .
			
 
				+block-comment	-> "/\*(?ms:.)*?\*/" flex-skip .
			
 
				+dot          	-> '\.' .
			
 
				+literal-string  -> `"` flex-esc-string  .
			
 
				+literal-raw     -> "`" flex-string  .
			
 
				+literal-char    -> "'" flex-string  .
			
 
				+terminal     	-> "[[:lower:]][[:alnum]_-]+" .
			
 
				+epsilon      	-> "(?:epsilon|ε)" .
			
 
				+arrow        	-> "(?:->|→)" .
			
 
				+flexer-keyword	-> "flex-skip|flex-string|flex-esc-string" .
			
 
				+whitespace   	-> "[ \t\n\r]+" flex-skip .
			
 
				+flexer-action   -> `@{(?ms:.)*?}@`
			
 
				+@{
			
 
				+// This is the rule action.
			
 
				+@ { } @
			
 
				+}@
			
 
				+.
			
--- a/flexgen/flexer_lexer.go
+++ b/flexgen/flexer_lexer.go
@@ -0,0 +1,61 @@
 
				+package flexer
			
 
				+
			
 
				+const (
			
 
				+	FlexerKindDot = Kind(-1 - iota)
			
 
				+	FlexerKindLiteralString
			
 
				+	FlexerKindLiteralRaw
			
 
				+	FlexerKindLiteralChar
			
 
				+	FlexerKindTerminal
			
 
				+	FlexerKindArrow
			
 
				+	FlexerKindFlexerKeyword
			
 
				+	FlexerKindWhitespace
			
 
				+	FlexerKindLineComment
			
 
				+	FlexerKindBlockComment
			
 
				+	FlexerKindFlexerAction
			
 
				+)
			
 
				+
			
 
				+/*
			
 
				+
			
 
				+// Flexer generator's input own lexer specification
			
 
				+dot          	-> '\.' .
			
 
				+arrow        	-> "(?:->|→)" .
			
 
				+terminal     	-> "[[:isLower:]][[:isAlNum]_-]+" .
			
 
				+literal-string  -> `"` flex-esc-string  .
			
 
				+literal-raw     -> "`" flex-string  .
			
 
				+literal-char    -> "'" flex-string  .
			
 
				+flexer-keyword	-> "flex-skip|flex-string|flex-esc-string" .
			
 
				+whitespace   	-> "[ \t\n\r]+" flex-skip .
			
 
				+line-comment 	-> "//[^\n\r]+" flex-skip .
			
 
				+block-comment	-> "/\*(?ms:.)*?\* /" .
			
 
				+flexer-action   -> `%{(?ms:.)*?}%` .
			
 
				+
			
 
				+*/
			
 
				+
			
 
				+type CheckedError struct {
			
 
				+	Error error
			
 
				+}
			
 
				+
			
 
				+func Check(err error) {
			
 
				+	if err != nil {
			
 
				+		panic(CheckedError{err})
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func LexFlexerInputString(name, input string) []Token {
			
 
				+	f := NewFlexer(name, input)
			
 
				+	Check(f.Rule(FlexerKindWhitespace, `[ \t\n\r]+`, "", nil))
			
 
				+	Check(f.Rule(FlexerKindLineComment, `//[^\n\r]+[\n\r]+`, "", nil))
			
 
				+	Check(f.Rule(FlexerKindBlockComment, `/\*(?ms:.)*?\*/`, "", nil))
			
 
				+	Check(f.Rule(FlexerKindDot, `\.`, "", nil))
			
 
				+	Check(f.Rule(FlexerKindArrow, `(?:->|→)`, "", nil))
			
 
				+	Check(f.Rule(FlexerKindTerminal, `[[:lower:]][[:alnum:]_-]+`, "", nil))
			
 
				+	f.EscapedStringRule(FlexerKindLiteralString, `"`, `"`, "literal-string")
			
 
				+	f.RawStringRule(FlexerKindLiteralRaw, "`", "`", "literal-raw")
			
 
				+	f.RawStringRule(FlexerKindLiteralChar, `''`, `''`, "literal-char")
			
 
				+	Check(f.Rule(FlexerKindFlexerKeyword, `flex-skip|flex-string|flex-esc-string`, "", nil))
			
 
				+	Check(f.Rule(FlexerKindFlexerAction, `@{(?ms:.)*?}@`, "", nil))
			
 
				+
			
 
				+	skipKinds := []Kind{SkipKind, FlexerKindWhitespace, FlexerKindBlockComment, FlexerKindLineComment}
			
 
				+
			
 
				+	return LexAll(f, skipKinds...)
			
 
				+}
			
--- a/flexgen/flexer_parser.go
+++ b/flexgen/flexer_parser.go
@@ -0,0 +1,41 @@
 
				+package flexer
			
 
				+
			
 
				+/* Parser for the flexer lexer generator.  */
			
 
				+
			
 
				+type GeneratorRule struct {
			
 
				+	Name     string
			
 
				+	Regexp   string
			
 
				+	Keywords []string
			
 
				+	Action   string
			
 
				+}
			
 
				+
			
 
				+type GeneratorParser struct {
			
 
				+	Tokens []Token
			
 
				+	Index  int
			
 
				+	Rules  []GeneratorRule
			
 
				+}
			
 
				+
			
 
				+func (g GeneratorParser) Token() Token {
			
 
				+	return g.Tokens[g.Index]
			
 
				+}
			
 
				+
			
 
				+func (g GeneratorParser) Accept() (Token, err) {
			
 
				+
			
 
				+}
			
 
				+
			
 
				+func (g *GeneratorParser) ParseRule() error {
			
 
				+	tok := g.Token()
			
 
				+	if tok.Kind() != FlexerKind Terminal
			
 
				+
			
 
				+	for g.Index < len(g.Tokens) {
			
 
				+		g.ParseRule()
			
 
				+	}
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+func (g *GeneratorParser) Parse() error {
			
 
				+	for g.Index < len(g.Tokens) {
			
 
				+		g.ParseRule()
			
 
				+	}
			
 
				+	return nil
			
 
				+}
			
--- a/flexgen/generator.go
+++ b/flexgen/generator.go
@@ -0,0 +1 @@
 
				+package flexer
			
--- a/go.mod
+++ b/go.mod
@@ -1,3 +1,3 @@
 
				-module src.eruta.nl/ll1
			
 
				+module src.eruta.nl/beoran/ll1
			
 
				 
			
 
				 go 1.16