Quellcode durchsuchen

First checkin of Multi Use Embeddable Scripting Language Indeed.

Beoran vor 6 Jahren
Commit
e329b61fe7
5 geänderte Dateien mit 607 neuen und 0 gelöschten Zeilen
  1. 169 0
      design_muesli.muesli
  2. 304 0
      lexer.go
  3. 1 0
      lexer_test.go
  4. 19 0
      logger.go
  5. 114 0
      parser.go

+ 169 - 0
design_muesli.muesli

@@ -0,0 +1,169 @@
+# Muesli is a Multi Use Embeddable Scripting Language Indeed. 
+# It is a scripting language with a TCL or shell like syntax, but somewhat 
+# OOP and LISP like semantics.
+# First it will be an embeddable scripting language implemented in Go.
+# In what follows I will describe the design of the language though example.
+# 
+# Why is there a # before these lines? # begins a comment, until the end of the 
+# line. MUESLI does not execute comments but they are collected 
+# for documentation purposes. More on that topic later
+# Muesli is a line based language.
+# A newline preceded by a \ is ignored, even after a comment so \
+this is still comment
+# Muesli consists of newline separated statements, however, a newline 
+# after { or the do keyword is ignored and does not count as a separator.
+/*
+    C style comments are also supported, and unlke C, they DO nest, but the 
+    comment indicator and the end of comment indicator must be the fist element 
+    on the line. Anything on the line of the end-of-comment indicator is also 
+    ignored
+    /* 
+        so this is fine 
+    */ this is ignored too
+*/
+
+
+# Empty lines are ignored.
+
+# ... 
+# Muesli supports integer constants with type Int
+1
+2
+378
++108
+-878
+
+# ... Character constants with escapes with type Rune
+# This may also be unicode code points if above ascii range.
+
+'a'
+
+# It also supports multi line string constants with escapes, with type String
+"Hello world\"
+" 
+
+# and multi line strings without escapes
+`
+"Pop" goes the 
+weasel's tail.
+`
+
+# And booleans constants of type Bool
+!true !false
+
+# And simple floating point constants, but no exponential notation, with type Float
++0.5
+-7.000005
+
+# Lists can be created between [ ] and may be heterogenous or homogenous. 
+# The [ ] must be space separated. 
+# The type is Any[] if heterogenous,  
+[ foo "bar" ] 
+# The type is Int@  below
+[ 1 2 3 ] 
+
+
+# A sequence of a lower case letter followed by anything that is not whitespace.
+# For example: this-IS*a/single+Word._ 
+# The value of a word is a string with the word itself. 
+# If the word is at the beginning of the line it is invoked as a command.
+# Muesli's basic syntax is that of the command. Spaces separate the arguments.
+# of the command. The first word is the command, the rest are the arguments. 
+print "hello" world 7 0.9
+# the previous command outputs: "hello world 7 0.9" to standard output
+
+# A command has one or more results that can be captured with a parenthesis
+print ( mul 3 ( sum 5 7 ) )
+
+
+# Commands can be grouped into blocks. The value of the block is that of it's last command.
+{ 
+    print "hello"
+    print "world"
+}
+
+# Commands can also take blocks as parameters. Don't put a newline between } and {
+# to be sure multiple blocks get passed
+command {
+    print "first block"
+} and also {
+    print "second block"
+}
+
+# In Muesli al values are typed. Types can be defined by the built type command.
+type Door ( object locked Bool keys Item[] )
+
+# Commands can be defined by the built in 'to' command. 
+# They can have many arguments and many results.
+to open [door Door key Item]  Bool {
+    if (contains (member door key) key) {
+        set 
+    }
+}
+
+
+# Unlike TCL, MUESLI values are typed, and commands
+# can be overloaded based on the types the types of their arguments.
+# Types are much like words, but they begin with an upper case letter, up to the
+# next whitespace. 
+# A type that end in [] is a list, [][] a list of lists, etc, and a type 
+# that end in ... represents a variable argument, which must be last. 
+# Muesli tries to match the types, from narrow to wide for objects in the order 
+# Outer class, Embedded Class, Object, Any, 
+# and for primitive types <Bool|Int|String|Float|Word|Type>, Primitive, Any
+# You can override commands with more specific ones but not existing ones 
+# that have the same specificity.
+# Variables are nor part of the language but of the built in commands
+# Variables are set in the current scope with
+set a 10
+# And fetched in the current scope with get
+print (get a 10) 
+# To acces a variable in the scope one above current use upset/upget.
+upset a 10
+
+# However, there is syntactic sugar: 
+# =foo bar gets mapped to (set foo bar)
+=a 10
+
+# $foo means (get foo)  
+print $a
+
+# combinations of = followed by $ are allowed for indirect variable assignment
+=foo a
+=$foo 20
+print $a 
+
+
+# Control structures are merely built in functions.
+
+if (less a 10) {
+    print "Less"
+} else {
+    print "More"
+}
+
+# That's all there is to the syntax. Apart from the built in commands, 
+# the semantics are up to you to implement as 
+# embedded commands.
+
+/*
+
+BLOCK
+PUSHS "More"
+CALL print
+PUSHBLOCK
+PUSHW else
+BLOCK
+PUSHS "Less"
+CALL print
+PUSHBLOCK
+PUSHI 10
+PUSHW a
+CALL less
+CALL if
+
+
+
+
+*/
+

+ 304 - 0
lexer.go

@@ -0,0 +1,304 @@
+package muesli
+
+
+import (
+    _ "bytes"
+    _ "errors"
+    "fmt"
+    _ "io"
+    _ "reflect"
+    _ "runtime"
+    "strings"
+    _ "unicode"
+    "io"
+    "os"
+    "bufio"
+    "unicode"
+    // "gitlab.com/beoran/woe/graphviz"
+    // _ "gitlab.com/beoran/woe/monolog"
+)
+
+
+type Position struct {
+    FileName    string
+    Line        int
+    Column      int
+}
+
+type Lexer struct {
+    Position
+    Index       int
+    Start       int
+    io.RuneScanner
+    buffer      []rune
+    Current     rune
+}
+
+
+
+/** Token Kind. Uses a rune to easily handle single character tokens. */
+type TokenKind rune
+
+const (    
+    TokenKindInteger    = TokenKind('i') 
+    TokenKindFloat      = TokenKind('f')
+    TokenKindString     = TokenKind('s')
+    TokenKindBoolean    = TokenKind('b')
+    TokenKindWord       = TokenKind('w')
+    TokenKindType       = TokenKind('t')
+    TokenKindGet        = TokenKind('$')
+    TokenKindSet        = TokenKind('=')
+    TokenKindOpenBlock  = TokenKind('{')
+    TokenKindCloseBlock = TokenKind('}')
+    TokenKindOpenList   = TokenKind('[')
+    TokenKindCloseList  = TokenKind(']')
+    TokenKindOpenParen  = TokenKind('(')
+    TokenKindCloseParen = TokenKind(')')
+    TokenKindError      = TokenKind('!')
+    TokenKindEOX        = TokenKind('\n')
+)
+
+
+
+func NewToken(kind TokenKind, val Value, pos Position) Token {
+    return Token{kind, val, pos}
+}
+
+func (lexer Lexer) MakeToken(kind TokenKind) Token {
+    val := StringValue(string(lexer.buffer))
+    return NewToken(kind, val, lexer.Position)
+}
+
+func (lexer * Lexer) Next() (rune, error) {
+    r, _, err := lexer.RuneScanner.ReadRune()
+    if err != nil {
+        return 0, err
+    }
+    lexer.Current = r
+    lexer.buffer = append(lexer.buffer, r)
+    lexer.Index++
+    lexer.Position.Column++
+    if r == '\n' {
+        lexer.Position.Column = 1
+        lexer.Position.Line++
+    }
+    return lexer.buffer[len(lexer.buffer) - 1], nil
+}
+
+func (lexer * Lexer) Previous() error {
+    err := lexer.RuneScanner.UnreadRune()
+    if err != nil {
+        return err
+    }    
+    
+    lexer.Index--
+    lexer.Position.Column--
+
+    if (len(lexer.buffer) > 0)  { 
+        r := lexer.buffer[len(lexer.buffer) - 1];
+        lexer.buffer = lexer.buffer[0: len(lexer.buffer) - 1];
+    
+        if r == '\n' {
+            lexer.Position.Column = 1
+            lexer.Position.Line++
+        }
+        
+        lexer.Current = r
+    }
+    return nil
+}
+
+
+
+    
+func (lexer * Lexer) SkipSpace() (error) {
+    var r rune
+    var err error
+    r = lexer.Current
+    
+    for unicode.IsSpace(r) { 
+      r, err = lexer.Next()
+      if err != nil { 
+        return err
+      }
+    }
+    lexer.Previous()
+    return nil 
+}
+
+
+func (lexer * Lexer) LexNumber() (Token, error) { 
+  isFloat := false
+  var r rune
+  var err error
+  
+  r = lexer.Current  
+    
+  for unicode.IsDigit(r) || r == '.'  {
+    if r == '.' {
+      if isFloat { // double . in floating point is an error
+        tok := lexer.MakeToken(TokenKindError) 
+        err = fmt.Errorf("Double period . in floating point constant.")
+        return tok, err
+      } else {
+        isFloat = true
+      }
+    }
+    r, err = lexer.Next()
+    if err != nil {
+      return lexer.MakeToken(TokenKindError), err
+    }
+  }
+  
+  lexer.Previous()
+  if isFloat {
+    return lexer.MakeToken(TokenKindFloat), nil
+  } else {
+    return lexer.MakeToken(TokenKindInteger), nil  
+  }
+  
+}
+
+func (lexer * Lexer) LexString() (Token, error) { 
+  inEscape := false
+  var r rune
+  var err error
+  
+  r, err = lexer.Next()
+  if err != nil {
+    return lexer.MakeToken(TokenKindError), err
+  }
+ 
+  
+  for  r != '"'  || inEscape {
+    if r == '\\' {
+      // TODO escape parsing, now just a single character after it
+      if inEscape { // double backslash
+      } else {
+        inEscape = true
+      }
+    } else {
+        inEscape = false
+    }
+    r, err = lexer.Next()
+    if err != nil {
+      return lexer.MakeToken(TokenKindError), err
+    }
+  }
+  
+  return lexer.MakeToken(TokenKindString), nil
+}
+
+func (lexer * Lexer) LexLongString() (Token, error) { 
+  var r rune
+  var err error
+  
+  r, err = lexer.Next()
+  if err != nil {
+    return lexer.MakeToken(TokenKindError), err
+  }
+  
+  for  r != '`' {
+    r, err = lexer.Next()
+    if err != nil {
+      return lexer.MakeToken(TokenKindError), err
+    }
+  }
+  
+  return lexer.MakeToken(TokenKindString), nil
+}
+
+func (lexer * Lexer) LexWord() (Token, error) { 
+  var r rune
+  var err error
+  
+  r, err = lexer.Next()
+  if err != nil {
+    return lexer.MakeToken(TokenKindError), err
+  }
+  
+  for  r != '`' {
+    r, err = lexer.Next()
+    if err != nil {
+      return lexer.MakeToken(TokenKindError), err
+    }
+  }
+  
+  return lexer.MakeToken(TokenKindString), nil
+}
+
+func (lexer * Lexer) Lex() (Token, error) {
+    r, err := lexer.Next()
+    if err != nil {
+        return lexer.MakeToken(TokenKindError), err
+    }
+    
+    if unicode.IsSpace(r) {
+        lexer.SkipSpace()
+    }
+
+    
+    if unicode.IsDigit(r) {
+        return lexer.LexNumber()
+    }
+    
+    if r == '\n' || r == '.' {
+       return lexer.MakeToken(TokenKindEOX), nil 
+    }
+        
+    if r == '"' {
+        return lexer.LexString()
+    }
+    
+    if r == '`' {
+        return lexer.LexLongString()
+    }
+    
+        
+    switch (TokenKind(r)) {     
+        case TokenKindGet       : fallthrough
+        case TokenKindSet       : fallthrough
+        case TokenKindOpenBlock : fallthrough
+        case TokenKindCloseBlock: fallthrough
+        case TokenKindOpenList  : fallthrough
+        case TokenKindCloseList : fallthrough
+        case TokenKindOpenParen : fallthrough
+        case TokenKindCloseParen: 
+            return lexer.MakeToken(TokenKind(r)), nil
+        default:
+    }
+    
+    if unicode.IsLetter(r) {
+        return lexer.LexWord()
+    }
+
+    return lexer.MakeToken(TokenKindError), fmt.Errorf("Unknown character")
+}
+
+func NewLexer(scanner io.RuneScanner, filename string) Lexer {
+    lexer := Lexer{}
+    lexer.RuneScanner       = scanner
+    lexer.Position.FileName = filename
+    lexer.Position.Column   = 1
+    lexer.Position.Line     = 1
+    return lexer
+}
+
+func NewLexerFromInputString(input string) Lexer {
+    reader := strings.NewReader(input)
+    return NewLexer(reader, "<input>") 
+}
+
+
+func NewLexerFromFileName(filename string) (*Lexer, error) {
+    read, err   := os.Open(filename)
+    if err != nil {
+        bread   := bufio.NewReader(read)
+        lex     := NewLexer(bread, filename)
+        return &lex, nil
+    }
+    return nil , err
+}
+ 
+
+

+ 1 - 0
lexer_test.go

@@ -0,0 +1 @@
+package muesli

+ 19 - 0
logger.go

@@ -0,0 +1,19 @@
+package muesli
+
+import "runtime"
+
+/** Logger interface that Muuesli uses.*/
+type Logger interface {
+	Log(level string, file string, line int, format string, args ...interface{})
+}
+
+func WriteLog(logger Logger, depth int, level string, format string, args ...interface{}) {
+	_, file, line, ok := runtime.Caller(depth)
+
+	if !ok {
+		file = "unknown"
+		line = 0
+	}
+
+	logger.Log(level, file, line, format, args...)
+}

+ 114 - 0
parser.go

@@ -0,0 +1,114 @@
+package muesli 
+
+import (
+    _ "bytes"
+    _ "errors"
+    _ "fmt"
+    _ "io"
+    _ "reflect"
+    _ "runtime"
+    _ "strings"
+    _ "unicode"
+    _ "io"
+    _ "os"
+    _ "bufio"
+    _ "unicode"
+    // "gitlab.com/beoran/woe/graphviz"
+    // _ "gitlab.com/beoran/woe/monolog"
+    "gitlab.com/beoran/gdast/tree"
+)
+
+
+
+type AstKind int
+
+const (
+    AstKindProgram      = AstKind(iota)
+    AstKindStatements
+    AstKindStatement
+    AstKindCommand
+    AstKindArguments
+    AstKindBlock
+    AstKindList
+    AstKindCapture
+    AstKindWordValue
+    AstKindWord
+    AstKindType
+    AstKindValue
+    AstKindEox
+    AstKindError
+)
+
+
+/* Run time values */
+type Value interface {
+}
+
+type IntValue int64
+
+type FloatValue float64
+
+type StringValue string
+
+type BoolValue bool
+
+type WordValue string
+
+type TypeValue string
+
+type AnyValue struct {
+}
+
+type ListValue struct {
+    List []Value
+}
+
+
+type Token struct {
+    TokenKind
+    Value
+    Position
+}
+
+
+/* AST node kind */
+type Ast struct {
+    tree.Node
+    AstKind
+    * Token
+}
+
+
+type Parser struct {
+    Lexer
+    Ast
+}
+
+
+
+
+
+func (parser * Parser) ParseProgram() error {
+    /*
+    for parser.Position < len(Input) {
+    }
+    */
+    return nil
+}
+
+
+
+func (parser * Parser) Parse() (Ast, error) {
+    parser.Index     = 0
+    err             := parser.ParseProgram()
+    return parser.Ast, err
+}
+
+
+
+
+
+
+
+
+