소스 검색

Finally the lexer shows the first signs of working correctly.

Beoran 6 년 전
부모
커밋
4a8d75abdb
6개의 변경된 파일380개의 추가작업 그리고 147개의 파일을 삭제
  1. 9 0
      go.mod
  2. 193 118
      lexer.go
  3. 46 1
      lexer_test.go
  4. 0 28
      parser.go
  5. 106 0
      token.go
  6. 26 0
      value.go

+ 9 - 0
go.mod

@@ -0,0 +1,9 @@
+module muesli
+
+require gitlab.com/beoran/monolog v0.0.0
+
+require gitlab.com/beoran/gdast v0.0.0
+
+replace gitlab.com/beoran/monolog => ../monolog
+
+replace gitlab.com/beoran/gdast => ../gdast

+ 193 - 118
lexer.go

@@ -19,12 +19,10 @@ import (
 )
 )
 
 
 
 
-type Position struct {
-    FileName    string
-    Line        int
-    Column      int
-}
 
 
+
+/* A Lexer splits scanned input into tokens.
+ */
 type Lexer struct {
 type Lexer struct {
     Position
     Position
     Index       int
     Index       int
@@ -35,41 +33,42 @@ type Lexer struct {
 }
 }
 
 
 
 
+func (lexer * Lexer) ClearBuffer() {
+  lexer.buffer = make([]rune, 0)
+}
 
 
-/** Token Kind. Uses a rune to easily handle single character tokens. */
-type TokenKind rune
-
-const (    
-    TokenKindInteger    = TokenKind('i') 
-    TokenKindFloat      = TokenKind('f')
-    TokenKindString     = TokenKind('s')
-    TokenKindBoolean    = TokenKind('b')
-    TokenKindWord       = TokenKind('w')
-    TokenKindType       = TokenKind('t')
-    TokenKindGet        = TokenKind('$')
-    TokenKindSet        = TokenKind('=')
-    TokenKindOpenBlock  = TokenKind('{')
-    TokenKindCloseBlock = TokenKind('}')
-    TokenKindOpenList   = TokenKind('[')
-    TokenKindCloseList  = TokenKind(']')
-    TokenKindOpenParen  = TokenKind('(')
-    TokenKindCloseParen = TokenKind(')')
-    TokenKindError      = TokenKind('!')
-    TokenKindEOX        = TokenKind('\n')
-)
+func (lexer * Lexer) MakeToken(kind TokenKind) Token {
+    val := StringValue(string(lexer.buffer))
+    lexer.ClearBuffer()
+    return NewToken(kind, val, lexer.Position)
+}
+
+func (lexer Lexer) MakeErrorToken(err error) Token {
+    return NewToken(TokenKindError, err.Error(), lexer.Position)
+}
 
 
+func (lexer Lexer) MakeErrorfToken(format string, va ... interface{}) Token {
+    err := fmt.Errorf(format, va...)
+    return lexer.MakeErrorToken(err)
+}
 
 
 
 
-func NewToken(kind TokenKind, val Value, pos Position) Token {
-    return Token{kind, val, pos}
+func (lexer Lexer) MakeEOFToken() Token {
+    return NewToken(TokenKindEOF, "", lexer.Position)
 }
 }
 
 
-func (lexer Lexer) MakeToken(kind TokenKind) Token {
-    val := StringValue(string(lexer.buffer))
-    return NewToken(kind, val, lexer.Position)
+
+func (lexer * Lexer) Peek() (rune, error) {
+    r, _, err := lexer.RuneScanner.ReadRune()
+    err2 := lexer.RuneScanner.UnreadRune()
+    
+    if err == nil {
+        err = err2
+    }
+    return r, err
 }
 }
 
 
-func (lexer * Lexer) Next() (rune, error) {
+func (lexer * Lexer) Next()  (rune, error) {  
     r, _, err := lexer.RuneScanner.ReadRune()
     r, _, err := lexer.RuneScanner.ReadRune()
     if err != nil {
     if err != nil {
         return 0, err
         return 0, err
@@ -85,7 +84,8 @@ func (lexer * Lexer) Next() (rune, error) {
     return lexer.buffer[len(lexer.buffer) - 1], nil
     return lexer.buffer[len(lexer.buffer) - 1], nil
 }
 }
 
 
-func (lexer * Lexer) Previous() error {
+func (lexer * Lexer) oldPrevious() error {
+    fmt.Printf("Previous: now %c \n", lexer.Current)
     err := lexer.RuneScanner.UnreadRune()
     err := lexer.RuneScanner.UnreadRune()
     if err != nil {
     if err != nil {
         return err
         return err
@@ -99,8 +99,8 @@ func (lexer * Lexer) Previous() error {
         lexer.buffer = lexer.buffer[0: len(lexer.buffer) - 1];
         lexer.buffer = lexer.buffer[0: len(lexer.buffer) - 1];
     
     
         if r == '\n' {
         if r == '\n' {
-            lexer.Position.Column = 1
-            lexer.Position.Line++
+            lexer.Position.Column = 1 // XXX wrong
+            lexer.Position.Line--
         }
         }
         
         
         lexer.Current = r
         lexer.Current = r
@@ -109,141 +109,192 @@ func (lexer * Lexer) Previous() error {
 }
 }
 
 
 
 
+func (lexer * Lexer) NextIf(predicate func(rune) bool) (bool, error) {
+  r, err := lexer.Peek()
+  if err != nil {
+    return false, err
+  }
+  if (predicate(r)) {
+    r, err = lexer.Next()
+    if err != nil {
+      return true, err
+    }
+    return true, nil
+  }
+  return false, nil
+}
+
+func (lexer * Lexer) NextWhile(predicate func(rune) bool) (bool, error) {
+    result := true
+    ok, err := lexer.NextIf(predicate)
+    result = result || ok
+    for ; ok && (err == nil) ; ok, err = lexer.NextIf(predicate) {
+        result = result || ok
+    }
+    return result, err
+}
+
+
+func isSpace(r rune) bool {
+  return r == ' ' || r == '\t'
+}
+
 
 
-    
 func (lexer * Lexer) SkipSpace() (error) {
 func (lexer * Lexer) SkipSpace() (error) {
-    var r rune
-    var err error
-    r = lexer.Current
-    
-    for unicode.IsSpace(r) { 
-      r, err = lexer.Next()
-      if err != nil { 
-        return err
-      }
+    _, err := lexer.NextWhile(isSpace)
+    if err == nil { 
+      lexer.ClearBuffer()
+    }
+    return err
+ }
+
+/* Handles errors including EOF by either returning an error token or an 
+ * EOF token.
+ */
+func (lexer * Lexer) handleError(err error) Token {
+    if err == io.EOF {
+        return lexer.MakeEOFToken()
+    } else {
+        return lexer.MakeErrorToken(err)
     }
     }
-    lexer.Previous()
-    return nil 
 }
 }
 
 
 
 
-func (lexer * Lexer) LexNumber() (Token, error) { 
+func (lexer * Lexer) LexNumber() Token { 
   isFloat := false
   isFloat := false
-  var r rune
-  var err error
   
   
-  r = lexer.Current  
-    
-  for unicode.IsDigit(r) || r == '.'  {
-    if r == '.' {
-      if isFloat { // double . in floating point is an error
-        tok := lexer.MakeToken(TokenKindError) 
-        err = fmt.Errorf("Double period . in floating point constant.")
-        return tok, err
+  _, err := lexer.NextWhile(func (r rune) bool {
+    if unicode.IsDigit(r) {
+        return true
+    } else if r == '.' {
+      if isFloat { 
+          return false // double point in floating point
       } else {
       } else {
-        isFloat = true
+          isFloat = true
+          return true
       }
       }
+    } else {
+        return false
     }
     }
-    r, err = lexer.Next()
-    if err != nil {
-      return lexer.MakeToken(TokenKindError), err
-    }
-  }
+  })
+  
   
   
-  lexer.Previous()
+  if err != nil {
+      return lexer.MakeErrorfToken("when parsing number: %s", err)
+  }
   if isFloat {
   if isFloat {
-    return lexer.MakeToken(TokenKindFloat), nil
+      return lexer.MakeToken(TokenKindFloat)
   } else {
   } else {
-    return lexer.MakeToken(TokenKindInteger), nil  
-  }
-  
+      return lexer.MakeToken(TokenKindInteger)  
+  }  
+}
+
+func isDoubleQuote(r rune) bool {
+  return r == '"'
 }
 }
 
 
-func (lexer * Lexer) LexString() (Token, error) { 
+func (lexer * Lexer) LexString() Token { 
   inEscape := false
   inEscape := false
-  var r rune
   var err error
   var err error
   
   
-  r, err = lexer.Next()
+  _, err = lexer.Next() 
   if err != nil {
   if err != nil {
-    return lexer.MakeToken(TokenKindError), err
+      return lexer.handleError(err)
   }
   }
- 
-  
-  for  r != '"'  || inEscape {
-    if r == '\\' {
-      // TODO escape parsing, now just a single character after it
-      if inEscape { // double backslash
+    
+  _, err = lexer.NextWhile(func (r rune) bool {
+      if r == '"' && !inEscape {
+        return false
+      }
+      if r == '\\' {
+        // TODO escape parsing, now just a single character after it      
+          if inEscape { // double backslash
+            inEscape = false
+          } else {
+            inEscape = true
+          }
       } else {
       } else {
-        inEscape = true
+            inEscape = false
       }
       }
-    } else {
-        inEscape = false
-    }
-    r, err = lexer.Next()
-    if err != nil {
-      return lexer.MakeToken(TokenKindError), err
-    }
+      return true // still inside the string
+  })
+  if err != nil {
+        return lexer.MakeErrorfToken("when parsing string: %s", err)
   }
   }
   
   
-  return lexer.MakeToken(TokenKindString), nil
+  _, err = lexer.Next()
+  if err != nil {
+      return lexer.handleError(err)
+  }
+  
+  return lexer.MakeToken(TokenKindString)
 }
 }
 
 
-func (lexer * Lexer) LexLongString() (Token, error) { 
-  var r rune
+func (lexer * Lexer) LexLongString() Token { 
   var err error
   var err error
   
   
-  r, err = lexer.Next()
+  _, err = lexer.Next()
   if err != nil {
   if err != nil {
-    return lexer.MakeToken(TokenKindError), err
+      return lexer.handleError(err)
   }
   }
   
   
-  for  r != '`' {
-    r, err = lexer.Next()
-    if err != nil {
-      return lexer.MakeToken(TokenKindError), err
-    }
+  _, err = lexer.NextWhile(func (r rune) bool {
+      return r != '`'
+  })
+
+  if err != nil {
+        return lexer.MakeErrorfToken("when parsing long string: %s", err)
+  }
+  
+  _, err = lexer.Next()
+  if err != nil {
+      return lexer.handleError(err)
   }
   }
   
   
-  return lexer.MakeToken(TokenKindString), nil
+  return lexer.MakeToken(TokenKindString)
 }
 }
 
 
-func (lexer * Lexer) LexWord() (Token, error) { 
-  var r rune
+func (lexer * Lexer) LexWord() Token { 
   var err error
   var err error
   
   
-  r, err = lexer.Next()
+  _, err = lexer.Next()
   if err != nil {
   if err != nil {
-    return lexer.MakeToken(TokenKindError), err
+    return lexer.handleError(err)
   }
   }
   
   
-  for  r != '`' {
-    r, err = lexer.Next()
-    if err != nil {
-      return lexer.MakeToken(TokenKindError), err
-    }
-  }
+  lexer.NextWhile(func(r rune) bool {
+      return unicode.IsLetter(r)
+  })
   
   
-  return lexer.MakeToken(TokenKindString), nil
+  return lexer.MakeToken(TokenKindWord)
 }
 }
 
 
-func (lexer * Lexer) Lex() (Token, error) {
-    r, err := lexer.Next()
+func (lexer * Lexer) lex() Token {
+    r, err := lexer.Peek()
+ 
     if err != nil {
     if err != nil {
-        return lexer.MakeToken(TokenKindError), err
+      return lexer.handleError(err)
     }
     }
     
     
-    if unicode.IsSpace(r) {
-        lexer.SkipSpace()
+    if isSpace(r) { 
+        err = lexer.SkipSpace()
+        if err != nil {
+          return lexer.handleError(err)
+        } 
+        r, err = lexer.Peek()
+        if err != nil {
+          return lexer.handleError(err)
+        }
     }
     }
-
     
     
+
     if unicode.IsDigit(r) {
     if unicode.IsDigit(r) {
         return lexer.LexNumber()
         return lexer.LexNumber()
     }
     }
     
     
     if r == '\n' || r == '.' {
     if r == '\n' || r == '.' {
-       return lexer.MakeToken(TokenKindEOX), nil 
+       lexer.Next()
+       return lexer.MakeToken(TokenKindEOX)
     }
     }
         
         
     if r == '"' {
     if r == '"' {
@@ -264,7 +315,8 @@ func (lexer * Lexer) Lex() (Token, error) {
         case TokenKindCloseList : fallthrough
         case TokenKindCloseList : fallthrough
         case TokenKindOpenParen : fallthrough
         case TokenKindOpenParen : fallthrough
         case TokenKindCloseParen: 
         case TokenKindCloseParen: 
-            return lexer.MakeToken(TokenKind(r)), nil
+            lexer.Next()
+            return lexer.MakeToken(TokenKind(r))
         default:
         default:
     }
     }
     
     
@@ -272,9 +324,32 @@ func (lexer * Lexer) Lex() (Token, error) {
         return lexer.LexWord()
         return lexer.LexWord()
     }
     }
 
 
-    return lexer.MakeToken(TokenKindError), fmt.Errorf("Unknown character")
+    return lexer.MakeErrorfToken("Unknown character: %c", r)
+}
+
+func (lexer * Lexer) Lex() Token {
+  res := lexer.lex()
+  lexer.ClearBuffer() // ensure buffer is cleared after lexing, always.
+  return res
 }
 }
 
 
+func (lexer * Lexer) LexAll() []Token {
+    var token Token
+
+    res := make([]Token, 0)
+
+    for token = lexer.Lex() ; ! token.IsLast() ; token = lexer.Lex() {
+          fmt.Printf("token: %s %v\n", token.String(), token.IsLast())
+          res = append(res, token)
+    } 
+    
+    fmt.Printf("Last token: %s %v\n", token.String(), token.IsLast())
+    res = append(res, token)
+ 
+    return res
+}
+
+
 func NewLexer(scanner io.RuneScanner, filename string) Lexer {
 func NewLexer(scanner io.RuneScanner, filename string) Lexer {
     lexer := Lexer{}
     lexer := Lexer{}
     lexer.RuneScanner       = scanner
     lexer.RuneScanner       = scanner

+ 46 - 1
lexer_test.go

@@ -1 +1,46 @@
-package muesli
+package muesli
+
+import (
+    _ "strings"
+    "testing"
+)
+
+
+func LexText(input string) []Token {
+    lexer  := NewLexerFromInputString(input)
+    tokens := lexer.LexAll()
+    return tokens
+}
+
+func Assert(test *testing.T, ok bool, text string) bool {
+    if !ok {
+        test.Errorf(text)
+    }
+    return ok
+}
+
+func HelperTryLexText(input string, test * testing.T) {
+  tokens := LexText(input)
+  for i:= 0; i < len(tokens) ; i++ {
+    test.Logf("%d: %s", i, tokens[i].String())
+  }
+}
+
+func TestLexing(test *testing.T) {
+    const input = `
+    greet "hi there"
+    
+    say "hello \"world\\"
+
+define open a door {
+    set (door open) true
+}
+
+def increment variable by value {
+    =variable (add variable $value)
+}
+`
+    test.Log("Hi test!")
+
+    HelperTryLexText(input, test)
+} 

+ 0 - 28
parser.go

@@ -40,35 +40,7 @@ const (
 )
 )
 
 
 
 
-/* Run time values */
-type Value interface {
-}
-
-type IntValue int64
-
-type FloatValue float64
-
-type StringValue string
-
-type BoolValue bool
-
-type WordValue string
-
-type TypeValue string
 
 
-type AnyValue struct {
-}
-
-type ListValue struct {
-    List []Value
-}
-
-
-type Token struct {
-    TokenKind
-    Value
-    Position
-}
 
 
 
 
 /* AST node kind */
 /* AST node kind */

+ 106 - 0
token.go

@@ -0,0 +1,106 @@
+package muesli
+
+
+import (
+    "fmt"
+)
+
+
+/* Position of a token in an input stream */
+type Position struct {
+    FileName    string
+    Line        int
+    Column      int
+}
+
+
+/* Token Kind. Uses a rune to easily handle single character tokens. */
+type TokenKind rune
+
+const (    
+    TokenKindInteger    = TokenKind('i') 
+    TokenKindFloat      = TokenKind('f')
+    TokenKindString     = TokenKind('s')
+    TokenKindBoolean    = TokenKind('b')
+    TokenKindWord       = TokenKind('w')
+    TokenKindType       = TokenKind('t')
+    TokenKindGet        = TokenKind('$')
+    TokenKindSet        = TokenKind('=')
+    TokenKindOpenBlock  = TokenKind('{')
+    TokenKindCloseBlock = TokenKind('}')
+    TokenKindOpenList   = TokenKind('[')
+    TokenKindCloseList  = TokenKind(']')
+    TokenKindOpenParen  = TokenKind('(')
+    TokenKindCloseParen = TokenKind(')')
+    TokenKindError      = TokenKind('!')
+    TokenKindEOX        = TokenKind('\n')
+    TokenKindEOF        = TokenKind(0x255)
+)
+
+/* Names of the different token types. */
+var TokenKindNames map[TokenKind] string = map[TokenKind]string{
+    TokenKindInteger   :  "Integer"   ,
+    TokenKindFloat     :  "Float"     ,
+    TokenKindString    :  "String"    ,
+    TokenKindBoolean   :  "Boolean"   ,
+    TokenKindWord      :  "Word"      ,
+    TokenKindType      :  "Type"      ,
+    TokenKindGet       :  "Get"       ,
+    TokenKindSet       :  "Set"       ,
+    TokenKindOpenBlock :  "OpenBlock" ,
+    TokenKindCloseBlock:  "CloseBlock",
+    TokenKindOpenList  :  "OpenList"  ,
+    TokenKindCloseList :  "CloseList" ,
+    TokenKindOpenParen :  "OpenParen" ,
+    TokenKindCloseParen:  "CloseParen",
+    TokenKindError     :  "Error"     ,
+    TokenKindEOX       :  "EOX"       ,
+    TokenKindEOF       :  "EOF"       , 
+}
+
+/* Transforms a token kid to a String */
+func (kind TokenKind) String() string {
+  name, ok := TokenKindNames[kind]
+  if !ok {
+    return "Unknown TokenKind!"
+  }
+  return name
+}
+
+
+type Token struct {
+    TokenKind
+    Value
+    Position
+}
+
+func (token Token) String() string {
+  return fmt.Sprintf("<%s:%q:%v>", token.TokenKind.String(), 
+                      token.Value, token.Position)
+}
+
+func (token Token) Error() string {
+    if token.TokenKind == TokenKindError {
+        return token.Value.(string)
+    }
+    return "No error"
+}
+
+/* Returns whether or not the token is the last to be expected, 
+ * that is either an error or EOF. */
+func (token Token) IsLast() bool {
+    switch token.TokenKind {  
+        case TokenKindError:    return true
+        case TokenKindEOF:      return true
+        default:                return false;
+    }
+}
+
+
+/* Creates a new token. */
+func NewToken(kind TokenKind, val Value, pos Position) Token {
+    return Token{kind, val, pos}
+}
+
+
+

+ 26 - 0
value.go

@@ -0,0 +1,26 @@
+package muesli
+
+
+/* Run time values */
+type Value interface {
+}
+
+type IntValue int64
+
+type FloatValue float64
+
+type StringValue string
+
+type BoolValue bool
+
+type WordValue string
+
+type TypeValue string
+
+type AnyValue struct {
+}
+
+type ListValue struct {
+    List []Value
+}
+