parser.go 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. package main
  2. import "text/scanner"
  3. import "fmt"
  4. import "io"
  5. import "os"
  6. import "unicode"
  7. // This parser is for parsing LL1 grammars, not Beo.
  8. type Parser struct {
  9. reader io.Reader
  10. scanner scanner.Scanner
  11. current Token
  12. Errors []ParserError
  13. Filename string
  14. Debug io.Writer
  15. }
  16. func NewParserFromReader(reader io.Reader, filename string, debug bool) *Parser {
  17. parser := &Parser{}
  18. parser.reader = reader
  19. parser.scanner.Init(parser.reader)
  20. parser.Filename = filename
  21. parser.current.Kind = 0
  22. parser.Debug = nil
  23. if debug {
  24. parser.Debug = os.Stderr
  25. }
  26. parser.scanner.Mode = scanner.GoTokens
  27. parser.scanner.Error = func (s *scanner.Scanner, msg string) {
  28. parser.Panicf("%s: scanner error: %s, %s", s.Position, s.TokenText(), msg)
  29. }
  30. parser.scanner.IsIdentRune = func(ch rune, i int) bool {
  31. if i == 0 {
  32. return unicode.IsLetter(ch)
  33. }
  34. return unicode.IsLetter(ch) ||
  35. unicode.IsNumber(ch) ||
  36. ch == '_' ||
  37. ch == '-'
  38. }
  39. return parser
  40. }
  41. func (p *Parser) Lex() Token {
  42. scanned := p.scanner.Scan()
  43. pos := p.scanner.Position
  44. pos.Filename = p.Filename
  45. value := p.scanner.TokenText()
  46. // Get rid of the quotes
  47. if scanned == scanner.Char ||
  48. scanned == scanner.String ||
  49. scanned == scanner.RawString {
  50. value = value[1:len(value) - 1]
  51. }
  52. token := Token {
  53. scanned,
  54. value,
  55. pos,
  56. }
  57. p.Debugf("Lexed: %s\n", p.Filename, token)
  58. return token
  59. }
  60. func (p *Parser) Advance() {
  61. token := p.Lex()
  62. p.current = token
  63. }
  64. // panic with this type on errors that would prevent the parser
  65. // from making progress.
  66. type ParserError struct {
  67. *Parser
  68. *Token
  69. Chain error
  70. }
  71. func (pe ParserError) Error() string {
  72. return pe.Token.MakeError(pe.Chain.Error()).Error()
  73. }
  74. func (parser *Parser) Errorf(message string, args ...interface{}) ParserError {
  75. err := fmt.Errorf(message, args...)
  76. pe := ParserError { Parser: parser, Token:&parser.current, Chain: err }
  77. parser.Errors = append(parser.Errors, pe)
  78. return pe
  79. }
  80. func (parser *Parser) Panicf(message string, args ...interface{}) {
  81. pe := parser.Errorf(message, args...)
  82. panic(pe)
  83. }
  84. func (p *Parser) Debugf(message string, args ...interface{}) {
  85. if p.Debug != nil {
  86. fmt.Fprintf(p.Debug, message, args)
  87. }
  88. }
  89. /* Looks at the current token and advances the lexer if the token is of any of
  90. the token kinds given in kinds. In this case it will return the accepted
  91. token and advance the parser. Otherwise, it will call parser.Panicf.*/
  92. func (parser *Parser) Require(kinds ...rune) Token {
  93. parser.Debugf("Require: %v\n", kinds)
  94. if parser.current.Kind == 0 {
  95. parser.Advance()
  96. }
  97. expected := ""
  98. sep := ""
  99. for _, kind := range kinds {
  100. if kind == parser.current.Kind {
  101. accepted := parser.current
  102. parser.Advance()
  103. return accepted
  104. }
  105. expected = fmt.Sprintf("%s%s%s", expected, sep, scanner.TokenString(kind))
  106. }
  107. parser.Panicf("error: expected one of the following: %s", expected)
  108. return Token{}
  109. }
  110. func (parser Parser) NextIs(kinds ...rune) bool {
  111. parser.Debugf("NextIs: %v\n", kinds)
  112. if (parser.current.Kind == 0) {
  113. parser.Advance()
  114. }
  115. for _, kind := range kinds {
  116. if kind == parser.current.Kind {
  117. return true
  118. }
  119. }
  120. return false
  121. }
  122. /*
  123. Sequence -> Element Sequence | .
  124. Element -> Parenthesis | Name | Literal .
  125. Parenthesis -> '(' Definition ')' .
  126. */
  127. func (p *Parser) NextIsElement() (bool) {
  128. return p.NextIs('(', scanner.Ident, scanner.Char, scanner.String)
  129. }
  130. func (p *Parser) ParseElement() (Element, error) {
  131. switch {
  132. case p.NextIs('('):
  133. return p.ParseParenthesis()
  134. case p.NextIs(scanner.Ident):
  135. ident := p.Require(scanner.Ident)
  136. if ident.Value == "epsilon" || ident.Value == "ε" {
  137. ident.Value = "ε"
  138. return Epsilon{ element { token: ident } }, nil
  139. }
  140. // Upper case means a nonterminal, otherwise terminal
  141. if unicode.IsUpper(([]rune(ident.Value))[0]) {
  142. return Nonterminal{ element { token: ident }, nil }, nil
  143. }
  144. return Terminal{ element { token: ident } }, nil
  145. case p.NextIs(scanner.Char, scanner.String):
  146. literal := p.Require(scanner.Char, scanner.String)
  147. return Literal{ Terminal { element { token: literal } } }, nil
  148. default:
  149. p.Panicf("error: unexpected for grammar Element: %s", p.current)
  150. }
  151. return nil, nil
  152. }
  153. func (p *Parser) ParseSequence() (*Sequence, error) {
  154. sequence := &Sequence{}
  155. for p.NextIsElement() {
  156. element, err := p.ParseElement()
  157. p.Debugf("Sequence parsed element: %v\n", element)
  158. if err != nil {
  159. return sequence, err
  160. }
  161. sequence.Elements = append(sequence.Elements, element)
  162. }
  163. return sequence, nil
  164. }
  165. func (p *Parser) ParseParenthesis() (*Definition, error) {
  166. // Parse the sub definition
  167. p.Require('(')
  168. sub, err := p.ParseDefinition()
  169. p.Require(')')
  170. return sub, err
  171. }
  172. func (p *Parser) ParseAlternates() (*Definition, error) {
  173. alternates := &Alternates{}
  174. for p.NextIsElement() {
  175. sequence, err := p.ParseSequence()
  176. if err != nil {
  177. return alternates, err
  178. }
  179. alternates.Sequences = append(alternates.Sequences, *sequence)
  180. if !p.NextIs('.', scanner.RawString, scanner.EOF, ')') {
  181. p.Require('|')
  182. }
  183. }
  184. return alternates, nil
  185. }
  186. func (p *Parser) ParseDefinition() (*Definition, error) {
  187. return p.ParseAlternates()
  188. /*if p.NextIs('(') {
  189. sub, err := p.ParseParenthesis()
  190. if err != nil {
  191. return nil, err
  192. }
  193. }*/
  194. }
  195. func (p *Parser) ParseRule() (*Rule, error) {
  196. rule := &Rule{}
  197. name := p.Require(scanner.Ident)
  198. // XXX Require or → or -> separator.
  199. // This is a hack, needs to be moved to a lexer.
  200. if p.NextIs('→') {
  201. p.Require('→')
  202. } else {
  203. p.Require('-')
  204. p.Require('>')
  205. }
  206. definition, err := p.ParseDefinition()
  207. if err != nil {
  208. return rule, err
  209. }
  210. if p.NextIs(scanner.RawString) {
  211. rs := p.Require(scanner.RawString)
  212. rule.Template = rs.Value
  213. }
  214. // require '.' terminator
  215. p.Require('.')
  216. rule.Name = name.Value
  217. rule.Definition = *definition
  218. return rule, nil
  219. }
  220. func (p *Parser) ParseRules() ([]*Rule, error) {
  221. var rules []*Rule
  222. for p.current.Kind != scanner.EOF {
  223. rule, err := p.ParseRule()
  224. if err != nil {
  225. return rules, err
  226. }
  227. appended := false
  228. for i, existing := range(rules) {
  229. if existing.Name == rule.Name {
  230. // Add to the definition in stead
  231. existing.Definition.Sequences = append(existing.Definition.Sequences, rule.Definition.Sequences...)
  232. rules[i] = existing
  233. appended = true
  234. break
  235. }
  236. }
  237. if !appended {
  238. rules = append(rules, rule)
  239. }
  240. }
  241. return rules, nil
  242. }
  243. func (p *Parser) Parse() (g *Grammar, e error) {
  244. defer func() {
  245. thrown := recover()
  246. if thrown != nil {
  247. g = nil
  248. e = thrown.(error)
  249. return
  250. }
  251. }()
  252. p.Advance()
  253. rules, err := p.ParseRules()
  254. if err != nil {
  255. return nil, err
  256. }
  257. if len(rules) < 1 {
  258. return nil, nil
  259. }
  260. grammar := &Grammar {
  261. Top: rules[0],
  262. Rules: rules,
  263. }
  264. return grammar, nil
  265. }
  266. func ParseFile(filename string, debug bool) (*Parser, *Grammar, error) {
  267. file, err := os.Open(filename)
  268. defer file.Close()
  269. parser := NewParserFromReader(file, filename, debug)
  270. grammar, err := parser.Parse()
  271. return parser, grammar, err
  272. }