531 lines
9.5 KiB
Go
531 lines
9.5 KiB
Go
// Package parser implements parser for anko.
|
|
package parser
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
|
|
"github.com/mattn/anko/ast"
|
|
)
|
|
|
|
const (
|
|
EOF = -1 // End of file.
|
|
EOL = '\n' // End of line.
|
|
)
|
|
|
|
// Error provides a convenient interface for handling runtime error.
|
|
// It can be Error inteface with type cast which can call Pos().
|
|
type Error struct {
|
|
Message string
|
|
Pos ast.Position
|
|
Filename string
|
|
Fatal bool
|
|
}
|
|
|
|
// Error returns the error message.
|
|
func (e *Error) Error() string {
|
|
return e.Message
|
|
}
|
|
|
|
// Scanner stores informations for lexer.
|
|
type Scanner struct {
|
|
src []rune
|
|
offset int
|
|
lineHead int
|
|
line int
|
|
}
|
|
|
|
// opName is correction of operation names.
|
|
var opName = map[string]int{
|
|
"func": FUNC,
|
|
"return": RETURN,
|
|
"var": VAR,
|
|
"throw": THROW,
|
|
"if": IF,
|
|
"for": FOR,
|
|
"break": BREAK,
|
|
"continue": CONTINUE,
|
|
"in": IN,
|
|
"else": ELSE,
|
|
"new": NEW,
|
|
"true": TRUE,
|
|
"false": FALSE,
|
|
"nil": NIL,
|
|
"module": MODULE,
|
|
"try": TRY,
|
|
"catch": CATCH,
|
|
"finally": FINALLY,
|
|
"switch": SWITCH,
|
|
"case": CASE,
|
|
"default": DEFAULT,
|
|
"go": GO,
|
|
"chan": CHAN,
|
|
"make": MAKE,
|
|
}
|
|
|
|
// Init resets code to scan.
|
|
func (s *Scanner) Init(src string) {
|
|
s.src = []rune(src)
|
|
}
|
|
|
|
// Scan analyses token, and decide identify or literals.
|
|
func (s *Scanner) Scan() (tok int, lit string, pos ast.Position, err error) {
|
|
retry:
|
|
s.skipBlank()
|
|
pos = s.pos()
|
|
switch ch := s.peek(); {
|
|
case isLetter(ch):
|
|
lit, err = s.scanIdentifier()
|
|
if err != nil {
|
|
return
|
|
}
|
|
if name, ok := opName[lit]; ok {
|
|
tok = name
|
|
} else {
|
|
tok = IDENT
|
|
}
|
|
case isDigit(ch):
|
|
tok = NUMBER
|
|
lit, err = s.scanNumber()
|
|
if err != nil {
|
|
return
|
|
}
|
|
case ch == '"':
|
|
tok = STRING
|
|
lit, err = s.scanString('"')
|
|
if err != nil {
|
|
return
|
|
}
|
|
case ch == '\'':
|
|
tok = STRING
|
|
lit, err = s.scanString('\'')
|
|
if err != nil {
|
|
return
|
|
}
|
|
case ch == '`':
|
|
tok = STRING
|
|
lit, err = s.scanRawString()
|
|
if err != nil {
|
|
return
|
|
}
|
|
default:
|
|
switch ch {
|
|
case EOF:
|
|
tok = EOF
|
|
case '#':
|
|
for !isEOL(s.peek()) {
|
|
s.next()
|
|
}
|
|
goto retry
|
|
case '!':
|
|
s.next()
|
|
switch s.peek() {
|
|
case '=':
|
|
tok = NEQ
|
|
lit = "!="
|
|
default:
|
|
s.back()
|
|
tok = int(ch)
|
|
lit = string(ch)
|
|
}
|
|
case '=':
|
|
s.next()
|
|
switch s.peek() {
|
|
case '=':
|
|
tok = EQEQ
|
|
lit = "=="
|
|
default:
|
|
s.back()
|
|
tok = int(ch)
|
|
lit = string(ch)
|
|
}
|
|
case '+':
|
|
s.next()
|
|
switch s.peek() {
|
|
case '+':
|
|
tok = PLUSPLUS
|
|
lit = "++"
|
|
case '=':
|
|
tok = PLUSEQ
|
|
lit = "+="
|
|
default:
|
|
s.back()
|
|
tok = int(ch)
|
|
lit = string(ch)
|
|
}
|
|
case '-':
|
|
s.next()
|
|
switch s.peek() {
|
|
case '-':
|
|
tok = MINUSMINUS
|
|
lit = "--"
|
|
case '=':
|
|
tok = MINUSEQ
|
|
lit = "-="
|
|
default:
|
|
s.back()
|
|
tok = int(ch)
|
|
lit = string(ch)
|
|
}
|
|
case '*':
|
|
s.next()
|
|
switch s.peek() {
|
|
case '*':
|
|
tok = POW
|
|
lit = "**"
|
|
case '=':
|
|
tok = MULEQ
|
|
lit = "*="
|
|
default:
|
|
s.back()
|
|
tok = int(ch)
|
|
lit = string(ch)
|
|
}
|
|
case '/':
|
|
s.next()
|
|
switch s.peek() {
|
|
case '=':
|
|
tok = DIVEQ
|
|
lit = "/="
|
|
default:
|
|
s.back()
|
|
tok = int(ch)
|
|
lit = string(ch)
|
|
}
|
|
case '>':
|
|
s.next()
|
|
switch s.peek() {
|
|
case '=':
|
|
tok = GE
|
|
lit = ">="
|
|
case '>':
|
|
tok = SHIFTRIGHT
|
|
lit = ">>"
|
|
default:
|
|
s.back()
|
|
tok = int(ch)
|
|
lit = string(ch)
|
|
}
|
|
case '<':
|
|
s.next()
|
|
switch s.peek() {
|
|
case '-':
|
|
tok = OPCHAN
|
|
lit = "<-"
|
|
case '=':
|
|
tok = LE
|
|
lit = "<="
|
|
case '<':
|
|
tok = SHIFTLEFT
|
|
lit = "<<"
|
|
default:
|
|
s.back()
|
|
tok = int(ch)
|
|
lit = string(ch)
|
|
}
|
|
case '|':
|
|
s.next()
|
|
switch s.peek() {
|
|
case '|':
|
|
tok = OROR
|
|
lit = "||"
|
|
case '=':
|
|
tok = OREQ
|
|
lit = "|="
|
|
default:
|
|
s.back()
|
|
tok = int(ch)
|
|
lit = string(ch)
|
|
}
|
|
case '&':
|
|
s.next()
|
|
switch s.peek() {
|
|
case '&':
|
|
tok = ANDAND
|
|
lit = "&&"
|
|
case '=':
|
|
tok = ANDEQ
|
|
lit = "&="
|
|
default:
|
|
s.back()
|
|
tok = int(ch)
|
|
lit = string(ch)
|
|
}
|
|
case '.':
|
|
s.next()
|
|
if s.peek() == '.' {
|
|
s.next()
|
|
if s.peek() == '.' {
|
|
tok = VARARG
|
|
} else {
|
|
err = fmt.Errorf(`syntax error "%s"`, "..")
|
|
return
|
|
}
|
|
} else {
|
|
s.back()
|
|
tok = int(ch)
|
|
lit = string(ch)
|
|
}
|
|
case '(', ')', ':', ';', '%', '?', '{', '}', ',', '[', ']', '^', '\n':
|
|
s.next()
|
|
if ch == '[' && s.peek() == ']' {
|
|
s.next()
|
|
if isLetter(s.peek()) {
|
|
s.back()
|
|
tok = ARRAYLIT
|
|
lit = "[]"
|
|
} else {
|
|
s.back()
|
|
s.back()
|
|
tok = int(ch)
|
|
lit = string(ch)
|
|
}
|
|
} else {
|
|
s.back()
|
|
tok = int(ch)
|
|
lit = string(ch)
|
|
}
|
|
default:
|
|
err = fmt.Errorf(`syntax error "%s"`, string(ch))
|
|
return
|
|
}
|
|
s.next()
|
|
}
|
|
return
|
|
}
|
|
|
|
// isLetter returns true if the rune is a letter for identity.
|
|
func isLetter(ch rune) bool {
|
|
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
|
|
}
|
|
|
|
// isDigit returns true if the rune is a number.
|
|
func isDigit(ch rune) bool {
|
|
return '0' <= ch && ch <= '9'
|
|
}
|
|
|
|
// isHex returns true if the rune is a hex digits.
|
|
func isHex(ch rune) bool {
|
|
return ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F')
|
|
}
|
|
|
|
// isEOL returns true if the rune is at end-of-line or end-of-file.
|
|
func isEOL(ch rune) bool {
|
|
return ch == '\n' || ch == -1
|
|
}
|
|
|
|
// isBlank returns true if the rune is empty character..
|
|
func isBlank(ch rune) bool {
|
|
return ch == ' ' || ch == '\t' || ch == '\r'
|
|
}
|
|
|
|
// peek returns current rune in the code.
|
|
func (s *Scanner) peek() rune {
|
|
if s.reachEOF() {
|
|
return EOF
|
|
}
|
|
return s.src[s.offset]
|
|
}
|
|
|
|
// next moves offset to next.
|
|
func (s *Scanner) next() {
|
|
if !s.reachEOF() {
|
|
if s.peek() == '\n' {
|
|
s.lineHead = s.offset + 1
|
|
s.line++
|
|
}
|
|
s.offset++
|
|
}
|
|
}
|
|
|
|
// current returns the current offset.
|
|
func (s *Scanner) current() int {
|
|
return s.offset
|
|
}
|
|
|
|
// offset sets the offset value.
|
|
func (s *Scanner) set(o int) {
|
|
s.offset = o
|
|
}
|
|
|
|
// back moves back offset once to top.
|
|
func (s *Scanner) back() {
|
|
s.offset--
|
|
}
|
|
|
|
// reachEOF returns true if offset is at end-of-file.
|
|
func (s *Scanner) reachEOF() bool {
|
|
return len(s.src) <= s.offset
|
|
}
|
|
|
|
// pos returns the position of current.
|
|
func (s *Scanner) pos() ast.Position {
|
|
return ast.Position{Line: s.line + 1, Column: s.offset - s.lineHead + 1}
|
|
}
|
|
|
|
// skipBlank moves position into non-black character.
|
|
func (s *Scanner) skipBlank() {
|
|
for isBlank(s.peek()) {
|
|
s.next()
|
|
}
|
|
}
|
|
|
|
// scanIdentifier returns identifier begining at current position.
|
|
func (s *Scanner) scanIdentifier() (string, error) {
|
|
var ret []rune
|
|
for {
|
|
if !isLetter(s.peek()) && !isDigit(s.peek()) {
|
|
break
|
|
}
|
|
ret = append(ret, s.peek())
|
|
s.next()
|
|
}
|
|
return string(ret), nil
|
|
}
|
|
|
|
// scanNumber returns number begining at current position.
|
|
func (s *Scanner) scanNumber() (string, error) {
|
|
var ret []rune
|
|
ch := s.peek()
|
|
ret = append(ret, ch)
|
|
s.next()
|
|
if ch == '0' && s.peek() == 'x' {
|
|
ret = append(ret, s.peek())
|
|
s.next()
|
|
for isHex(s.peek()) {
|
|
ret = append(ret, s.peek())
|
|
s.next()
|
|
}
|
|
} else {
|
|
for isDigit(s.peek()) || s.peek() == '.' {
|
|
ret = append(ret, s.peek())
|
|
s.next()
|
|
}
|
|
if s.peek() == 'e' {
|
|
ret = append(ret, s.peek())
|
|
s.next()
|
|
if isDigit(s.peek()) || s.peek() == '+' || s.peek() == '-' {
|
|
ret = append(ret, s.peek())
|
|
s.next()
|
|
for isDigit(s.peek()) || s.peek() == '.' {
|
|
ret = append(ret, s.peek())
|
|
s.next()
|
|
}
|
|
}
|
|
for isDigit(s.peek()) || s.peek() == '.' {
|
|
ret = append(ret, s.peek())
|
|
s.next()
|
|
}
|
|
}
|
|
if isLetter(s.peek()) {
|
|
return "", errors.New("identifier starts immediately after numeric literal")
|
|
}
|
|
}
|
|
return string(ret), nil
|
|
}
|
|
|
|
// scanRawString returns raw-string starting at current position.
|
|
func (s *Scanner) scanRawString() (string, error) {
|
|
var ret []rune
|
|
for {
|
|
s.next()
|
|
if s.peek() == EOF {
|
|
return "", errors.New("unexpected EOF")
|
|
break
|
|
}
|
|
if s.peek() == '`' {
|
|
s.next()
|
|
break
|
|
}
|
|
ret = append(ret, s.peek())
|
|
}
|
|
return string(ret), nil
|
|
}
|
|
|
|
// scanString returns string starting at current position.
|
|
// This handles backslash escaping.
|
|
func (s *Scanner) scanString(l rune) (string, error) {
|
|
var ret []rune
|
|
eos:
|
|
for {
|
|
s.next()
|
|
switch s.peek() {
|
|
case EOL:
|
|
return "", errors.New("unexpected EOL")
|
|
case EOF:
|
|
return "", errors.New("unexpected EOF")
|
|
case l:
|
|
s.next()
|
|
break eos
|
|
case '\\':
|
|
s.next()
|
|
switch s.peek() {
|
|
case 'b':
|
|
ret = append(ret, '\b')
|
|
continue
|
|
case 'f':
|
|
ret = append(ret, '\f')
|
|
continue
|
|
case 'r':
|
|
ret = append(ret, '\r')
|
|
continue
|
|
case 'n':
|
|
ret = append(ret, '\n')
|
|
continue
|
|
case 't':
|
|
ret = append(ret, '\t')
|
|
continue
|
|
}
|
|
ret = append(ret, s.peek())
|
|
continue
|
|
default:
|
|
ret = append(ret, s.peek())
|
|
}
|
|
}
|
|
return string(ret), nil
|
|
}
|
|
|
|
// Lexer provides inteface to parse codes.
|
|
type Lexer struct {
|
|
s *Scanner
|
|
lit string
|
|
pos ast.Position
|
|
e error
|
|
stmts []ast.Stmt
|
|
}
|
|
|
|
// Lex scans the token and literals.
|
|
func (l *Lexer) Lex(lval *yySymType) int {
|
|
tok, lit, pos, err := l.s.Scan()
|
|
if err != nil {
|
|
l.e = &Error{Message: fmt.Sprintf("%s", err.Error()), Pos: pos, Fatal: true}
|
|
}
|
|
lval.tok = ast.Token{Tok: tok, Lit: lit}
|
|
lval.tok.SetPosition(pos)
|
|
l.lit = lit
|
|
l.pos = pos
|
|
return tok
|
|
}
|
|
|
|
// Error sets parse error.
|
|
func (l *Lexer) Error(msg string) {
|
|
l.e = &Error{Message: msg, Pos: l.pos, Fatal: false}
|
|
}
|
|
|
|
// Parser provides way to parse the code using Scanner.
|
|
func Parse(s *Scanner) ([]ast.Stmt, error) {
|
|
l := Lexer{s: s}
|
|
if yyParse(&l) != 0 {
|
|
return nil, l.e
|
|
}
|
|
return l.stmts, l.e
|
|
}
|
|
|
|
// ParserSrc provides way to parse the code from source.
|
|
func ParseSrc(src string) ([]ast.Stmt, error) {
|
|
scanner := &Scanner{
|
|
src: []rune(src),
|
|
}
|
|
return Parse(scanner)
|
|
}
|