package parser import ( "errors" "fmt" "unicode" "github.com/mattn/kinako/ast" ) const ( EOF = -1 // End of file. EOL = '\n' // End of line. ) // Error provides a convenient interface for handling runtime error. // It can be Error inteface with type cast which can call Pos(). type Error struct { Message string Filename string Fatal bool } // Error returns the error message. func (e *Error) Error() string { return e.Message } // Scanner stores informations for lexer. type Scanner struct { src []rune offset int lineHead int line int } // Init resets code to scan. func (s *Scanner) Init(src string) { s.src = []rune(src) } // Scan analyses token, and decide identify or literals. func (s *Scanner) Scan() (tok int, lit string, pos ast.Position, err error) { retry: s.skipBlank() pos = s.pos() switch ch := s.peek(); { case isLetter(ch): tok = IDENT lit, err = s.scanIdentifier() if err != nil { return } case isDigit(ch): tok = NUMBER lit, err = s.scanNumber() if err != nil { return } case ch == '"': tok = STRING lit, err = s.scanString('"') if err != nil { return } case ch == '\'': tok = STRING lit, err = s.scanString('\'') if err != nil { return } case ch == '`': tok = STRING lit, err = s.scanRawString() if err != nil { return } default: switch ch { case EOF: tok = EOF case '#': for !isEOL(s.peek()) { s.next() } goto retry case '!': s.next() switch s.peek() { case '=': tok = NEQ lit = "!=" default: s.back() tok = int(ch) lit = string(ch) } case '=': s.next() switch s.peek() { case '=': tok = EQEQ lit = "==" default: s.back() tok = int(ch) lit = string(ch) } case '+': tok = int(ch) lit = string(ch) case '-': tok = int(ch) lit = string(ch) case '*': tok = int(ch) lit = string(ch) case '/': tok = int(ch) lit = string(ch) case '>': s.next() switch s.peek() { case '=': tok = GE lit = ">=" case '>': tok = SHIFTRIGHT lit = ">>" default: s.back() tok = int(ch) lit = string(ch) } case '<': s.next() switch s.peek() { case '=': tok = LE lit = "<=" case '<': tok = SHIFTLEFT lit = "<<" default: s.back() tok = int(ch) lit = string(ch) } case '|': s.next() switch s.peek() { case '|': tok = OROR lit = "||" default: s.back() tok = int(ch) lit = string(ch) } case '&': s.next() switch s.peek() { case '&': tok = ANDAND lit = "&&" default: s.back() tok = int(ch) lit = string(ch) } case '.': tok = int(ch) lit = string(ch) case '\n': tok = int(ch) lit = string(ch) case '(', ')', ':', ';', '%', '?', '{', '}', ',', '[', ']', '^': tok = int(ch) lit = string(ch) default: err = fmt.Errorf(`syntax error "%s"`, string(ch)) tok = int(ch) lit = string(ch) return } s.next() } return } // isLetter returns true if the rune is a letter for identity. func isLetter(ch rune) bool { return unicode.IsLetter(ch) || ch == '_' } // isDigit returns true if the rune is a number. func isDigit(ch rune) bool { return '0' <= ch && ch <= '9' } // isHex returns true if the rune is a hex digits. func isHex(ch rune) bool { return ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F') } // isEOL returns true if the rune is at end-of-line or end-of-file. func isEOL(ch rune) bool { return ch == '\n' || ch == -1 } // isBlank returns true if the rune is empty character.. func isBlank(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\r' } // peek returns current rune in the code. func (s *Scanner) peek() rune { if s.reachEOF() { return EOF } return s.src[s.offset] } // next moves offset to next. func (s *Scanner) next() { if !s.reachEOF() { if s.peek() == '\n' { s.lineHead = s.offset + 1 s.line++ } s.offset++ } } // current returns the current offset. func (s *Scanner) current() int { return s.offset } // offset sets the offset value. func (s *Scanner) set(o int) { s.offset = o } // back moves back offset once to top. func (s *Scanner) back() { s.offset-- } // reachEOF returns true if offset is at end-of-file. func (s *Scanner) reachEOF() bool { return len(s.src) <= s.offset } // pos returns the position of current. func (s *Scanner) pos() ast.Position { return ast.Position{Line: s.line + 1, Column: s.offset - s.lineHead + 1} } // skipBlank moves position into non-black character. func (s *Scanner) skipBlank() { for isBlank(s.peek()) { s.next() } } // scanIdentifier returns identifier begining at current position. func (s *Scanner) scanIdentifier() (string, error) { var ret []rune for { if !isLetter(s.peek()) && !isDigit(s.peek()) { break } ret = append(ret, s.peek()) s.next() } return string(ret), nil } // scanNumber returns number begining at current position. func (s *Scanner) scanNumber() (string, error) { var ret []rune ch := s.peek() ret = append(ret, ch) s.next() if ch == '0' && s.peek() == 'x' { ret = append(ret, s.peek()) s.next() for isHex(s.peek()) { ret = append(ret, s.peek()) s.next() } } else { for isDigit(s.peek()) || s.peek() == '.' { ret = append(ret, s.peek()) s.next() } if s.peek() == 'e' { ret = append(ret, s.peek()) s.next() if isDigit(s.peek()) || s.peek() == '+' || s.peek() == '-' { ret = append(ret, s.peek()) s.next() for isDigit(s.peek()) || s.peek() == '.' { ret = append(ret, s.peek()) s.next() } } for isDigit(s.peek()) || s.peek() == '.' { ret = append(ret, s.peek()) s.next() } } if isLetter(s.peek()) { return "", errors.New("identifier starts immediately after numeric literal") } } return string(ret), nil } // scanRawString returns raw-string starting at current position. func (s *Scanner) scanRawString() (string, error) { var ret []rune for { s.next() if s.peek() == EOF { return "", errors.New("unexpected EOF") break } if s.peek() == '`' { s.next() break } ret = append(ret, s.peek()) } return string(ret), nil } // scanString returns string starting at current position. // This handles backslash escaping. func (s *Scanner) scanString(l rune) (string, error) { var ret []rune eos: for { s.next() switch s.peek() { case EOL: return "", errors.New("unexpected EOL") case EOF: return "", errors.New("unexpected EOF") case l: s.next() break eos case '\\': s.next() switch s.peek() { case 'b': ret = append(ret, '\b') continue case 'f': ret = append(ret, '\f') continue case 'r': ret = append(ret, '\r') continue case 'n': ret = append(ret, '\n') continue case 't': ret = append(ret, '\t') continue } ret = append(ret, s.peek()) continue default: ret = append(ret, s.peek()) } } return string(ret), nil } // Lexer provides inteface to parse codes. type Lexer struct { s *Scanner lit string pos ast.Position e error stmts []ast.Stmt } // Lex scans the token and literals. func (l *Lexer) Lex(lval *yySymType) int { tok, lit, pos, err := l.s.Scan() if err != nil { l.e = &Error{Message: fmt.Sprintf("%s", err.Error()), Fatal: true} } lval.tok = ast.Token{Tok: tok, Lit: lit} l.lit = lit l.pos = pos return tok } // Error sets parse error. func (l *Lexer) Error(msg string) { l.e = &Error{Message: msg, Fatal: false} } // Parser provides way to parse the code using Scanner. func Parse(s *Scanner) ([]ast.Stmt, error) { l := Lexer{s: s} if yyParse(&l) != 0 { return nil, l.e } return l.stmts, l.e } func EnableErrorVerbose() { yyErrorVerbose = true } // ParserSrc provides way to parse the code from source. func ParseSrc(src string) ([]ast.Stmt, error) { scanner := &Scanner{ src: []rune(src), } return Parse(scanner) }