blob: 745185b3f3b8139b34673450711e0f572dbec7a1 [file] [log] [blame]
// Yacc grammar file for the vanadium VDL langage.
// http://goto/veyron:vdl
//
// Similar to Go, the formal grammar uses semicolons ';' as terminators, but
// idiomatic usage may omit most semicolons using the following rules:
// 1) During the tokenization phase, semicolons are always auto-inserted at
// the end of each line after certain tokens. This is implemented in
// the lexer via the autoSemi function.
// 2) Semicolons may be omitted before a closing ')' or '}'. This is
// implemented via the osemi rule below.
//
// To generate the grammar.go source file containing the parser, run
// grammar_gen.sh in this same directory, or run go generate on this package.
////////////////////////////////////////////////////////////////////////
// Declarations section.
%{
// This grammar.y.go file was auto-generated by yacc from grammar.y.
package parse
import (
"math/big"
"strings"
)
type intPos struct {
int *big.Int
pos Pos
}
type ratPos struct {
rat *big.Rat
pos Pos
}
type imagPos struct {
imag *BigImag
pos Pos
}
// typeListToStrList converts a slice of Type to a slice of StringPos. Each
// type must be a TypeNamed with an empty PackageName, otherwise errors are
// reported, and ok=false is returned.
func typeListToStrList(yylex yyLexer, typeList []Type) (strList []StringPos, ok bool) {
ok = true
for _, t := range typeList {
var tn *TypeNamed
if tn, ok = t.(*TypeNamed); !ok {
lexPosErrorf(yylex, t.Pos(), "%s invalid (expected one or more variable names)", t.String())
return
}
if strings.ContainsRune(tn.Name, '.') {
ok = false
lexPosErrorf(yylex, t.Pos(), "%s invalid (expected one or more variable names).", tn.Name)
return
}
strList = append(strList, StringPos{tn.Name, tn.P})
}
return
}
%}
// This union is turned into the struct type yySymType. Most symbols include
// positional information; this is necessary since Go yacc doesn't support
// passing positional information, so we need to track it ourselves.
%union {
pos Pos
strpos StringPos
intpos intPos
ratpos ratPos
imagpos imagPos
namepos NamePos
nameposes []NamePos
typeexpr Type
typeexprs []Type
fields []*Field
iface *Interface
constexpr ConstExpr
constexprs []ConstExpr
complit *ConstCompositeLit
kvlit KVLit
kvlits []KVLit
errordef ErrorDef
}
// Terminal tokens. We leave single-char tokens as-is using their ascii code as
// their id, to make the grammar more readable; multi-char tokens get their own
// id. The start* tokens are dummy tokens to kick off the parse.
%token startFileImports startFile startConfigImports startConfig
%token startExprs
%token <pos> ';' ':' ',' '.' '(' ')' '[' ']' '{' '}' '<' '>' '='
%token <pos> '!' '+' '-' '*' '/' '%' '|' '&' '^' '?'
%token <pos> tOROR tANDAND tLE tGE tNE tEQEQ tLSH tRSH
%token <pos> tCONST tENUM tERROR tIMPORT tINTERFACE tMAP tPACKAGE
%token <pos> tSET tSTREAM tSTRUCT tTYPE tTYPEOBJECT tUNION
%token <strpos> tIDENT tSTRLIT
%token <intpos> tINTLIT
%token <ratpos> tRATLIT
%token <imagpos> tIMAGLIT
// Labeled rules holding typed values.
%type <strpos> nameref dotnameref
%type <namepos> label_spec
%type <nameposes> label_spec_list
%type <typeexpr> type type_no_typeobject otype
%type <typeexprs> type_comma_list streamargs
%type <fields> field_spec_list field_spec named_arg_list inargs outargs
%type <iface> iface_item_list iface_item
%type <constexpr> expr unary_expr operand
%type <constexprs> tags expr_comma_list
%type <complit> comp_lit
%type <kvlit> kv_lit
%type <kvlits> kv_lit_list
%type <errordef> error_details error_detail_list error_detail
// There are 5 precedence levels for operators, all left-associative, just like
// Go. Lines are listed in order of increasing precedence.
%left tOROR
%left tANDAND
%left '<' '>' tLE tGE tNE tEQEQ
%left '+' '-' '|' '^'
%left '*' '/' '%' '&' tLSH tRSH
%left notPackage notConfig
%start start
%%
////////////////////////////////////////////////////////////////////////
// Rules section.
// Note that vdl files and config files use an identical grammar, other than the
// initial package or config clause respectively. Error checking for config
// files that include error, type or interface definitions occurs afterwards, to
// improve error reporting.
start:
startFileImports package imports gen_imports_eof
| startFile package imports defs
| startConfigImports config imports gen_imports_eof
| startConfig config imports defs
| startExprs expr_comma_list ';'
{ lexStoreExprs(yylex, $2) }
// Dummy rule to terminate the parse after the imports, regardless of whether
// there are any defs. Defs always start with either the tTYPE, tCONST or
// tERROR tokens, and the rule handles all cases - either there's no trailing
// text (the empty case, which would have resulted in EOF anyways), or there's
// one or more defs, where we need to force an EOF.
gen_imports_eof:
// Empty.
{ lexGenEOF(yylex) }
| tTYPE
{ lexGenEOF(yylex) }
| tCONST
{ lexGenEOF(yylex) }
| tERROR
{ lexGenEOF(yylex) }
// PACKAGE
package:
%prec notPackage
{ lexPosErrorf(yylex, Pos{}, "vdl file must start with package clause") }
| tPACKAGE tIDENT ';'
{ lexVDLFile(yylex).PackageDef = NamePos{Name:$2.String, Pos:$2.Pos} }
// CONFIG
config:
%prec notConfig
{ lexPosErrorf(yylex, Pos{}, "config file must start with config clause") }
| tIDENT '=' expr ';'
{
// We allow "config" as an identifier; it is not a keyword. So we check
// manually to make sure the syntax is correct.
if $1.String != "config" {
lexPosErrorf(yylex, $1.Pos, "config file must start with config clause")
return 1 // Any non-zero code indicates an error
}
file := lexVDLFile(yylex)
file.PackageDef = NamePos{Name:"config", Pos:$1.Pos}
file.ConstDefs = []*ConstDef{{Expr:$3}}
}
// IMPORTS
imports:
// Empty.
| imports import ';'
import:
tIMPORT '(' ')'
| tIMPORT '(' import_spec_list osemi ')'
| tIMPORT import_spec
import_spec_list:
import_spec
| import_spec_list ';' import_spec
import_spec:
tSTRLIT
{
imps := &lexVDLFile(yylex).Imports
*imps = append(*imps, &Import{Path:$1.String, NamePos:NamePos{Pos:$1.Pos}})
}
| tIDENT tSTRLIT
{
imps := &lexVDLFile(yylex).Imports
*imps = append(*imps, &Import{Path:$2.String, NamePos:NamePos{Name:$1.String, Pos:$1.Pos}})
}
// DEFINITIONS
defs:
// Empty.
| defs type_def ';'
| defs const_def ';'
| defs error_def ';'
type_def:
tTYPE '(' ')'
| tTYPE '(' type_spec_list osemi ')'
| tTYPE type_spec
| tTYPE interface_spec
const_def:
tCONST '(' ')'
| tCONST '(' const_spec_list osemi ')'
| tCONST const_spec
error_def:
tERROR '(' ')'
| tERROR '(' error_spec_list osemi ')'
| tERROR error_spec
// TYPE DEFINITIONS
type_spec_list:
type_spec
| type_spec_list ';' type_spec
type_spec:
tIDENT type
{
tds := &lexVDLFile(yylex).TypeDefs
*tds = append(*tds, &TypeDef{Type:$2, NamePos:NamePos{Name:$1.String, Pos:$1.Pos}})
}
// The type_no_typeobject rule is necessary to avoid a shift/reduce conflict
// between type conversions and typeobject const expressions. E.g.
// type(expr) // type conversion
// typeobject(type) // typeobject const expression
//
// We've chosen similar syntax to make it easier for the user to remember how to
// use the feature, but since "typeobject" is itself a type, there is a problem.
// We resolve the conflict by restricting the type conversion to the rule:
// type_no_typeobject '(' expr ')'
//
// Note that if we wanted to add general-purpose functions with the func(expr)
// syntax, we'll need to pull nameref out of type_no_typeobject, and parse both
// func(expr) and nameref(expr) into a generic structure. We can't use that
// same mechanism for typeobject, since the thing inside the parens is a value
// expression for type conversions, but a type expression for typeobject.
type_no_typeobject:
nameref
{ $$ = &TypeNamed{Name:$1.String, P:$1.Pos} }
| tERROR // Special-case to allow the "error" keyword as a named type.
{ $$ = &TypeNamed{Name:"error", P:$1} }
| '[' tINTLIT ']' type
{ $$ = &TypeArray{Len:int($2.int.Int64()), Elem:$4, P:$1} }
| '[' ']' type
{ $$ = &TypeList{Elem:$3, P:$1} }
| tENUM '{' label_spec_list osemi '}'
{ $$ = &TypeEnum{Labels:$3, P:$1} }
| tSET '[' type ']'
{ $$ = &TypeSet{Key:$3, P:$1} }
| tMAP '[' type ']' type
{ $$ = &TypeMap{Key:$3, Elem:$5, P:$1} }
| tSTRUCT '{' field_spec_list osemi '}'
{ $$ = &TypeStruct{Fields:$3, P:$1} }
| tSTRUCT '{' '}'
{ $$ = &TypeStruct{P:$1} }
| tUNION '{' field_spec_list osemi '}'
{ $$ = &TypeUnion{Fields:$3, P:$1} }
| tUNION '{' '}'
{ $$ = &TypeUnion{P:$1} }
| '?' type
{ $$ = &TypeOptional{Base:$2, P:$1} }
// The type rule expands to all the actual types, including typeobject.
type:
type_no_typeobject
{ $$ = $1}
| tTYPEOBJECT
{ $$ = &TypeNamed{Name:"typeobject", P:$1} }
label_spec_list:
label_spec
{ $$ = []NamePos{$1} }
| label_spec_list ';' label_spec
{ $$ = append($1, $3) }
label_spec:
tIDENT
{ $$ = NamePos{Name:$1.String, Pos:$1.Pos} }
field_spec_list:
field_spec
{ $$ = $1 }
| field_spec_list ';' field_spec
{ $$ = append($1, $3...) }
// The field_spec rule is intended to capture the following patterns:
// var type
// var0, var1, var2 type
// where var* refers to a variable name, and type refers to a type. Each var
// is expressed as an identifier. An oddity here is that we use a type_list to
// capture the list of variables rather than using a list of IDENTS. This means
// the grammar accepts invalid constructions, and we must validate afterwards.
//
// We do this to avoid a LALR reduce/reduce conflict with function arguments.
// The problem is exhibited by the in-args of these two functions, where func1
// has three args respectively named A, B, C all of type t1, and func2 has three
// args with name and type t2, t3 and t4 respectively. The func1 style is
// captured by field_spec in named_arg_list, while the func2 style is captured
// by type_list in args.
// func1(A, B, C t1)
// func2(t2, t3, t4)
//
// If we used an ident_list to capture "A, B, C" in func1, but used a type_list
// to capture "t2, t3, t4" in func2, we'd have a reduce/reduce conflict since
// yacc cannot determine whether to reduce as an ident_list or as a type_list;
// we don't know until we've reached token t1 in func1, or token ')' in func2.
//
// The fix can be considered both beautiful and a huge hack. To avoid the
// conflict we force both forms to use type_list to capture both "A, B, C" and
// "t2, t3, t4". This avoids the conflict since we're now always reducing via
// type_list, but allows invalid constructions like "[]int, []int []int". So we
// validate in the action and throw errors.
//
// An alternate fix would have been to remove the IDENT case from the type rule,
// use ident_list to capture both cases, and manually "expand" the grammar to
// distinguish the cases appropriately. That would ensure we don't allow
// constructions like "int, int int" in the grammar itself, but would lead to a
// much more complicated grammar. As a bonus, with the type_list solution we
// can give better error messages.
field_spec:
type_comma_list type
{
if names, ok := typeListToStrList(yylex, $1); ok {
for _, n := range names {
$$ = append($$, &Field{Type:$2, NamePos:NamePos{Name:n.String, Pos:n.Pos}})
}
} else {
lexPosErrorf(yylex, $2.Pos(), "perhaps you forgot a comma before %q?.", $2.String())
}
}
type_comma_list:
type
{ $$ = []Type{$1} }
| type_comma_list ',' type
{ $$ = append($1, $3) }
// INTERFACE DEFINITIONS
interface_spec:
tIDENT tINTERFACE '{' '}'
{
ifs := &lexVDLFile(yylex).Interfaces
*ifs = append(*ifs, &Interface{NamePos:NamePos{Name:$1.String, Pos:$1.Pos}})
}
| tIDENT tINTERFACE '{' iface_item_list osemi '}'
{
$4.Name, $4.Pos = $1.String, $1.Pos
ifs := &lexVDLFile(yylex).Interfaces
*ifs = append(*ifs, $4)
}
iface_item_list:
iface_item
{ $$ = $1 }
| iface_item_list ';' iface_item
{
$1.Embeds = append($1.Embeds, $3.Embeds...)
$1.Methods = append($1.Methods, $3.Methods...)
$$ = $1
}
iface_item:
tIDENT inargs streamargs outargs tags
{ $$ = &Interface{Methods: []*Method{{InArgs:$2, InStream:$3[0], OutStream:$3[1], OutArgs:$4, Tags:$5, NamePos:NamePos{Name:$1.String, Pos:$1.Pos}}}} }
| nameref
{ $$ = &Interface{Embeds: []*NamePos{{Name:$1.String, Pos:$1.Pos}}} }
inargs:
'(' ')'
{ $$ = nil }
| '(' named_arg_list ocomma ')'
{ $$ = $2 }
| '(' type_comma_list ocomma ')'
// Just like Go, we allow a list of types without variable names. See the
// field_spec rule for a workaround to avoid a reduce/reduce conflict.
{
for _, t := range $2 {
$$ = append($$, &Field{Type:t, NamePos:NamePos{Pos:t.Pos()}})
}
}
// The named_arg_list rule is just like the field_spec_list, but uses comma ','
// as a delimiter rather than semicolon ';'.
named_arg_list:
field_spec
{ $$ = $1 }
| named_arg_list ',' field_spec
{ $$ = append($1, $3...) }
// The outargs use special syntax to denote the error associated with each
// method. For parsing we accept these forms:
// error
// (string | error)
// (a, b string, c bool | error)
//
// TODO(toddw): Improve parser syntax errors.
outargs:
tERROR
{ $$ = nil }
| '(' named_arg_list ocomma '|' tERROR ')'
{ $$ = $2 }
| '(' type_comma_list ocomma '|' tERROR ')'
// Just like Go, we allow a list of types without variable names. See the
// field_spec rule for a workaround to avoid a reduce/reduce conflict.
{
for _, t := range $2 {
$$ = append($$, &Field{Type:t, NamePos:NamePos{Pos:t.Pos()}})
}
}
streamargs:
// Empty.
{ $$ = []Type{nil, nil} }
| tSTREAM '<' '>'
{ $$ = []Type{nil, nil} }
| tSTREAM '<' type '>'
{ $$ = []Type{$3, nil} }
| tSTREAM '<' type ',' type '>'
{ $$ = []Type{$3, $5} }
tags:
// Empty.
{ $$ = nil }
| '{' '}'
{ $$ = nil }
| '{' expr_comma_list ocomma '}'
{ $$ = $2 }
expr_comma_list:
expr
{ $$ = []ConstExpr{$1} }
| expr_comma_list ',' expr
{ $$ = append($1, $3) }
// CONST DEFINITIONS
const_spec_list:
const_spec
| const_spec_list ';' const_spec
const_spec:
tIDENT '=' expr
{
cds := &lexVDLFile(yylex).ConstDefs
*cds = append(*cds, &ConstDef{Expr:$3, NamePos:NamePos{Name:$1.String, Pos:$1.Pos}})
}
expr:
unary_expr
{ $$ = $1 }
| expr tOROR expr
{ $$ = &ConstBinaryOp{"||", $1, $3, $2} }
| expr tANDAND expr
{ $$ = &ConstBinaryOp{"&&", $1, $3, $2} }
| expr '<' expr
{ $$ = &ConstBinaryOp{"<", $1, $3, $2} }
| expr '>' expr
{ $$ = &ConstBinaryOp{">", $1, $3, $2} }
| expr tLE expr
{ $$ = &ConstBinaryOp{"<=", $1, $3, $2} }
| expr tGE expr
{ $$ = &ConstBinaryOp{">=", $1, $3, $2} }
| expr tNE expr
{ $$ = &ConstBinaryOp{"!=", $1, $3, $2} }
| expr tEQEQ expr
{ $$ = &ConstBinaryOp{"==", $1, $3, $2} }
| expr '+' expr
{ $$ = &ConstBinaryOp{"+", $1, $3, $2} }
| expr '-' expr
{ $$ = &ConstBinaryOp{"-", $1, $3, $2} }
| expr '*' expr
{ $$ = &ConstBinaryOp{"*", $1, $3, $2} }
| expr '/' expr
{ $$ = &ConstBinaryOp{"/", $1, $3, $2} }
| expr '%' expr
{ $$ = &ConstBinaryOp{"%", $1, $3, $2} }
| expr '|' expr
{ $$ = &ConstBinaryOp{"|", $1, $3, $2} }
| expr '&' expr
{ $$ = &ConstBinaryOp{"&", $1, $3, $2} }
| expr '^' expr
{ $$ = &ConstBinaryOp{"^", $1, $3, $2} }
| expr tLSH expr
{ $$ = &ConstBinaryOp{"<<", $1, $3, $2} }
| expr tRSH expr
{ $$ = &ConstBinaryOp{">>", $1, $3, $2} }
unary_expr:
operand
{ $$ = $1 }
| '!' unary_expr
{ $$ = &ConstUnaryOp{"!", $2, $1} }
| '+' unary_expr
{ $$ = &ConstUnaryOp{"+", $2, $1} }
| '-' unary_expr
{ $$ = &ConstUnaryOp{"-", $2, $1} }
| '^' unary_expr
{ $$ = &ConstUnaryOp{"^", $2, $1} }
| type_no_typeobject '(' expr ')'
{ $$ = &ConstTypeConv{$1, $3, $1.Pos()} }
| tTYPEOBJECT '(' type ')'
{ $$ = &ConstTypeObject{$3, $1} }
// TODO(bprosnitz) Add .real() and .imag() for complex.
operand:
tSTRLIT
{ $$ = &ConstLit{$1.String, $1.Pos} }
| tINTLIT
{ $$ = &ConstLit{$1.int, $1.pos} }
| tRATLIT
{ $$ = &ConstLit{$1.rat, $1.pos} }
| tIMAGLIT
{ $$ = &ConstLit{$1.imag, $1.pos} }
| nameref
{ $$ = &ConstNamed{$1.String, $1.Pos} }
| comp_lit
{ $$ = $1 }
| comp_lit '.' tIDENT
{ lexPosErrorf(yylex, $2, "cannot apply selector operator to unnamed constant")}
| comp_lit '[' expr ']'
{ lexPosErrorf(yylex, $2, "cannot apply index operator to unnamed constant")}
| nameref '[' expr ']'
{ $$ = &ConstIndexed{&ConstNamed{$1.String, $1.Pos}, $3, $1.Pos} }
| '(' expr ')'
{ $$ = $2 }
comp_lit:
otype '{' '}'
{ $$ = &ConstCompositeLit{$1, nil, $2} }
| otype '{' kv_lit_list ocomma '}'
{ $$ = &ConstCompositeLit{$1, $3, $2} }
kv_lit_list:
kv_lit
{ $$ = []KVLit{$1} }
| kv_lit_list ',' kv_lit
{ $$ = append($1, $3) }
kv_lit:
expr
{ $$ = KVLit{Value:$1} }
| expr ':' expr
{ $$ = KVLit{Key:$1, Value:$3} }
// ERROR DEFINITIONS
error_spec_list:
error_spec
| error_spec_list ';' error_spec
error_spec:
tIDENT inargs error_details
{
// Create *ErrorDef starting with a copy of error_details, filling in the
// name and params
ed := $3
ed.NamePos = NamePos{Name:$1.String, Pos:$1.Pos}
ed.Params = $2
eds := &lexVDLFile(yylex).ErrorDefs
*eds = append(*eds, &ed)
}
error_details:
// Empty.
{ $$ = ErrorDef{} }
| '{' '}'
{ $$ = ErrorDef{} }
| '{' error_detail_list ocomma '}'
{ $$ = $2 }
error_detail_list:
error_detail
{ $$ = $1 }
| error_detail_list ',' error_detail
{
// Merge each ErrorDef in-order to build the final ErrorDef.
$$ = $1
switch {
case len($3.Actions) > 0:
$$.Actions = append($$.Actions, $3.Actions...)
case len($3.Formats) > 0:
$$.Formats = append($$.Formats, $3.Formats...)
}
}
error_detail:
tIDENT
{ $$ = ErrorDef{Actions: []StringPos{$1}} }
| tSTRLIT ':' tSTRLIT
{ $$ = ErrorDef{Formats: []LangFmt{{Lang: $1, Fmt: $3}}} }
// MISC TOKENS
// nameref describes a named reference to another type, interface or const. We
// allow the following forms:
// foo
// foo.bar (and multi-dot variants)
// "pkg/path".foo
// "pkg/path".foo.bar (and multi-dot variants)
nameref:
dotnameref
{ $$ = $1 }
| tSTRLIT '.' dotnameref
{ $$ = StringPos{"\""+$1.String+"\"."+$3.String, $1.Pos} }
// dotnameref describes just the dotted portion of nameref.
dotnameref:
tIDENT
{ $$ = $1 }
| dotnameref '.' tIDENT
{ $$ = StringPos{$1.String+"."+$3.String, $1.Pos} }
otype:
// Empty.
{ $$ = nil }
| type
{ $$ = $1 }
osemi:
// Empty.
| ';'
ocomma:
// Empty.
| ','