| // Yacc grammar file for the vanadium VDL langage. |
| // http://goto/veyron:vdl |
| // |
| // Similar to Go, the formal grammar uses semicolons ';' as terminators, but |
| // idiomatic usage may omit most semicolons using the following rules: |
| // 1) During the tokenization phase, semicolons are always auto-inserted at |
| // the end of each line after certain tokens. This is implemented in |
| // the lexer via the autoSemi function. |
| // 2) Semicolons may be omitted before a closing ')' or '}'. This is |
| // implemented via the osemi rule below. |
| // |
| // To generate the grammar.go source file containing the parser, run |
| // grammar_gen.sh in this same directory, or run go generate on this package. |
| |
| //////////////////////////////////////////////////////////////////////// |
| // Declarations section. |
| %{ |
| // This grammar.y.go file was auto-generated by yacc from grammar.y. |
| |
| package parse |
| |
| import ( |
| "math/big" |
| "strings" |
| ) |
| |
| type intPos struct { |
| int *big.Int |
| pos Pos |
| } |
| |
| type ratPos struct { |
| rat *big.Rat |
| pos Pos |
| } |
| |
| // typeListToStrList converts a slice of Type to a slice of StringPos. Each |
| // type must be a TypeNamed with an empty PackageName, otherwise errors are |
| // reported, and ok=false is returned. |
| func typeListToStrList(yylex yyLexer, typeList []Type) (strList []StringPos, ok bool) { |
| ok = true |
| for _, t := range typeList { |
| var tn *TypeNamed |
| if tn, ok = t.(*TypeNamed); !ok { |
| lexPosErrorf(yylex, t.Pos(), "%s invalid (expected one or more variable names)", t.String()) |
| return |
| } |
| if strings.ContainsRune(tn.Name, '.') { |
| ok = false |
| lexPosErrorf(yylex, t.Pos(), "%s invalid (expected one or more variable names).", tn.Name) |
| return |
| } |
| strList = append(strList, StringPos{tn.Name, tn.P}) |
| } |
| return |
| } |
| %} |
| |
| // This union is turned into the struct type yySymType. Most symbols include |
| // positional information; this is necessary since Go yacc doesn't support |
| // passing positional information, so we need to track it ourselves. |
| %union { |
| pos Pos |
| strpos StringPos |
| intpos intPos |
| ratpos ratPos |
| namepos NamePos |
| nameposes []NamePos |
| typeexpr Type |
| typeexprs []Type |
| fields []*Field |
| iface *Interface |
| constexpr ConstExpr |
| constexprs []ConstExpr |
| complit *ConstCompositeLit |
| kvlit KVLit |
| kvlits []KVLit |
| errordef ErrorDef |
| } |
| |
| // Terminal tokens. We leave single-char tokens as-is using their ascii code as |
| // their id, to make the grammar more readable; multi-char tokens get their own |
| // id. The start* tokens are dummy tokens to kick off the parse. |
| %token startFileImports startFile startConfigImports startConfig |
| %token startExprs |
| %token <pos> ';' ':' ',' '.' '(' ')' '[' ']' '{' '}' '<' '>' '=' |
| %token <pos> '!' '+' '-' '*' '/' '%' '|' '&' '^' '?' |
| %token <pos> tOROR tANDAND tLE tGE tNE tEQEQ tLSH tRSH |
| %token <pos> tCONST tENUM tERROR tIMPORT tINTERFACE tMAP tPACKAGE |
| %token <pos> tSET tSTREAM tSTRUCT tTYPE tTYPEOBJECT tUNION |
| %token <strpos> tIDENT tSTRLIT |
| %token <intpos> tINTLIT |
| %token <ratpos> tRATLIT |
| |
| // Labeled rules holding typed values. |
| %type <strpos> nameref dotnameref |
| %type <namepos> label_spec |
| %type <nameposes> label_spec_list |
| %type <typeexpr> type type_no_typeobject otype |
| %type <typeexprs> type_comma_list streamargs |
| %type <fields> field_spec_list field_spec named_arg_list inargs outargs |
| %type <iface> iface_item_list iface_item |
| %type <constexpr> expr unary_expr operand |
| %type <constexprs> tags expr_comma_list |
| %type <complit> comp_lit |
| %type <kvlit> kv_lit |
| %type <kvlits> kv_lit_list |
| %type <errordef> error_details error_detail_list error_detail |
| |
| // There are 5 precedence levels for operators, all left-associative, just like |
| // Go. Lines are listed in order of increasing precedence. |
| %left tOROR |
| %left tANDAND |
| %left '<' '>' tLE tGE tNE tEQEQ |
| %left '+' '-' '|' '^' |
| %left '*' '/' '%' '&' tLSH tRSH |
| |
| %left notPackage notConfig |
| |
| %start start |
| |
| %% |
| //////////////////////////////////////////////////////////////////////// |
| // Rules section. |
| |
| // Note that vdl files and config files use an identical grammar, other than the |
| // initial package or config clause respectively. Error checking for config |
| // files that include error, type or interface definitions occurs afterwards, to |
| // improve error reporting. |
| start: |
| startFileImports package imports gen_imports_eof |
| | startFile package imports defs |
| | startConfigImports config imports gen_imports_eof |
| | startConfig config imports defs |
| | startExprs expr_comma_list ';' |
| { lexStoreExprs(yylex, $2) } |
| |
| // Dummy rule to terminate the parse after the imports, regardless of whether |
| // there are any defs. Defs always start with either the tTYPE, tCONST or |
| // tERROR tokens, and the rule handles all cases - either there's no trailing |
| // text (the empty case, which would have resulted in EOF anyways), or there's |
| // one or more defs, where we need to force an EOF. |
| gen_imports_eof: |
| // Empty. |
| { lexGenEOF(yylex) } |
| | tTYPE |
| { lexGenEOF(yylex) } |
| | tCONST |
| { lexGenEOF(yylex) } |
| | tERROR |
| { lexGenEOF(yylex) } |
| |
| // PACKAGE |
| package: |
| %prec notPackage |
| { lexPosErrorf(yylex, Pos{}, "vdl file must start with package clause") } |
| | tPACKAGE tIDENT ';' |
| { lexVDLFile(yylex).PackageDef = NamePos{Name:$2.String, Pos:$2.Pos} } |
| |
| // CONFIG |
| config: |
| %prec notConfig |
| { lexPosErrorf(yylex, Pos{}, "config file must start with config clause") } |
| | tIDENT '=' expr ';' |
| { |
| // We allow "config" as an identifier; it is not a keyword. So we check |
| // manually to make sure the syntax is correct. |
| if $1.String != "config" { |
| lexPosErrorf(yylex, $1.Pos, "config file must start with config clause") |
| return 1 // Any non-zero code indicates an error |
| } |
| file := lexVDLFile(yylex) |
| file.PackageDef = NamePos{Name:"config", Pos:$1.Pos} |
| file.ConstDefs = []*ConstDef{{Expr:$3}} |
| } |
| |
| // IMPORTS |
| imports: |
| // Empty. |
| | imports import ';' |
| |
| import: |
| tIMPORT '(' ')' |
| | tIMPORT '(' import_spec_list osemi ')' |
| | tIMPORT import_spec |
| |
| import_spec_list: |
| import_spec |
| | import_spec_list ';' import_spec |
| |
| import_spec: |
| tSTRLIT |
| { |
| imps := &lexVDLFile(yylex).Imports |
| *imps = append(*imps, &Import{Path:$1.String, NamePos:NamePos{Pos:$1.Pos}}) |
| } |
| | tIDENT tSTRLIT |
| { |
| imps := &lexVDLFile(yylex).Imports |
| *imps = append(*imps, &Import{Path:$2.String, NamePos:NamePos{Name:$1.String, Pos:$1.Pos}}) |
| } |
| |
| // DEFINITIONS |
| defs: |
| // Empty. |
| | defs type_def ';' |
| | defs const_def ';' |
| | defs error_def ';' |
| |
| type_def: |
| tTYPE '(' ')' |
| | tTYPE '(' type_spec_list osemi ')' |
| | tTYPE type_spec |
| | tTYPE interface_spec |
| |
| const_def: |
| tCONST '(' ')' |
| | tCONST '(' const_spec_list osemi ')' |
| | tCONST const_spec |
| |
| error_def: |
| tERROR '(' ')' |
| | tERROR '(' error_spec_list osemi ')' |
| | tERROR error_spec |
| |
| // TYPE DEFINITIONS |
| type_spec_list: |
| type_spec |
| | type_spec_list ';' type_spec |
| |
| type_spec: |
| tIDENT type |
| { |
| tds := &lexVDLFile(yylex).TypeDefs |
| *tds = append(*tds, &TypeDef{Type:$2, NamePos:NamePos{Name:$1.String, Pos:$1.Pos}}) |
| } |
| |
| // The type_no_typeobject rule is necessary to avoid a shift/reduce conflict |
| // between type conversions and typeobject const expressions. E.g. |
| // type(expr) // type conversion |
| // typeobject(type) // typeobject const expression |
| // |
| // We've chosen similar syntax to make it easier for the user to remember how to |
| // use the feature, but since "typeobject" is itself a type, there is a problem. |
| // We resolve the conflict by restricting the type conversion to the rule: |
| // type_no_typeobject '(' expr ')' |
| // |
| // Note that if we wanted to add general-purpose functions with the func(expr) |
| // syntax, we'll need to pull nameref out of type_no_typeobject, and parse both |
| // func(expr) and nameref(expr) into a generic structure. We can't use that |
| // same mechanism for typeobject, since the thing inside the parens is a value |
| // expression for type conversions, but a type expression for typeobject. |
| type_no_typeobject: |
| nameref |
| { $$ = &TypeNamed{Name:$1.String, P:$1.Pos} } |
| | tERROR // Special-case to allow the "error" keyword as a named type. |
| { $$ = &TypeNamed{Name:"error", P:$1} } |
| | '[' tINTLIT ']' type |
| { $$ = &TypeArray{Len:int($2.int.Int64()), Elem:$4, P:$1} } |
| | '[' ']' type |
| { $$ = &TypeList{Elem:$3, P:$1} } |
| | tENUM '{' label_spec_list osemi '}' |
| { $$ = &TypeEnum{Labels:$3, P:$1} } |
| | tSET '[' type ']' |
| { $$ = &TypeSet{Key:$3, P:$1} } |
| | tMAP '[' type ']' type |
| { $$ = &TypeMap{Key:$3, Elem:$5, P:$1} } |
| | tSTRUCT '{' field_spec_list osemi '}' |
| { $$ = &TypeStruct{Fields:$3, P:$1} } |
| | tSTRUCT '{' '}' |
| { $$ = &TypeStruct{P:$1} } |
| | tUNION '{' field_spec_list osemi '}' |
| { $$ = &TypeUnion{Fields:$3, P:$1} } |
| | tUNION '{' '}' |
| { $$ = &TypeUnion{P:$1} } |
| | '?' type |
| { $$ = &TypeOptional{Base:$2, P:$1} } |
| |
| // The type rule expands to all the actual types, including typeobject. |
| type: |
| type_no_typeobject |
| { $$ = $1} |
| | tTYPEOBJECT |
| { $$ = &TypeNamed{Name:"typeobject", P:$1} } |
| |
| label_spec_list: |
| label_spec |
| { $$ = []NamePos{$1} } |
| | label_spec_list ';' label_spec |
| { $$ = append($1, $3) } |
| |
| label_spec: |
| tIDENT |
| { $$ = NamePos{Name:$1.String, Pos:$1.Pos} } |
| |
| field_spec_list: |
| field_spec |
| { $$ = $1 } |
| | field_spec_list ';' field_spec |
| { $$ = append($1, $3...) } |
| |
| // The field_spec rule is intended to capture the following patterns: |
| // var type |
| // var0, var1, var2 type |
| // where var* refers to a variable name, and type refers to a type. Each var |
| // is expressed as an identifier. An oddity here is that we use a type_list to |
| // capture the list of variables rather than using a list of IDENTS. This means |
| // the grammar accepts invalid constructions, and we must validate afterwards. |
| // |
| // We do this to avoid a LALR reduce/reduce conflict with function arguments. |
| // The problem is exhibited by the in-args of these two functions, where func1 |
| // has three args respectively named A, B, C all of type t1, and func2 has three |
| // args with name and type t2, t3 and t4 respectively. The func1 style is |
| // captured by field_spec in named_arg_list, while the func2 style is captured |
| // by type_list in args. |
| // func1(A, B, C t1) |
| // func2(t2, t3, t4) |
| // |
| // If we used an ident_list to capture "A, B, C" in func1, but used a type_list |
| // to capture "t2, t3, t4" in func2, we'd have a reduce/reduce conflict since |
| // yacc cannot determine whether to reduce as an ident_list or as a type_list; |
| // we don't know until we've reached token t1 in func1, or token ')' in func2. |
| // |
| // The fix can be considered both beautiful and a huge hack. To avoid the |
| // conflict we force both forms to use type_list to capture both "A, B, C" and |
| // "t2, t3, t4". This avoids the conflict since we're now always reducing via |
| // type_list, but allows invalid constructions like "[]int, []int []int". So we |
| // validate in the action and throw errors. |
| // |
| // An alternate fix would have been to remove the IDENT case from the type rule, |
| // use ident_list to capture both cases, and manually "expand" the grammar to |
| // distinguish the cases appropriately. That would ensure we don't allow |
| // constructions like "int, int int" in the grammar itself, but would lead to a |
| // much more complicated grammar. As a bonus, with the type_list solution we |
| // can give better error messages. |
| field_spec: |
| type_comma_list type |
| { |
| if names, ok := typeListToStrList(yylex, $1); ok { |
| for _, n := range names { |
| $$ = append($$, &Field{Type:$2, NamePos:NamePos{Name:n.String, Pos:n.Pos}}) |
| } |
| } else { |
| lexPosErrorf(yylex, $2.Pos(), "perhaps you forgot a comma before %q?.", $2.String()) |
| } |
| } |
| |
| type_comma_list: |
| type |
| { $$ = []Type{$1} } |
| | type_comma_list ',' type |
| { $$ = append($1, $3) } |
| |
| // INTERFACE DEFINITIONS |
| interface_spec: |
| tIDENT tINTERFACE '{' '}' |
| { |
| ifs := &lexVDLFile(yylex).Interfaces |
| *ifs = append(*ifs, &Interface{NamePos:NamePos{Name:$1.String, Pos:$1.Pos}}) |
| } |
| | tIDENT tINTERFACE '{' iface_item_list osemi '}' |
| { |
| $4.Name, $4.Pos = $1.String, $1.Pos |
| ifs := &lexVDLFile(yylex).Interfaces |
| *ifs = append(*ifs, $4) |
| } |
| |
| iface_item_list: |
| iface_item |
| { $$ = $1 } |
| | iface_item_list ';' iface_item |
| { |
| $1.Embeds = append($1.Embeds, $3.Embeds...) |
| $1.Methods = append($1.Methods, $3.Methods...) |
| $$ = $1 |
| } |
| |
| iface_item: |
| tIDENT inargs streamargs outargs tags |
| { $$ = &Interface{Methods: []*Method{{InArgs:$2, InStream:$3[0], OutStream:$3[1], OutArgs:$4, Tags:$5, NamePos:NamePos{Name:$1.String, Pos:$1.Pos}}}} } |
| | nameref |
| { $$ = &Interface{Embeds: []*NamePos{{Name:$1.String, Pos:$1.Pos}}} } |
| |
| inargs: |
| '(' ')' |
| { $$ = nil } |
| | '(' named_arg_list ocomma ')' |
| { $$ = $2 } |
| | '(' type_comma_list ocomma ')' |
| // Just like Go, we allow a list of types without variable names. See the |
| // field_spec rule for a workaround to avoid a reduce/reduce conflict. |
| { |
| for _, t := range $2 { |
| $$ = append($$, &Field{Type:t, NamePos:NamePos{Pos:t.Pos()}}) |
| } |
| } |
| |
| // The named_arg_list rule is just like the field_spec_list, but uses comma ',' |
| // as a delimiter rather than semicolon ';'. |
| named_arg_list: |
| field_spec |
| { $$ = $1 } |
| | named_arg_list ',' field_spec |
| { $$ = append($1, $3...) } |
| |
| // The outargs use special syntax to denote the error associated with each |
| // method. For parsing we accept these forms: |
| // error |
| // (string | error) |
| // (a, b string, c bool | error) |
| // |
| // TODO(toddw): Improve parser syntax errors. |
| outargs: |
| tERROR |
| { $$ = nil } |
| | '(' named_arg_list ocomma '|' tERROR ')' |
| { $$ = $2 } |
| | '(' type_comma_list ocomma '|' tERROR ')' |
| // Just like Go, we allow a list of types without variable names. See the |
| // field_spec rule for a workaround to avoid a reduce/reduce conflict. |
| { |
| for _, t := range $2 { |
| $$ = append($$, &Field{Type:t, NamePos:NamePos{Pos:t.Pos()}}) |
| } |
| } |
| |
| streamargs: |
| // Empty. |
| { $$ = []Type{nil, nil} } |
| | tSTREAM '<' '>' |
| { $$ = []Type{nil, nil} } |
| | tSTREAM '<' type '>' |
| { $$ = []Type{$3, nil} } |
| | tSTREAM '<' type ',' type '>' |
| { $$ = []Type{$3, $5} } |
| |
| tags: |
| // Empty. |
| { $$ = nil } |
| | '{' '}' |
| { $$ = nil } |
| | '{' expr_comma_list ocomma '}' |
| { $$ = $2 } |
| |
| expr_comma_list: |
| expr |
| { $$ = []ConstExpr{$1} } |
| | expr_comma_list ',' expr |
| { $$ = append($1, $3) } |
| |
| // CONST DEFINITIONS |
| const_spec_list: |
| const_spec |
| | const_spec_list ';' const_spec |
| |
| const_spec: |
| tIDENT '=' expr |
| { |
| cds := &lexVDLFile(yylex).ConstDefs |
| *cds = append(*cds, &ConstDef{Expr:$3, NamePos:NamePos{Name:$1.String, Pos:$1.Pos}}) |
| } |
| |
| expr: |
| unary_expr |
| { $$ = $1 } |
| | expr tOROR expr |
| { $$ = &ConstBinaryOp{"||", $1, $3, $2} } |
| | expr tANDAND expr |
| { $$ = &ConstBinaryOp{"&&", $1, $3, $2} } |
| | expr '<' expr |
| { $$ = &ConstBinaryOp{"<", $1, $3, $2} } |
| | expr '>' expr |
| { $$ = &ConstBinaryOp{">", $1, $3, $2} } |
| | expr tLE expr |
| { $$ = &ConstBinaryOp{"<=", $1, $3, $2} } |
| | expr tGE expr |
| { $$ = &ConstBinaryOp{">=", $1, $3, $2} } |
| | expr tNE expr |
| { $$ = &ConstBinaryOp{"!=", $1, $3, $2} } |
| | expr tEQEQ expr |
| { $$ = &ConstBinaryOp{"==", $1, $3, $2} } |
| | expr '+' expr |
| { $$ = &ConstBinaryOp{"+", $1, $3, $2} } |
| | expr '-' expr |
| { $$ = &ConstBinaryOp{"-", $1, $3, $2} } |
| | expr '*' expr |
| { $$ = &ConstBinaryOp{"*", $1, $3, $2} } |
| | expr '/' expr |
| { $$ = &ConstBinaryOp{"/", $1, $3, $2} } |
| | expr '%' expr |
| { $$ = &ConstBinaryOp{"%", $1, $3, $2} } |
| | expr '|' expr |
| { $$ = &ConstBinaryOp{"|", $1, $3, $2} } |
| | expr '&' expr |
| { $$ = &ConstBinaryOp{"&", $1, $3, $2} } |
| | expr '^' expr |
| { $$ = &ConstBinaryOp{"^", $1, $3, $2} } |
| | expr tLSH expr |
| { $$ = &ConstBinaryOp{"<<", $1, $3, $2} } |
| | expr tRSH expr |
| { $$ = &ConstBinaryOp{">>", $1, $3, $2} } |
| |
| unary_expr: |
| operand |
| { $$ = $1 } |
| | '!' unary_expr |
| { $$ = &ConstUnaryOp{"!", $2, $1} } |
| | '+' unary_expr |
| { $$ = &ConstUnaryOp{"+", $2, $1} } |
| | '-' unary_expr |
| { $$ = &ConstUnaryOp{"-", $2, $1} } |
| | '^' unary_expr |
| { $$ = &ConstUnaryOp{"^", $2, $1} } |
| | type_no_typeobject '(' expr ')' |
| { $$ = &ConstTypeConv{$1, $3, $1.Pos()} } |
| | tTYPEOBJECT '(' type ')' |
| { $$ = &ConstTypeObject{$3, $1} } |
| |
| operand: |
| tSTRLIT |
| { $$ = &ConstLit{$1.String, $1.Pos} } |
| | tINTLIT |
| { $$ = &ConstLit{$1.int, $1.pos} } |
| | tRATLIT |
| { $$ = &ConstLit{$1.rat, $1.pos} } |
| | nameref |
| { $$ = &ConstNamed{$1.String, $1.Pos} } |
| | comp_lit |
| { $$ = $1 } |
| | comp_lit '.' tIDENT |
| { lexPosErrorf(yylex, $2, "cannot apply selector operator to unnamed constant")} |
| | comp_lit '[' expr ']' |
| { lexPosErrorf(yylex, $2, "cannot apply index operator to unnamed constant")} |
| | nameref '[' expr ']' |
| { $$ = &ConstIndexed{&ConstNamed{$1.String, $1.Pos}, $3, $1.Pos} } |
| | '(' expr ')' |
| { $$ = $2 } |
| |
| comp_lit: |
| otype '{' '}' |
| { $$ = &ConstCompositeLit{$1, nil, $2} } |
| | otype '{' kv_lit_list ocomma '}' |
| { $$ = &ConstCompositeLit{$1, $3, $2} } |
| |
| kv_lit_list: |
| kv_lit |
| { $$ = []KVLit{$1} } |
| | kv_lit_list ',' kv_lit |
| { $$ = append($1, $3) } |
| |
| kv_lit: |
| expr |
| { $$ = KVLit{Value:$1} } |
| | expr ':' expr |
| { $$ = KVLit{Key:$1, Value:$3} } |
| |
| // ERROR DEFINITIONS |
| error_spec_list: |
| error_spec |
| | error_spec_list ';' error_spec |
| |
| error_spec: |
| tIDENT inargs error_details |
| { |
| // Create *ErrorDef starting with a copy of error_details, filling in the |
| // name and params |
| ed := $3 |
| ed.NamePos = NamePos{Name:$1.String, Pos:$1.Pos} |
| ed.Params = $2 |
| eds := &lexVDLFile(yylex).ErrorDefs |
| *eds = append(*eds, &ed) |
| } |
| |
| error_details: |
| // Empty. |
| { $$ = ErrorDef{} } |
| | '{' '}' |
| { $$ = ErrorDef{} } |
| | '{' error_detail_list ocomma '}' |
| { $$ = $2 } |
| |
| error_detail_list: |
| error_detail |
| { $$ = $1 } |
| | error_detail_list ',' error_detail |
| { |
| // Merge each ErrorDef in-order to build the final ErrorDef. |
| $$ = $1 |
| switch { |
| case len($3.Actions) > 0: |
| $$.Actions = append($$.Actions, $3.Actions...) |
| case len($3.Formats) > 0: |
| $$.Formats = append($$.Formats, $3.Formats...) |
| } |
| } |
| |
| error_detail: |
| tIDENT |
| { $$ = ErrorDef{Actions: []StringPos{$1}} } |
| | tSTRLIT ':' tSTRLIT |
| { $$ = ErrorDef{Formats: []LangFmt{{Lang: $1, Fmt: $3}}} } |
| |
| // MISC TOKENS |
| |
| // nameref describes a named reference to another type, interface or const. We |
| // allow the following forms: |
| // foo |
| // foo.bar (and multi-dot variants) |
| // "pkg/path".foo |
| // "pkg/path".foo.bar (and multi-dot variants) |
| nameref: |
| dotnameref |
| { $$ = $1 } |
| | tSTRLIT '.' dotnameref |
| { $$ = StringPos{"\""+$1.String+"\"."+$3.String, $1.Pos} } |
| |
| // dotnameref describes just the dotted portion of nameref. |
| dotnameref: |
| tIDENT |
| { $$ = $1 } |
| | dotnameref '.' tIDENT |
| { $$ = StringPos{$1.String+"."+$3.String, $1.Pos} } |
| |
| otype: |
| // Empty. |
| { $$ = nil } |
| | type |
| { $$ = $1 } |
| |
| osemi: |
| // Empty. |
| | ';' |
| |
| ocomma: |
| // Empty. |
| | ',' |