Jiri Simsa | d7616c9 | 2015-03-24 23:44:30 -0700 | [diff] [blame] | 1 | // Copyright 2015 The Vanadium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
Todd Wang | 8c4e5cc | 2015-04-09 11:30:52 -0700 | [diff] [blame] | 5 | // Package parse implements the VDL parser, converting source files into a parse |
| 6 | // tree. The ParseFile function is the main entry point. |
Todd Wang | 232d649 | 2015-02-25 18:04:54 -0800 | [diff] [blame] | 7 | package parse |
| 8 | |
| 9 | //go:generate ./grammar_gen.sh |
| 10 | |
| 11 | // This is the only file in this package that uses the yacc-generated parser |
| 12 | // with entrypoint yyParse. The result of the parse is the simple parse.File |
| 13 | // representation, which is used by the compilation stage. |
| 14 | // |
| 15 | // TODO(toddw): The yacc-generated parser returns pretty lousy error messages; |
| 16 | // basically "syntax error" is the only string returned. Improve them. |
| 17 | import ( |
| 18 | "fmt" |
| 19 | "io" |
| 20 | "log" |
| 21 | "math/big" |
| 22 | "path" |
| 23 | "strconv" |
| 24 | "strings" |
| 25 | "text/scanner" |
| 26 | |
Jiri Simsa | ffceefa | 2015-02-28 11:03:34 -0800 | [diff] [blame] | 27 | "v.io/x/ref/lib/vdl/vdlutil" |
Todd Wang | 232d649 | 2015-02-25 18:04:54 -0800 | [diff] [blame] | 28 | ) |
| 29 | |
| 30 | // Opts specifies vdl parsing options. |
| 31 | type Opts struct { |
| 32 | ImportsOnly bool // Only parse imports; skip everything else. |
| 33 | } |
| 34 | |
| 35 | // ParseFile takes a file name, the contents of the vdl file src, and the |
| 36 | // accumulated errors, and parses the vdl into a parse.File containing the parse |
| 37 | // tree. Returns nil if any errors are encountered, with errs containing more |
| 38 | // information. Otherwise returns the parsed File. |
| 39 | func ParseFile(fileName string, src io.Reader, opts Opts, errs *vdlutil.Errors) *File { |
| 40 | start := startFile |
| 41 | if opts.ImportsOnly { |
| 42 | start = startFileImports |
| 43 | } |
| 44 | return parse(fileName, src, start, errs) |
| 45 | } |
| 46 | |
| 47 | // ParseConfig takes a file name, the contents of the config file src, and the |
| 48 | // accumulated errors, and parses the config into a parse.Config containing the |
| 49 | // parse tree. Returns nil if any errors are encountered, with errs containing |
| 50 | // more information. Otherwise returns the parsed Config. |
| 51 | func ParseConfig(fileName string, src io.Reader, opts Opts, errs *vdlutil.Errors) *Config { |
| 52 | start := startConfig |
| 53 | if opts.ImportsOnly { |
| 54 | start = startConfigImports |
| 55 | } |
| 56 | // Since the syntax is so similar between config files and vdl files, we just |
| 57 | // parse it as a vdl file and populate Config afterwards. |
| 58 | file := parse(fileName, src, start, errs) |
| 59 | if file == nil { |
| 60 | return nil |
| 61 | } |
| 62 | if len(file.ErrorDefs) > 0 || len(file.TypeDefs) > 0 || len(file.Interfaces) > 0 { |
| 63 | errs.Errorf("%s: config files may not contain error, type or interface definitions", fileName) |
| 64 | return nil |
| 65 | } |
| 66 | config := &Config{ |
| 67 | FileName: fileName, |
Todd Wang | b90b8de | 2015-03-31 20:04:13 -0700 | [diff] [blame] | 68 | Doc: file.Doc, |
Todd Wang | 232d649 | 2015-02-25 18:04:54 -0800 | [diff] [blame] | 69 | ConfigDef: file.PackageDef, |
| 70 | Imports: file.Imports, |
| 71 | Config: file.ConstDefs[0].Expr, |
| 72 | ConstDefs: file.ConstDefs[1:], |
| 73 | } |
| 74 | if len(config.ConstDefs) == 0 { |
| 75 | config.ConstDefs = nil |
| 76 | } |
| 77 | if opts.ImportsOnly { |
| 78 | // Clear out the const expression from the config clause. |
| 79 | config.Config = nil |
| 80 | config.ConstDefs = nil |
| 81 | } |
| 82 | return config |
| 83 | } |
| 84 | |
| 85 | func parse(fileName string, src io.Reader, startTok int, errs *vdlutil.Errors) *File { |
| 86 | if errs == nil { |
| 87 | log.Fatal("Nil errors specified for Parse") |
| 88 | } |
| 89 | origErrs := errs.NumErrors() |
| 90 | lex := newLexer(fileName, src, startTok, errs) |
| 91 | if errCode := yyParse(lex); errCode != 0 { |
| 92 | errs.Errorf("%s: yyParse returned error code %v", fileName, errCode) |
| 93 | } |
| 94 | lex.attachComments() |
| 95 | if startTok == startFile || startTok == startConfig { |
| 96 | vdlutil.Vlog.Printf("PARSE RESULTS\n\n%v\n\n", lex.vdlFile) |
| 97 | } |
| 98 | if origErrs != errs.NumErrors() { |
| 99 | return nil |
| 100 | } |
| 101 | return lex.vdlFile |
| 102 | } |
| 103 | |
| 104 | // ParseExprs parses data into a slice of parsed const expressions. The input |
| 105 | // data is specified in VDL syntax, with commas separating multiple expressions. |
| 106 | // There must be at least one expression specified in data. Errors are returned |
| 107 | // in errs. |
| 108 | func ParseExprs(data string, errs *vdlutil.Errors) []ConstExpr { |
| 109 | const name = "exprs" |
| 110 | lex := newLexer(name, strings.NewReader(data), startExprs, errs) |
| 111 | if errCode := yyParse(lex); errCode != 0 { |
| 112 | errs.Errorf("vdl: yyParse returned error code %d", errCode) |
| 113 | } |
| 114 | return lex.exprs |
| 115 | } |
| 116 | |
Suharsh Sivakumar | 06c85ef | 2015-05-21 21:23:33 -0700 | [diff] [blame] | 117 | // ExtractExprPackagePaths returns any package paths that appear in named constants |
| 118 | // in expr. i.e. "a/b/c".Foo => "a/b/c". |
| 119 | func ExtractExprPackagePaths(expr ConstExpr) []string { |
| 120 | var paths []string |
| 121 | switch e := expr.(type) { |
| 122 | case *ConstNamed: |
| 123 | if path := packageFromName(e.Name); len(path) > 0 { |
| 124 | paths = append(paths, path) |
| 125 | } |
| 126 | case *ConstCompositeLit: |
| 127 | for _, kv := range e.KVList { |
| 128 | paths = append(paths, ExtractExprPackagePaths(kv.Key)...) |
| 129 | paths = append(paths, ExtractExprPackagePaths(kv.Value)...) |
| 130 | } |
| 131 | paths = append(paths, ExtractTypePackagePaths(e.Type)...) |
| 132 | case *ConstIndexed: |
| 133 | paths = append(paths, ExtractExprPackagePaths(e.Expr)...) |
| 134 | paths = append(paths, ExtractExprPackagePaths(e.IndexExpr)...) |
| 135 | case *ConstTypeConv: |
| 136 | paths = append(paths, ExtractTypePackagePaths(e.Type)...) |
| 137 | paths = append(paths, ExtractExprPackagePaths(e.Expr)...) |
| 138 | case *ConstTypeObject: |
| 139 | paths = append(paths, ExtractTypePackagePaths(e.Type)...) |
| 140 | case *ConstBinaryOp: |
| 141 | paths = append(paths, ExtractExprPackagePaths(e.Lexpr)...) |
| 142 | paths = append(paths, ExtractExprPackagePaths(e.Rexpr)...) |
| 143 | case *ConstUnaryOp: |
| 144 | paths = append(paths, ExtractExprPackagePaths(e.Expr)...) |
| 145 | default: |
| 146 | // leaf expression with no embedded expressions or types. |
| 147 | } |
| 148 | return paths |
| 149 | } |
| 150 | |
| 151 | func ExtractTypePackagePaths(typ Type) []string { |
| 152 | var paths []string |
| 153 | switch t := typ.(type) { |
| 154 | case *TypeNamed: |
| 155 | if path := packageFromName(t.Name); len(path) > 0 { |
| 156 | paths = append(paths, path) |
| 157 | } |
| 158 | case *TypeArray: |
| 159 | paths = append(paths, ExtractTypePackagePaths(t.Elem)...) |
| 160 | case *TypeList: |
| 161 | paths = append(paths, ExtractTypePackagePaths(t.Elem)...) |
| 162 | case *TypeSet: |
| 163 | paths = append(paths, ExtractTypePackagePaths(t.Key)...) |
| 164 | case *TypeMap: |
| 165 | paths = append(paths, ExtractTypePackagePaths(t.Key)...) |
| 166 | paths = append(paths, ExtractTypePackagePaths(t.Elem)...) |
| 167 | case *TypeStruct: |
| 168 | for _, f := range t.Fields { |
| 169 | paths = append(paths, ExtractTypePackagePaths(f.Type)...) |
| 170 | } |
| 171 | case *TypeUnion: |
| 172 | for _, f := range t.Fields { |
| 173 | paths = append(paths, ExtractTypePackagePaths(f.Type)...) |
| 174 | } |
| 175 | case *TypeOptional: |
| 176 | paths = append(paths, ExtractTypePackagePaths(t.Base)...) |
| 177 | default: |
| 178 | // leaf type with no embedded types. |
| 179 | } |
| 180 | return paths |
| 181 | } |
| 182 | |
| 183 | func packageFromName(name string) string { |
| 184 | if strings.HasPrefix(name, `"`) { |
| 185 | if parts := strings.SplitN(name[1:], `".`, 2); len(parts) == 2 { |
| 186 | return parts[0] |
| 187 | } |
| 188 | } |
| 189 | return "" |
| 190 | } |
| 191 | |
Todd Wang | 232d649 | 2015-02-25 18:04:54 -0800 | [diff] [blame] | 192 | // lexer implements the yyLexer interface for the yacc-generated parser. |
| 193 | // |
| 194 | // An oddity: lexer also holds the result of the parse. Most yacc examples hold |
| 195 | // parse results in package-scoped (global) variables, but doing that would mean |
| 196 | // we wouldn't be able to run separate parses concurrently. To enable that we'd |
| 197 | // need each invocation of yyParse to mutate its own result, but unfortunately |
| 198 | // the Go yacc tool doesn't provide any way to pass extra arguments to yyParse. |
| 199 | // |
| 200 | // So we cheat and hold the parse result in the lexer, and in the yacc rules we |
| 201 | // call lexVDLFile(yylex) to convert from the yyLexer interface back to the |
| 202 | // concrete lexer type, and retrieve a pointer to the parse result. |
| 203 | type lexer struct { |
| 204 | // Fields for lexing / scanning the input source file. |
| 205 | name string |
| 206 | scanner scanner.Scanner |
| 207 | errs *vdlutil.Errors |
| 208 | startTok int // One of our dummy start tokens. |
| 209 | started bool // Has the dummy start token already been emitted? |
| 210 | sawEOF bool // Have we already seen the end-of-file? |
| 211 | prevTok token // Previous token, used for auto-semicolons and errors. |
| 212 | |
| 213 | // Fields holding the result of file and config parsing. |
| 214 | comments commentMap |
| 215 | vdlFile *File |
| 216 | |
| 217 | // Field holding the result of expr parsing. |
| 218 | exprs []ConstExpr |
| 219 | } |
| 220 | |
| 221 | func newLexer(fileName string, src io.Reader, startTok int, errs *vdlutil.Errors) *lexer { |
| 222 | l := &lexer{name: fileName, errs: errs, startTok: startTok, vdlFile: &File{BaseName: path.Base(fileName)}} |
| 223 | l.comments.init() |
| 224 | l.scanner.Init(src) |
| 225 | // Don't produce character literal tokens, but do scan comments. |
| 226 | l.scanner.Mode = scanner.ScanIdents | scanner.ScanFloats | scanner.ScanStrings | scanner.ScanRawStrings | scanner.ScanComments |
| 227 | // Don't treat '\n' as whitespace, so we can auto-insert semicolons. |
| 228 | l.scanner.Whitespace = 1<<'\t' | 1<<'\r' | 1<<' ' |
| 229 | l.scanner.Error = func(s *scanner.Scanner, msg string) { |
| 230 | l.Error(msg) |
| 231 | } |
| 232 | return l |
| 233 | } |
| 234 | |
| 235 | type token struct { |
| 236 | t rune |
| 237 | text string |
| 238 | pos Pos |
| 239 | } |
| 240 | |
| 241 | func (t token) String() string { |
| 242 | return fmt.Sprintf("%v %U %s", t.pos, t.t, t.text) |
| 243 | } |
| 244 | |
| 245 | // The lex* functions below all convert the yyLexer input arg into a concrete |
| 246 | // lexer as their first step. The type conversion is always safe since we're |
| 247 | // the ones who called yyParse, and thus know the concrete type is always lexer. |
| 248 | |
| 249 | // lexVDLFile retrieves the File parse result from the yyLexer interface. This |
| 250 | // is called in the yacc rules to fill in the parse result. |
| 251 | func lexVDLFile(yylex yyLexer) *File { |
| 252 | return yylex.(*lexer).vdlFile |
| 253 | } |
| 254 | |
| 255 | // lexPosErrorf adds an error with positional information, on a type |
| 256 | // implementing the yyLexer interface. This is called in the yacc rules to |
| 257 | // throw errors. |
| 258 | func lexPosErrorf(yylex yyLexer, pos Pos, format string, v ...interface{}) { |
| 259 | yylex.(*lexer).posErrorf(pos, format, v...) |
| 260 | } |
| 261 | |
| 262 | // lexGenEOF tells the lexer to generate EOF tokens from now on, as if the end |
| 263 | // of file had been seen. This is called in the yacc rules to terminate the |
| 264 | // parse even if the file still has tokens. |
| 265 | func lexGenEOF(yylex yyLexer) { |
| 266 | yylex.(*lexer).sawEOF = true |
| 267 | } |
| 268 | |
| 269 | // lexStoreExprs stores the parsed exprs in the lexer. |
| 270 | func lexStoreExprs(yylex yyLexer, exprs []ConstExpr) { |
| 271 | yylex.(*lexer).exprs = exprs |
| 272 | } |
| 273 | |
| 274 | var keywords = map[string]int{ |
| 275 | "const": tCONST, |
| 276 | "enum": tENUM, |
| 277 | "error": tERROR, |
| 278 | "import": tIMPORT, |
| 279 | "interface": tINTERFACE, |
| 280 | "map": tMAP, |
| 281 | "package": tPACKAGE, |
| 282 | "set": tSET, |
| 283 | "stream": tSTREAM, |
| 284 | "struct": tSTRUCT, |
| 285 | "type": tTYPE, |
| 286 | "typeobject": tTYPEOBJECT, |
| 287 | "union": tUNION, |
| 288 | } |
| 289 | |
| 290 | type nextRune struct { |
| 291 | t rune |
| 292 | id int |
| 293 | } |
| 294 | |
| 295 | // knownPunct is a map of our known punctuation. We support 1 and 2 rune |
| 296 | // combinations, where 2 rune combos must be immediately adjacent with no |
| 297 | // intervening whitespace. The 2-rune combos always take precedence over the |
| 298 | // 1-rune combos. Every entry is a valid 1-rune combo, which is returned as-is |
| 299 | // without a special token id; the ascii value represents itself. |
| 300 | var knownPunct = map[rune][]nextRune{ |
| 301 | ';': nil, |
| 302 | ':': nil, |
| 303 | ',': nil, |
| 304 | '.': nil, |
| 305 | '*': nil, |
| 306 | '(': nil, |
| 307 | ')': nil, |
| 308 | '[': nil, |
| 309 | ']': nil, |
| 310 | '{': nil, |
| 311 | '}': nil, |
| 312 | '+': nil, |
| 313 | '-': nil, |
| 314 | '/': nil, |
| 315 | '%': nil, |
| 316 | '^': nil, |
| 317 | '?': nil, |
| 318 | '!': {{'=', tNE}}, |
| 319 | '=': {{'=', tEQEQ}}, |
| 320 | '<': {{'=', tLE}, {'<', tLSH}}, |
| 321 | '>': {{'=', tGE}, {'>', tRSH}}, |
| 322 | '|': {{'|', tOROR}}, |
| 323 | '&': {{'&', tANDAND}}, |
| 324 | } |
| 325 | |
| 326 | // autoSemi determines whether to automatically add a semicolon, based on the |
| 327 | // rule that semicolons are always added at the end of each line after certain |
| 328 | // tokens. The Go auto-semicolon rule is described here: |
| 329 | // http://golang.org/ref/spec#Semicolons |
| 330 | func autoSemi(prevTok token) bool { |
| 331 | return prevAutoSemi[prevTok.t] && prevTok.pos.IsValid() |
| 332 | } |
| 333 | |
| 334 | var prevAutoSemi = map[rune]bool{ |
| 335 | scanner.Ident: true, |
| 336 | scanner.Int: true, |
| 337 | scanner.Float: true, |
| 338 | scanner.String: true, |
| 339 | scanner.RawString: true, |
| 340 | ')': true, |
| 341 | ']': true, |
| 342 | '}': true, |
| 343 | '>': true, |
| 344 | } |
| 345 | |
| 346 | const yaccEOF int = 0 // yacc interprets 0 as the end-of-file marker |
| 347 | |
| 348 | func init() { |
| 349 | // yyDebug is defined in the yacc-generated grammar.go file. Setting it to 1 |
| 350 | // only produces output on syntax errors; set it to 4 to generate full debug |
| 351 | // output. Sadly yacc doesn't give position information describing the error. |
| 352 | yyDebug = 1 |
| 353 | } |
| 354 | |
| 355 | // A note on the comment-tracking strategy. During lexing we generate |
| 356 | // commentBlocks, defined as a sequence of adjacent or abutting comments (either |
| 357 | // // or /**/) with no intervening tokens. Adjacent means that the previous |
| 358 | // comment ends on the line immediately before the next one starts, and abutting |
| 359 | // means that the previous comment ends on the same line as the next one starts. |
| 360 | // |
| 361 | // At the end of the parse we try to attach comment blocks to parse tree items. |
| 362 | // We use a heuristic that works for common cases, but isn't perfect - it |
| 363 | // mis-associates some styles of comments, and we don't ensure all comment |
| 364 | // blocks will be associated to an item. |
| 365 | |
| 366 | type commentBlock struct { |
| 367 | text string |
| 368 | firstLine int |
| 369 | lastLine int |
| 370 | } |
| 371 | |
| 372 | // update returns true and adds tok to this block if tok is adjacent or |
| 373 | // abutting, otherwise it returns false without mutating the block. Since we're |
| 374 | // handling newlines explicitly in the lexer, we never get comment tokens with |
| 375 | // trailing newlines. We can get embedded newlines via /**/ style comments. |
| 376 | func (cb *commentBlock) update(tok token) bool { |
| 377 | if cb.text == "" { |
| 378 | // First update in this block. |
| 379 | cb.text = tok.text |
| 380 | cb.firstLine = tok.pos.Line |
| 381 | cb.lastLine = tok.pos.Line + strings.Count(tok.text, "\n") |
| 382 | return true |
| 383 | } |
| 384 | if cb.lastLine >= tok.pos.Line-1 { |
| 385 | // The tok is adjacent or abutting. |
| 386 | if cb.lastLine == tok.pos.Line-1 { |
| 387 | // The tok is adjacent - need a newline. |
| 388 | cb.text += "\n" |
| 389 | cb.lastLine++ |
| 390 | } |
| 391 | cb.text += tok.text |
| 392 | cb.lastLine += strings.Count(tok.text, "\n") |
| 393 | return true |
| 394 | } |
| 395 | return false |
| 396 | } |
| 397 | |
| 398 | // commentMap keeps track of blocks of comments in a file. We store comment |
| 399 | // blocks in maps by first line, and by last line. Note that technically there |
| 400 | // could be more than one commentBlock ending on the same line, due to /**/ |
| 401 | // style comments. We ignore this rare case and just keep the first one. |
| 402 | type commentMap struct { |
| 403 | byFirst map[int]commentBlock |
| 404 | byLast map[int]commentBlock |
| 405 | cur commentBlock |
| 406 | prevTokenPos Pos |
| 407 | } |
| 408 | |
| 409 | func (cm *commentMap) init() { |
| 410 | cm.byFirst = make(map[int]commentBlock) |
| 411 | cm.byLast = make(map[int]commentBlock) |
| 412 | } |
| 413 | |
| 414 | // addComment adds a comment token to the map, either appending to the current |
| 415 | // block or ending the current block and starting a new one. |
| 416 | func (cm *commentMap) addComment(tok token) { |
| 417 | if !cm.cur.update(tok) { |
| 418 | cm.endBlock() |
| 419 | if !cm.cur.update(tok) { |
| 420 | panic(fmt.Errorf("vdl: couldn't update current comment block with token %v", tok)) |
| 421 | } |
| 422 | } |
| 423 | // Here's an example of why we need the special case endBlock logic. |
| 424 | // |
| 425 | // type Foo struct { |
| 426 | // // doc1 |
| 427 | // A int // doc2 |
| 428 | // // doc3 |
| 429 | // B int |
| 430 | // } |
| 431 | // |
| 432 | // The problem is that without the special-case, we'd group doc2 and doc3 |
| 433 | // together into the same block. That may actually be correct some times, but |
| 434 | // it's more common for doc3 to be semantically associated with field B. Thus |
| 435 | // if we've already seen any token on the same line as this comment block, we |
| 436 | // end the block immediately. This means that comments appearing on the same |
| 437 | // line as any other token are forced to be a single comment block. |
| 438 | if cm.prevTokenPos.Line == tok.pos.Line { |
| 439 | cm.endBlock() |
| 440 | } |
| 441 | } |
| 442 | |
| 443 | func (cm *commentMap) handleToken(tok token) { |
| 444 | cm.endBlock() |
| 445 | cm.prevTokenPos = tok.pos |
| 446 | } |
| 447 | |
| 448 | // endBlock adds the the current comment block to the map, and resets it in |
| 449 | // preparation for new comments to be added. In the rare case where we see |
| 450 | // comment blocks that either start or end on the same line, we just keep the |
| 451 | // first comment block that was inserted. |
| 452 | func (cm *commentMap) endBlock() { |
| 453 | _, inFirst := cm.byFirst[cm.cur.firstLine] |
| 454 | _, inLast := cm.byLast[cm.cur.lastLine] |
| 455 | if cm.cur.text != "" && !inFirst && !inLast { |
| 456 | cm.byFirst[cm.cur.firstLine] = cm.cur |
| 457 | cm.byLast[cm.cur.lastLine] = cm.cur |
| 458 | } |
| 459 | cm.cur.text = "" |
| 460 | cm.cur.firstLine = 0 |
| 461 | cm.cur.lastLine = 0 |
| 462 | } |
| 463 | |
| 464 | // getDoc returns the documentation string associated with pos. Our rule is the |
| 465 | // last line of the documentation must end on the line immediately before pos. |
| 466 | // Once a comment block has been returned it isn't eligible to be attached to |
| 467 | // any other item, and is deleted from the map. |
| 468 | // |
| 469 | // The returned string is either empty, or is newline terminated. |
| 470 | func (cm *commentMap) getDoc(pos Pos) string { |
| 471 | block := cm.byLast[pos.Line-1] |
| 472 | if block.text == "" { |
| 473 | return "" |
| 474 | } |
| 475 | doc := block.text + "\n" |
| 476 | delete(cm.byFirst, block.firstLine) |
| 477 | delete(cm.byLast, block.lastLine) |
| 478 | return doc |
| 479 | } |
| 480 | |
| 481 | // getDocSuffix returns the suffix documentation associated with pos. Our rule |
| 482 | // is the first line of the documentation must be on the same line as pos. Once |
Todd Wang | b90b8de | 2015-03-31 20:04:13 -0700 | [diff] [blame] | 483 | // a comment block has been returned it isn't eligible to be attached to any |
Todd Wang | 232d649 | 2015-02-25 18:04:54 -0800 | [diff] [blame] | 484 | // other item, and is deleted from the map. |
| 485 | // |
| 486 | // The returned string is either empty, or has a leading space. |
| 487 | func (cm *commentMap) getDocSuffix(pos Pos) string { |
| 488 | block := cm.byFirst[pos.Line] |
| 489 | if block.text == "" { |
| 490 | return "" |
| 491 | } |
| 492 | doc := " " + block.text |
| 493 | delete(cm.byFirst, block.firstLine) |
| 494 | delete(cm.byLast, block.lastLine) |
| 495 | return doc |
| 496 | } |
| 497 | |
Todd Wang | b90b8de | 2015-03-31 20:04:13 -0700 | [diff] [blame] | 498 | // getFileDoc returns the file documentation. Our rule is that the first line |
| 499 | // of the documentation must occur on the first line of the file, and all other |
| 500 | // comments must have already been attached. Once a comment block has been |
| 501 | // returned it isn't eligible to be attached to any other item, and is deleted |
| 502 | // from the map. |
| 503 | // |
| 504 | // The returned string is either empty, or is newline terminated. |
| 505 | func (cm *commentMap) getFileDoc() string { |
| 506 | block := cm.byFirst[1] |
| 507 | if block.text == "" { |
| 508 | return "" |
| 509 | } |
| 510 | doc := block.text + "\n" |
| 511 | delete(cm.byFirst, block.firstLine) |
| 512 | delete(cm.byLast, block.lastLine) |
| 513 | return doc |
| 514 | } |
| 515 | |
Todd Wang | 232d649 | 2015-02-25 18:04:54 -0800 | [diff] [blame] | 516 | func attachTypeComments(t Type, cm *commentMap, suffix bool) { |
| 517 | switch tu := t.(type) { |
| 518 | case *TypeEnum: |
| 519 | for _, label := range tu.Labels { |
| 520 | if suffix { |
| 521 | label.DocSuffix = cm.getDocSuffix(label.Pos) |
| 522 | } else { |
| 523 | label.Doc = cm.getDoc(label.Pos) |
| 524 | } |
| 525 | } |
| 526 | case *TypeArray: |
| 527 | attachTypeComments(tu.Elem, cm, suffix) |
| 528 | case *TypeList: |
| 529 | attachTypeComments(tu.Elem, cm, suffix) |
| 530 | case *TypeSet: |
| 531 | attachTypeComments(tu.Key, cm, suffix) |
| 532 | case *TypeMap: |
| 533 | attachTypeComments(tu.Key, cm, suffix) |
| 534 | attachTypeComments(tu.Elem, cm, suffix) |
| 535 | case *TypeStruct: |
| 536 | for _, field := range tu.Fields { |
| 537 | if suffix { |
| 538 | field.DocSuffix = cm.getDocSuffix(field.Pos) |
| 539 | } else { |
| 540 | field.Doc = cm.getDoc(field.Pos) |
| 541 | } |
| 542 | attachTypeComments(field.Type, cm, suffix) |
| 543 | } |
| 544 | case *TypeUnion: |
| 545 | for _, field := range tu.Fields { |
| 546 | if suffix { |
| 547 | field.DocSuffix = cm.getDocSuffix(field.Pos) |
| 548 | } else { |
| 549 | field.Doc = cm.getDoc(field.Pos) |
| 550 | } |
| 551 | attachTypeComments(field.Type, cm, suffix) |
| 552 | } |
| 553 | case *TypeOptional: |
| 554 | attachTypeComments(tu.Base, cm, suffix) |
| 555 | case *TypeNamed: |
| 556 | // Terminate the recursion at named types. |
| 557 | default: |
| 558 | panic(fmt.Errorf("vdl: unhandled type %#v", t)) |
| 559 | } |
| 560 | } |
| 561 | |
| 562 | // attachComments causes all comments collected during the parse to be attached |
| 563 | // to the appropriate parse tree items. This should only be called after the |
| 564 | // parse has completed. |
| 565 | func (l *lexer) attachComments() { |
| 566 | f := l.vdlFile |
| 567 | // First attach all suffix docs - these occur on the same line. |
| 568 | f.PackageDef.DocSuffix = l.comments.getDocSuffix(f.PackageDef.Pos) |
| 569 | for _, x := range f.Imports { |
| 570 | x.DocSuffix = l.comments.getDocSuffix(x.Pos) |
| 571 | } |
| 572 | for _, x := range f.ErrorDefs { |
| 573 | x.DocSuffix = l.comments.getDocSuffix(x.Pos) |
| 574 | } |
| 575 | for _, x := range f.TypeDefs { |
| 576 | x.DocSuffix = l.comments.getDocSuffix(x.Pos) |
| 577 | attachTypeComments(x.Type, &l.comments, true) |
| 578 | } |
| 579 | for _, x := range f.ConstDefs { |
| 580 | x.DocSuffix = l.comments.getDocSuffix(x.Pos) |
| 581 | } |
| 582 | for _, x := range f.Interfaces { |
| 583 | x.DocSuffix = l.comments.getDocSuffix(x.Pos) |
| 584 | for _, y := range x.Embeds { |
| 585 | y.DocSuffix = l.comments.getDocSuffix(y.Pos) |
| 586 | } |
| 587 | for _, y := range x.Methods { |
| 588 | y.DocSuffix = l.comments.getDocSuffix(y.Pos) |
| 589 | } |
| 590 | } |
| 591 | // Now attach the docs - these occur on the line immediately before. |
| 592 | f.PackageDef.Doc = l.comments.getDoc(f.PackageDef.Pos) |
| 593 | for _, x := range f.Imports { |
| 594 | x.Doc = l.comments.getDoc(x.Pos) |
| 595 | } |
| 596 | for _, x := range f.ErrorDefs { |
| 597 | x.Doc = l.comments.getDoc(x.Pos) |
| 598 | } |
| 599 | for _, x := range f.TypeDefs { |
| 600 | x.Doc = l.comments.getDoc(x.Pos) |
| 601 | attachTypeComments(x.Type, &l.comments, false) |
| 602 | } |
| 603 | for _, x := range f.ConstDefs { |
| 604 | x.Doc = l.comments.getDoc(x.Pos) |
| 605 | } |
| 606 | for _, x := range f.Interfaces { |
| 607 | x.Doc = l.comments.getDoc(x.Pos) |
| 608 | for _, y := range x.Embeds { |
| 609 | y.Doc = l.comments.getDoc(y.Pos) |
| 610 | } |
| 611 | for _, y := range x.Methods { |
| 612 | y.Doc = l.comments.getDoc(y.Pos) |
| 613 | } |
| 614 | } |
Todd Wang | b90b8de | 2015-03-31 20:04:13 -0700 | [diff] [blame] | 615 | // Finally attach the top-level file doc - this occurs on the first line. |
| 616 | f.Doc = l.comments.getFileDoc() |
Todd Wang | 232d649 | 2015-02-25 18:04:54 -0800 | [diff] [blame] | 617 | } |
| 618 | |
| 619 | // nextToken uses the text/scanner package to scan the input for the next token. |
| 620 | func (l *lexer) nextToken() (tok token) { |
| 621 | tok.t = l.scanner.Scan() |
| 622 | tok.text = l.scanner.TokenText() |
| 623 | // Both Pos and scanner.Position start line and column numbering at 1. |
| 624 | tok.pos = Pos{Line: l.scanner.Position.Line, Col: l.scanner.Position.Column} |
| 625 | return |
| 626 | } |
| 627 | |
| 628 | // handleImag handles imaginary literals "[number]i" by peeking ahead. |
| 629 | func (l *lexer) handleImag(tok token, lval *yySymType) bool { |
| 630 | if l.scanner.Peek() != 'i' { |
| 631 | return false |
| 632 | } |
| 633 | l.scanner.Next() |
| 634 | |
| 635 | rat := new(big.Rat) |
| 636 | if _, ok := rat.SetString(tok.text); !ok { |
| 637 | l.posErrorf(tok.pos, "can't convert token [%v] to imaginary literal", tok) |
| 638 | } |
| 639 | lval.imagpos.pos = tok.pos |
| 640 | lval.imagpos.imag = (*BigImag)(rat) |
| 641 | return true |
| 642 | } |
| 643 | |
| 644 | // translateToken takes the token we just scanned, and translates it into a |
| 645 | // token usable by yacc (lval and id). The done return arg is true when a real |
| 646 | // yacc token was generated, or false if we need another next/translate pass. |
| 647 | func (l *lexer) translateToken(tok token, lval *yySymType) (id int, done bool) { |
| 648 | switch tok.t { |
| 649 | case scanner.EOF: |
| 650 | l.sawEOF = true |
| 651 | if autoSemi(l.prevTok) { |
| 652 | return ';', true |
| 653 | } |
| 654 | return yaccEOF, true |
| 655 | |
| 656 | case '\n': |
| 657 | if autoSemi(l.prevTok) { |
| 658 | return ';', true |
| 659 | } |
| 660 | // Returning done=false ensures next/translate will be called again so that |
| 661 | // this newline is skipped; id=yaccEOF is a dummy value that's ignored. |
| 662 | return yaccEOF, false |
| 663 | |
| 664 | case scanner.String, scanner.RawString: |
| 665 | var err error |
| 666 | lval.strpos.Pos = tok.pos |
| 667 | lval.strpos.String, err = strconv.Unquote(tok.text) |
| 668 | if err != nil { |
| 669 | l.posErrorf(tok.pos, "can't convert token [%v] to string literal", tok) |
| 670 | } |
| 671 | return tSTRLIT, true |
| 672 | |
| 673 | case scanner.Int: |
| 674 | if l.handleImag(tok, lval) { |
| 675 | return tIMAGLIT, true |
| 676 | } |
| 677 | lval.intpos.pos = tok.pos |
| 678 | lval.intpos.int = new(big.Int) |
| 679 | if _, ok := lval.intpos.int.SetString(tok.text, 0); !ok { |
| 680 | l.posErrorf(tok.pos, "can't convert token [%v] to integer literal", tok) |
| 681 | } |
| 682 | return tINTLIT, true |
| 683 | |
| 684 | case scanner.Float: |
| 685 | if l.handleImag(tok, lval) { |
| 686 | return tIMAGLIT, true |
| 687 | } |
| 688 | lval.ratpos.pos = tok.pos |
| 689 | lval.ratpos.rat = new(big.Rat) |
| 690 | if _, ok := lval.ratpos.rat.SetString(tok.text); !ok { |
| 691 | l.posErrorf(tok.pos, "can't convert token [%v] to float literal", tok) |
| 692 | } |
| 693 | return tRATLIT, true |
| 694 | |
| 695 | case scanner.Ident: |
| 696 | // Either the identifier is a known keyword, or we pass it through as IDENT. |
| 697 | if keytok, ok := keywords[tok.text]; ok { |
| 698 | lval.pos = tok.pos |
| 699 | return keytok, true |
| 700 | } |
| 701 | lval.strpos.Pos = tok.pos |
| 702 | lval.strpos.String = tok.text |
| 703 | return tIDENT, true |
| 704 | |
| 705 | case scanner.Comment: |
| 706 | l.comments.addComment(tok) |
| 707 | // Comments aren't considered tokens, just like the '\n' case. |
| 708 | return yaccEOF, false |
| 709 | |
| 710 | default: |
| 711 | // Either the rune is in our known punctuation whitelist, or we've hit a |
| 712 | // syntax error. |
| 713 | if nextRunes, ok := knownPunct[tok.t]; ok { |
| 714 | // Peek at the next rune and compare against our list of next runes. If |
| 715 | // we find a match we return the id in next, otherwise just return the |
| 716 | // original rune. This means that 2-rune tokens always take precedence |
| 717 | // over 1-rune tokens. Either way the pos is set to the original rune. |
| 718 | lval.pos = tok.pos |
| 719 | peek := l.scanner.Peek() |
| 720 | for _, next := range nextRunes { |
| 721 | if peek == next.t { |
| 722 | l.scanner.Next() |
| 723 | return next.id, true |
| 724 | } |
| 725 | } |
| 726 | return int(tok.t), true |
| 727 | } |
| 728 | l.posErrorf(tok.pos, "unexpected token [%v]", tok) |
| 729 | l.sawEOF = true |
| 730 | return yaccEOF, true |
| 731 | } |
| 732 | } |
| 733 | |
| 734 | // Lex is part of the yyLexer interface, called by the yacc-generated parser. |
| 735 | func (l *lexer) Lex(lval *yySymType) int { |
| 736 | // Emit a dummy start token indicating what type of parse we're performing. |
| 737 | if !l.started { |
| 738 | l.started = true |
| 739 | switch l.startTok { |
| 740 | case startFileImports, startFile, startConfigImports, startConfig, startExprs: |
| 741 | return l.startTok |
| 742 | default: |
| 743 | panic(fmt.Errorf("vdl: unhandled parse start token %d", l.startTok)) |
| 744 | } |
| 745 | } |
| 746 | // Always return EOF after we've scanned it. This ensures we emit EOF on the |
| 747 | // next Lex call after scanning EOF and adding an auto-semicolon. |
| 748 | if l.sawEOF { |
| 749 | return yaccEOF |
| 750 | } |
| 751 | // Run next/translate in a loop to handle newline-triggered auto-semicolons; |
| 752 | // nextToken needs to generate newline tokens so that we can trigger the |
| 753 | // auto-semicolon logic, but if the newline doesn't generate an auto-semicolon |
| 754 | // we should skip the token and move on to the next one. |
| 755 | for { |
| 756 | tok := l.nextToken() |
| 757 | if id, done := l.translateToken(tok, lval); done { |
| 758 | l.prevTok = tok |
| 759 | l.comments.handleToken(tok) |
| 760 | return id |
| 761 | } |
| 762 | } |
| 763 | } |
| 764 | |
| 765 | // Error is part of the yyLexer interface, called by the yacc-generated parser. |
| 766 | // Unfortunately yacc doesn't give good error information - we dump the position |
| 767 | // of the previous scanned token as an approximation of where the error is. |
| 768 | func (l *lexer) Error(s string) { |
| 769 | l.posErrorf(l.prevTok.pos, "%s", s) |
| 770 | } |
| 771 | |
| 772 | // posErrorf generates an error with file and pos info. |
| 773 | func (l *lexer) posErrorf(pos Pos, format string, v ...interface{}) { |
| 774 | var posstr string |
| 775 | if pos.IsValid() { |
| 776 | posstr = pos.String() |
| 777 | } |
| 778 | l.errs.Errorf(l.name+":"+posstr+" "+format, v...) |
| 779 | } |