blob: 75fa6c7fe7002076f6af774d7b621e42da09567c [file] [log] [blame]
Jiri Simsad7616c92015-03-24 23:44:30 -07001// Copyright 2015 The Vanadium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Todd Wang8c4e5cc2015-04-09 11:30:52 -07005// Package parse implements the VDL parser, converting source files into a parse
6// tree. The ParseFile function is the main entry point.
Todd Wang232d6492015-02-25 18:04:54 -08007package parse
8
9//go:generate ./grammar_gen.sh
10
11// This is the only file in this package that uses the yacc-generated parser
12// with entrypoint yyParse. The result of the parse is the simple parse.File
13// representation, which is used by the compilation stage.
14//
15// TODO(toddw): The yacc-generated parser returns pretty lousy error messages;
16// basically "syntax error" is the only string returned. Improve them.
17import (
18 "fmt"
19 "io"
20 "log"
21 "math/big"
22 "path"
23 "strconv"
24 "strings"
25 "text/scanner"
26
Jiri Simsaffceefa2015-02-28 11:03:34 -080027 "v.io/x/ref/lib/vdl/vdlutil"
Todd Wang232d6492015-02-25 18:04:54 -080028)
29
30// Opts specifies vdl parsing options.
31type Opts struct {
32 ImportsOnly bool // Only parse imports; skip everything else.
33}
34
35// ParseFile takes a file name, the contents of the vdl file src, and the
36// accumulated errors, and parses the vdl into a parse.File containing the parse
37// tree. Returns nil if any errors are encountered, with errs containing more
38// information. Otherwise returns the parsed File.
39func ParseFile(fileName string, src io.Reader, opts Opts, errs *vdlutil.Errors) *File {
40 start := startFile
41 if opts.ImportsOnly {
42 start = startFileImports
43 }
44 return parse(fileName, src, start, errs)
45}
46
47// ParseConfig takes a file name, the contents of the config file src, and the
48// accumulated errors, and parses the config into a parse.Config containing the
49// parse tree. Returns nil if any errors are encountered, with errs containing
50// more information. Otherwise returns the parsed Config.
51func ParseConfig(fileName string, src io.Reader, opts Opts, errs *vdlutil.Errors) *Config {
52 start := startConfig
53 if opts.ImportsOnly {
54 start = startConfigImports
55 }
56 // Since the syntax is so similar between config files and vdl files, we just
57 // parse it as a vdl file and populate Config afterwards.
58 file := parse(fileName, src, start, errs)
59 if file == nil {
60 return nil
61 }
62 if len(file.ErrorDefs) > 0 || len(file.TypeDefs) > 0 || len(file.Interfaces) > 0 {
63 errs.Errorf("%s: config files may not contain error, type or interface definitions", fileName)
64 return nil
65 }
66 config := &Config{
67 FileName: fileName,
Todd Wangb90b8de2015-03-31 20:04:13 -070068 Doc: file.Doc,
Todd Wang232d6492015-02-25 18:04:54 -080069 ConfigDef: file.PackageDef,
70 Imports: file.Imports,
71 Config: file.ConstDefs[0].Expr,
72 ConstDefs: file.ConstDefs[1:],
73 }
74 if len(config.ConstDefs) == 0 {
75 config.ConstDefs = nil
76 }
77 if opts.ImportsOnly {
78 // Clear out the const expression from the config clause.
79 config.Config = nil
80 config.ConstDefs = nil
81 }
82 return config
83}
84
85func parse(fileName string, src io.Reader, startTok int, errs *vdlutil.Errors) *File {
86 if errs == nil {
87 log.Fatal("Nil errors specified for Parse")
88 }
89 origErrs := errs.NumErrors()
90 lex := newLexer(fileName, src, startTok, errs)
91 if errCode := yyParse(lex); errCode != 0 {
92 errs.Errorf("%s: yyParse returned error code %v", fileName, errCode)
93 }
94 lex.attachComments()
95 if startTok == startFile || startTok == startConfig {
96 vdlutil.Vlog.Printf("PARSE RESULTS\n\n%v\n\n", lex.vdlFile)
97 }
98 if origErrs != errs.NumErrors() {
99 return nil
100 }
101 return lex.vdlFile
102}
103
104// ParseExprs parses data into a slice of parsed const expressions. The input
105// data is specified in VDL syntax, with commas separating multiple expressions.
106// There must be at least one expression specified in data. Errors are returned
107// in errs.
108func ParseExprs(data string, errs *vdlutil.Errors) []ConstExpr {
109 const name = "exprs"
110 lex := newLexer(name, strings.NewReader(data), startExprs, errs)
111 if errCode := yyParse(lex); errCode != 0 {
112 errs.Errorf("vdl: yyParse returned error code %d", errCode)
113 }
114 return lex.exprs
115}
116
Suharsh Sivakumar06c85ef2015-05-21 21:23:33 -0700117// ExtractExprPackagePaths returns any package paths that appear in named constants
118// in expr. i.e. "a/b/c".Foo => "a/b/c".
119func ExtractExprPackagePaths(expr ConstExpr) []string {
120 var paths []string
121 switch e := expr.(type) {
122 case *ConstNamed:
123 if path := packageFromName(e.Name); len(path) > 0 {
124 paths = append(paths, path)
125 }
126 case *ConstCompositeLit:
127 for _, kv := range e.KVList {
128 paths = append(paths, ExtractExprPackagePaths(kv.Key)...)
129 paths = append(paths, ExtractExprPackagePaths(kv.Value)...)
130 }
131 paths = append(paths, ExtractTypePackagePaths(e.Type)...)
132 case *ConstIndexed:
133 paths = append(paths, ExtractExprPackagePaths(e.Expr)...)
134 paths = append(paths, ExtractExprPackagePaths(e.IndexExpr)...)
135 case *ConstTypeConv:
136 paths = append(paths, ExtractTypePackagePaths(e.Type)...)
137 paths = append(paths, ExtractExprPackagePaths(e.Expr)...)
138 case *ConstTypeObject:
139 paths = append(paths, ExtractTypePackagePaths(e.Type)...)
140 case *ConstBinaryOp:
141 paths = append(paths, ExtractExprPackagePaths(e.Lexpr)...)
142 paths = append(paths, ExtractExprPackagePaths(e.Rexpr)...)
143 case *ConstUnaryOp:
144 paths = append(paths, ExtractExprPackagePaths(e.Expr)...)
145 default:
146 // leaf expression with no embedded expressions or types.
147 }
148 return paths
149}
150
151func ExtractTypePackagePaths(typ Type) []string {
152 var paths []string
153 switch t := typ.(type) {
154 case *TypeNamed:
155 if path := packageFromName(t.Name); len(path) > 0 {
156 paths = append(paths, path)
157 }
158 case *TypeArray:
159 paths = append(paths, ExtractTypePackagePaths(t.Elem)...)
160 case *TypeList:
161 paths = append(paths, ExtractTypePackagePaths(t.Elem)...)
162 case *TypeSet:
163 paths = append(paths, ExtractTypePackagePaths(t.Key)...)
164 case *TypeMap:
165 paths = append(paths, ExtractTypePackagePaths(t.Key)...)
166 paths = append(paths, ExtractTypePackagePaths(t.Elem)...)
167 case *TypeStruct:
168 for _, f := range t.Fields {
169 paths = append(paths, ExtractTypePackagePaths(f.Type)...)
170 }
171 case *TypeUnion:
172 for _, f := range t.Fields {
173 paths = append(paths, ExtractTypePackagePaths(f.Type)...)
174 }
175 case *TypeOptional:
176 paths = append(paths, ExtractTypePackagePaths(t.Base)...)
177 default:
178 // leaf type with no embedded types.
179 }
180 return paths
181}
182
183func packageFromName(name string) string {
184 if strings.HasPrefix(name, `"`) {
185 if parts := strings.SplitN(name[1:], `".`, 2); len(parts) == 2 {
186 return parts[0]
187 }
188 }
189 return ""
190}
191
Todd Wang232d6492015-02-25 18:04:54 -0800192// lexer implements the yyLexer interface for the yacc-generated parser.
193//
194// An oddity: lexer also holds the result of the parse. Most yacc examples hold
195// parse results in package-scoped (global) variables, but doing that would mean
196// we wouldn't be able to run separate parses concurrently. To enable that we'd
197// need each invocation of yyParse to mutate its own result, but unfortunately
198// the Go yacc tool doesn't provide any way to pass extra arguments to yyParse.
199//
200// So we cheat and hold the parse result in the lexer, and in the yacc rules we
201// call lexVDLFile(yylex) to convert from the yyLexer interface back to the
202// concrete lexer type, and retrieve a pointer to the parse result.
203type lexer struct {
204 // Fields for lexing / scanning the input source file.
205 name string
206 scanner scanner.Scanner
207 errs *vdlutil.Errors
208 startTok int // One of our dummy start tokens.
209 started bool // Has the dummy start token already been emitted?
210 sawEOF bool // Have we already seen the end-of-file?
211 prevTok token // Previous token, used for auto-semicolons and errors.
212
213 // Fields holding the result of file and config parsing.
214 comments commentMap
215 vdlFile *File
216
217 // Field holding the result of expr parsing.
218 exprs []ConstExpr
219}
220
221func newLexer(fileName string, src io.Reader, startTok int, errs *vdlutil.Errors) *lexer {
222 l := &lexer{name: fileName, errs: errs, startTok: startTok, vdlFile: &File{BaseName: path.Base(fileName)}}
223 l.comments.init()
224 l.scanner.Init(src)
225 // Don't produce character literal tokens, but do scan comments.
226 l.scanner.Mode = scanner.ScanIdents | scanner.ScanFloats | scanner.ScanStrings | scanner.ScanRawStrings | scanner.ScanComments
227 // Don't treat '\n' as whitespace, so we can auto-insert semicolons.
228 l.scanner.Whitespace = 1<<'\t' | 1<<'\r' | 1<<' '
229 l.scanner.Error = func(s *scanner.Scanner, msg string) {
230 l.Error(msg)
231 }
232 return l
233}
234
235type token struct {
236 t rune
237 text string
238 pos Pos
239}
240
241func (t token) String() string {
242 return fmt.Sprintf("%v %U %s", t.pos, t.t, t.text)
243}
244
245// The lex* functions below all convert the yyLexer input arg into a concrete
246// lexer as their first step. The type conversion is always safe since we're
247// the ones who called yyParse, and thus know the concrete type is always lexer.
248
249// lexVDLFile retrieves the File parse result from the yyLexer interface. This
250// is called in the yacc rules to fill in the parse result.
251func lexVDLFile(yylex yyLexer) *File {
252 return yylex.(*lexer).vdlFile
253}
254
255// lexPosErrorf adds an error with positional information, on a type
256// implementing the yyLexer interface. This is called in the yacc rules to
257// throw errors.
258func lexPosErrorf(yylex yyLexer, pos Pos, format string, v ...interface{}) {
259 yylex.(*lexer).posErrorf(pos, format, v...)
260}
261
262// lexGenEOF tells the lexer to generate EOF tokens from now on, as if the end
263// of file had been seen. This is called in the yacc rules to terminate the
264// parse even if the file still has tokens.
265func lexGenEOF(yylex yyLexer) {
266 yylex.(*lexer).sawEOF = true
267}
268
269// lexStoreExprs stores the parsed exprs in the lexer.
270func lexStoreExprs(yylex yyLexer, exprs []ConstExpr) {
271 yylex.(*lexer).exprs = exprs
272}
273
274var keywords = map[string]int{
275 "const": tCONST,
276 "enum": tENUM,
277 "error": tERROR,
278 "import": tIMPORT,
279 "interface": tINTERFACE,
280 "map": tMAP,
281 "package": tPACKAGE,
282 "set": tSET,
283 "stream": tSTREAM,
284 "struct": tSTRUCT,
285 "type": tTYPE,
286 "typeobject": tTYPEOBJECT,
287 "union": tUNION,
288}
289
290type nextRune struct {
291 t rune
292 id int
293}
294
295// knownPunct is a map of our known punctuation. We support 1 and 2 rune
296// combinations, where 2 rune combos must be immediately adjacent with no
297// intervening whitespace. The 2-rune combos always take precedence over the
298// 1-rune combos. Every entry is a valid 1-rune combo, which is returned as-is
299// without a special token id; the ascii value represents itself.
300var knownPunct = map[rune][]nextRune{
301 ';': nil,
302 ':': nil,
303 ',': nil,
304 '.': nil,
305 '*': nil,
306 '(': nil,
307 ')': nil,
308 '[': nil,
309 ']': nil,
310 '{': nil,
311 '}': nil,
312 '+': nil,
313 '-': nil,
314 '/': nil,
315 '%': nil,
316 '^': nil,
317 '?': nil,
318 '!': {{'=', tNE}},
319 '=': {{'=', tEQEQ}},
320 '<': {{'=', tLE}, {'<', tLSH}},
321 '>': {{'=', tGE}, {'>', tRSH}},
322 '|': {{'|', tOROR}},
323 '&': {{'&', tANDAND}},
324}
325
326// autoSemi determines whether to automatically add a semicolon, based on the
327// rule that semicolons are always added at the end of each line after certain
328// tokens. The Go auto-semicolon rule is described here:
329// http://golang.org/ref/spec#Semicolons
330func autoSemi(prevTok token) bool {
331 return prevAutoSemi[prevTok.t] && prevTok.pos.IsValid()
332}
333
334var prevAutoSemi = map[rune]bool{
335 scanner.Ident: true,
336 scanner.Int: true,
337 scanner.Float: true,
338 scanner.String: true,
339 scanner.RawString: true,
340 ')': true,
341 ']': true,
342 '}': true,
343 '>': true,
344}
345
346const yaccEOF int = 0 // yacc interprets 0 as the end-of-file marker
347
348func init() {
349 // yyDebug is defined in the yacc-generated grammar.go file. Setting it to 1
350 // only produces output on syntax errors; set it to 4 to generate full debug
351 // output. Sadly yacc doesn't give position information describing the error.
352 yyDebug = 1
353}
354
355// A note on the comment-tracking strategy. During lexing we generate
356// commentBlocks, defined as a sequence of adjacent or abutting comments (either
357// // or /**/) with no intervening tokens. Adjacent means that the previous
358// comment ends on the line immediately before the next one starts, and abutting
359// means that the previous comment ends on the same line as the next one starts.
360//
361// At the end of the parse we try to attach comment blocks to parse tree items.
362// We use a heuristic that works for common cases, but isn't perfect - it
363// mis-associates some styles of comments, and we don't ensure all comment
364// blocks will be associated to an item.
365
366type commentBlock struct {
367 text string
368 firstLine int
369 lastLine int
370}
371
372// update returns true and adds tok to this block if tok is adjacent or
373// abutting, otherwise it returns false without mutating the block. Since we're
374// handling newlines explicitly in the lexer, we never get comment tokens with
375// trailing newlines. We can get embedded newlines via /**/ style comments.
376func (cb *commentBlock) update(tok token) bool {
377 if cb.text == "" {
378 // First update in this block.
379 cb.text = tok.text
380 cb.firstLine = tok.pos.Line
381 cb.lastLine = tok.pos.Line + strings.Count(tok.text, "\n")
382 return true
383 }
384 if cb.lastLine >= tok.pos.Line-1 {
385 // The tok is adjacent or abutting.
386 if cb.lastLine == tok.pos.Line-1 {
387 // The tok is adjacent - need a newline.
388 cb.text += "\n"
389 cb.lastLine++
390 }
391 cb.text += tok.text
392 cb.lastLine += strings.Count(tok.text, "\n")
393 return true
394 }
395 return false
396}
397
398// commentMap keeps track of blocks of comments in a file. We store comment
399// blocks in maps by first line, and by last line. Note that technically there
400// could be more than one commentBlock ending on the same line, due to /**/
401// style comments. We ignore this rare case and just keep the first one.
402type commentMap struct {
403 byFirst map[int]commentBlock
404 byLast map[int]commentBlock
405 cur commentBlock
406 prevTokenPos Pos
407}
408
409func (cm *commentMap) init() {
410 cm.byFirst = make(map[int]commentBlock)
411 cm.byLast = make(map[int]commentBlock)
412}
413
414// addComment adds a comment token to the map, either appending to the current
415// block or ending the current block and starting a new one.
416func (cm *commentMap) addComment(tok token) {
417 if !cm.cur.update(tok) {
418 cm.endBlock()
419 if !cm.cur.update(tok) {
420 panic(fmt.Errorf("vdl: couldn't update current comment block with token %v", tok))
421 }
422 }
423 // Here's an example of why we need the special case endBlock logic.
424 //
425 // type Foo struct {
426 // // doc1
427 // A int // doc2
428 // // doc3
429 // B int
430 // }
431 //
432 // The problem is that without the special-case, we'd group doc2 and doc3
433 // together into the same block. That may actually be correct some times, but
434 // it's more common for doc3 to be semantically associated with field B. Thus
435 // if we've already seen any token on the same line as this comment block, we
436 // end the block immediately. This means that comments appearing on the same
437 // line as any other token are forced to be a single comment block.
438 if cm.prevTokenPos.Line == tok.pos.Line {
439 cm.endBlock()
440 }
441}
442
443func (cm *commentMap) handleToken(tok token) {
444 cm.endBlock()
445 cm.prevTokenPos = tok.pos
446}
447
448// endBlock adds the the current comment block to the map, and resets it in
449// preparation for new comments to be added. In the rare case where we see
450// comment blocks that either start or end on the same line, we just keep the
451// first comment block that was inserted.
452func (cm *commentMap) endBlock() {
453 _, inFirst := cm.byFirst[cm.cur.firstLine]
454 _, inLast := cm.byLast[cm.cur.lastLine]
455 if cm.cur.text != "" && !inFirst && !inLast {
456 cm.byFirst[cm.cur.firstLine] = cm.cur
457 cm.byLast[cm.cur.lastLine] = cm.cur
458 }
459 cm.cur.text = ""
460 cm.cur.firstLine = 0
461 cm.cur.lastLine = 0
462}
463
464// getDoc returns the documentation string associated with pos. Our rule is the
465// last line of the documentation must end on the line immediately before pos.
466// Once a comment block has been returned it isn't eligible to be attached to
467// any other item, and is deleted from the map.
468//
469// The returned string is either empty, or is newline terminated.
470func (cm *commentMap) getDoc(pos Pos) string {
471 block := cm.byLast[pos.Line-1]
472 if block.text == "" {
473 return ""
474 }
475 doc := block.text + "\n"
476 delete(cm.byFirst, block.firstLine)
477 delete(cm.byLast, block.lastLine)
478 return doc
479}
480
481// getDocSuffix returns the suffix documentation associated with pos. Our rule
482// is the first line of the documentation must be on the same line as pos. Once
Todd Wangb90b8de2015-03-31 20:04:13 -0700483// a comment block has been returned it isn't eligible to be attached to any
Todd Wang232d6492015-02-25 18:04:54 -0800484// other item, and is deleted from the map.
485//
486// The returned string is either empty, or has a leading space.
487func (cm *commentMap) getDocSuffix(pos Pos) string {
488 block := cm.byFirst[pos.Line]
489 if block.text == "" {
490 return ""
491 }
492 doc := " " + block.text
493 delete(cm.byFirst, block.firstLine)
494 delete(cm.byLast, block.lastLine)
495 return doc
496}
497
Todd Wangb90b8de2015-03-31 20:04:13 -0700498// getFileDoc returns the file documentation. Our rule is that the first line
499// of the documentation must occur on the first line of the file, and all other
500// comments must have already been attached. Once a comment block has been
501// returned it isn't eligible to be attached to any other item, and is deleted
502// from the map.
503//
504// The returned string is either empty, or is newline terminated.
505func (cm *commentMap) getFileDoc() string {
506 block := cm.byFirst[1]
507 if block.text == "" {
508 return ""
509 }
510 doc := block.text + "\n"
511 delete(cm.byFirst, block.firstLine)
512 delete(cm.byLast, block.lastLine)
513 return doc
514}
515
Todd Wang232d6492015-02-25 18:04:54 -0800516func attachTypeComments(t Type, cm *commentMap, suffix bool) {
517 switch tu := t.(type) {
518 case *TypeEnum:
519 for _, label := range tu.Labels {
520 if suffix {
521 label.DocSuffix = cm.getDocSuffix(label.Pos)
522 } else {
523 label.Doc = cm.getDoc(label.Pos)
524 }
525 }
526 case *TypeArray:
527 attachTypeComments(tu.Elem, cm, suffix)
528 case *TypeList:
529 attachTypeComments(tu.Elem, cm, suffix)
530 case *TypeSet:
531 attachTypeComments(tu.Key, cm, suffix)
532 case *TypeMap:
533 attachTypeComments(tu.Key, cm, suffix)
534 attachTypeComments(tu.Elem, cm, suffix)
535 case *TypeStruct:
536 for _, field := range tu.Fields {
537 if suffix {
538 field.DocSuffix = cm.getDocSuffix(field.Pos)
539 } else {
540 field.Doc = cm.getDoc(field.Pos)
541 }
542 attachTypeComments(field.Type, cm, suffix)
543 }
544 case *TypeUnion:
545 for _, field := range tu.Fields {
546 if suffix {
547 field.DocSuffix = cm.getDocSuffix(field.Pos)
548 } else {
549 field.Doc = cm.getDoc(field.Pos)
550 }
551 attachTypeComments(field.Type, cm, suffix)
552 }
553 case *TypeOptional:
554 attachTypeComments(tu.Base, cm, suffix)
555 case *TypeNamed:
556 // Terminate the recursion at named types.
557 default:
558 panic(fmt.Errorf("vdl: unhandled type %#v", t))
559 }
560}
561
562// attachComments causes all comments collected during the parse to be attached
563// to the appropriate parse tree items. This should only be called after the
564// parse has completed.
565func (l *lexer) attachComments() {
566 f := l.vdlFile
567 // First attach all suffix docs - these occur on the same line.
568 f.PackageDef.DocSuffix = l.comments.getDocSuffix(f.PackageDef.Pos)
569 for _, x := range f.Imports {
570 x.DocSuffix = l.comments.getDocSuffix(x.Pos)
571 }
572 for _, x := range f.ErrorDefs {
573 x.DocSuffix = l.comments.getDocSuffix(x.Pos)
574 }
575 for _, x := range f.TypeDefs {
576 x.DocSuffix = l.comments.getDocSuffix(x.Pos)
577 attachTypeComments(x.Type, &l.comments, true)
578 }
579 for _, x := range f.ConstDefs {
580 x.DocSuffix = l.comments.getDocSuffix(x.Pos)
581 }
582 for _, x := range f.Interfaces {
583 x.DocSuffix = l.comments.getDocSuffix(x.Pos)
584 for _, y := range x.Embeds {
585 y.DocSuffix = l.comments.getDocSuffix(y.Pos)
586 }
587 for _, y := range x.Methods {
588 y.DocSuffix = l.comments.getDocSuffix(y.Pos)
589 }
590 }
591 // Now attach the docs - these occur on the line immediately before.
592 f.PackageDef.Doc = l.comments.getDoc(f.PackageDef.Pos)
593 for _, x := range f.Imports {
594 x.Doc = l.comments.getDoc(x.Pos)
595 }
596 for _, x := range f.ErrorDefs {
597 x.Doc = l.comments.getDoc(x.Pos)
598 }
599 for _, x := range f.TypeDefs {
600 x.Doc = l.comments.getDoc(x.Pos)
601 attachTypeComments(x.Type, &l.comments, false)
602 }
603 for _, x := range f.ConstDefs {
604 x.Doc = l.comments.getDoc(x.Pos)
605 }
606 for _, x := range f.Interfaces {
607 x.Doc = l.comments.getDoc(x.Pos)
608 for _, y := range x.Embeds {
609 y.Doc = l.comments.getDoc(y.Pos)
610 }
611 for _, y := range x.Methods {
612 y.Doc = l.comments.getDoc(y.Pos)
613 }
614 }
Todd Wangb90b8de2015-03-31 20:04:13 -0700615 // Finally attach the top-level file doc - this occurs on the first line.
616 f.Doc = l.comments.getFileDoc()
Todd Wang232d6492015-02-25 18:04:54 -0800617}
618
619// nextToken uses the text/scanner package to scan the input for the next token.
620func (l *lexer) nextToken() (tok token) {
621 tok.t = l.scanner.Scan()
622 tok.text = l.scanner.TokenText()
623 // Both Pos and scanner.Position start line and column numbering at 1.
624 tok.pos = Pos{Line: l.scanner.Position.Line, Col: l.scanner.Position.Column}
625 return
626}
627
628// handleImag handles imaginary literals "[number]i" by peeking ahead.
629func (l *lexer) handleImag(tok token, lval *yySymType) bool {
630 if l.scanner.Peek() != 'i' {
631 return false
632 }
633 l.scanner.Next()
634
635 rat := new(big.Rat)
636 if _, ok := rat.SetString(tok.text); !ok {
637 l.posErrorf(tok.pos, "can't convert token [%v] to imaginary literal", tok)
638 }
639 lval.imagpos.pos = tok.pos
640 lval.imagpos.imag = (*BigImag)(rat)
641 return true
642}
643
644// translateToken takes the token we just scanned, and translates it into a
645// token usable by yacc (lval and id). The done return arg is true when a real
646// yacc token was generated, or false if we need another next/translate pass.
647func (l *lexer) translateToken(tok token, lval *yySymType) (id int, done bool) {
648 switch tok.t {
649 case scanner.EOF:
650 l.sawEOF = true
651 if autoSemi(l.prevTok) {
652 return ';', true
653 }
654 return yaccEOF, true
655
656 case '\n':
657 if autoSemi(l.prevTok) {
658 return ';', true
659 }
660 // Returning done=false ensures next/translate will be called again so that
661 // this newline is skipped; id=yaccEOF is a dummy value that's ignored.
662 return yaccEOF, false
663
664 case scanner.String, scanner.RawString:
665 var err error
666 lval.strpos.Pos = tok.pos
667 lval.strpos.String, err = strconv.Unquote(tok.text)
668 if err != nil {
669 l.posErrorf(tok.pos, "can't convert token [%v] to string literal", tok)
670 }
671 return tSTRLIT, true
672
673 case scanner.Int:
674 if l.handleImag(tok, lval) {
675 return tIMAGLIT, true
676 }
677 lval.intpos.pos = tok.pos
678 lval.intpos.int = new(big.Int)
679 if _, ok := lval.intpos.int.SetString(tok.text, 0); !ok {
680 l.posErrorf(tok.pos, "can't convert token [%v] to integer literal", tok)
681 }
682 return tINTLIT, true
683
684 case scanner.Float:
685 if l.handleImag(tok, lval) {
686 return tIMAGLIT, true
687 }
688 lval.ratpos.pos = tok.pos
689 lval.ratpos.rat = new(big.Rat)
690 if _, ok := lval.ratpos.rat.SetString(tok.text); !ok {
691 l.posErrorf(tok.pos, "can't convert token [%v] to float literal", tok)
692 }
693 return tRATLIT, true
694
695 case scanner.Ident:
696 // Either the identifier is a known keyword, or we pass it through as IDENT.
697 if keytok, ok := keywords[tok.text]; ok {
698 lval.pos = tok.pos
699 return keytok, true
700 }
701 lval.strpos.Pos = tok.pos
702 lval.strpos.String = tok.text
703 return tIDENT, true
704
705 case scanner.Comment:
706 l.comments.addComment(tok)
707 // Comments aren't considered tokens, just like the '\n' case.
708 return yaccEOF, false
709
710 default:
711 // Either the rune is in our known punctuation whitelist, or we've hit a
712 // syntax error.
713 if nextRunes, ok := knownPunct[tok.t]; ok {
714 // Peek at the next rune and compare against our list of next runes. If
715 // we find a match we return the id in next, otherwise just return the
716 // original rune. This means that 2-rune tokens always take precedence
717 // over 1-rune tokens. Either way the pos is set to the original rune.
718 lval.pos = tok.pos
719 peek := l.scanner.Peek()
720 for _, next := range nextRunes {
721 if peek == next.t {
722 l.scanner.Next()
723 return next.id, true
724 }
725 }
726 return int(tok.t), true
727 }
728 l.posErrorf(tok.pos, "unexpected token [%v]", tok)
729 l.sawEOF = true
730 return yaccEOF, true
731 }
732}
733
734// Lex is part of the yyLexer interface, called by the yacc-generated parser.
735func (l *lexer) Lex(lval *yySymType) int {
736 // Emit a dummy start token indicating what type of parse we're performing.
737 if !l.started {
738 l.started = true
739 switch l.startTok {
740 case startFileImports, startFile, startConfigImports, startConfig, startExprs:
741 return l.startTok
742 default:
743 panic(fmt.Errorf("vdl: unhandled parse start token %d", l.startTok))
744 }
745 }
746 // Always return EOF after we've scanned it. This ensures we emit EOF on the
747 // next Lex call after scanning EOF and adding an auto-semicolon.
748 if l.sawEOF {
749 return yaccEOF
750 }
751 // Run next/translate in a loop to handle newline-triggered auto-semicolons;
752 // nextToken needs to generate newline tokens so that we can trigger the
753 // auto-semicolon logic, but if the newline doesn't generate an auto-semicolon
754 // we should skip the token and move on to the next one.
755 for {
756 tok := l.nextToken()
757 if id, done := l.translateToken(tok, lval); done {
758 l.prevTok = tok
759 l.comments.handleToken(tok)
760 return id
761 }
762 }
763}
764
765// Error is part of the yyLexer interface, called by the yacc-generated parser.
766// Unfortunately yacc doesn't give good error information - we dump the position
767// of the previous scanned token as an approximation of where the error is.
768func (l *lexer) Error(s string) {
769 l.posErrorf(l.prevTok.pos, "%s", s)
770}
771
772// posErrorf generates an error with file and pos info.
773func (l *lexer) posErrorf(pos Pos, format string, v ...interface{}) {
774 var posstr string
775 if pos.IsValid() {
776 posstr = pos.String()
777 }
778 l.errs.Errorf(l.name+":"+posstr+" "+format, v...)
779}