blob: 1d8f869c9ead1bd99f430b147c732d302c02456e [file] [log] [blame]
Jiri Simsa5293dcb2014-05-10 09:56:38 -07001package exec
2
3import (
Bogdan Caprita66ca3532015-02-05 21:08:10 -08004 "bytes"
Jiri Simsac199bc12014-05-30 12:52:24 -07005 "encoding/binary"
Jiri Simsa5293dcb2014-05-10 09:56:38 -07006 "errors"
Cosmos Nicolaou1c18c1c2014-10-08 16:37:10 -07007 "fmt"
Jiri Simsac199bc12014-05-30 12:52:24 -07008 "io"
Jiri Simsa5293dcb2014-05-10 09:56:38 -07009 "os"
10 "os/exec"
Robert Kroegerb5d6bda2015-01-30 16:10:49 -080011 "strconv"
Cosmos Nicolaou1c18c1c2014-10-08 16:37:10 -070012 "strings"
Bogdan Caprita650b1622014-11-21 15:11:05 -080013 "sync"
Jiri Simsa5293dcb2014-05-10 09:56:38 -070014 "syscall"
15 "time"
Cosmos Nicolaoubfcac5f2014-05-22 21:57:35 -070016
Jiri Simsa764efb72014-12-25 20:57:03 -080017 "v.io/core/veyron2/vlog"
Cosmos Nicolaou251a4d82014-09-30 22:28:45 -070018
Jiri Simsa764efb72014-12-25 20:57:03 -080019 "v.io/core/veyron/lib/timekeeper"
Jiri Simsa5293dcb2014-05-10 09:56:38 -070020)
21
22var (
Cosmos Nicolaou1c18c1c2014-10-08 16:37:10 -070023 ErrAuthTimeout = errors.New("timeout in auth handshake")
Jiri Simsa5293dcb2014-05-10 09:56:38 -070024 ErrTimeout = errors.New("timeout waiting for child")
25 ErrSecretTooLarge = errors.New("secret is too large")
26)
27
28// A ParentHandle is the Parent process' means of managing a single child.
29type ParentHandle struct {
Cosmos Nicolaouee7abc22014-05-27 10:50:03 -070030 c *exec.Cmd
Cosmos Nicolaou486d3492014-09-30 22:21:20 -070031 config Config
Cosmos Nicolaouee7abc22014-05-27 10:50:03 -070032 secret string
33 statusRead *os.File
34 statusWrite *os.File
35 tk timekeeper.TimeKeeper
Bogdan Caprita650b1622014-11-21 15:11:05 -080036 waitDone bool
37 waitErr error
38 waitLock sync.Mutex
Robert Kroegerb5d6bda2015-01-30 16:10:49 -080039 callbackPid int
Jiri Simsa5293dcb2014-05-10 09:56:38 -070040}
41
42// ParentHandleOpt is an option for NewParentHandle.
43type ParentHandleOpt interface {
Jiri Simsac199bc12014-05-30 12:52:24 -070044 // ExecParentHandleOpt is a signature 'dummy' method for the
45 // interface.
Jiri Simsa5293dcb2014-05-10 09:56:38 -070046 ExecParentHandleOpt()
47}
48
Bogdan Capritaa4d9ee42014-06-20 16:42:53 -070049// ConfigOpt can be used to seed the parent handle with a
50// config to be passed to the child.
51type ConfigOpt struct {
Cosmos Nicolaou486d3492014-09-30 22:21:20 -070052 Config
Bogdan Capritaa4d9ee42014-06-20 16:42:53 -070053}
Jiri Simsac199bc12014-05-30 12:52:24 -070054
Bogdan Capritaa4d9ee42014-06-20 16:42:53 -070055// ExecParentHandleOpt makes ConfigOpt an instance of
Jiri Simsac199bc12014-05-30 12:52:24 -070056// ParentHandleOpt.
Bogdan Capritaa4d9ee42014-06-20 16:42:53 -070057func (ConfigOpt) ExecParentHandleOpt() {}
Jiri Simsac199bc12014-05-30 12:52:24 -070058
59// SecretOpt can be used to seed the parent handle with a custom secret.
60type SecretOpt string
61
62// ExecParentHandleOpt makes SecretOpt an instance of ParentHandleOpt.
Bogdan Capritaa4d9ee42014-06-20 16:42:53 -070063func (SecretOpt) ExecParentHandleOpt() {}
Jiri Simsac199bc12014-05-30 12:52:24 -070064
Jiri Simsa5293dcb2014-05-10 09:56:38 -070065// TimeKeeperOpt can be used to seed the parent handle with a custom timekeeper.
66type TimeKeeperOpt struct {
Cosmos Nicolaouee7abc22014-05-27 10:50:03 -070067 timekeeper.TimeKeeper
Jiri Simsa5293dcb2014-05-10 09:56:38 -070068}
69
70// ExecParentHandleOpt makes TimeKeeperOpt an instance of ParentHandleOpt.
Bogdan Capritaa4d9ee42014-06-20 16:42:53 -070071func (TimeKeeperOpt) ExecParentHandleOpt() {}
Jiri Simsa5293dcb2014-05-10 09:56:38 -070072
73// NewParentHandle creates a ParentHandle for the child process represented by
74// an instance of exec.Cmd.
Jiri Simsac199bc12014-05-30 12:52:24 -070075func NewParentHandle(c *exec.Cmd, opts ...ParentHandleOpt) *ParentHandle {
Cosmos Nicolaou486d3492014-09-30 22:21:20 -070076 cfg, secret := NewConfig(), ""
Jiri Simsac199bc12014-05-30 12:52:24 -070077 tk := timekeeper.RealTime()
Jiri Simsa5293dcb2014-05-10 09:56:38 -070078 for _, opt := range opts {
79 switch v := opt.(type) {
Bogdan Capritaa4d9ee42014-06-20 16:42:53 -070080 case ConfigOpt:
81 cfg = v
Jiri Simsac199bc12014-05-30 12:52:24 -070082 case SecretOpt:
83 secret = string(v)
Jiri Simsa5293dcb2014-05-10 09:56:38 -070084 case TimeKeeperOpt:
Cosmos Nicolaouee7abc22014-05-27 10:50:03 -070085 tk = v
Jiri Simsa5293dcb2014-05-10 09:56:38 -070086 default:
87 vlog.Errorf("Unrecognized parent option: %v", v)
88 }
89 }
Jiri Simsa5293dcb2014-05-10 09:56:38 -070090 return &ParentHandle{
Jiri Simsa24e87aa2014-06-09 09:27:34 -070091 c: c,
Bogdan Capritaa4d9ee42014-06-20 16:42:53 -070092 config: cfg,
Jiri Simsa24e87aa2014-06-09 09:27:34 -070093 secret: secret,
94 tk: tk,
Jiri Simsa5293dcb2014-05-10 09:56:38 -070095 }
96}
97
98// Start starts the child process, sharing a secret with it and
99// setting up a communication channel over which to read its status.
100func (p *ParentHandle) Start() error {
Cosmos Nicolaoue5b41502014-10-29 22:55:09 -0700101 // Make sure that there are no instances of the VersionVariable
102 // already in the environment (which can happen when a subprocess
103 // creates a subprocess etc)
104 nenv := make([]string, 0, len(p.c.Env)+1)
105 for _, e := range p.c.Env {
106 if strings.HasPrefix(e, VersionVariable+"=") {
107 continue
108 }
109 nenv = append(nenv, e)
110 }
111 p.c.Env = append(nenv, VersionVariable+"="+version1)
112
Jiri Simsac199bc12014-05-30 12:52:24 -0700113 // Create anonymous pipe for communicating data between the child
114 // and the parent.
Bogdan Caprita7f491672014-11-13 14:51:08 -0800115 // TODO(caprita): As per ribrdb@, Go's exec does not prune the set
116 // of file descriptors passed down to the child process, and hence
117 // a child may get access to the files meant for another child.
118 // Do we need to ensure only one thread is allowed to create these
119 // pipes at any time?
Jiri Simsac199bc12014-05-30 12:52:24 -0700120 dataRead, dataWrite, err := os.Pipe()
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700121 if err != nil {
122 return err
123 }
Jiri Simsac199bc12014-05-30 12:52:24 -0700124 defer dataRead.Close()
125 defer dataWrite.Close()
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700126 statusRead, statusWrite, err := os.Pipe()
127 if err != nil {
128 return err
129 }
130 p.statusRead = statusRead
131 p.statusWrite = statusWrite
Jiri Simsac199bc12014-05-30 12:52:24 -0700132 // Add the parent-child pipes to cmd.ExtraFiles, offsetting all
133 // existing file descriptors accordingly.
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700134 extraFiles := make([]*os.File, len(p.c.ExtraFiles)+2)
Jiri Simsac199bc12014-05-30 12:52:24 -0700135 extraFiles[0] = dataRead
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700136 extraFiles[1] = statusWrite
137 for i, _ := range p.c.ExtraFiles {
138 extraFiles[i+2] = p.c.ExtraFiles[i]
139 }
140 p.c.ExtraFiles = extraFiles
Jiri Simsac199bc12014-05-30 12:52:24 -0700141 // Start the child process.
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700142 if err := p.c.Start(); err != nil {
143 p.statusWrite.Close()
144 p.statusRead.Close()
145 return err
146 }
Jiri Simsac199bc12014-05-30 12:52:24 -0700147 // Pass data to the child using a pipe.
Bogdan Capritaa4d9ee42014-06-20 16:42:53 -0700148 serializedConfig, err := p.config.Serialize()
149 if err != nil {
150 return err
151 }
152 if err := encodeString(dataWrite, serializedConfig); err != nil {
Jiri Simsac199bc12014-05-30 12:52:24 -0700153 p.statusWrite.Close()
154 p.statusRead.Close()
155 return err
156 }
Jiri Simsa84059da2014-06-02 17:22:05 -0700157 if err := encodeString(dataWrite, p.secret); err != nil {
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700158 p.statusWrite.Close()
159 p.statusRead.Close()
160 return err
161 }
162 return nil
163}
164
Bogdan Caprita66ca3532015-02-05 21:08:10 -0800165// copy is like io.Copy, but it also treats the receipt of the special eofChar
166// byte to mean io.EOF.
167func copy(w io.Writer, r io.Reader) (err error) {
168 buf := make([]byte, 1024)
169 for {
170 nRead, errRead := r.Read(buf)
171 if nRead > 0 {
172 if eofCharIndex := bytes.IndexByte(buf[:nRead], eofChar); eofCharIndex != -1 {
173 nRead = eofCharIndex
174 errRead = io.EOF
175 }
176 nWrite, errWrite := w.Write(buf[:nRead])
177 if errWrite != nil {
178 err = errWrite
179 break
180 }
181 if nRead != nWrite {
182 err = io.ErrShortWrite
183 break
184 }
185 }
186 if errRead == io.EOF {
187 break
188 }
189 if errRead != nil {
190 err = errRead
191 break
192 }
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700193 }
Bogdan Caprita66ca3532015-02-05 21:08:10 -0800194 return
195}
196
197func waitForStatus(c chan interface{}, r *os.File) {
198 var readBytes bytes.Buffer
199 err := copy(&readBytes, r)
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700200 r.Close()
Bogdan Caprita66ca3532015-02-05 21:08:10 -0800201 if err != nil {
202 c <- err
203 } else {
204 c <- readBytes.String()
205 }
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700206 close(c)
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700207}
208
209// WaitForReady will wait for the child process to become ready.
210func (p *ParentHandle) WaitForReady(timeout time.Duration) error {
Bogdan Caprita66ca3532015-02-05 21:08:10 -0800211 // An invariant of WaitForReady is that both statusWrite and statusRead
212 // get closed before WaitForStatus returns (statusRead gets closed by
213 // waitForStatus).
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700214 defer p.statusWrite.Close()
Bogdan Caprita66ca3532015-02-05 21:08:10 -0800215 c := make(chan interface{}, 1)
216 go waitForStatus(c, p.statusRead)
217 // TODO(caprita): This can be simplified further by doing the reading
218 // from the status pipe here, and instead moving the timeout listener to
219 // a separate goroutine.
220 select {
221 case msg := <-c:
222 switch m := msg.(type) {
223 case error:
224 return m
225 case string:
226 if strings.HasPrefix(m, readyStatus) {
227 pid, err := strconv.Atoi(m[len(readyStatus):])
Robert Kroegerb5d6bda2015-01-30 16:10:49 -0800228 if err != nil {
229 return err
230 }
231 p.callbackPid = pid
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700232 return nil
233 }
Bogdan Caprita66ca3532015-02-05 21:08:10 -0800234 if strings.HasPrefix(m, failedStatus) {
235 return fmt.Errorf("%s", strings.TrimPrefix(m, failedStatus))
Cosmos Nicolaou1c18c1c2014-10-08 16:37:10 -0700236 }
Bogdan Caprita66ca3532015-02-05 21:08:10 -0800237 return fmt.Errorf("unrecognised status from subprocess: %q", m)
238 default:
239 return fmt.Errorf("unexpected type %T", m)
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700240 }
Bogdan Caprita66ca3532015-02-05 21:08:10 -0800241 case <-p.tk.After(timeout):
242 vlog.Errorf("Timed out waiting for child status")
243 // By writing the special eofChar byte to the pipe, we ensure
244 // that waitForStatus returns: the copy function treats eofChar
245 // to indicate end of read input. Note, copy could have
246 // finished for other reasons already (receipt of eofChar from
247 // the child process). Note, closing the pipe from the child
248 // (explicitly or due to crash) would NOT cause copy to read
249 // io.EOF, since we keep the statusWrite open in the parent.
250 // Hence, a child crash will eventually trigger this timeout.
251 p.statusWrite.Write([]byte{eofChar})
252 // Before returning, waitForStatus will close r, and then close
253 // c. Waiting on c ensures that r.Close() in waitForStatus
254 // already executed.
255 <-c
256 return ErrTimeout
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700257 }
258 panic("unreachable")
259}
260
Bogdan Caprita650b1622014-11-21 15:11:05 -0800261// wait performs the Wait on the underlying command under lock, and only once
262// (subsequent wait calls block until the Wait is finished). It's ok to call
263// wait multiple times, and in parallel. The error from the initial Wait is
264// cached and returned for all subsequent calls.
265func (p *ParentHandle) wait() error {
266 p.waitLock.Lock()
267 defer p.waitLock.Unlock()
268 if p.waitDone {
269 return p.waitErr
270 }
271 p.waitErr = p.c.Wait()
272 p.waitDone = true
273 return p.waitErr
274}
275
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700276// Wait will wait for the child process to terminate of its own accord.
277// It returns nil if the process exited cleanly with an exit status of 0,
278// any other exit code or error will result in an appropriate error return
279func (p *ParentHandle) Wait(timeout time.Duration) error {
280 c := make(chan error, 1)
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700281 go func() {
Bogdan Caprita650b1622014-11-21 15:11:05 -0800282 c <- p.wait()
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700283 close(c)
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700284 }()
285 // If timeout is zero time.After will panic; we handle zero specially
286 // to mean infinite timeout.
287 if timeout > 0 {
288 select {
289 case <-p.tk.After(timeout):
290 return ErrTimeout
291 case err := <-c:
292 return err
293 }
294 } else {
295 return <-c
296 }
297 panic("unreachable")
298}
299
Cosmos Nicolaouee7abc22014-05-27 10:50:03 -0700300// Pid returns the pid of the child, 0 if the child process doesn't exist
301func (p *ParentHandle) Pid() int {
302 if p.c.Process != nil {
303 return p.c.Process.Pid
304 }
305 return 0
306}
307
Robert Kroegerb5d6bda2015-01-30 16:10:49 -0800308// ChildPid returns the pid of a child process as reported by its status
309// callback.
310func (p *ParentHandle) ChildPid() int {
311 return p.callbackPid
312}
313
Cosmos Nicolaouee7abc22014-05-27 10:50:03 -0700314// Exists returns true if the child process exists and can be signal'ed
315func (p *ParentHandle) Exists() bool {
316 if p.c.Process != nil {
317 return syscall.Kill(p.c.Process.Pid, 0) == nil
318 }
319 return false
320}
321
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700322// Kill kills the child process.
323func (p *ParentHandle) Kill() error {
Robin Thellend44903522015-02-06 12:55:26 -0800324 if p.c.Process == nil {
325 return errors.New("no such process")
326 }
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700327 return p.c.Process.Kill()
328}
329
330// Signal sends the given signal to the child process.
331func (p *ParentHandle) Signal(sig syscall.Signal) error {
Robin Thellend44903522015-02-06 12:55:26 -0800332 if p.c.Process == nil {
333 return errors.New("no such process")
334 }
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700335 return syscall.Kill(p.c.Process.Pid, sig)
336}
337
338// Clean will clean up state, including killing the child process.
339func (p *ParentHandle) Clean() error {
340 if err := p.Kill(); err != nil {
341 return err
342 }
Bogdan Caprita650b1622014-11-21 15:11:05 -0800343 return p.wait()
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700344}
Jiri Simsac199bc12014-05-30 12:52:24 -0700345
Jiri Simsa84059da2014-06-02 17:22:05 -0700346func encodeString(w io.Writer, data string) error {
Jiri Simsac199bc12014-05-30 12:52:24 -0700347 l := len(data)
348 if err := binary.Write(w, binary.BigEndian, int64(l)); err != nil {
349 return err
350 }
351 if n, err := w.Write([]byte(data)); err != nil || n != l {
352 if err != nil {
353 return err
354 } else {
355 return errors.New("partial write")
356 }
357 }
358 return nil
359}