blob: 0c31dcf32e9755df16e9e1b6b233bb1565cc319e [file] [log] [blame]
Jiri Simsa5293dcb2014-05-10 09:56:38 -07001package exec
2
3import (
Bogdan Caprita66ca3532015-02-05 21:08:10 -08004 "bytes"
Jiri Simsac199bc12014-05-30 12:52:24 -07005 "encoding/binary"
Jiri Simsa5293dcb2014-05-10 09:56:38 -07006 "errors"
Cosmos Nicolaou1c18c1c2014-10-08 16:37:10 -07007 "fmt"
Jiri Simsac199bc12014-05-30 12:52:24 -07008 "io"
Jiri Simsa5293dcb2014-05-10 09:56:38 -07009 "os"
10 "os/exec"
Robert Kroegerb5d6bda2015-01-30 16:10:49 -080011 "strconv"
Cosmos Nicolaou1c18c1c2014-10-08 16:37:10 -070012 "strings"
Bogdan Caprita650b1622014-11-21 15:11:05 -080013 "sync"
Jiri Simsa5293dcb2014-05-10 09:56:38 -070014 "syscall"
15 "time"
Cosmos Nicolaoubfcac5f2014-05-22 21:57:35 -070016
Jiri Simsa337af232015-02-27 14:36:46 -080017 "v.io/x/lib/vlog"
Cosmos Nicolaou251a4d82014-09-30 22:28:45 -070018
Jiri Simsaffceefa2015-02-28 11:03:34 -080019 "v.io/x/ref/lib/exec/consts"
20 "v.io/x/ref/lib/timekeeper"
Jiri Simsa5293dcb2014-05-10 09:56:38 -070021)
22
23var (
Cosmos Nicolaou1c18c1c2014-10-08 16:37:10 -070024 ErrAuthTimeout = errors.New("timeout in auth handshake")
Jiri Simsa5293dcb2014-05-10 09:56:38 -070025 ErrTimeout = errors.New("timeout waiting for child")
26 ErrSecretTooLarge = errors.New("secret is too large")
27)
28
29// A ParentHandle is the Parent process' means of managing a single child.
30type ParentHandle struct {
Cosmos Nicolaouee7abc22014-05-27 10:50:03 -070031 c *exec.Cmd
Cosmos Nicolaou486d3492014-09-30 22:21:20 -070032 config Config
Cosmos Nicolaouee7abc22014-05-27 10:50:03 -070033 secret string
34 statusRead *os.File
35 statusWrite *os.File
36 tk timekeeper.TimeKeeper
Bogdan Caprita650b1622014-11-21 15:11:05 -080037 waitDone bool
38 waitErr error
39 waitLock sync.Mutex
Robert Kroegerb5d6bda2015-01-30 16:10:49 -080040 callbackPid int
Jiri Simsa5293dcb2014-05-10 09:56:38 -070041}
42
43// ParentHandleOpt is an option for NewParentHandle.
44type ParentHandleOpt interface {
Jiri Simsac199bc12014-05-30 12:52:24 -070045 // ExecParentHandleOpt is a signature 'dummy' method for the
46 // interface.
Jiri Simsa5293dcb2014-05-10 09:56:38 -070047 ExecParentHandleOpt()
48}
49
Bogdan Capritaa4d9ee42014-06-20 16:42:53 -070050// ConfigOpt can be used to seed the parent handle with a
51// config to be passed to the child.
52type ConfigOpt struct {
Cosmos Nicolaou486d3492014-09-30 22:21:20 -070053 Config
Bogdan Capritaa4d9ee42014-06-20 16:42:53 -070054}
Jiri Simsac199bc12014-05-30 12:52:24 -070055
Bogdan Capritaa4d9ee42014-06-20 16:42:53 -070056// ExecParentHandleOpt makes ConfigOpt an instance of
Jiri Simsac199bc12014-05-30 12:52:24 -070057// ParentHandleOpt.
Bogdan Capritaa4d9ee42014-06-20 16:42:53 -070058func (ConfigOpt) ExecParentHandleOpt() {}
Jiri Simsac199bc12014-05-30 12:52:24 -070059
60// SecretOpt can be used to seed the parent handle with a custom secret.
61type SecretOpt string
62
63// ExecParentHandleOpt makes SecretOpt an instance of ParentHandleOpt.
Bogdan Capritaa4d9ee42014-06-20 16:42:53 -070064func (SecretOpt) ExecParentHandleOpt() {}
Jiri Simsac199bc12014-05-30 12:52:24 -070065
Jiri Simsa5293dcb2014-05-10 09:56:38 -070066// TimeKeeperOpt can be used to seed the parent handle with a custom timekeeper.
67type TimeKeeperOpt struct {
Cosmos Nicolaouee7abc22014-05-27 10:50:03 -070068 timekeeper.TimeKeeper
Jiri Simsa5293dcb2014-05-10 09:56:38 -070069}
70
71// ExecParentHandleOpt makes TimeKeeperOpt an instance of ParentHandleOpt.
Bogdan Capritaa4d9ee42014-06-20 16:42:53 -070072func (TimeKeeperOpt) ExecParentHandleOpt() {}
Jiri Simsa5293dcb2014-05-10 09:56:38 -070073
74// NewParentHandle creates a ParentHandle for the child process represented by
75// an instance of exec.Cmd.
Jiri Simsac199bc12014-05-30 12:52:24 -070076func NewParentHandle(c *exec.Cmd, opts ...ParentHandleOpt) *ParentHandle {
Cosmos Nicolaou486d3492014-09-30 22:21:20 -070077 cfg, secret := NewConfig(), ""
Jiri Simsac199bc12014-05-30 12:52:24 -070078 tk := timekeeper.RealTime()
Jiri Simsa5293dcb2014-05-10 09:56:38 -070079 for _, opt := range opts {
80 switch v := opt.(type) {
Bogdan Capritaa4d9ee42014-06-20 16:42:53 -070081 case ConfigOpt:
82 cfg = v
Jiri Simsac199bc12014-05-30 12:52:24 -070083 case SecretOpt:
84 secret = string(v)
Jiri Simsa5293dcb2014-05-10 09:56:38 -070085 case TimeKeeperOpt:
Cosmos Nicolaouee7abc22014-05-27 10:50:03 -070086 tk = v
Jiri Simsa5293dcb2014-05-10 09:56:38 -070087 default:
88 vlog.Errorf("Unrecognized parent option: %v", v)
89 }
90 }
Jiri Simsa5293dcb2014-05-10 09:56:38 -070091 return &ParentHandle{
Jiri Simsa24e87aa2014-06-09 09:27:34 -070092 c: c,
Bogdan Capritaa4d9ee42014-06-20 16:42:53 -070093 config: cfg,
Jiri Simsa24e87aa2014-06-09 09:27:34 -070094 secret: secret,
95 tk: tk,
Jiri Simsa5293dcb2014-05-10 09:56:38 -070096 }
97}
98
99// Start starts the child process, sharing a secret with it and
100// setting up a communication channel over which to read its status.
101func (p *ParentHandle) Start() error {
Cosmos Nicolaoua6fef892015-02-20 23:09:03 -0800102 // Make sure that there are no instances of the consts.ExecVersionVariable
Cosmos Nicolaoue5b41502014-10-29 22:55:09 -0700103 // already in the environment (which can happen when a subprocess
104 // creates a subprocess etc)
105 nenv := make([]string, 0, len(p.c.Env)+1)
106 for _, e := range p.c.Env {
Cosmos Nicolaoua6fef892015-02-20 23:09:03 -0800107 if strings.HasPrefix(e, consts.ExecVersionVariable+"=") {
Cosmos Nicolaoue5b41502014-10-29 22:55:09 -0700108 continue
109 }
110 nenv = append(nenv, e)
111 }
Cosmos Nicolaoua6fef892015-02-20 23:09:03 -0800112 p.c.Env = append(nenv, consts.ExecVersionVariable+"="+version1)
Cosmos Nicolaoue5b41502014-10-29 22:55:09 -0700113
Jiri Simsac199bc12014-05-30 12:52:24 -0700114 // Create anonymous pipe for communicating data between the child
115 // and the parent.
Bogdan Caprita7f491672014-11-13 14:51:08 -0800116 // TODO(caprita): As per ribrdb@, Go's exec does not prune the set
117 // of file descriptors passed down to the child process, and hence
118 // a child may get access to the files meant for another child.
119 // Do we need to ensure only one thread is allowed to create these
120 // pipes at any time?
Jiri Simsac199bc12014-05-30 12:52:24 -0700121 dataRead, dataWrite, err := os.Pipe()
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700122 if err != nil {
123 return err
124 }
Jiri Simsac199bc12014-05-30 12:52:24 -0700125 defer dataRead.Close()
126 defer dataWrite.Close()
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700127 statusRead, statusWrite, err := os.Pipe()
128 if err != nil {
129 return err
130 }
131 p.statusRead = statusRead
132 p.statusWrite = statusWrite
Jiri Simsac199bc12014-05-30 12:52:24 -0700133 // Add the parent-child pipes to cmd.ExtraFiles, offsetting all
134 // existing file descriptors accordingly.
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700135 extraFiles := make([]*os.File, len(p.c.ExtraFiles)+2)
Jiri Simsac199bc12014-05-30 12:52:24 -0700136 extraFiles[0] = dataRead
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700137 extraFiles[1] = statusWrite
138 for i, _ := range p.c.ExtraFiles {
139 extraFiles[i+2] = p.c.ExtraFiles[i]
140 }
141 p.c.ExtraFiles = extraFiles
Jiri Simsac199bc12014-05-30 12:52:24 -0700142 // Start the child process.
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700143 if err := p.c.Start(); err != nil {
144 p.statusWrite.Close()
145 p.statusRead.Close()
146 return err
147 }
Jiri Simsac199bc12014-05-30 12:52:24 -0700148 // Pass data to the child using a pipe.
Bogdan Capritaa4d9ee42014-06-20 16:42:53 -0700149 serializedConfig, err := p.config.Serialize()
150 if err != nil {
151 return err
152 }
153 if err := encodeString(dataWrite, serializedConfig); err != nil {
Jiri Simsac199bc12014-05-30 12:52:24 -0700154 p.statusWrite.Close()
155 p.statusRead.Close()
156 return err
157 }
Jiri Simsa84059da2014-06-02 17:22:05 -0700158 if err := encodeString(dataWrite, p.secret); err != nil {
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700159 p.statusWrite.Close()
160 p.statusRead.Close()
161 return err
162 }
163 return nil
164}
165
Bogdan Caprita66ca3532015-02-05 21:08:10 -0800166// copy is like io.Copy, but it also treats the receipt of the special eofChar
167// byte to mean io.EOF.
168func copy(w io.Writer, r io.Reader) (err error) {
169 buf := make([]byte, 1024)
170 for {
171 nRead, errRead := r.Read(buf)
172 if nRead > 0 {
173 if eofCharIndex := bytes.IndexByte(buf[:nRead], eofChar); eofCharIndex != -1 {
174 nRead = eofCharIndex
175 errRead = io.EOF
176 }
177 nWrite, errWrite := w.Write(buf[:nRead])
178 if errWrite != nil {
179 err = errWrite
180 break
181 }
182 if nRead != nWrite {
183 err = io.ErrShortWrite
184 break
185 }
186 }
187 if errRead == io.EOF {
188 break
189 }
190 if errRead != nil {
191 err = errRead
192 break
193 }
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700194 }
Bogdan Caprita66ca3532015-02-05 21:08:10 -0800195 return
196}
197
198func waitForStatus(c chan interface{}, r *os.File) {
199 var readBytes bytes.Buffer
200 err := copy(&readBytes, r)
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700201 r.Close()
Bogdan Caprita66ca3532015-02-05 21:08:10 -0800202 if err != nil {
203 c <- err
204 } else {
205 c <- readBytes.String()
206 }
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700207 close(c)
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700208}
209
210// WaitForReady will wait for the child process to become ready.
211func (p *ParentHandle) WaitForReady(timeout time.Duration) error {
Bogdan Caprita66ca3532015-02-05 21:08:10 -0800212 // An invariant of WaitForReady is that both statusWrite and statusRead
213 // get closed before WaitForStatus returns (statusRead gets closed by
214 // waitForStatus).
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700215 defer p.statusWrite.Close()
Bogdan Caprita66ca3532015-02-05 21:08:10 -0800216 c := make(chan interface{}, 1)
217 go waitForStatus(c, p.statusRead)
218 // TODO(caprita): This can be simplified further by doing the reading
219 // from the status pipe here, and instead moving the timeout listener to
220 // a separate goroutine.
221 select {
222 case msg := <-c:
223 switch m := msg.(type) {
224 case error:
225 return m
226 case string:
227 if strings.HasPrefix(m, readyStatus) {
228 pid, err := strconv.Atoi(m[len(readyStatus):])
Robert Kroegerb5d6bda2015-01-30 16:10:49 -0800229 if err != nil {
230 return err
231 }
232 p.callbackPid = pid
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700233 return nil
234 }
Bogdan Caprita66ca3532015-02-05 21:08:10 -0800235 if strings.HasPrefix(m, failedStatus) {
236 return fmt.Errorf("%s", strings.TrimPrefix(m, failedStatus))
Cosmos Nicolaou1c18c1c2014-10-08 16:37:10 -0700237 }
Bogdan Caprita66ca3532015-02-05 21:08:10 -0800238 return fmt.Errorf("unrecognised status from subprocess: %q", m)
239 default:
240 return fmt.Errorf("unexpected type %T", m)
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700241 }
Bogdan Caprita66ca3532015-02-05 21:08:10 -0800242 case <-p.tk.After(timeout):
243 vlog.Errorf("Timed out waiting for child status")
244 // By writing the special eofChar byte to the pipe, we ensure
245 // that waitForStatus returns: the copy function treats eofChar
246 // to indicate end of read input. Note, copy could have
247 // finished for other reasons already (receipt of eofChar from
248 // the child process). Note, closing the pipe from the child
249 // (explicitly or due to crash) would NOT cause copy to read
250 // io.EOF, since we keep the statusWrite open in the parent.
251 // Hence, a child crash will eventually trigger this timeout.
252 p.statusWrite.Write([]byte{eofChar})
253 // Before returning, waitForStatus will close r, and then close
254 // c. Waiting on c ensures that r.Close() in waitForStatus
255 // already executed.
256 <-c
257 return ErrTimeout
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700258 }
259 panic("unreachable")
260}
261
Bogdan Caprita650b1622014-11-21 15:11:05 -0800262// wait performs the Wait on the underlying command under lock, and only once
263// (subsequent wait calls block until the Wait is finished). It's ok to call
264// wait multiple times, and in parallel. The error from the initial Wait is
265// cached and returned for all subsequent calls.
266func (p *ParentHandle) wait() error {
267 p.waitLock.Lock()
268 defer p.waitLock.Unlock()
269 if p.waitDone {
270 return p.waitErr
271 }
272 p.waitErr = p.c.Wait()
273 p.waitDone = true
274 return p.waitErr
275}
276
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700277// Wait will wait for the child process to terminate of its own accord.
278// It returns nil if the process exited cleanly with an exit status of 0,
279// any other exit code or error will result in an appropriate error return
280func (p *ParentHandle) Wait(timeout time.Duration) error {
281 c := make(chan error, 1)
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700282 go func() {
Bogdan Caprita650b1622014-11-21 15:11:05 -0800283 c <- p.wait()
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700284 close(c)
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700285 }()
286 // If timeout is zero time.After will panic; we handle zero specially
287 // to mean infinite timeout.
288 if timeout > 0 {
289 select {
290 case <-p.tk.After(timeout):
291 return ErrTimeout
292 case err := <-c:
293 return err
294 }
295 } else {
296 return <-c
297 }
298 panic("unreachable")
299}
300
Cosmos Nicolaouee7abc22014-05-27 10:50:03 -0700301// Pid returns the pid of the child, 0 if the child process doesn't exist
302func (p *ParentHandle) Pid() int {
303 if p.c.Process != nil {
304 return p.c.Process.Pid
305 }
306 return 0
307}
308
Robert Kroegerb5d6bda2015-01-30 16:10:49 -0800309// ChildPid returns the pid of a child process as reported by its status
310// callback.
311func (p *ParentHandle) ChildPid() int {
312 return p.callbackPid
313}
314
Cosmos Nicolaouee7abc22014-05-27 10:50:03 -0700315// Exists returns true if the child process exists and can be signal'ed
316func (p *ParentHandle) Exists() bool {
317 if p.c.Process != nil {
318 return syscall.Kill(p.c.Process.Pid, 0) == nil
319 }
320 return false
321}
322
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700323// Kill kills the child process.
324func (p *ParentHandle) Kill() error {
Robin Thellend44903522015-02-06 12:55:26 -0800325 if p.c.Process == nil {
326 return errors.New("no such process")
327 }
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700328 return p.c.Process.Kill()
329}
330
331// Signal sends the given signal to the child process.
332func (p *ParentHandle) Signal(sig syscall.Signal) error {
Robin Thellend44903522015-02-06 12:55:26 -0800333 if p.c.Process == nil {
334 return errors.New("no such process")
335 }
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700336 return syscall.Kill(p.c.Process.Pid, sig)
337}
338
339// Clean will clean up state, including killing the child process.
340func (p *ParentHandle) Clean() error {
341 if err := p.Kill(); err != nil {
342 return err
343 }
Bogdan Caprita650b1622014-11-21 15:11:05 -0800344 return p.wait()
Jiri Simsa5293dcb2014-05-10 09:56:38 -0700345}
Jiri Simsac199bc12014-05-30 12:52:24 -0700346
Jiri Simsa84059da2014-06-02 17:22:05 -0700347func encodeString(w io.Writer, data string) error {
Jiri Simsac199bc12014-05-30 12:52:24 -0700348 l := len(data)
349 if err := binary.Write(w, binary.BigEndian, int64(l)); err != nil {
350 return err
351 }
352 if n, err := w.Write([]byte(data)); err != nil || n != l {
353 if err != nil {
354 return err
355 } else {
356 return errors.New("partial write")
357 }
358 }
359 return nil
360}