lib: v23test

v23test aims to replace x/ref/{v23tests,modules}.

v23test differs from the existing packages in terms of both
API/behavior and implementation. The key differences are
outlined below.

API/behavior changes
----------------------------------------
- Shell can be used both for tests (e.g. -v23.tests style
tests) and for tools that run commands (e.g. dmrun,
servicerunner). If Shell is initialized with a testing.T,
Shell method errors trigger test failure; otherwise, errors
trigger panic. (These failures/panics can be disabled, but
we expect them to almost always be enabled.)
- By default, stdout and stderr from child processes are
streamed to the parent's respective output streams, as well
as to temporary files on disk. (This behavior is
configurable.)
- Improved function registration: registered functions now
accept typed parameters, so developers need not convert
arguments to/from strings when invoking registered
functions.
- New Main() method allows for "main-like" functions to be
registered and invoked with command-line flags. These
invocations behave identically to binary invocations, but
allow for the "main-like" functions to be compiled into the
parent process's binary, eliminating the need for expensive
JiriBuildGoPkg compilations at runtime.
- Simplified credentials management via ForkContext and
ForkCredentials methods.

Testing-specific API/behavior changes
----------------------------------------
- For tests, avoids the need for "go generate", thereby
avoiding extra process overhead and eliminating the need for
code generation. Test packages simply include a TestMain of
the form:
func TestMain(m *testing.M) {
os.Exit(v23test.Run(m.Run))
}
Developers avoid having to learn a new "framework" (V23Test)
and avoid adding "go generate" to their test development
workflow.
- Keeps the Shell separate from the *testing.T; developers
can interact directly with the *testing.T, just as they do
in other tests, and interact with the Shell when they wish
to perform Shell operations.
- Tests can be enabled/disabled independent of whether the
-v23.tests flag was specified.

Implementation
----------------------------------------
- Simpler, cleaner layering:
* gosh.Shell allows for spawning, managing, waiting on, and
terminating subprocesses, and is oblivious to Vanadium.
* v23test.Shell is a lightweight wrapper around gosh.Shell
that provides Vanadium-specific functionality such as
JiriBuildGoPkg, StartRootMountTable, credentials management,
etc.
- Does not use v.io/x/ref/lib/exec for subprocess
management. This library will continue to be used by device
manager (where it's needed, according to Bogdan), but it is
not needed by other clients of v23tests/modules. In
particular, secret-sharing via anonymous pipes is not
necessary for our integration tests (also, note that it's
not actually secure on all platforms), and existing tests
rely on other mechanisms to wait for subprocesses to report
that they are ready, e.g. we use the 'expect' package to
wait for the mount table to report its endpoint.

MultiPart: 1/2

Change-Id: I418270262f142c0f2cea0b5bb34b923bf4ece4f5
diff --git a/gosh/.api b/gosh/.api
new file mode 100644
index 0000000..3602790
--- /dev/null
+++ b/gosh/.api
@@ -0,0 +1,54 @@
+pkg gosh, func Call(string, ...interface{}) error
+pkg gosh, func MaybeRunFnAndExit()
+pkg gosh, func MaybeWatchParent()
+pkg gosh, func NewBufferedPipe() io.ReadWriteCloser
+pkg gosh, func NewShell(Opts) *Shell
+pkg gosh, func Register(string, interface{}) *Fn
+pkg gosh, func Run(func() int) int
+pkg gosh, func SendReady()
+pkg gosh, func SendVars(map[string]string)
+pkg gosh, func WatchParent()
+pkg gosh, method (*Cmd) AwaitReady()
+pkg gosh, method (*Cmd) AwaitVars(...string) map[string]string
+pkg gosh, method (*Cmd) CombinedOutput() []byte
+pkg gosh, method (*Cmd) Output() ([]byte, []byte)
+pkg gosh, method (*Cmd) Process() *os.Process
+pkg gosh, method (*Cmd) Run()
+pkg gosh, method (*Cmd) Shutdown(os.Signal)
+pkg gosh, method (*Cmd) Start()
+pkg gosh, method (*Cmd) StderrPipe() io.Reader
+pkg gosh, method (*Cmd) StdoutPipe() io.Reader
+pkg gosh, method (*Cmd) Wait()
+pkg gosh, method (*Fn) Call(...interface{}) error
+pkg gosh, method (*Shell) AddToCleanup(func())
+pkg gosh, method (*Shell) BuildGoPkg(string, ...string) string
+pkg gosh, method (*Shell) Cleanup()
+pkg gosh, method (*Shell) Cmd(string, ...string) *Cmd
+pkg gosh, method (*Shell) Fn(*Fn, ...interface{}) *Cmd
+pkg gosh, method (*Shell) HandleError(error)
+pkg gosh, method (*Shell) Main(*Fn, ...string) *Cmd
+pkg gosh, method (*Shell) MakeTempDir() string
+pkg gosh, method (*Shell) MakeTempFile() *os.File
+pkg gosh, method (*Shell) Ok()
+pkg gosh, method (*Shell) Popd()
+pkg gosh, method (*Shell) Pushd(string)
+pkg gosh, method (*Shell) Rename(string, string)
+pkg gosh, method (*Shell) Wait()
+pkg gosh, type Cmd struct
+pkg gosh, type Cmd struct, Args []string
+pkg gosh, type Cmd struct, OutputDir string
+pkg gosh, type Cmd struct, Stdin io.Reader
+pkg gosh, type Cmd struct, SuppressOutput bool
+pkg gosh, type Cmd struct, Vars map[string]string
+pkg gosh, type Fn struct
+pkg gosh, type Opts struct
+pkg gosh, type Opts struct, BinDir string
+pkg gosh, type Opts struct, ChildOutputDir string
+pkg gosh, type Opts struct, Errorf func(string, ...interface{})
+pkg gosh, type Opts struct, Logf func(string, ...interface{})
+pkg gosh, type Opts struct, SuppressChildOutput bool
+pkg gosh, type Shell struct
+pkg gosh, type Shell struct, Args []string
+pkg gosh, type Shell struct, Err error
+pkg gosh, type Shell struct, Opts Opts
+pkg gosh, type Shell struct, Vars map[string]string
diff --git a/gosh/buffered_pipe.go b/gosh/buffered_pipe.go
new file mode 100644
index 0000000..86757f4
--- /dev/null
+++ b/gosh/buffered_pipe.go
@@ -0,0 +1,62 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gosh
+
+import (
+	"bytes"
+	"errors"
+	"io"
+	"sync"
+)
+
+type bufferedPipe struct {
+	cond *sync.Cond
+	buf  bytes.Buffer
+	err  error
+}
+
+// NewBufferedPipe returns a new pipe backed by an unbounded in-memory buffer.
+// Writes on the pipe never block; reads on the pipe block until data is
+// available.
+func NewBufferedPipe() io.ReadWriteCloser {
+	return &bufferedPipe{cond: sync.NewCond(&sync.Mutex{})}
+}
+
+// Read reads from the pipe.
+func (p *bufferedPipe) Read(d []byte) (n int, err error) {
+	p.cond.L.Lock()
+	defer p.cond.L.Unlock()
+	for {
+		if p.buf.Len() > 0 {
+			return p.buf.Read(d)
+		}
+		if p.err != nil {
+			return 0, p.err
+		}
+		p.cond.Wait()
+	}
+}
+
+// Write writes to the pipe.
+func (p *bufferedPipe) Write(d []byte) (n int, err error) {
+	p.cond.L.Lock()
+	defer p.cond.L.Unlock()
+	if p.err != nil {
+		return 0, errors.New("write on closed pipe")
+	}
+	defer p.cond.Signal()
+	return p.buf.Write(d)
+}
+
+// Close closes the pipe.
+func (p *bufferedPipe) Close() error {
+	p.cond.L.Lock()
+	defer p.cond.L.Unlock()
+	if p.err == nil {
+		defer p.cond.Signal()
+		p.err = io.EOF
+	}
+	return nil
+}
diff --git a/gosh/child.go b/gosh/child.go
new file mode 100644
index 0000000..50725ee
--- /dev/null
+++ b/gosh/child.go
@@ -0,0 +1,69 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gosh
+
+// This file contains functions meant to be called from a child process.
+
+import (
+	"encoding/json"
+	"fmt"
+	"log"
+	"os"
+	"time"
+)
+
+const (
+	msgPrefix = "#! "
+	typeReady = "ready"
+	typeVars  = "vars"
+)
+
+type msg struct {
+	Type string
+	Vars map[string]string // nil if Type is typeReady
+}
+
+func send(m msg) {
+	data, err := json.Marshal(m)
+	if err != nil {
+		panic(err)
+	}
+	fmt.Printf("%s%s\n", msgPrefix, data)
+}
+
+// SendReady tells the parent process that this child process is "ready", e.g.
+// ready to serve requests.
+func SendReady() {
+	send(msg{Type: typeReady})
+}
+
+// SendVars sends the given vars to the parent process.
+func SendVars(vars map[string]string) {
+	send(msg{Type: typeVars, Vars: vars})
+}
+
+// WatchParent starts a goroutine that periodically checks whether the parent
+// process has exited and, if so, kills the current process.
+func WatchParent() {
+	go func() {
+		for {
+			if os.Getppid() == 1 {
+				log.Fatal("parent process has exited")
+			}
+			time.Sleep(time.Second)
+		}
+	}()
+}
+
+// MaybeWatchParent calls WatchParent iff this process was spawned by a
+// gosh.Shell in the parent process.
+func MaybeWatchParent() {
+	if os.Getenv(envSpawnedByShell) != "" {
+		// Our child processes should see envSpawnedByShell iff they were spawned by
+		// a gosh.Shell in this process.
+		os.Unsetenv(envSpawnedByShell)
+		WatchParent()
+	}
+}
diff --git a/gosh/cmd.go b/gosh/cmd.go
new file mode 100644
index 0000000..42bfb4d
--- /dev/null
+++ b/gosh/cmd.go
@@ -0,0 +1,434 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gosh
+
+import (
+	"bytes"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"sync"
+	"time"
+)
+
+var (
+	errAlreadyCalledStart = errors.New("gosh: already called Cmd.Start")
+	errAlreadyCalledWait  = errors.New("gosh: already called Cmd.Wait")
+	errDidNotCallStart    = errors.New("gosh: did not call Cmd.Start")
+)
+
+// Cmd represents a command. Not thread-safe.
+// Public fields should not be modified after calling Start.
+type Cmd struct {
+	// Vars is the map of env vars for this Cmd.
+	Vars map[string]string
+	// Args is the list of args for this Cmd.
+	Args []string
+	// SuppressOutput is inherited from Shell.Opts.SuppressChildOutput.
+	SuppressOutput bool
+	// OutputDir is inherited from Shell.Opts.ChildOutputDir.
+	OutputDir string
+	// Stdin specifies this Cmd's stdin. See comments in exec.Cmd for detailed
+	// semantics.
+	Stdin io.Reader
+	// Internal state.
+	sh             *Shell
+	c              *exec.Cmd
+	name           string
+	calledWait     bool
+	stdoutWriters  []io.Writer
+	stderrWriters  []io.Writer
+	closeAfterWait []io.Closer
+	condReady      *sync.Cond
+	recvReady      bool // protected by condReady.L
+	condVars       *sync.Cond
+	recvVars       map[string]string // protected by condVars.L
+}
+
+// StdoutPipe returns a Reader backed by a buffered pipe for this command's
+// stdout. Must be called before Start. May be called more than once; each
+// invocation creates a new pipe.
+func (c *Cmd) StdoutPipe() io.Reader {
+	c.sh.Ok()
+	res, err := c.stdoutPipe()
+	c.sh.HandleError(err)
+	return res
+}
+
+// StderrPipe returns a Reader backed by a buffered pipe for this command's
+// stderr. Must be called before Start. May be called more than once; each
+// invocation creates a new pipe.
+func (c *Cmd) StderrPipe() io.Reader {
+	c.sh.Ok()
+	res, err := c.stderrPipe()
+	c.sh.HandleError(err)
+	return res
+}
+
+// Start starts this command.
+func (c *Cmd) Start() {
+	c.sh.Ok()
+	c.sh.HandleError(c.start())
+}
+
+// AwaitReady waits for the child process to call SendReady. Must not be called
+// before Start or after Wait.
+func (c *Cmd) AwaitReady() {
+	c.sh.Ok()
+	c.sh.HandleError(c.awaitReady())
+}
+
+// AwaitVars waits for the child process to send values for the given vars
+// (using SendVars). Must not be called before Start or after Wait.
+func (c *Cmd) AwaitVars(keys ...string) map[string]string {
+	c.sh.Ok()
+	res, err := c.awaitVars(keys...)
+	c.sh.HandleError(err)
+	return res
+}
+
+// Wait waits for this command to exit.
+func (c *Cmd) Wait() {
+	c.sh.Ok()
+	c.sh.HandleError(c.wait())
+}
+
+// TODO(sadovsky): Maybe add a method to send SIGINT, wait for a bit, then send
+// SIGKILL if the process hasn't exited.
+
+// Shutdown sends the given signal to this command, then waits for it to exit.
+func (c *Cmd) Shutdown(sig os.Signal) {
+	c.sh.Ok()
+	c.sh.HandleError(c.shutdown(sig))
+}
+
+// Run calls Start followed by Wait.
+func (c *Cmd) Run() {
+	c.sh.Ok()
+	c.sh.HandleError(c.run())
+}
+
+// Output calls Start followed by Wait, then returns this command's stdout and
+// stderr.
+func (c *Cmd) Output() ([]byte, []byte) {
+	c.sh.Ok()
+	stdout, stderr, err := c.output()
+	c.sh.HandleError(err)
+	return stdout, stderr
+}
+
+// CombinedOutput calls Start followed by Wait, then returns this command's
+// combined stdout and stderr.
+func (c *Cmd) CombinedOutput() []byte {
+	c.sh.Ok()
+	res, err := c.combinedOutput()
+	c.sh.HandleError(err)
+	return res
+}
+
+// Process returns the underlying process handle for this command.
+func (c *Cmd) Process() *os.Process {
+	c.sh.Ok()
+	res, err := c.process()
+	c.sh.HandleError(err)
+	return res
+}
+
+////////////////////////////////////////
+// Internals
+
+func newCmd(sh *Shell, vars map[string]string, name string, args ...string) (*Cmd, error) {
+	// Mimics https://golang.org/src/os/exec/exec.go Command.
+	if filepath.Base(name) == name {
+		if lp, err := exec.LookPath(name); err != nil {
+			return nil, err
+		} else {
+			name = lp
+		}
+	}
+	c := &Cmd{
+		Vars:      vars,
+		Args:      args,
+		sh:        sh,
+		name:      name,
+		condReady: sync.NewCond(&sync.Mutex{}),
+		condVars:  sync.NewCond(&sync.Mutex{}),
+		recvVars:  map[string]string{},
+	}
+	// Protect against concurrent signal-triggered Shell.cleanup().
+	sh.cleanupMu.Lock()
+	defer sh.cleanupMu.Unlock()
+	if sh.calledCleanup {
+		return nil, errAlreadyCalledCleanup
+	}
+	sh.cmds = append(sh.cmds, c)
+	return c, nil
+}
+
+func (c *Cmd) calledStart() bool {
+	return c.c != nil
+}
+
+func closeAll(closers []io.Closer) {
+	for _, c := range closers {
+		c.Close()
+	}
+}
+
+func addWriter(writers *[]io.Writer, w io.Writer) {
+	*writers = append(*writers, w)
+}
+
+// recvWriter listens for gosh messages from a child process.
+type recvWriter struct {
+	c          *Cmd
+	buf        bytes.Buffer
+	readPrefix bool // if true, we've read len(msgPrefix) for the current line
+	skipLine   bool // if true, ignore bytes until next '\n'
+}
+
+func (w *recvWriter) Write(p []byte) (n int, err error) {
+	for _, b := range p {
+		if b == '\n' {
+			if w.readPrefix && !w.skipLine {
+				m := msg{}
+				if err := json.Unmarshal(w.buf.Bytes(), &m); err != nil {
+					return 0, err
+				}
+				switch m.Type {
+				case typeReady:
+					w.c.condReady.L.Lock()
+					w.c.recvReady = true
+					w.c.condReady.Signal()
+					w.c.condReady.L.Unlock()
+				case typeVars:
+					w.c.condVars.L.Lock()
+					w.c.recvVars = mergeMaps(w.c.recvVars, m.Vars)
+					w.c.condVars.Signal()
+					w.c.condVars.L.Unlock()
+				default:
+					return 0, fmt.Errorf("unknown message type: %q", m.Type)
+				}
+			}
+			// Reset state for next line.
+			w.readPrefix, w.skipLine = false, false
+			w.buf.Reset()
+		} else if !w.skipLine {
+			w.buf.WriteByte(b)
+			if !w.readPrefix && w.buf.Len() == len(msgPrefix) {
+				w.readPrefix = true
+				prefix := string(w.buf.Next(len(msgPrefix)))
+				if prefix != msgPrefix {
+					w.skipLine = true
+				}
+			}
+		}
+	}
+	return len(p), nil
+}
+
+func (c *Cmd) initMultiWriter(f *os.File, t string) (io.Writer, error) {
+	var writers *[]io.Writer
+	if f == os.Stdout {
+		writers = &c.stdoutWriters
+	} else {
+		writers = &c.stderrWriters
+	}
+	if !c.SuppressOutput {
+		addWriter(writers, f)
+	}
+	if c.OutputDir != "" {
+		suffix := "stderr"
+		if f == os.Stdout {
+			suffix = "stdout"
+		}
+		name := filepath.Join(c.OutputDir, filepath.Base(c.name)+"."+t+"."+suffix)
+		file, err := os.OpenFile(name, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0600)
+		if err != nil {
+			return nil, err
+		}
+		addWriter(writers, file)
+		c.closeAfterWait = append(c.closeAfterWait, file)
+	}
+	if f == os.Stdout {
+		addWriter(writers, &recvWriter{c: c})
+	}
+	return io.MultiWriter(*writers...), nil
+}
+
+func (c *Cmd) stdoutPipe() (io.Reader, error) {
+	if c.calledStart() {
+		return nil, errAlreadyCalledStart
+	}
+	p := NewBufferedPipe()
+	addWriter(&c.stdoutWriters, p)
+	c.closeAfterWait = append(c.closeAfterWait, p)
+	return p, nil
+}
+
+func (c *Cmd) stderrPipe() (io.Reader, error) {
+	if c.calledStart() {
+		return nil, errAlreadyCalledStart
+	}
+	p := NewBufferedPipe()
+	addWriter(&c.stderrWriters, p)
+	c.closeAfterWait = append(c.closeAfterWait, p)
+	return p, nil
+}
+
+func (c *Cmd) start() error {
+	if c.calledStart() {
+		return errAlreadyCalledStart
+	}
+	// Protect against Cmd.start() writing to c.c.Process concurrently with
+	// signal-triggered Shell.cleanup() reading from it.
+	c.sh.cleanupMu.Lock()
+	defer c.sh.cleanupMu.Unlock()
+	if c.sh.calledCleanup {
+		return errAlreadyCalledCleanup
+	}
+	c.c = exec.Command(c.name, c.Args...)
+	c.c.Env = mapToSlice(c.Vars)
+	c.c.Stdin = c.Stdin
+	t := time.Now().UTC().Format("20060102.150405.000000")
+	var err error
+	if c.c.Stdout, err = c.initMultiWriter(os.Stdout, t); err != nil {
+		return err
+	}
+	if c.c.Stderr, err = c.initMultiWriter(os.Stderr, t); err != nil {
+		return err
+	}
+	// TODO(sadovsky): Maybe wrap every child process with a "supervisor" process
+	// that calls WatchParent().
+	err = c.c.Start()
+	if err != nil {
+		closeAll(c.closeAfterWait)
+	}
+	return err
+}
+
+// TODO(sadovsky): Add timeouts for Cmd.{awaitReady,awaitVars,wait}.
+
+func (c *Cmd) awaitReady() error {
+	if !c.calledStart() {
+		return errDidNotCallStart
+	} else if c.calledWait {
+		return errAlreadyCalledWait
+	}
+	// http://golang.org/pkg/sync/#Cond.Wait
+	c.condReady.L.Lock()
+	for !c.recvReady {
+		c.condReady.Wait()
+	}
+	c.condReady.L.Unlock()
+	return nil
+}
+
+func (c *Cmd) awaitVars(keys ...string) (map[string]string, error) {
+	if !c.calledStart() {
+		return nil, errDidNotCallStart
+	} else if c.calledWait {
+		return nil, errAlreadyCalledWait
+	}
+	wantKeys := map[string]bool{}
+	for _, key := range keys {
+		wantKeys[key] = true
+	}
+	res := map[string]string{}
+	updateRes := func() {
+		for k, v := range c.recvVars {
+			if _, ok := wantKeys[k]; ok {
+				res[k] = v
+			}
+		}
+	}
+	// http://golang.org/pkg/sync/#Cond.Wait
+	c.condVars.L.Lock()
+	updateRes()
+	for len(res) < len(wantKeys) {
+		c.condVars.Wait()
+		updateRes()
+	}
+	c.condVars.L.Unlock()
+	return res, nil
+}
+
+func (c *Cmd) wait() error {
+	if !c.calledStart() {
+		return errDidNotCallStart
+	} else if c.calledWait {
+		return errAlreadyCalledWait
+	}
+	c.calledWait = true
+	err := c.c.Wait()
+	closeAll(c.closeAfterWait)
+	return err
+}
+
+func (c *Cmd) shutdown(sig os.Signal) error {
+	if !c.calledStart() {
+		return errDidNotCallStart
+	}
+	if err := c.c.Process.Signal(sig); err != nil {
+		return err
+	}
+	if err := c.wait(); err != nil {
+		if _, ok := err.(*exec.ExitError); !ok {
+			return err
+		}
+	}
+	return nil
+}
+
+func (c *Cmd) run() error {
+	if err := c.start(); err != nil {
+		return err
+	}
+	return c.wait()
+}
+
+func (c *Cmd) output() ([]byte, []byte, error) {
+	var stdout, stderr bytes.Buffer
+	addWriter(&c.stdoutWriters, &stdout)
+	addWriter(&c.stderrWriters, &stderr)
+	err := c.run()
+	return stdout.Bytes(), stderr.Bytes(), err
+}
+
+type threadSafeBuffer struct {
+	mu  sync.Mutex
+	buf bytes.Buffer
+}
+
+func (b *threadSafeBuffer) Write(p []byte) (int, error) {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	return b.buf.Write(p)
+}
+
+func (b *threadSafeBuffer) Bytes() []byte {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	return b.buf.Bytes()
+}
+
+func (c *Cmd) combinedOutput() ([]byte, error) {
+	buf := &threadSafeBuffer{}
+	addWriter(&c.stdoutWriters, buf)
+	addWriter(&c.stderrWriters, buf)
+	err := c.run()
+	return buf.Bytes(), err
+}
+
+func (c *Cmd) process() (*os.Process, error) {
+	if !c.calledStart() {
+		return nil, errDidNotCallStart
+	}
+	return c.c.Process, nil
+}
diff --git a/gosh/env_util.go b/gosh/env_util.go
new file mode 100644
index 0000000..3e7b815
--- /dev/null
+++ b/gosh/env_util.go
@@ -0,0 +1,69 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gosh
+
+import (
+	"sort"
+	"strings"
+)
+
+func splitKeyValue(kv string) (string, string) {
+	parts := strings.SplitN(kv, "=", 2)
+	if len(parts) != 2 {
+		panic(kv)
+	}
+	return parts[0], parts[1]
+}
+
+func joinKeyValue(k, v string) string {
+	return k + "=" + v
+}
+
+func sortByKey(vars []string) {
+	sort.Sort(keySorter(vars))
+}
+
+type keySorter []string
+
+func (s keySorter) Len() int      { return len(s) }
+func (s keySorter) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
+func (s keySorter) Less(i, j int) bool {
+	ki, _ := splitKeyValue(s[i])
+	kj, _ := splitKeyValue(s[j])
+	return ki < kj
+}
+
+// sliceToMap converts a slice of "key=value" entries to a map, preferring later
+// values over earlier ones.
+func sliceToMap(s []string) map[string]string {
+	m := make(map[string]string, len(s))
+	for _, kv := range s {
+		k, v := splitKeyValue(kv)
+		m[k] = v
+	}
+	return m
+}
+
+// mapToSlice converts a map to a slice of "key=value" entries, sorted by key.
+func mapToSlice(m map[string]string) []string {
+	s := make([]string, 0, len(m))
+	for k, v := range m {
+		s = append(s, joinKeyValue(k, v))
+	}
+	sortByKey(s)
+	return s
+}
+
+// mergeMaps merges the given maps into a new map, preferring values from later
+// maps over those from earlier maps.
+func mergeMaps(maps ...map[string]string) map[string]string {
+	res := make(map[string]string)
+	for _, m := range maps {
+		for k, v := range m {
+			res[k] = v
+		}
+	}
+	return res
+}
diff --git a/gosh/internal/gosh_example/main.go b/gosh/internal/gosh_example/main.go
new file mode 100644
index 0000000..02ed614
--- /dev/null
+++ b/gosh/internal/gosh_example/main.go
@@ -0,0 +1,67 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+
+	"v.io/x/lib/gosh"
+	"v.io/x/lib/gosh/internal/gosh_example_lib"
+)
+
+func ExampleCmds() {
+	sh := gosh.NewShell(gosh.Opts{SuppressChildOutput: true})
+	defer sh.Cleanup()
+
+	// Start server.
+	binPath := sh.BuildGoPkg("v.io/x/lib/gosh/internal/gosh_example_server")
+	c := sh.Cmd(binPath)
+	c.Start()
+	c.AwaitReady()
+	addr := c.AwaitVars("Addr")["Addr"]
+	fmt.Println(addr)
+
+	// Run client.
+	binPath = sh.BuildGoPkg("v.io/x/lib/gosh/internal/gosh_example_client")
+	c = sh.Cmd(binPath, "-addr="+addr)
+	stdout, _ := c.Output()
+	fmt.Print(string(stdout))
+}
+
+var (
+	get   = gosh.Register("get", lib.Get)
+	serve = gosh.Register("serve", lib.Serve)
+)
+
+func ExampleFns() {
+	sh := gosh.NewShell(gosh.Opts{SuppressChildOutput: true})
+	defer sh.Cleanup()
+
+	// Start server.
+	c := sh.Fn(serve)
+	c.Start()
+	c.AwaitReady()
+	addr := c.AwaitVars("Addr")["Addr"]
+	fmt.Println(addr)
+
+	// Run client.
+	c = sh.Fn(get, addr)
+	stdout, _ := c.Output()
+	fmt.Print(string(stdout))
+}
+
+func ExampleShellMain() {
+	sh := gosh.NewShell(gosh.Opts{})
+	defer sh.Cleanup()
+	stdout, _ := sh.Main(lib.HelloWorldMain).Output()
+	fmt.Print(string(stdout))
+}
+
+func main() {
+	gosh.MaybeRunFnAndExit()
+	ExampleCmds()
+	ExampleFns()
+	ExampleShellMain()
+}
diff --git a/gosh/internal/gosh_example_client/main.go b/gosh/internal/gosh_example_client/main.go
new file mode 100644
index 0000000..6962d22
--- /dev/null
+++ b/gosh/internal/gosh_example_client/main.go
@@ -0,0 +1,20 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"flag"
+
+	"v.io/x/lib/gosh"
+	"v.io/x/lib/gosh/internal/gosh_example_lib"
+)
+
+var addr = flag.String("addr", "localhost:8080", "server addr")
+
+func main() {
+	gosh.MaybeWatchParent()
+	flag.Parse()
+	lib.Get(*addr)
+}
diff --git a/gosh/internal/gosh_example_lib/lib.go b/gosh/internal/gosh_example_lib/lib.go
new file mode 100644
index 0000000..c2158d4
--- /dev/null
+++ b/gosh/internal/gosh_example_lib/lib.go
@@ -0,0 +1,70 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lib
+
+import (
+	"fmt"
+	"io/ioutil"
+	"log"
+	"net"
+	"net/http"
+	"time"
+
+	"v.io/x/lib/gosh"
+)
+
+const helloWorld = "Hello, world!"
+
+// HelloWorldMain is used to demonstrate usage of Shell.Main.
+var HelloWorldMain = gosh.Register("HelloWorldMain", func() {
+	fmt.Println(helloWorld)
+})
+
+func Get(addr string) {
+	resp, err := http.Get("http://" + addr)
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer resp.Body.Close()
+	body, err := ioutil.ReadAll(resp.Body)
+	fmt.Print(string(body))
+}
+
+// Copied from http://golang.org/src/net/http/server.go.
+type tcpKeepAliveListener struct {
+	*net.TCPListener
+}
+
+func (ln tcpKeepAliveListener) Accept() (c net.Conn, err error) {
+	tc, err := ln.AcceptTCP()
+	if err != nil {
+		return
+	}
+	tc.SetKeepAlive(true)
+	tc.SetKeepAlivePeriod(3 * time.Minute)
+	return tc, nil
+}
+
+func Serve() {
+	http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+		fmt.Fprintln(w, helloWorld)
+	})
+	// Note: With http.ListenAndServe() there's no easy way to tell which port
+	// number we were assigned, so instead we use net.Listen() followed by
+	// http.Server.Serve().
+	srv := &http.Server{Addr: "localhost:0"}
+	ln, err := net.Listen("tcp", srv.Addr)
+	if err != nil {
+		panic(err)
+	}
+	gosh.SendVars(map[string]string{"Addr": ln.Addr().String()})
+	go func() {
+		time.Sleep(100 * time.Millisecond)
+		gosh.SendReady()
+	}()
+	if err = srv.Serve(tcpKeepAliveListener{ln.(*net.TCPListener)}); err != nil {
+		panic(err)
+	}
+}
diff --git a/gosh/internal/gosh_example_server/main.go b/gosh/internal/gosh_example_server/main.go
new file mode 100644
index 0000000..f7d96c2
--- /dev/null
+++ b/gosh/internal/gosh_example_server/main.go
@@ -0,0 +1,15 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"v.io/x/lib/gosh"
+	"v.io/x/lib/gosh/internal/gosh_example_lib"
+)
+
+func main() {
+	gosh.MaybeWatchParent()
+	lib.Serve()
+}
diff --git a/gosh/registry.go b/gosh/registry.go
new file mode 100644
index 0000000..e2511eb
--- /dev/null
+++ b/gosh/registry.go
@@ -0,0 +1,126 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gosh
+
+// Inspired by https://github.com/golang/appengine/blob/master/delay/delay.go.
+
+import (
+	"bytes"
+	"encoding/base64"
+	"encoding/gob"
+	"fmt"
+	"reflect"
+)
+
+// Fn is a registered, callable function.
+type Fn struct {
+	name  string
+	value reflect.Value
+}
+
+var (
+	fns       = map[string]*Fn{}
+	errorType = reflect.TypeOf((*error)(nil)).Elem()
+)
+
+// Register registers the given function with the given name. 'name' must be
+// unique across the dependency graph; 'fni' must be a function that accepts
+// gob-encodable arguments and returns an error or nothing.
+func Register(name string, fni interface{}) *Fn {
+	// TODO(sadovsky): Switch to using len(fns) as name, and maybe drop the name
+	// argument, if it turns out that initialization order is deterministic.
+	if _, ok := fns[name]; ok {
+		panic(fmt.Errorf("%s: already registered", name))
+	}
+	v := reflect.ValueOf(fni)
+	t := v.Type()
+	if t.Kind() != reflect.Func {
+		panic(fmt.Errorf("%s: not a function: %v", name, t.Kind()))
+	}
+	if t.NumOut() > 1 || t.NumOut() == 1 && t.Out(0) != errorType {
+		panic(fmt.Errorf("%s: function must return an error or nothing: %v", name, t))
+	}
+	// Register the function's args with gob. Needed because Shell.Fn() takes
+	// interface{} arguments.
+	for i := 0; i < t.NumIn(); i++ {
+		// Note: Clients are responsible for registering any concrete types stored
+		// inside interface{} arguments.
+		if t.In(i).Kind() == reflect.Interface {
+			continue
+		}
+		gob.Register(reflect.Zero(t.In(i)).Interface())
+	}
+	fn := &Fn{name: name, value: v}
+	fns[name] = fn
+	return fn
+}
+
+// Call calls the named function, which must have been registered.
+func Call(name string, args ...interface{}) error {
+	if fn, ok := fns[name]; !ok {
+		return fmt.Errorf("unknown function: %s", name)
+	} else {
+		return fn.Call(args...)
+	}
+}
+
+// Call calls the function fn with the input arguments args.
+func (fn *Fn) Call(args ...interface{}) error {
+	t := fn.value.Type()
+	in := []reflect.Value{}
+	for i, arg := range args {
+		var av reflect.Value
+		if arg != nil {
+			av = reflect.ValueOf(arg)
+		} else {
+			// Client passed nil; construct the zero value for this argument based on
+			// the function signature.
+			at := t.In(i)
+			if t.IsVariadic() && i == t.NumIn()-1 {
+				at = at.Elem()
+			}
+			av = reflect.Zero(at)
+		}
+		in = append(in, av)
+	}
+	out := fn.value.Call(in)
+	if t.NumOut() == 1 && !out[0].IsNil() {
+		return out[0].Interface().(error)
+	}
+	return nil
+}
+
+////////////////////////////////////////
+// invocation
+
+type invocation struct {
+	Name string
+	Args []interface{}
+}
+
+// encInvocation encodes an invocation.
+func encInvocation(name string, args ...interface{}) (string, error) {
+	inv := invocation{Name: name, Args: args}
+	buf := &bytes.Buffer{}
+	if err := gob.NewEncoder(buf).Encode(inv); err != nil {
+		return "", fmt.Errorf("failed to encode invocation: %v", err)
+	}
+	// Base64-encode the gob-encoded bytes so that the result can be used as an
+	// env var value.
+	return base64.StdEncoding.EncodeToString(buf.Bytes()), nil
+}
+
+// decInvocation decodes an invocation.
+func decInvocation(s string) (name string, args []interface{}, err error) {
+	var inv invocation
+	b, err := base64.StdEncoding.DecodeString(s)
+	if err == nil {
+		err = gob.NewDecoder(bytes.NewReader(b)).Decode(&inv)
+	}
+	if err != nil {
+		return "", nil, fmt.Errorf("failed to decode invocation: %v", err)
+	}
+	return inv.Name, inv.Args, nil
+}
diff --git a/gosh/shell.go b/gosh/shell.go
new file mode 100644
index 0000000..faa1cc3
--- /dev/null
+++ b/gosh/shell.go
@@ -0,0 +1,586 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package gosh provides facilities for running and managing processes: start
+// them, wait for them to exit, capture their output streams, pipe messages
+// between them, terminate them (e.g. on SIGINT), and so on.
+//
+// Gosh is meant to be used in situations where you might otherwise be tempted
+// to write a shell script. (Oh my gosh, no more shell scripts!)
+//
+// For usage examples, see shell_test.go and internal/gosh_example/main.go.
+package gosh
+
+import (
+	"errors"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"math/rand"
+	"os"
+	"os/signal"
+	"path"
+	"path/filepath"
+	"sync"
+	"syscall"
+	"time"
+)
+
+const (
+	envBinDir         = "GOSH_BIN_DIR"
+	envChildOutputDir = "GOSH_CHILD_OUTPUT_DIR"
+	envInvocation     = "GOSH_INVOCATION"
+	envSpawnedByShell = "GOSH_SPAWNED_BY_SHELL"
+)
+
+var (
+	errAlreadyCalledCleanup        = errors.New("gosh: already called Shell.Cleanup")
+	errDidNotCallMaybeRunFnAndExit = errors.New("gosh: did not call Shell.MaybeRunFnAndExit")
+	errDidNotCallNewShell          = errors.New("gosh: did not call gosh.NewShell")
+	errShellErrIsNotNil            = errors.New("gosh: Shell.Err is not nil")
+)
+
+// Shell represents a shell. Not thread-safe.
+type Shell struct {
+	// Err is the most recent error (may be nil).
+	Err error
+	// Opts is the Opts struct for this Shell, with default values filled in.
+	Opts Opts
+	// Vars is the map of env vars for this Shell.
+	Vars map[string]string
+	// Args is the list of args to append to subsequent command invocations.
+	Args []string
+	// Internal state.
+	calledNewShell bool
+	dirStack       []string   // for pushd/popd
+	cleanupMu      sync.Mutex // protects the fields below; held during cleanup
+	calledCleanup  bool
+	cmds           []*Cmd
+	tempFiles      []*os.File
+	tempDirs       []string
+	cleanupFns     []func()
+}
+
+// Opts configures Shell.
+type Opts struct {
+	// Errorf is called whenever an error is encountered.
+	// If not specified, defaults to panic(fmt.Sprintf(format, v...)).
+	Errorf func(format string, v ...interface{})
+	// Logf is called to log things.
+	// If not specified, defaults to log.Printf(format, v...).
+	Logf func(format string, v ...interface{})
+	// Child stdout and stderr are propagated up to the parent's stdout and stderr
+	// iff SuppressChildOutput is false.
+	SuppressChildOutput bool
+	// If specified, each child's stdout and stderr streams are also piped to
+	// files in this directory.
+	// If not specified, defaults to GOSH_CHILD_OUTPUT_DIR.
+	ChildOutputDir string
+	// Directory where BuildGoPkg() writes compiled binaries.
+	// If not specified, defaults to GOSH_BIN_DIR.
+	BinDir string
+}
+
+// NewShell returns a new Shell.
+func NewShell(opts Opts) *Shell {
+	sh, err := newShell(opts)
+	sh.HandleError(err)
+	return sh
+}
+
+// HandleError sets sh.Err. If err is not nil, it also calls sh.Opts.Errorf.
+func (sh *Shell) HandleError(err error) {
+	sh.Ok()
+	sh.Err = err
+	if err != nil && sh.Opts.Errorf != nil {
+		sh.Opts.Errorf("%v", err)
+	}
+}
+
+// Cmd returns a Cmd for an invocation of the named program.
+func (sh *Shell) Cmd(name string, args ...string) *Cmd {
+	sh.Ok()
+	res, err := sh.cmd(nil, name, args...)
+	sh.HandleError(err)
+	return res
+}
+
+// Fn returns a Cmd for an invocation of the given registered Fn.
+func (sh *Shell) Fn(fn *Fn, args ...interface{}) *Cmd {
+	sh.Ok()
+	res, err := sh.fn(fn, args...)
+	sh.HandleError(err)
+	return res
+}
+
+// Main returns a Cmd for an invocation of the given registered main() function.
+// Intended usage: Have your program's main() call RealMain, then write a parent
+// program that uses Shell.Main to run RealMain in a child process. With this
+// approach, RealMain can be compiled into the parent program's binary. Caveat:
+// potential flag collisions.
+func (sh *Shell) Main(fn *Fn, args ...string) *Cmd {
+	sh.Ok()
+	res, err := sh.main(fn, args...)
+	sh.HandleError(err)
+	return res
+}
+
+// Wait waits for all commands started by this Shell to exit.
+func (sh *Shell) Wait() {
+	sh.Ok()
+	sh.HandleError(sh.wait())
+}
+
+// Rename renames (moves) a file. It's just like os.Rename, but retries once on
+// error.
+func (sh *Shell) Rename(oldpath, newpath string) {
+	sh.Ok()
+	sh.HandleError(sh.rename(oldpath, newpath))
+}
+
+// BuildGoPkg compiles a Go package using the "go build" command and writes the
+// resulting binary to sh.Opts.BinDir. Returns the absolute path to the binary.
+// Included in Shell for convenience, but could have just as easily been
+// provided as a utility function.
+func (sh *Shell) BuildGoPkg(pkg string, flags ...string) string {
+	sh.Ok()
+	res, err := sh.buildGoPkg(pkg, flags...)
+	sh.HandleError(err)
+	return res
+}
+
+// MakeTempFile creates a new temporary file in os.TempDir, opens the file for
+// reading and writing, and returns the resulting *os.File.
+func (sh *Shell) MakeTempFile() *os.File {
+	sh.Ok()
+	res, err := sh.makeTempFile()
+	sh.HandleError(err)
+	return res
+}
+
+// MakeTempDir creates a new temporary directory in os.TempDir and returns the
+// path of the new directory.
+func (sh *Shell) MakeTempDir() string {
+	sh.Ok()
+	res, err := sh.makeTempDir()
+	sh.HandleError(err)
+	return res
+}
+
+// Pushd behaves like Bash pushd.
+func (sh *Shell) Pushd(dir string) {
+	sh.Ok()
+	sh.HandleError(sh.pushd(dir))
+}
+
+// Popd behaves like Bash popd.
+func (sh *Shell) Popd() {
+	sh.Ok()
+	sh.HandleError(sh.popd())
+}
+
+// AddToCleanup registers the given function to be called by Shell.Cleanup().
+func (sh *Shell) AddToCleanup(fn func()) {
+	sh.Ok()
+	sh.HandleError(sh.addToCleanup(fn))
+}
+
+// Cleanup cleans up all resources (child processes, temporary files and
+// directories) associated with this Shell. It is safe (and recommended) to call
+// Cleanup after a Shell error.
+func (sh *Shell) Cleanup() {
+	if !sh.calledNewShell {
+		panic(errDidNotCallNewShell)
+	}
+	sh.cleanupMu.Lock()
+	defer sh.cleanupMu.Unlock()
+	if sh.calledCleanup {
+		panic(errAlreadyCalledCleanup)
+	} else {
+		sh.calledCleanup = true
+		sh.cleanup()
+	}
+}
+
+// Ok panics iff this Shell is in a state where it's invalid to call other
+// methods. This method is public to facilitate Shell wrapping.
+func (sh *Shell) Ok() {
+	if !sh.calledNewShell {
+		panic(errDidNotCallNewShell)
+	}
+	// Panic on incorrect usage of Shell.
+	if sh.Err != nil {
+		panic(errShellErrIsNotNil)
+	}
+	sh.cleanupMu.Lock()
+	defer sh.cleanupMu.Unlock()
+	if sh.calledCleanup {
+		panic(errAlreadyCalledCleanup)
+	}
+}
+
+////////////////////////////////////////
+// Internals
+
+// onTerminationSignal starts a goroutine that listens for various termination
+// signals and calls the given function when such a signal is received.
+func onTerminationSignal(fn func(os.Signal)) {
+	ch := make(chan os.Signal, 1)
+	signal.Notify(ch, syscall.SIGINT, syscall.SIGQUIT, syscall.SIGTERM)
+	go func() {
+		fn(<-ch)
+	}()
+}
+
+// Note: On error, newShell returns a *Shell with Opts.Errorf initialized to
+// simplify things for the caller.
+func newShell(opts Opts) (*Shell, error) {
+	if opts.Errorf == nil {
+		opts.Errorf = func(format string, v ...interface{}) {
+			panic(fmt.Sprintf(format, v...))
+		}
+	}
+	if opts.Logf == nil {
+		opts.Logf = func(format string, v ...interface{}) {
+			log.Printf(format, v...)
+		}
+	}
+	if opts.ChildOutputDir == "" {
+		opts.ChildOutputDir = os.Getenv(envChildOutputDir)
+	}
+	sh := &Shell{
+		Opts:           opts,
+		Vars:           map[string]string{},
+		calledNewShell: true,
+	}
+	if sh.Opts.BinDir == "" {
+		sh.Opts.BinDir = os.Getenv(envBinDir)
+		if sh.Opts.BinDir == "" {
+			var err error
+			if sh.Opts.BinDir, err = sh.makeTempDir(); err != nil {
+				// Note: Here and below, we keep sh.calledCleanup false so that clients
+				// with a non-fatal Errorf implementation can safely defer sh.Cleanup()
+				// before checking sh.Err.
+				sh.cleanup()
+				return sh, err
+			}
+		}
+	}
+	// Set this process's PGID to its PID so that its child processes can be
+	// identified reliably.
+	// http://man7.org/linux/man-pages/man2/setpgid.2.html
+	// TODO(sadovsky): Is there any way to reliably kill all spawned subprocesses
+	// without modifying external state?
+	if err := syscall.Setpgid(0, 0); err != nil {
+		sh.cleanup()
+		return sh, err
+	}
+	// Call sh.cleanup() if needed when a termination signal is received.
+	onTerminationSignal(func(sig os.Signal) {
+		sh.logf("Received signal: %v\n", sig)
+		sh.cleanupMu.Lock()
+		defer sh.cleanupMu.Unlock()
+		if !sh.calledCleanup {
+			sh.calledCleanup = true
+			sh.cleanup()
+		}
+		// Note: We hold cleanupMu during os.Exit(1) so that the main goroutine will
+		// not call Shell.ok() or Shell.Cleanup() and panic before we exit.
+		os.Exit(1)
+	})
+	return sh, nil
+}
+
+func (sh *Shell) logf(format string, v ...interface{}) {
+	if sh.Opts.Logf != nil {
+		sh.Opts.Logf(format, v...)
+	}
+}
+
+func (sh *Shell) cmd(vars map[string]string, name string, args ...string) (*Cmd, error) {
+	if vars == nil {
+		vars = make(map[string]string)
+	}
+	vars[envSpawnedByShell] = "1"
+	c, err := newCmd(sh, mergeMaps(sliceToMap(os.Environ()), sh.Vars, vars), name, append(args, sh.Args...)...)
+	if err != nil {
+		return nil, err
+	}
+	c.SuppressOutput = sh.Opts.SuppressChildOutput
+	c.OutputDir = sh.Opts.ChildOutputDir
+	return c, nil
+}
+
+func (sh *Shell) fn(fn *Fn, args ...interface{}) (*Cmd, error) {
+	// Safeguard against the developer forgetting to call MaybeRunFnAndExit, which
+	// could lead to infinite recursion.
+	if !calledMaybeRunFnAndExit {
+		return nil, errDidNotCallMaybeRunFnAndExit
+	}
+	b, err := encInvocation(fn.name, args...)
+	if err != nil {
+		return nil, err
+	}
+	vars := map[string]string{envInvocation: string(b)}
+	return sh.cmd(vars, os.Args[0])
+}
+
+func (sh *Shell) main(fn *Fn, args ...string) (*Cmd, error) {
+	// Safeguard against the developer forgetting to call MaybeRunFnAndExit, which
+	// could lead to infinite recursion.
+	if !calledMaybeRunFnAndExit {
+		return nil, errDidNotCallMaybeRunFnAndExit
+	}
+	// Check that fn has the required signature.
+	t := fn.value.Type()
+	if t.NumIn() != 0 || t.NumOut() != 0 {
+		return nil, errors.New("main function must have no input or output parameters")
+	}
+	b, err := encInvocation(fn.name)
+	if err != nil {
+		return nil, err
+	}
+	vars := map[string]string{envInvocation: string(b)}
+	return sh.cmd(vars, os.Args[0], args...)
+}
+
+func (sh *Shell) wait() error {
+	// Note: It is illegal to call newCmd() concurrently with Shell.wait(), so we
+	// need not hold cleanupMu when accessing sh.cmds below.
+	var res error
+	for _, c := range sh.cmds {
+		if !c.calledStart() || c.calledWait {
+			continue
+		}
+		if err := c.wait(); err != nil {
+			sh.logf("Cmd.Wait() failed: %v\n", err)
+			if res == nil {
+				res = err
+			}
+		}
+	}
+	return res
+}
+
+func (sh *Shell) rename(oldpath, newpath string) error {
+	if err := os.Rename(oldpath, newpath); err != nil {
+		// Concurrent, same-directory rename operations sometimes fail on certain
+		// filesystems, so we retry once after a random backoff.
+		time.Sleep(time.Duration(rand.Int63n(1000)) * time.Millisecond)
+		if err := os.Rename(oldpath, newpath); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (sh *Shell) buildGoPkg(pkg string, flags ...string) (string, error) {
+	binPath := filepath.Join(sh.Opts.BinDir, path.Base(pkg))
+	// If this binary has already been built, don't rebuild it.
+	if _, err := os.Stat(binPath); err == nil {
+		return binPath, nil
+	} else if !os.IsNotExist(err) {
+		return "", err
+	}
+	// Build binary to tempBinPath, then move it to binPath.
+	tempDir, err := ioutil.TempDir(sh.Opts.BinDir, "")
+	if err != nil {
+		return "", err
+	}
+	defer os.RemoveAll(tempDir)
+	tempBinPath := filepath.Join(tempDir, path.Base(pkg))
+	args := []string{"build", "-x", "-o", tempBinPath}
+	args = append(args, flags...)
+	args = append(args, pkg)
+	c, err := sh.cmd(nil, "go", args...)
+	if err != nil {
+		return "", err
+	}
+	c.SuppressOutput = true
+	if err := c.run(); err != nil {
+		return "", err
+	}
+	if err := sh.rename(tempBinPath, binPath); err != nil {
+		return "", err
+	}
+	return binPath, nil
+}
+
+func (sh *Shell) makeTempFile() (*os.File, error) {
+	sh.cleanupMu.Lock()
+	defer sh.cleanupMu.Unlock()
+	if sh.calledCleanup {
+		return nil, errAlreadyCalledCleanup
+	}
+	f, err := ioutil.TempFile("", "")
+	if err != nil {
+		return nil, err
+	}
+	sh.tempFiles = append(sh.tempFiles, f)
+	return f, nil
+}
+
+func (sh *Shell) makeTempDir() (string, error) {
+	sh.cleanupMu.Lock()
+	defer sh.cleanupMu.Unlock()
+	if sh.calledCleanup {
+		return "", errAlreadyCalledCleanup
+	}
+	name, err := ioutil.TempDir("", "")
+	if err != nil {
+		return "", err
+	}
+	sh.tempDirs = append(sh.tempDirs, name)
+	return name, nil
+}
+
+func (sh *Shell) pushd(dir string) error {
+	cwd, err := os.Getwd()
+	if err != nil {
+		return err
+	}
+	if err := os.Chdir(dir); err != nil {
+		return err
+	}
+	sh.dirStack = append(sh.dirStack, cwd)
+	return nil
+}
+
+func (sh *Shell) popd() error {
+	if len(sh.dirStack) == 0 {
+		return errors.New("dir stack is empty")
+	}
+	dir := sh.dirStack[len(sh.dirStack)-1]
+	if err := os.Chdir(dir); err != nil {
+		return err
+	}
+	sh.dirStack = sh.dirStack[:len(sh.dirStack)-1]
+	return nil
+}
+
+func (sh *Shell) addToCleanup(fn func()) error {
+	sh.cleanupMu.Lock()
+	defer sh.cleanupMu.Unlock()
+	if sh.calledCleanup {
+		return errAlreadyCalledCleanup
+	}
+	sh.cleanupFns = append(sh.cleanupFns, fn)
+	return nil
+}
+
+// forEachRunningCmd applies fn to each running child process.
+func (sh *Shell) forEachRunningCmd(fn func(*Cmd)) bool {
+	anyRunning := false
+	for _, c := range sh.cmds {
+		if !c.calledStart() || c.c.Process == nil {
+			continue // not started
+		}
+		if pgid, err := syscall.Getpgid(c.c.Process.Pid); err != nil || pgid != os.Getpid() {
+			continue // not our child
+		}
+		anyRunning = true
+		if fn != nil {
+			fn(c)
+		}
+	}
+	return anyRunning
+}
+
+// Note: It is safe to run Shell.terminateRunningCmds() concurrently with
+// Cmd.wait(). In particular, Shell.terminateRunningCmds() only reads
+// c.c.Process.{Pid,Path} and calls c.c.Process.{Signal,Kill}, all of which are
+// thread-safe with Cmd.wait().
+func (sh *Shell) terminateRunningCmds() {
+	// Try SIGINT first; if that doesn't work, use SIGKILL.
+	anyRunning := sh.forEachRunningCmd(func(c *Cmd) {
+		if err := c.c.Process.Signal(os.Interrupt); err != nil {
+			sh.logf("%d.Signal(os.Interrupt) failed: %v\n", c.c.Process.Pid, err)
+		}
+	})
+	// If any child is still running, wait for 50ms.
+	if anyRunning {
+		time.Sleep(50 * time.Millisecond)
+		anyRunning = sh.forEachRunningCmd(func(c *Cmd) {
+			sh.logf("%s (PID %d) did not die\n", c.c.Path, c.c.Process.Pid)
+		})
+	}
+	// If any child is still running, wait for another second, then send SIGKILL
+	// to all running children.
+	if anyRunning {
+		time.Sleep(time.Second)
+		sh.forEachRunningCmd(func(c *Cmd) {
+			if err := c.c.Process.Kill(); err != nil {
+				sh.logf("%d.Kill() failed: %v\n", c.c.Process.Pid, err)
+			}
+		})
+		sh.logf("Sent SIGKILL to all remaining child processes\n")
+	}
+}
+
+func (sh *Shell) cleanup() {
+	// Terminate all children that are still running. Note, newShell() calls
+	// syscall.Setpgid().
+	pgid, pid := syscall.Getpgrp(), os.Getpid()
+	if pgid != pid {
+		sh.logf("PGID (%d) != PID (%d); skipping subprocess termination\n", pgid, pid)
+	} else {
+		sh.terminateRunningCmds()
+	}
+	// Close and delete all temporary files.
+	for _, tempFile := range sh.tempFiles {
+		name := tempFile.Name()
+		if err := tempFile.Close(); err != nil {
+			sh.logf("%q.Close() failed: %v\n", name, err)
+		}
+		if err := os.RemoveAll(name); err != nil {
+			sh.logf("os.RemoveAll(%q) failed: %v\n", name, err)
+		}
+	}
+	// Delete all temporary directories.
+	for _, tempDir := range sh.tempDirs {
+		if err := os.RemoveAll(tempDir); err != nil {
+			sh.logf("os.RemoveAll(%q) failed: %v\n", tempDir, err)
+		}
+	}
+	// Call any registered cleanup functions in LIFO order.
+	for i := len(sh.cleanupFns) - 1; i >= 0; i-- {
+		sh.cleanupFns[i]()
+	}
+}
+
+////////////////////////////////////////
+// Public utilities
+
+var calledMaybeRunFnAndExit = false
+
+// MaybeRunFnAndExit must be called first thing in main() or TestMain(), before
+// flags are parsed. In the parent process, it returns immediately with no
+// effect. In a child process for a Shell.Fn() or Shell.Main() command, it runs
+// the specified function, then exits.
+func MaybeRunFnAndExit() {
+	calledMaybeRunFnAndExit = true
+	s := os.Getenv(envInvocation)
+	if s == "" {
+		return
+	}
+	os.Unsetenv(envInvocation)
+	// Call MaybeWatchParent rather than WatchParent so that envSpawnedByShell
+	// gets cleared.
+	MaybeWatchParent()
+	name, args, err := decInvocation(s)
+	if err != nil {
+		log.Fatal(err)
+	}
+	if err := Call(name, args...); err != nil {
+		log.Fatal(err)
+	}
+	os.Exit(0)
+}
+
+// Run calls MaybeRunFnAndExit(), then returns run(). Exported so that TestMain
+// functions can simply call os.Exit(gosh.Run(m.Run)).
+func Run(run func() int) int {
+	MaybeRunFnAndExit()
+	return run()
+}
diff --git a/gosh/shell_test.go b/gosh/shell_test.go
new file mode 100644
index 0000000..ec13332
--- /dev/null
+++ b/gosh/shell_test.go
@@ -0,0 +1,312 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gosh_test
+
+// TODO(sadovsky): Add more tests:
+// - variadic function registration and invocation
+// - shell cleanup
+// - Cmd.{Wait,Run}
+// - Shell.{Args,Wait,Rename,MakeTempFile,MakeTempDir}
+// - Opts (including defaulting behavior)
+// - {,Maybe}WatchParent
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"reflect"
+	"runtime/debug"
+	"testing"
+	"time"
+
+	"v.io/x/lib/gosh"
+	"v.io/x/lib/gosh/internal/gosh_example_lib"
+)
+
+var fakeError = errors.New("fake error")
+
+func fatal(t *testing.T, v ...interface{}) {
+	debug.PrintStack()
+	t.Fatal(v...)
+}
+
+func fatalf(t *testing.T, format string, v ...interface{}) {
+	debug.PrintStack()
+	t.Fatalf(format, v...)
+}
+
+func ok(t *testing.T, err error) {
+	if err != nil {
+		fatal(t, err)
+	}
+}
+
+func nok(t *testing.T, err error) {
+	if err == nil {
+		fatal(t, "nil err")
+	}
+}
+
+func eq(t *testing.T, got, want interface{}) {
+	if !reflect.DeepEqual(got, want) {
+		fatalf(t, "got %v, want %v", got, want)
+	}
+}
+
+func neq(t *testing.T, got, notWant interface{}) {
+	if reflect.DeepEqual(got, notWant) {
+		fatalf(t, "got %v", got)
+	}
+}
+
+func makeErrorf(t *testing.T) func(string, ...interface{}) {
+	return func(format string, v ...interface{}) {
+		debug.PrintStack()
+		t.Fatalf(format, v...)
+	}
+}
+
+func TestPushdPopd(t *testing.T) {
+	sh := gosh.NewShell(gosh.Opts{Errorf: makeErrorf(t), Logf: t.Logf})
+	defer sh.Cleanup()
+	startDir, err := os.Getwd()
+	ok(t, err)
+	parentDir := filepath.Dir(startDir)
+	neq(t, startDir, parentDir)
+	sh.Pushd(parentDir)
+	cwd, err := os.Getwd()
+	ok(t, err)
+	eq(t, cwd, parentDir)
+	sh.Pushd(startDir)
+	cwd, err = os.Getwd()
+	ok(t, err)
+	eq(t, cwd, startDir)
+	sh.Popd()
+	cwd, err = os.Getwd()
+	ok(t, err)
+	eq(t, cwd, parentDir)
+	sh.Popd()
+	cwd, err = os.Getwd()
+	ok(t, err)
+	eq(t, cwd, startDir)
+	// The next sh.Popd() will fail.
+	var calledErrorf bool
+	sh.Opts.Errorf = func(string, ...interface{}) { calledErrorf = true }
+	sh.Popd()
+	// Note, our deferred sh.Cleanup() should succeed despite this error.
+	nok(t, sh.Err)
+	eq(t, calledErrorf, true)
+}
+
+func TestCmds(t *testing.T) {
+	sh := gosh.NewShell(gosh.Opts{Errorf: makeErrorf(t), Logf: t.Logf})
+	defer sh.Cleanup()
+
+	// Start server.
+	binPath := sh.BuildGoPkg("v.io/x/lib/gosh/internal/gosh_example_server")
+	c := sh.Cmd(binPath)
+	c.Start()
+	c.AwaitReady()
+	addr := c.AwaitVars("Addr")["Addr"]
+	neq(t, addr, "")
+
+	// Run client.
+	binPath = sh.BuildGoPkg("v.io/x/lib/gosh/internal/gosh_example_client")
+	c = sh.Cmd(binPath, "-addr="+addr)
+	stdout, _ := c.Output()
+	eq(t, string(stdout), "Hello, world!\n")
+}
+
+var (
+	get   = gosh.Register("get", lib.Get)
+	serve = gosh.Register("serve", lib.Serve)
+)
+
+func TestFns(t *testing.T) {
+	sh := gosh.NewShell(gosh.Opts{Errorf: makeErrorf(t), Logf: t.Logf})
+	defer sh.Cleanup()
+
+	// Start server.
+	c := sh.Fn(serve)
+	c.Start()
+	c.AwaitReady()
+	addr := c.AwaitVars("Addr")["Addr"]
+	neq(t, addr, "")
+
+	// Run client.
+	c = sh.Fn(get, addr)
+	stdout, _ := c.Output()
+	eq(t, string(stdout), "Hello, world!\n")
+}
+
+func TestShellMain(t *testing.T) {
+	sh := gosh.NewShell(gosh.Opts{Errorf: makeErrorf(t), Logf: t.Logf})
+	defer sh.Cleanup()
+	stdout, _ := sh.Main(lib.HelloWorldMain).Output()
+	eq(t, string(stdout), "Hello, world!\n")
+}
+
+var write = gosh.Register("write", func(stdout, stderr bool) error {
+	if stdout {
+		time.Sleep(time.Millisecond)
+		if _, err := os.Stdout.Write([]byte("A")); err != nil {
+			return err
+		}
+	}
+	if stderr {
+		time.Sleep(time.Millisecond)
+		if _, err := os.Stderr.Write([]byte("B")); err != nil {
+			return err
+		}
+	}
+	if stdout {
+		time.Sleep(time.Millisecond)
+		if _, err := os.Stdout.Write([]byte("A")); err != nil {
+			return err
+		}
+	}
+	if stderr {
+		time.Sleep(time.Millisecond)
+		if _, err := os.Stderr.Write([]byte("B")); err != nil {
+			return err
+		}
+	}
+	return nil
+})
+
+func toString(r io.Reader) string {
+	if b, err := ioutil.ReadAll(r); err != nil {
+		panic(err)
+	} else {
+		return string(b)
+	}
+}
+
+func TestStdoutStderr(t *testing.T) {
+	sh := gosh.NewShell(gosh.Opts{Errorf: makeErrorf(t), Logf: t.Logf})
+	defer sh.Cleanup()
+
+	// Write to stdout only.
+	c := sh.Fn(write, true, false)
+	stdoutPipe, stderrPipe := c.StdoutPipe(), c.StderrPipe()
+	eq(t, string(c.CombinedOutput()), "AA")
+	eq(t, toString(stdoutPipe), "AA")
+	eq(t, toString(stderrPipe), "")
+	stdout, stderr := sh.Fn(write, true, false).Output()
+	eq(t, string(stdout), "AA")
+	eq(t, string(stderr), "")
+
+	// Write to stderr only.
+	c = sh.Fn(write, false, true)
+	stdoutPipe, stderrPipe = c.StdoutPipe(), c.StderrPipe()
+	eq(t, string(c.CombinedOutput()), "BB")
+	eq(t, toString(stdoutPipe), "")
+	eq(t, toString(stderrPipe), "BB")
+	stdout, stderr = sh.Fn(write, false, true).Output()
+	eq(t, string(stdout), "")
+	eq(t, string(stderr), "BB")
+
+	// Write to both stdout and stderr.
+	c = sh.Fn(write, true, true)
+	stdoutPipe, stderrPipe = c.StdoutPipe(), c.StderrPipe()
+	eq(t, string(c.CombinedOutput()), "ABAB")
+	eq(t, toString(stdoutPipe), "AA")
+	eq(t, toString(stderrPipe), "BB")
+	stdout, stderr = sh.Fn(write, true, true).Output()
+	eq(t, string(stdout), "AA")
+	eq(t, string(stderr), "BB")
+}
+
+var sleep = gosh.Register("sleep", func(d time.Duration) {
+	time.Sleep(d)
+})
+
+func TestShutdown(t *testing.T) {
+	sh := gosh.NewShell(gosh.Opts{Errorf: makeErrorf(t), Logf: t.Logf})
+	defer sh.Cleanup()
+
+	for _, d := range []time.Duration{0, time.Second} {
+		for _, s := range []os.Signal{os.Interrupt, os.Kill} {
+			fmt.Println(d, s)
+			c := sh.Fn(sleep, d)
+			c.Start()
+			time.Sleep(10 * time.Millisecond)
+			c.Shutdown(s)
+		}
+	}
+}
+
+// Tests that sh.Ok panics under various conditions.
+func TestOkPanics(t *testing.T) {
+	func() { // errDidNotCallNewShell
+		sh := gosh.Shell{}
+		defer func() { neq(t, recover(), nil) }()
+		sh.Ok()
+	}()
+	func() { // errShellErrIsNotNil
+		sh := gosh.NewShell(gosh.Opts{Errorf: t.Logf})
+		defer sh.Cleanup()
+		sh.Err = fakeError
+		defer func() { neq(t, recover(), nil) }()
+		sh.Ok()
+	}()
+	func() { // errAlreadyCalledCleanup
+		sh := gosh.NewShell(gosh.Opts{Errorf: t.Logf})
+		sh.Cleanup()
+		defer func() { neq(t, recover(), nil) }()
+		sh.Ok()
+	}()
+}
+
+// Tests that sh.HandleError panics under various conditions.
+func TestHandleErrorPanics(t *testing.T) {
+	func() { // errDidNotCallNewShell
+		sh := gosh.Shell{}
+		defer func() { neq(t, recover(), nil) }()
+		sh.HandleError(fakeError)
+	}()
+	func() { // errShellErrIsNotNil
+		sh := gosh.NewShell(gosh.Opts{Errorf: t.Logf})
+		defer sh.Cleanup()
+		sh.Err = fakeError
+		defer func() { neq(t, recover(), nil) }()
+		sh.HandleError(fakeError)
+	}()
+	func() { // errAlreadyCalledCleanup
+		sh := gosh.NewShell(gosh.Opts{Errorf: t.Logf})
+		sh.Cleanup()
+		defer func() { neq(t, recover(), nil) }()
+		sh.HandleError(fakeError)
+	}()
+}
+
+// Tests that sh.Cleanup succeeds even if sh.Err is not nil.
+func TestCleanupAfterError(t *testing.T) {
+	sh := gosh.NewShell(gosh.Opts{Errorf: t.Logf})
+	sh.Err = fakeError
+	sh.Cleanup()
+}
+
+// Tests that sh.Cleanup panics under various conditions.
+func TestCleanupPanics(t *testing.T) {
+	func() { // errDidNotCallNewShell
+		sh := gosh.Shell{}
+		defer func() { neq(t, recover(), nil) }()
+		sh.Cleanup()
+	}()
+	func() { // errAlreadyCalledCleanup
+		sh := gosh.NewShell(gosh.Opts{Errorf: t.Logf})
+		sh.Cleanup()
+		defer func() { neq(t, recover(), nil) }()
+		sh.Cleanup()
+	}()
+}
+
+func TestMain(m *testing.M) {
+	os.Exit(gosh.Run(m.Run))
+}