gosh: Exit signal-handling goroutine on Cleanup, other cleanups.

I noticed while running tests that we had a bunch of termination
signal handler goroutines running.  It's better to clean up these
goroutines, since they do take up some resources, and more
importantly, clutter up the stack traces on failure.  The new
logic shuts down the goroutine when cleanup is called.  It's hard
to write a test to verify this, since Go doesn't provide a way to
detect that a goroutine has finished.  So I don't have a test.

I also did some minor reorganizing to get better stacktraces from
the goroutines spawned by gosh; basically making sure we had
named functions in the stacktrace to help debugging.

I also noticed that sh.dirStack is used in cleanup, so we need to
ensure sh.cleanupMu is locked when touching it.  I think I missed
this when I added the dirStack to the cleanup.

Finally, I noticed that TestSignal and TestTerminate are flaky,
because of a race between the child process installing the signal
handler, and the test process sending the signal.  I added some
synchronization to avoid this race, and also changed to children
to sleep for 1 hour rather than 1 second, since we're trying to
test the case where they haven't exited yet.

Change-Id: I73a080e781008a1fdcf3401b41d2d5b84e108560
diff --git a/gosh/cmd.go b/gosh/cmd.go
index 9fa6d1f..5aab1c7 100644
--- a/gosh/cmd.go
+++ b/gosh/cmd.go
@@ -483,6 +483,11 @@
 // that calls InitChildMain.
 
 func (c *Cmd) start() error {
+	defer func() {
+		if !c.started {
+			c.closeClosers()
+		}
+	}()
 	if c.calledStart {
 		return errAlreadyCalledStart
 	}
@@ -520,27 +525,28 @@
 		return err
 	}
 	// Start the command.
-	onExit := func(err error) {
+	if err = c.c.Start(); err != nil {
+		return err
+	}
+	c.started = true
+	c.startExitWaiter()
+	return nil
+}
+
+// startExitWaiter spawns a goroutine that calls exec.Cmd.Wait, waiting for the
+// process to exit. Calling exec.Cmd.Wait here rather than in gosh.Cmd.Wait
+// ensures that the child process is reaped once it exits. Note, gosh.Cmd.wait
+// blocks on waitChan.
+func (c *Cmd) startExitWaiter() {
+	go func() {
+		waitErr := c.c.Wait()
 		c.cond.L.Lock()
 		c.exited = true
 		c.cond.Signal()
 		c.cond.L.Unlock()
 		c.closeClosers()
-		c.waitChan <- err
-	}
-	if err = c.c.Start(); err != nil {
-		onExit(errors.New("gosh: start failed"))
-		return err
-	}
-	c.started = true
-	// Spawn a "waiter" goroutine that calls exec.Cmd.Wait and thus waits for the
-	// process to exit. Calling exec.Cmd.Wait here rather than in gosh.Cmd.Wait
-	// ensures that the child process is reaped once it exits. Note, gosh.Cmd.wait
-	// blocks on waitChan.
-	go func() {
-		onExit(c.c.Wait())
+		c.waitChan <- waitErr
 	}()
-	return nil
 }
 
 // TODO(sadovsky): Maybe add optional timeouts for
diff --git a/gosh/shell.go b/gosh/shell.go
index 59a3171..8e17747 100644
--- a/gosh/shell.go
+++ b/gosh/shell.go
@@ -56,12 +56,13 @@
 	Args []string
 	// Internal state.
 	calledNewShell bool
-	dirStack       []string   // for pushd/popd
+	cleanupDone    chan struct{}
 	cleanupMu      sync.Mutex // protects the fields below; held during cleanup
 	calledCleanup  bool
 	cmds           []*Cmd
 	tempFiles      []*os.File
 	tempDirs       []string
+	dirStack       []string // for pushd/popd
 	cleanupFuncs   []func()
 }
 
@@ -217,16 +218,6 @@
 ////////////////////////////////////////
 // Internals
 
-// onTerminationSignal starts a goroutine that listens for various termination
-// signals and calls the given function when such a signal is received.
-func onTerminationSignal(f func(os.Signal)) {
-	ch := make(chan os.Signal, 1)
-	signal.Notify(ch, syscall.SIGINT, syscall.SIGQUIT, syscall.SIGTERM)
-	go func() {
-		f(<-ch)
-	}()
-}
-
 // Note: On error, newShell returns a *Shell with Opts.Fatalf initialized to
 // simplify things for the caller.
 func newShell(opts Opts) (*Shell, error) {
@@ -253,6 +244,7 @@
 		Opts:           opts,
 		Vars:           shVars,
 		calledNewShell: true,
+		cleanupDone:    make(chan struct{}),
 	}
 	if sh.Opts.BinDir == "" {
 		sh.Opts.BinDir = osVars[envBinDir]
@@ -264,21 +256,36 @@
 			}
 		}
 	}
-	// Call sh.cleanup() if needed when a termination signal is received.
-	onTerminationSignal(func(sig os.Signal) {
-		sh.logf("Received signal: %v\n", sig)
-		sh.cleanupMu.Lock()
-		defer sh.cleanupMu.Unlock()
-		if !sh.calledCleanup {
-			sh.cleanup()
-		}
-		// Note: We hold cleanupMu during os.Exit(1) so that the main goroutine will
-		// not call Shell.Ok() and panic before we exit.
-		os.Exit(1)
-	})
+	sh.cleanupOnSignal()
 	return sh, nil
 }
 
+// cleanupOnSignal starts a goroutine that calls cleanup if a termination signal
+// is received.
+func (sh *Shell) cleanupOnSignal() {
+	ch := make(chan os.Signal, 1)
+	signal.Notify(ch, syscall.SIGINT, syscall.SIGQUIT, syscall.SIGTERM)
+	go func() {
+		select {
+		case sig := <-ch:
+			// A termination signal was received, the process will exit.
+			sh.logf("Received signal: %v\n", sig)
+			sh.cleanupMu.Lock()
+			defer sh.cleanupMu.Unlock()
+			if !sh.calledCleanup {
+				sh.cleanup()
+			}
+			// Note: We hold cleanupMu during os.Exit(1) so that the main goroutine will
+			// not call Shell.Ok() and panic before we exit.
+			os.Exit(1)
+		case <-sh.cleanupDone:
+			// The user called sh.Cleanup; stop listening for signals and exit this
+			// goroutine.
+		}
+		signal.Stop(ch)
+	}()
+}
+
 func (sh *Shell) logf(format string, v ...interface{}) {
 	if sh.Opts.Logf != nil {
 		sh.Opts.Logf(format, v...)
@@ -410,6 +417,11 @@
 }
 
 func (sh *Shell) pushd(dir string) error {
+	sh.cleanupMu.Lock()
+	defer sh.cleanupMu.Unlock()
+	if sh.calledCleanup {
+		return errAlreadyCalledCleanup
+	}
 	cwd, err := os.Getwd()
 	if err != nil {
 		return err
@@ -422,6 +434,11 @@
 }
 
 func (sh *Shell) popd() error {
+	sh.cleanupMu.Lock()
+	defer sh.cleanupMu.Unlock()
+	if sh.calledCleanup {
+		return errAlreadyCalledCleanup
+	}
 	if len(sh.dirStack) == 0 {
 		return errors.New("gosh: dir stack is empty")
 	}
@@ -519,6 +536,7 @@
 	for i := len(sh.cleanupFuncs) - 1; i >= 0; i-- {
 		sh.cleanupFuncs[i]()
 	}
+	close(sh.cleanupDone)
 }
 
 ////////////////////////////////////////
diff --git a/gosh/shell_test.go b/gosh/shell_test.go
index f8e1840..04b3510 100644
--- a/gosh/shell_test.go
+++ b/gosh/shell_test.go
@@ -118,6 +118,9 @@
 			<-ch
 			os.Exit(0)
 		}()
+		// The parent waits for this ready signal, to avoid the race where a signal
+		// is sent before the handler is installed.
+		gosh.SendReady()
 		time.Sleep(d)
 		os.Exit(code)
 	})
@@ -540,21 +543,23 @@
 	sh := gosh.NewShell(gosh.Opts{Fatalf: makeFatalf(t), Logf: t.Logf})
 	defer sh.Cleanup()
 
-	for _, d := range []time.Duration{0, time.Second} {
+	for _, d := range []time.Duration{0, time.Hour} {
 		for _, s := range []os.Signal{os.Interrupt, os.Kill} {
 			fmt.Println(d, s)
 			c := sh.FuncCmd(sleepFunc, d, 0)
 			c.Start()
+			c.AwaitReady()
 			// Wait for a bit to allow the zero-sleep commands to exit.
 			time.Sleep(100 * time.Millisecond)
 			c.Signal(s)
-			// Wait should succeed as long as the exit code was 0, regardless of
-			// whether the signal arrived or the process had already exited.
-			if s == os.Interrupt {
+			switch {
+			case s == os.Interrupt:
+				// Wait should succeed as long as the exit code was 0, regardless of
+				// whether the signal arrived or the process had already exited.
+				c.Wait()
+			case d != 0:
 				// Note: We don't call Wait in the {d: 0, s: os.Kill} case because doing
 				// so makes the test flaky on slow systems.
-				c.Wait()
-			} else if d == time.Second {
 				setsErr(t, sh, func() { c.Wait() })
 			}
 		}
@@ -570,11 +575,12 @@
 	sh := gosh.NewShell(gosh.Opts{Fatalf: makeFatalf(t), Logf: t.Logf})
 	defer sh.Cleanup()
 
-	for _, d := range []time.Duration{0, time.Second} {
+	for _, d := range []time.Duration{0, time.Hour} {
 		for _, s := range []os.Signal{os.Interrupt, os.Kill} {
 			fmt.Println(d, s)
 			c := sh.FuncCmd(sleepFunc, d, 0)
 			c.Start()
+			c.AwaitReady()
 			// Wait for a bit to allow the zero-sleep commands to exit.
 			time.Sleep(100 * time.Millisecond)
 			// Terminate should succeed regardless of the exit code, and regardless of