Merge "veyron/services/mgmt/device/impl,veyron/lib/unixfd: more close-on-exec stuff"
diff --git a/lib/modules/shell.go b/lib/modules/shell.go
index c6cd898..cc14137 100644
--- a/lib/modules/shell.go
+++ b/lib/modules/shell.go
@@ -136,7 +136,7 @@
 	return sh, nil
 }
 
-func (sh *Shell) getChildCredentials() (*os.File, error) {
+func (sh *Shell) getChildCredentials() (c *os.File, err error) {
 	if sh.ctx == nil {
 		return nil, nil
 	}
@@ -146,11 +146,16 @@
 	if err != nil {
 		return nil, err
 	}
+	defer func() {
+		if err != nil {
+			conn.Close()
+		}
+	}()
 	ctx, cancel := context.WithCancel(sh.ctx)
+	defer cancel()
 	if ctx, _, err = veyron2.SetNewStreamManager(ctx); err != nil {
 		return nil, err
 	}
-	defer cancel()
 	syscall.ForkLock.RLock()
 	fd, err := syscall.Dup(int(conn.Fd()))
 	if err != nil {
@@ -161,6 +166,7 @@
 	syscall.ForkLock.RUnlock()
 	p, err := agent.NewAgentPrincipal(ctx, fd, veyron2.GetClient(ctx))
 	if err != nil {
+		syscall.Close(fd)
 		return nil, err
 	}
 	blessingForChild, err := root.Bless(p.PublicKey(), rootBlessing, childBlessingExtension, security.UnconstrainedUse())
diff --git a/lib/unixfd/unixfd.go b/lib/unixfd/unixfd.go
index fd17a93..fa0fe85 100644
--- a/lib/unixfd/unixfd.go
+++ b/lib/unixfd/unixfd.go
@@ -231,12 +231,16 @@
 	// This is to work around a race on OS X where it appears we can close
 	// the file descriptor before it gets transfered over the socket.
 	f := local.releaseFile()
+	syscall.ForkLock.Lock()
 	fd, err := syscall.Dup(int(f.Fd()))
 	if err != nil {
+		syscall.ForkLock.Unlock()
 		f.Close()
 		rfile.Close()
 		return nil, err
 	}
+	syscall.CloseOnExec(fd)
+	syscall.ForkLock.Unlock()
 	newConn, err := net.FileConn(f)
 	f.Close()
 	if err != nil {
@@ -290,11 +294,15 @@
 		return nil, n, nil, nil
 	}
 	result := Addr(uintptr(fd))
+	syscall.ForkLock.Lock()
 	fd, err = syscall.Dup(fd)
 	if err != nil {
+		syscall.ForkLock.Unlock()
 		CloseUnixAddr(result)
 		return nil, n, nil, err
 	}
+	syscall.CloseOnExec(fd)
+	syscall.ForkLock.Unlock()
 	file := os.NewFile(uintptr(fd), "newconn")
 	newconn, err := net.FileConn(file)
 	file.Close()
diff --git a/security/agent/client.go b/security/agent/client.go
index f3128f9..77c6dde 100644
--- a/security/agent/client.go
+++ b/security/agent/client.go
@@ -57,7 +57,8 @@
 // NewAgentPrincipal returns a security.Pricipal using the PrivateKey held in a remote agent process.
 // 'fd' is the socket for connecting to the agent, typically obtained from
 // os.GetEnv(agent.FdVarName).
-// 'ctx' should not have a deadline, and should never be cancelled.
+// 'ctx' should not have a deadline, and should never be cancelled while the
+// principal is in use.
 func NewAgentPrincipal(ctx *context.T, fd int, insecureClient ipc.Client) (security.Principal, error) {
 	f := os.NewFile(uintptr(fd), "agent_client")
 	defer f.Close()
diff --git a/services/mgmt/device/impl/app_service.go b/services/mgmt/device/impl/app_service.go
index 1538c98..1a49e46 100644
--- a/services/mgmt/device/impl/app_service.go
+++ b/services/mgmt/device/impl/app_service.go
@@ -124,7 +124,6 @@
 	"reflect"
 	"strconv"
 	"strings"
-	"sync"
 	"time"
 
 	"v.io/core/veyron2"
@@ -189,12 +188,6 @@
 type securityAgentState struct {
 	// Security agent key manager client.
 	keyMgrAgent *keymgr.Agent
-	// Ensures only one security agent connection socket is created
-	// at any time, preventing fork/exec from potentially passing
-	// down sockets meant for other children (as per ribrdb@, Go's
-	// exec implementation does not prune the set of files passed
-	// down to only include those specified in cmd.ExtraFiles).
-	startLock sync.Mutex
 }
 
 // appService implements the Device manager's Application interface.
@@ -483,14 +476,23 @@
 			vlog.Errorf("NewPrincipal() failed %v", err)
 			return verror2.Make(ErrOperationFailed, nil)
 		}
-		defer conn.Close()
-
-		// TODO(caprita): release the socket created by NewAgentPrincipal.
-		if p, err = agent.NewAgentPrincipal(ctx, int(conn.Fd()), veyron2.GetClient(ctx)); err != nil {
+		agentctx, cancel := context.WithCancel(ctx)
+		if agentctx, _, err = veyron2.SetNewStreamManager(agentctx); err != nil {
+			cancel()
+			conn.Close()
+			vlog.Errorf("SetNewStreamManager failed: %v", err)
+			return verror2.Make(ErrOperationFailed, nil)
+		}
+		defer cancel()
+		if p, err = agent.NewAgentPrincipal(agentctx, int(conn.Fd()), veyron2.GetClient(agentctx)); err != nil {
+			conn.Close()
 			vlog.Errorf("NewAgentPrincipal() failed: %v", err)
 			return verror2.Make(ErrOperationFailed, nil)
 		}
 		info.SecurityAgentHandle = handle
+		// conn will be closed when the connection to the agent is shut
+		// down, as a result of cancel() shutting down the stream
+		// manager.  No need to call conn.Close().
 	} else {
 		credentialsDir := filepath.Join(instanceDir, "credentials")
 		// TODO(caprita): The app's system user id needs access to this dir.
@@ -771,24 +773,16 @@
 	cfg.Set(mgmt.ParentBlessingConfigKey, info.DeviceManagerPeerPattern)
 
 	// Set up any agent-specific state.
-	// NOTE(caprita): This ought to belong in genCmd, but we do it here
-	// to avoid holding on to the lock for too long.
-	//
-	// TODO(caprita): We need to take care to grab/release the lock
-	// excluding concurrent start operations.  See if we can make this more
-	// robust.
+	// NOTE(caprita): This ought to belong in genCmd.
 	var agentCleaner func()
 	if sa := i.securityAgent; sa != nil {
-		sa.startLock.Lock()
 		file, err := sa.keyMgrAgent.NewConnection(info.SecurityAgentHandle)
 		if err != nil {
-			sa.startLock.Unlock()
 			vlog.Errorf("NewConnection(%v) failed: %v", info.SecurityAgentHandle, err)
 			return err
 		}
 		agentCleaner = func() {
 			file.Close()
-			sa.startLock.Unlock()
 		}
 		// We need to account for the file descriptors corresponding to
 		// std{err|out|in} as well as the implementation-specific pipes