services/device/internal/impl: Use suidhelper kill functionality
Use killing via suidhelper to enforce app exit after a Stop() call.
While doing so, enable the deadline parameter to the Stop() call.
Also, use the suidhelper killing to kill off any newly-started app
that either fails to handshake properly or appears to lie about its pid
during the handshake.
In order to get this done without passing the suidhelper path around
everywhere, refactored the way that suidhelper is called to
centralize the knowledge of the suidhelper path and its flags in
helper_manager.go.
Change-Id: I84adcf874814bb040cfe8668da65e0193246c704
diff --git a/services/device/internal/impl/app_service.go b/services/device/internal/impl/app_service.go
index 9aa4316..d5fb7ed 100644
--- a/services/device/internal/impl/app_service.go
+++ b/services/device/internal/impl/app_service.go
@@ -736,7 +736,7 @@
return instanceDir, instanceID, nil
}
-func genCmd(ctx *context.T, instanceDir, helperPath, systemName string, nsRoot string) (*exec.Cmd, error) {
+func genCmd(ctx *context.T, instanceDir, systemName string, nsRoot string) (*exec.Cmd, error) {
versionLink := filepath.Join(instanceDir, "version")
versionDir, err := filepath.EvalSymlinks(versionLink)
if err != nil {
@@ -751,16 +751,7 @@
return nil, verror.New(ErrOperationFailed, ctx, fmt.Sprintf("Stat(%v) failed: %v", binPath, err))
}
- cmd := exec.Command(helperPath)
-
- switch yes, err := suidHelper.suidhelperEnabled(systemName, helperPath); {
- case err != nil:
- return nil, err
- case yes:
- cmd.Args = append(cmd.Args, "--username", systemName)
- case !yes:
- cmd.Args = append(cmd.Args, "--username", systemName, "--dryrun")
- }
+ saArgs := suidAppCmdArgs{targetUser: systemName, binpath: binPath}
// Pass the displayed name of the program (argv0 as seen in ps output)
// Envelope data comes from the user so we sanitize it for safety
@@ -774,40 +765,37 @@
}
}
appName := strings.Map(sanitize, rawAppName)
-
- cmd.Args = append(cmd.Args, "--progname", appName)
+ saArgs.progname = appName
// Set the app's default namespace root to the local namespace.
- cmd.Env = []string{envvar.NamespacePrefix + "=" + nsRoot}
- cmd.Env = append(cmd.Env, envelope.Env...)
+ saArgs.env = []string{envvar.NamespacePrefix + "=" + nsRoot}
+ saArgs.env = append(saArgs.env, envelope.Env...)
rootDir := filepath.Join(instanceDir, "root")
- cmd.Dir = rootDir
- cmd.Args = append(cmd.Args, "--workspace", rootDir)
+ saArgs.dir = rootDir
+ saArgs.workspace = rootDir
logDir := filepath.Join(instanceDir, "logs")
if err := mkdirPerm(logDir, 0755); err != nil {
return nil, err
}
- cmd.Args = append(cmd.Args, "--logdir", logDir)
+ saArgs.logdir = logDir
timestamp := time.Now().UnixNano()
stdoutLog := filepath.Join(logDir, fmt.Sprintf("STDOUT-%d", timestamp))
- if cmd.Stdout, err = openWriteFile(stdoutLog); err != nil {
+ if saArgs.stdout, err = openWriteFile(stdoutLog); err != nil {
return nil, verror.New(ErrOperationFailed, ctx, fmt.Sprintf("OpenFile(%v) failed: %v", stdoutLog, err))
}
stderrLog := filepath.Join(logDir, fmt.Sprintf("STDERR-%d", timestamp))
- if cmd.Stderr, err = openWriteFile(stderrLog); err != nil {
+ if saArgs.stderr, err = openWriteFile(stderrLog); err != nil {
return nil, verror.New(ErrOperationFailed, ctx, fmt.Sprintf("OpenFile(%v) failed: %v", stderrLog, err))
}
- cmd.Args = append(cmd.Args, "--run", binPath)
- cmd.Args = append(cmd.Args, "--")
// Args to be passed by helper to the app.
appArgs := []string{"--log_dir=../logs"}
appArgs = append(appArgs, envelope.Args...)
- cmd.Args = append(cmd.Args, appArgs...)
- return cmd, nil
+ saArgs.appArgs = appArgs
+ return suidHelper.getAppCmd(&saArgs)
}
func (i *appService) startCmd(ctx *context.T, instanceDir string, cmd *exec.Cmd) (int, error) {
@@ -916,7 +904,7 @@
}
var pid int
- cmd, err := genCmd(ctx, instanceDir, i.config.Helper, systemName, i.mtAddress)
+ cmd, err := genCmd(ctx, instanceDir, systemName, i.mtAddress)
if err == nil {
pid, err = i.startCmd(ctx, instanceDir, cmd)
}
@@ -995,9 +983,9 @@
return i.run(ctx, instanceDir, systemName)
}
-func stopAppRemotely(ctx *context.T, appVON string) error {
+func stopAppRemotely(ctx *context.T, appVON string, deadline time.Duration) error {
appStub := appcycle.AppCycleClient(appVON)
- ctx, cancel := context.WithTimeout(ctx, rpcContextLongTimeout)
+ ctx, cancel := context.WithTimeout(ctx, deadline)
defer cancel()
stream, err := appStub.Stop(ctx)
if err != nil {
@@ -1016,20 +1004,19 @@
return nil
}
-func stop(ctx *context.T, instanceDir string, reap reaper) error {
+func stop(ctx *context.T, instanceDir string, reap reaper, deadline time.Duration) error {
info, err := loadInstanceInfo(ctx, instanceDir)
if err != nil {
return err
}
- err = stopAppRemotely(ctx, info.AppCycleMgrName)
+ err = stopAppRemotely(ctx, info.AppCycleMgrName, deadline)
+ reap.forciblySuspend(instanceDir)
if err == nil {
reap.stopWatching(instanceDir)
}
return err
}
-// TODO(caprita): implement deadline for Stop.
-
func (i *appService) Stop(ctx *context.T, _ rpc.ServerCall, deadline time.Duration) error {
instanceDir, err := i.instanceDir()
if err != nil {
@@ -1041,13 +1028,15 @@
if err := transitionInstance(instanceDir, device.InstanceStateStarted, device.InstanceStateStopping); err != nil {
return err
}
- if err := stop(ctx, instanceDir, i.reap); err != nil {
+ if err := stop(ctx, instanceDir, i.reap, deadline); err != nil {
transitionInstance(instanceDir, device.InstanceStateStopping, device.InstanceStateStarted)
return err
}
return transitionInstance(instanceDir, device.InstanceStateStopping, device.InstanceStateStopped)
}
+// TODO(arup): It's weird that this doesn't take a deadline, whereas Stop() does. For now, we
+// are using a hardcoded deadline of rpcContextLongTimeout
func (i *appService) Suspend(ctx *context.T, _ rpc.ServerCall) error {
instanceDir, err := i.instanceDir()
if err != nil {
@@ -1056,7 +1045,7 @@
if err := transitionInstance(instanceDir, device.InstanceStateStarted, device.InstanceStateSuspending); err != nil {
return err
}
- if err := stop(ctx, instanceDir, i.reap); err != nil {
+ if err := stop(ctx, instanceDir, i.reap, rpcContextLongTimeout); err != nil {
transitionInstance(instanceDir, device.InstanceStateSuspending, device.InstanceStateStarted)
return err
}
@@ -1474,7 +1463,7 @@
} else {
debugInfo.StartSystemName = startSystemName
}
- if cmd, err := genCmd(ctx, instanceDir, i.config.Helper, debugInfo.SystemName, i.mtAddress); err != nil {
+ if cmd, err := genCmd(ctx, instanceDir, debugInfo.SystemName, i.mtAddress); err != nil {
return "", err
} else {
debugInfo.Cmd = cmd
diff --git a/services/device/internal/impl/app_starting_util.go b/services/device/internal/impl/app_starting_util.go
index 1170fe9..1c47e15 100644
--- a/services/device/internal/impl/app_starting_util.go
+++ b/services/device/internal/impl/app_starting_util.go
@@ -167,9 +167,8 @@
}
if pidFromHelper != pidFromChild {
- // Something nasty is going on and we should kill pidFromHelper.
- // TODO(arup): In future we'll extend suidhelper to support a kill
- // function, and invoke it here.
+ // Something nasty is going on (the child may be lying).
+ suidHelper.terminatePid(pidFromHelper, nil, nil)
return 0, "", verror.New(ErrOperationFailed, a.ctx,
fmt.Sprintf("Child pids do not match! (%d != %d)", pidFromHelper, pidFromChild))
}
@@ -177,7 +176,7 @@
// The appWatcher will stop the listener if the pid dies while waiting below
childName, err := listener.waitForValue(childReadyTimeout)
if err != nil {
- // TODO(arup) kill pidFromHelper here using suidhelper
+ suidHelper.terminatePid(pidFromHelper, nil, nil)
return 0, "", verror.New(ErrOperationFailed, a.ctx,
fmt.Sprintf("Waiting for child name: %v", err))
}
diff --git a/services/device/internal/impl/device_installer.go b/services/device/internal/impl/device_installer.go
index 4d66a49..689f09e 100644
--- a/services/device/internal/impl/device_installer.go
+++ b/services/device/internal/impl/device_installer.go
@@ -282,18 +282,9 @@
if _, err := initCommand(root, "uninstall", stdout, stderr); err != nil {
return err
}
- cmd := exec.Command(helperPath)
- cmd.Args = append(cmd.Args, "--rm", root)
- if stderr != nil {
- cmd.Stderr = stderr
- }
- if stdout != nil {
- cmd.Stdout = stdout
- }
- if err := cmd.Run(); err != nil {
- return fmt.Errorf("devicemanager's invocation of suidhelper to remove(%v) failed: %v", root, err)
- }
- return nil
+
+ initSuidHelper(helperPath)
+ return suidHelper.deleteFileTree(root, stdout, stderr)
}
// Start starts the device manager.
diff --git a/services/device/internal/impl/dispatcher.go b/services/device/internal/impl/dispatcher.go
index d4e3243..5aad5c1 100644
--- a/services/device/internal/impl/dispatcher.go
+++ b/services/device/internal/impl/dispatcher.go
@@ -122,6 +122,7 @@
if err != nil {
return nil, verror.New(errCantCreateAppWatcher, ctx, err)
}
+ initSuidHelper(config.Helper)
d := &dispatcher{
internal: &internalState{
callback: newCallbackState(config.Name),
diff --git a/services/device/internal/impl/helper_manager.go b/services/device/internal/impl/helper_manager.go
index 960c911..30a19b7 100644
--- a/services/device/internal/impl/helper_manager.go
+++ b/services/device/internal/impl/helper_manager.go
@@ -6,8 +6,11 @@
import (
"fmt"
+ "io"
"os"
+ "os/exec"
"os/user"
+ "strconv"
"v.io/v23/context"
"v.io/v23/security"
@@ -15,16 +18,106 @@
"v.io/x/lib/vlog"
)
-type suidHelperState string
+type suidHelperState struct {
+ dmUser string // user that the device manager is running as
+ helperPath string // path to the suidhelper binary
+}
-var suidHelper suidHelperState
+var suidHelper *suidHelperState
-func init() {
+func initSuidHelper(helperPath string) {
+ if suidHelper != nil || helperPath == "" {
+ return
+ }
+
u, err := user.Current()
if err != nil {
vlog.Panicf("devicemanager has no current user: %v", err)
}
- suidHelper = suidHelperState(u.Username)
+ suidHelper = &suidHelperState{
+ dmUser: u.Username,
+ helperPath: helperPath,
+ }
+}
+
+// terminatePid sends a SIGKILL to the target pid
+func (s suidHelperState) terminatePid(pid int, stdout, stderr io.Writer) error {
+ if err := s.internalModalOp(stdout, stderr, "--kill", strconv.Itoa(pid)); err != nil {
+ return fmt.Errorf("devicemanager's invocation of suidhelper to kill pid %v failed: %v", pid, err)
+ }
+ return nil
+}
+
+// deleteFileTree deletes a file or directory
+func (s suidHelperState) deleteFileTree(dirOrFile string, stdout, stderr io.Writer) error {
+ if err := s.internalModalOp(stdout, stderr, "--rm", dirOrFile); err != nil {
+ return fmt.Errorf("devicemanager's invocation of suidhelper delete %v failed: %v", dirOrFile, err)
+ }
+ return nil
+}
+
+type suidAppCmdArgs struct {
+ // args to helper
+ targetUser, progname, workspace, logdir, binpath string
+ // fields in exec.Cmd
+ env []string
+ stdout, stderr io.Writer
+ dir string
+ // arguments passed to app
+ appArgs []string
+}
+
+// getAppCmd produces an exec.Cmd that can be used to start an app
+func (s suidHelperState) getAppCmd(a *suidAppCmdArgs) (*exec.Cmd, error) {
+ if a.targetUser == "" || a.progname == "" || a.binpath == "" || a.workspace == "" || a.logdir == "" {
+ return nil, fmt.Errorf("Invalid args passed to getAppCmd: %+v", a)
+ }
+
+ cmd := exec.Command(s.helperPath)
+
+ switch yes, err := s.suidhelperEnabled(a.targetUser); {
+ case err != nil:
+ return nil, err
+ case yes:
+ cmd.Args = append(cmd.Args, "--username", a.targetUser)
+ case !yes:
+ cmd.Args = append(cmd.Args, "--username", a.targetUser, "--dryrun")
+ }
+
+ cmd.Args = append(cmd.Args, "--progname", a.progname)
+ cmd.Args = append(cmd.Args, "--workspace", a.workspace)
+ cmd.Args = append(cmd.Args, "--logdir", a.logdir)
+
+ cmd.Args = append(cmd.Args, "--run", a.binpath)
+ cmd.Args = append(cmd.Args, "--")
+
+ cmd.Args = append(cmd.Args, a.appArgs...)
+
+ cmd.Env = a.env
+ cmd.Stdout = a.stdout
+ cmd.Stderr = a.stderr
+ cmd.Dir = a.dir
+
+ return cmd, nil
+}
+
+// internalModalOp is a convenience routine containing the common part of all
+// modal operations. Only other routines implementing specific suidhelper operations
+// (like terminatePid and deleteFileTree) should call this directly.
+func (s suidHelperState) internalModalOp(stdout, stderr io.Writer, arg ...string) error {
+ cmd := exec.Command(s.helperPath)
+ cmd.Args = append(cmd.Args, arg...)
+ if stderr != nil {
+ cmd.Stderr = stderr
+ }
+ if stdout != nil {
+ cmd.Stdout = stdout
+ }
+
+ if err := cmd.Run(); err != nil {
+ vlog.Errorf("failed calling helper with args (%v):%v", arg, err)
+ }
+ return nil
}
// isSetuid is defined like this so we can override its
@@ -35,22 +128,22 @@
}
// suidhelperEnabled determines if the suidhelper must exist and be
-// setuid to run an application as system user un. If false, the
+// setuid to run an application as system user targetUser. If false, the
// setuidhelper must be invoked with the --dryrun flag to skip making
// system calls that will fail or provide apps with a trivial
// priviledge escalation.
-func (dn suidHelperState) suidhelperEnabled(un, helperPath string) (bool, error) {
- helperStat, err := os.Stat(helperPath)
+func (s suidHelperState) suidhelperEnabled(targetUser string) (bool, error) {
+ helperStat, err := os.Stat(s.helperPath)
if err != nil {
- return false, verror.New(ErrOperationFailed, nil, fmt.Sprintf("Stat(%v) failed: %v. helper is required.", helperPath, err))
+ return false, verror.New(ErrOperationFailed, nil, fmt.Sprintf("Stat(%v) failed: %v. helper is required.", s.helperPath, err))
}
haveHelper := isSetuid(helperStat)
switch {
- case haveHelper && string(dn) != un:
+ case haveHelper && s.dmUser != targetUser:
return true, nil
- case haveHelper && string(dn) == un:
- return false, verror.New(verror.ErrNoAccess, nil, fmt.Sprintf("suidhelperEnabled failed: %q == %q", string(dn), un))
+ case haveHelper && s.dmUser == targetUser:
+ return false, verror.New(verror.ErrNoAccess, nil, fmt.Sprintf("suidhelperEnabled failed: %q == %q", s.dmUser, targetUser))
default:
return false, nil
}
@@ -62,13 +155,13 @@
// devicemanager will use to invoke apps.
// TODO(rjkroege): This code assumes a desktop target and will need
// to be reconsidered for embedded contexts.
-func (i suidHelperState) usernameForPrincipal(ctx *context.T, uat BlessingSystemAssociationStore) string {
+func (s suidHelperState) usernameForPrincipal(ctx *context.T, uat BlessingSystemAssociationStore) string {
identityNames, _ := security.RemoteBlessingNames(ctx)
systemName, present := uat.SystemAccountForBlessings(identityNames)
if present {
return systemName
} else {
- return string(i)
+ return s.dmUser
}
}
diff --git a/services/device/internal/impl/impl_test.go b/services/device/internal/impl/impl_test.go
index 0659978..ab399ce 100644
--- a/services/device/internal/impl/impl_test.go
+++ b/services/device/internal/impl/impl_test.go
@@ -65,6 +65,7 @@
deviceManagerCmd = "deviceManager"
deviceManagerV10Cmd = "deviceManagerV10" // deviceManager with a different major version number
appCmd = "app"
+ hangingAppCmd = "hangingApp"
installerCmd = "installer"
uninstallerCmd = "uninstaller"
@@ -236,6 +237,7 @@
type pingArgs struct {
Username, FlagValue, EnvValue string
+ Pid int
}
func ping(ctx *context.T) {
@@ -251,6 +253,7 @@
Username: savedArgs.Uname,
FlagValue: *flagValue,
EnvValue: os.Getenv(testEnvVarName),
+ Pid: os.Getpid(),
}
client := v23.GetClient(ctx)
if call, err := client.StartCall(ctx, "pingserver", "Ping", []interface{}{args}); err != nil {
@@ -303,6 +306,13 @@
return nil
}
+// Same as app, except that it does not exit properly after being stopped
+func hangingApp(stdin io.Reader, stdout, stderr io.Writer, env map[string]string, args ...string) error {
+ err := app(stdin, stdout, stderr, env, args...)
+ time.Sleep(24 * time.Hour)
+ return err
+}
+
// TODO(rjkroege): generateDeviceManagerScript and generateSuidHelperScript have
// code similarity that might benefit from refactoring.
// generateDeviceManagerScript is very similar in behavior to generateScript in
@@ -592,17 +602,23 @@
// END HACK
}
-func verifyPingArgs(t *testing.T, pingCh <-chan pingArgs, username, flagValue, envValue string) {
+func receivePingArgs(t *testing.T, pingCh <-chan pingArgs) pingArgs {
var args pingArgs
select {
case args = <-pingCh:
case <-time.After(pingTimeout):
t.Fatalf(testutil.FormatLogLine(2, "failed to get ping"))
}
+ return args
+}
+
+func verifyPingArgs(t *testing.T, pingCh <-chan pingArgs, username, flagValue, envValue string) {
+ args := receivePingArgs(t, pingCh)
wantArgs := pingArgs{
Username: username,
FlagValue: flagValue,
EnvValue: envValue,
+ Pid: args.Pid, // We are not checking for a value of Pid
}
if !reflect.DeepEqual(args, wantArgs) {
t.Fatalf(testutil.FormatLogLine(2, "got ping args %q, expected %q", args, wantArgs))
@@ -854,6 +870,29 @@
// Starting new instances should no longer be allowed.
startAppExpectError(t, ctx, appID, impl.ErrInvalidOperation.ID)
+ // Make sure that Stop will actually kill an app that doesn't exit cleanly
+ // Do this by installing, starting, and stopping hangingApp, which sleeps (rather than
+ // exits) after being asked to Stop()
+ *envelope = envelopeFromShell(sh, nil, hangingAppCmd, "hanging ap", "hAppV1")
+ hAppID := installApp(t, ctx)
+ hInstanceID := startApp(t, ctx, hAppID)
+ hangingPid := receivePingArgs(t, pingCh).Pid
+ if err := syscall.Kill(hangingPid, 0); err != nil && err != syscall.EPERM {
+ t.Fatalf("Pid of hanging app (%v) is not live", hangingPid)
+ }
+ stopApp(t, ctx, hAppID, hInstanceID)
+ pidIsAlive := true
+ for i := 0; i < 10 && pidIsAlive; i++ {
+ if err := syscall.Kill(hangingPid, 0); err == nil || err == syscall.EPERM {
+ time.Sleep(time.Second) // pid is still alive
+ } else {
+ pidIsAlive = false
+ }
+ }
+ if pidIsAlive {
+ t.Fatalf("Pid of hanging app (%d) has not exited after Stop() call", hangingPid)
+ }
+
// Cleanly shut down the device manager.
defer verifyNoRunningProcesses(t)
syscall.Kill(dmh.Pid(), syscall.SIGINT)
diff --git a/services/device/internal/impl/instance_reaping.go b/services/device/internal/impl/instance_reaping.go
index f7e9672..6ff7e61 100644
--- a/services/device/internal/impl/instance_reaping.go
+++ b/services/device/internal/impl/instance_reaping.go
@@ -62,7 +62,7 @@
// TODO(rjkroege): There are nicer ways to provide this functionality.
// For example, use the kevent facility in darwin or replace init.
// See http://www.incenp.org/dvlpt/wait4.html for inspiration.
-func processStatusPolling(cq reaper, trackedPids map[string]int) {
+func processStatusPolling(r reaper, trackedPids map[string]int) {
poll := func() {
for idir, pid := range trackedPids {
switch err := syscall.Kill(pid, 0); err {
@@ -95,13 +95,21 @@
for {
select {
- case p, ok := <-cq:
- if !ok {
+ case p, ok := <-r:
+ switch {
+ case !ok:
return
- }
- if p.pid < 0 {
+ case p.pid == -1: // stop watching this instance
delete(trackedPids, p.instanceDir)
- } else {
+ case p.pid == -2: // kill the process
+ if pid, ok := trackedPids[p.instanceDir]; ok {
+ if err := suidHelper.terminatePid(pid, nil, nil); err != nil {
+ vlog.Errorf("Failure to kill: %v", err)
+ }
+ }
+ case p.pid < 0:
+ vlog.Panicf("invalid pid %v", p.pid)
+ default:
trackedPids[p.instanceDir] = p.pid
poll()
}
@@ -124,6 +132,11 @@
r <- pidInstanceDirPair{instanceDir: idir, pid: -1}
}
+// forciblySuspend terminates the process pid
+func (r reaper) forciblySuspend(idir string) {
+ r <- pidInstanceDirPair{instanceDir: idir, pid: -2}
+}
+
func (r reaper) shutdown() {
close(r)
}
diff --git a/services/device/internal/impl/util_test.go b/services/device/internal/impl/util_test.go
index 3a99927..4f35d91 100644
--- a/services/device/internal/impl/util_test.go
+++ b/services/device/internal/impl/util_test.go
@@ -42,7 +42,7 @@
const (
// TODO(caprita): Set the timeout in a more principled manner.
- stopTimeout = 20 // In seconds.
+ stopTimeout = 20 * time.Second
)
func envelopeFromShell(sh *modules.Shell, env []string, cmd, title string, args ...string) application.Envelope {
diff --git a/services/device/internal/impl/v23_test.go b/services/device/internal/impl/v23_test.go
index ddee652..1ccf6c1 100644
--- a/services/device/internal/impl/v23_test.go
+++ b/services/device/internal/impl/v23_test.go
@@ -15,4 +15,5 @@
specify device manager config settings.`, deviceManager)
modules.RegisterChild("deviceManagerV10", `This is the same as deviceManager above, except that it has a different major version number`, deviceManagerV10)
modules.RegisterChild("app", ``, app)
+ modules.RegisterChild("hangingApp", `Same as app, except that it does not exit properly after being stopped`, hangingApp)
}