Merge "services/device/internal/impl: daemon mode refactoring"
diff --git a/services/device/internal/impl/app_service.go b/services/device/internal/impl/app_service.go
index 8b1ce68..00ceb43 100644
--- a/services/device/internal/impl/app_service.go
+++ b/services/device/internal/impl/app_service.go
@@ -190,15 +190,15 @@
keyMgrAgent *keymgr.Agent
}
-// appStart is the subset of the appService object needed to
+// appRunner is the subset of the appService object needed to
// (re)start an application.
-type appStartState struct {
- systemName string
- instanceDir string
- callback *callbackState
+type appRunner struct {
+ callback *callbackState
// securityAgent holds state related to the security agent (nil if not
// using the agent).
securityAgent *securityAgentState
+ // reap is the app process monitoring subsystem.
+ reap *reaper
// mtAddress is the address of the local mounttable.
mtAddress string
}
@@ -214,9 +214,8 @@
permsStore *pathperms.PathStore
// Reference to the devicemanager top-level AccessList list.
deviceAccessList access.Permissions
- // reap is the app process monitoring subsystem.
- reap reaper
- appStart *appStartState
+ // State needed to (re)start an application.
+ runner *appRunner
}
func saveEnvelope(ctx *context.T, dir string, envelope *application.Envelope) error {
@@ -242,6 +241,15 @@
return envelope, nil
}
+func loadEnvelopeForInstance(ctx *context.T, instanceDir string) (*application.Envelope, error) {
+ versionLink := filepath.Join(instanceDir, "version")
+ versionDir, err := filepath.EvalSymlinks(versionLink)
+ if err != nil {
+ return nil, verror.New(ErrOperationFailed, ctx, fmt.Sprintf("EvalSymlinks(%v) failed: %v", versionLink, err))
+ }
+ return loadEnvelope(ctx, versionDir)
+}
+
func saveConfig(ctx *context.T, dir string, config device.Config) error {
jsonConfig, err := json.Marshal(config)
if err != nil {
@@ -712,7 +720,7 @@
return instanceDir, instanceID, verror.New(ErrOperationFailed, ctx, fmt.Sprintf("Symlink(%v, %v) failed: %v", packagesDir, packagesLink, err))
}
instanceInfo := new(instanceInfo)
- if err := setupPrincipal(ctx, instanceDir, call, i.appStart.securityAgent, instanceInfo); err != nil {
+ if err := setupPrincipal(ctx, instanceDir, call, i.runner.securityAgent, instanceInfo); err != nil {
return instanceDir, instanceID, err
}
if err := saveInstanceInfo(ctx, instanceDir, instanceInfo); err != nil {
@@ -735,10 +743,11 @@
return instanceDir, instanceID, nil
}
-func (i appStartState) genCmd(ctx *context.T) (*exec.Cmd, error) {
- instanceDir := i.instanceDir
- systemName := i.systemName
- nsRoot := i.mtAddress
+func genCmd(ctx *context.T, instanceDir string, nsRoot string) (*exec.Cmd, error) {
+ systemName, err := readSystemNameForInstance(instanceDir)
+ if err != nil {
+ return nil, err
+ }
versionLink := filepath.Join(instanceDir, "version")
versionDir, err := filepath.EvalSymlinks(versionLink)
@@ -802,8 +811,7 @@
return suidHelper.getAppCmd(&saArgs)
}
-func (i appStartState) startCmd(ctx *context.T, cmd *exec.Cmd) (int, error) {
- instanceDir := i.instanceDir
+func (i *appRunner) startCmd(ctx *context.T, instanceDir string, cmd *exec.Cmd) (int, error) {
info, err := loadInstanceInfo(ctx, instanceDir)
if err != nil {
return 0, err
@@ -905,27 +913,51 @@
return pid, nil
}
-func (i appStartState) run(ctx *context.T, reap reaper) error {
- if err := transitionInstance(i.instanceDir, device.InstanceStateNotRunning, device.InstanceStateLaunching); err != nil {
+func (i *appRunner) run(ctx *context.T, instanceDir string) error {
+ if err := transitionInstance(instanceDir, device.InstanceStateNotRunning, device.InstanceStateLaunching); err != nil {
return err
}
var pid int
- cmd, err := i.genCmd(ctx)
+ cmd, err := genCmd(ctx, instanceDir, i.mtAddress)
if err == nil {
- pid, err = i.startCmd(ctx, cmd)
+ pid, err = i.startCmd(ctx, instanceDir, cmd)
}
if err != nil {
- transitionInstance(i.instanceDir, device.InstanceStateLaunching, device.InstanceStateNotRunning)
+ transitionInstance(instanceDir, device.InstanceStateLaunching, device.InstanceStateNotRunning)
return err
}
- if err := transitionInstance(i.instanceDir, device.InstanceStateLaunching, device.InstanceStateRunning); err != nil {
+ if err := transitionInstance(instanceDir, device.InstanceStateLaunching, device.InstanceStateRunning); err != nil {
return err
}
- reap.startWatching(i.instanceDir, pid)
+ i.reap.startWatching(instanceDir, pid)
return nil
}
+func (i *appRunner) restartAppIfNecessary(instanceDir string) {
+ info, err := loadInstanceInfo(nil, instanceDir)
+ if err != nil {
+ vlog.Error(err)
+ return
+ }
+
+ envelope, err := loadEnvelopeForInstance(nil, instanceDir)
+ if err != nil {
+ vlog.Error(err)
+ return
+ }
+
+ // Determine if we should restart.
+ if !neverStart().decide(envelope, info) {
+ return
+ }
+
+ // TODO(rjkroege): Implement useful restart policy.
+ if err := i.run(nil, instanceDir); err != nil {
+ vlog.Error(err)
+ }
+}
+
func (i *appService) Instantiate(ctx *context.T, call device.ApplicationInstantiateServerCall) (string, error) {
helper := i.config.Helper
instanceDir, instanceID, err := i.newInstance(ctx, call)
@@ -976,10 +1008,7 @@
if startSystemName != systemName {
return verror.New(verror.ErrNoAccess, ctx, "Not allowed to resume an application under a different system name.")
}
-
- i.appStart.instanceDir = instanceDir
- i.appStart.systemName = systemName
- return i.appStart.run(ctx, i.reap)
+ return i.runner.run(ctx, instanceDir)
}
func stopAppRemotely(ctx *context.T, appVON string, deadline time.Duration) error {
@@ -1003,7 +1032,7 @@
return nil
}
-func stop(ctx *context.T, instanceDir string, reap reaper, deadline time.Duration) error {
+func stop(ctx *context.T, instanceDir string, reap *reaper, deadline time.Duration) error {
info, err := loadInstanceInfo(ctx, instanceDir)
if err != nil {
return err
@@ -1032,7 +1061,7 @@
if err := transitionInstance(instanceDir, device.InstanceStateRunning, device.InstanceStateDying); err != nil {
return err
}
- if err := stop(ctx, instanceDir, i.reap, deadline); err != nil {
+ if err := stop(ctx, instanceDir, i.runner.reap, deadline); err != nil {
transitionInstance(instanceDir, device.InstanceStateDying, device.InstanceStateRunning)
return err
}
@@ -1484,21 +1513,19 @@
} else {
debugInfo.StartSystemName = startSystemName
}
- as := *i.appStart
- as.instanceDir = instanceDir
- as.systemName = debugInfo.SystemName
- if cmd, err := as.genCmd(ctx); err != nil {
- return "", err
- } else {
- debugInfo.Cmd = cmd
- }
+
if info, err := loadInstanceInfo(ctx, instanceDir); err != nil {
return "", err
} else {
debugInfo.Info = info
}
+ if cmd, err := genCmd(ctx, instanceDir, i.runner.mtAddress); err != nil {
+ return "", err
+ } else {
+ debugInfo.Cmd = cmd
+ }
- if sa := i.appStart.securityAgent; sa != nil {
+ if sa := i.runner.securityAgent; sa != nil {
file, err := sa.keyMgrAgent.NewConnection(debugInfo.Info.SecurityAgentHandle)
if err != nil {
vlog.Errorf("NewConnection(%v) failed: %v", debugInfo.Info.SecurityAgentHandle, err)
diff --git a/services/device/internal/impl/dispatcher.go b/services/device/internal/impl/dispatcher.go
index 86f57f4..62d4787 100644
--- a/services/device/internal/impl/dispatcher.go
+++ b/services/device/internal/impl/dispatcher.go
@@ -41,6 +41,8 @@
securityAgent *securityAgentState
restartHandler func()
testMode bool
+ // reap is the app process monitoring subsystem.
+ reap *reaper
}
// dispatcher holds the state of the device manager dispatcher.
@@ -57,8 +59,6 @@
permsStore *pathperms.PathStore
// Namespace
mtAddress string // The address of the local mounttable.
- // reap is the app process monitoring subsystem.
- reap reaper
}
var _ rpc.Dispatcher = (*dispatcher)(nil)
@@ -118,10 +118,6 @@
if err != nil {
return nil, verror.New(errCantCreateAccountStore, ctx, err)
}
- reap, err := newReaper(ctx, config.Root)
- if err != nil {
- return nil, verror.New(errCantCreateAppWatcher, ctx, err)
- }
initSuidHelper(config.Helper)
d := &dispatcher{
internal: &internalState{
@@ -134,7 +130,6 @@
uat: uat,
permsStore: permStore,
mtAddress: mtAddress,
- reap: reap,
}
// If we're in 'security agent mode', set up the key manager agent.
@@ -147,6 +142,15 @@
}
}
}
+ reap, err := newReaper(ctx, config.Root, &appRunner{
+ callback: d.internal.callback,
+ securityAgent: d.internal.securityAgent,
+ })
+ if err != nil {
+ return nil, verror.New(errCantCreateAppWatcher, ctx, err)
+ }
+ d.internal.reap = reap
+
if testMode {
return &testModeDispatcher{d}, nil
}
@@ -157,7 +161,7 @@
func Shutdown(rpcd rpc.Dispatcher) {
switch d := rpcd.(type) {
case *dispatcher:
- d.reap.shutdown()
+ d.internal.reap.shutdown()
case *testModeDispatcher:
Shutdown(d.realDispatcher)
default:
@@ -326,8 +330,8 @@
suffix: components[1:],
uat: d.uat,
permsStore: d.permsStore,
- reap: d.reap,
- appStart: &appStartState{
+ runner: &appRunner{
+ reap: d.internal.reap,
callback: d.internal.callback,
securityAgent: d.internal.securityAgent,
mtAddress: d.mtAddress,
diff --git a/services/device/internal/impl/instance_reaping.go b/services/device/internal/impl/instance_reaping.go
index 59ae4b0..e300283 100644
--- a/services/device/internal/impl/instance_reaping.go
+++ b/services/device/internal/impl/instance_reaping.go
@@ -43,11 +43,14 @@
pid int
}
-type reaper chan pidInstanceDirPair
+type reaper struct {
+ c chan pidInstanceDirPair
+ startState *appRunner
+}
var stashedPidMap map[string]int
-func newReaper(ctx *context.T, root string) (reaper, error) {
+func newReaper(ctx *context.T, root string, startState *appRunner) (*reaper, error) {
pidMap, err := findAllTheInstances(ctx, root)
// Used only by the testing code that verifies that all processes
@@ -57,9 +60,13 @@
return nil, err
}
- c := make(reaper)
- go processStatusPolling(c, pidMap)
- return c, nil
+ r := &reaper{
+ c: make(chan pidInstanceDirPair),
+ startState: startState,
+ }
+ r.startState.reap = r
+ go r.processStatusPolling(pidMap)
+ return r, nil
}
func markNotRunning(idir string) {
@@ -77,7 +84,7 @@
// functionality. For example, use the kevent facility in darwin or
// replace init. See http://www.incenp.org/dvlpt/wait4.html for
// inspiration.
-func processStatusPolling(r reaper, trackedPids map[string]int) {
+func (r *reaper) processStatusPolling(trackedPids map[string]int) {
poll := func() {
for idir, pid := range trackedPids {
switch err := syscall.Kill(pid, 0); err {
@@ -85,6 +92,7 @@
// No such PID.
vlog.VI(2).Infof("processStatusPolling discovered pid %d ended", pid)
markNotRunning(idir)
+ r.startState.restartAppIfNecessary(idir)
delete(trackedPids, idir)
case nil, syscall.EPERM:
vlog.VI(2).Infof("processStatusPolling saw live pid: %d", pid)
@@ -99,6 +107,11 @@
// TODO(rjkroege): Probe the appcycle service of the app
// to confirm that its pid is valid iff v23PIDMgmt
// is false.
+
+ // TODO(rjkroege): if we can't connect to the app here via
+ // the appcycle manager, the app was probably started under
+ // a different agent and cannot be managed. Perhaps we should
+ // then kill the app and restart it?
default:
// The kill system call manpage says that this can only happen
// if the kernel claims that 0 is an invalid signal.
@@ -111,7 +124,7 @@
for {
select {
- case p, ok := <-r:
+ case p, ok := <-r.c:
switch {
case !ok:
return
@@ -139,22 +152,22 @@
// startWatching begins watching process pid's state. This routine
// assumes that pid already exists. Since pid is delivered to the device
// manager by RPC callback, this seems reasonable.
-func (r reaper) startWatching(idir string, pid int) {
- r <- pidInstanceDirPair{instanceDir: idir, pid: pid}
+func (r *reaper) startWatching(idir string, pid int) {
+ r.c <- pidInstanceDirPair{instanceDir: idir, pid: pid}
}
// stopWatching stops watching process pid's state.
-func (r reaper) stopWatching(idir string) {
- r <- pidInstanceDirPair{instanceDir: idir, pid: -1}
+func (r *reaper) stopWatching(idir string) {
+ r.c <- pidInstanceDirPair{instanceDir: idir, pid: -1}
}
// forciblySuspend terminates the process pid
-func (r reaper) forciblySuspend(idir string) {
- r <- pidInstanceDirPair{instanceDir: idir, pid: -2}
+func (r *reaper) forciblySuspend(idir string) {
+ r.c <- pidInstanceDirPair{instanceDir: idir, pid: -2}
}
-func (r reaper) shutdown() {
- close(r)
+func (r *reaper) shutdown() {
+ close(r.c)
}
type pidErrorTuple struct {
diff --git a/services/device/internal/impl/restart_policy.go b/services/device/internal/impl/restart_policy.go
new file mode 100644
index 0000000..57c6cff
--- /dev/null
+++ b/services/device/internal/impl/restart_policy.go
@@ -0,0 +1,36 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package impl
+
+import (
+ "v.io/v23/services/application"
+)
+
+// RestartPolicy instances provide a policy for deciding if an
+// application should be restarted on failure.
+type restartPolicy interface {
+ // decide determines if this application instance should be (re)started, returning
+ // true if the application should be be (re)started.
+ decide(envelope *application.Envelope, instance *instanceInfo) bool
+}
+
+// startStub implements a stub RestartPolicy.
+type startStub struct {
+ start bool
+}
+
+// alwaysStart returns a RestartPolicy that always (re)starts the application.
+func alwaysStart() restartPolicy {
+ return startStub{true}
+}
+
+// neverStart returns a RestartPolicy that never (re)starts the application.
+func neverStart() restartPolicy {
+ return startStub{false}
+}
+
+func (s startStub) decide(envelope *application.Envelope, instance *instanceInfo) bool {
+ return s.start
+}