Merge "services/device/device/publish.go: specifiable envelope name"
diff --git a/services/device/internal/impl/app_service.go b/services/device/internal/impl/app_service.go
index 19b2970..b3163cb 100644
--- a/services/device/internal/impl/app_service.go
+++ b/services/device/internal/impl/app_service.go
@@ -160,6 +160,8 @@
Pid int
DeviceManagerPeerPattern string
SecurityAgentHandle []byte
+ Restarts int32
+ RestartWindowBegan time.Time
}
func saveInstanceInfo(ctx *context.T, dir string, info *instanceInfo) error {
@@ -957,25 +959,45 @@
return nil
}
-func (i *appRunner) restartAppIfNecessary(instanceDir string) {
+func synchronizedShouldRestart(instanceDir string) bool {
info, err := loadInstanceInfo(nil, instanceDir)
if err != nil {
vlog.Error(err)
- return
+ return false
}
envelope, err := loadEnvelopeForInstance(nil, instanceDir)
if err != nil {
vlog.Error(err)
+ return false
+ }
+
+ shouldRestart := newBasicRestartPolicy().decide(envelope, info)
+
+ if err := saveInstanceInfo(nil, instanceDir, info); err != nil {
+ vlog.Error(err)
+ return false
+ }
+ return shouldRestart
+}
+
+func (i *appRunner) restartAppIfNecessary(instanceDir string) {
+ if err := transitionInstance(instanceDir, device.InstanceStateNotRunning, device.InstanceStateLaunching); err != nil {
+ vlog.Error(err)
return
}
- // Determine if we should restart.
- if !neverStart().decide(envelope, info) {
+ shouldRestart := synchronizedShouldRestart(instanceDir)
+
+ if err := transitionInstance(instanceDir, device.InstanceStateLaunching, device.InstanceStateNotRunning); err != nil {
+ vlog.Error(err)
return
}
- // TODO(rjkroege): Implement useful restart policy.
+ if !shouldRestart {
+ return
+ }
+
if err := i.run(nil, instanceDir); err != nil {
vlog.Error(err)
}
diff --git a/services/device/internal/impl/daemonreap/daemon_reaping_test.go b/services/device/internal/impl/daemonreap/daemon_reaping_test.go
index bcf4fc9..765eb82 100644
--- a/services/device/internal/impl/daemonreap/daemon_reaping_test.go
+++ b/services/device/internal/impl/daemonreap/daemon_reaping_test.go
@@ -9,6 +9,7 @@
"testing"
"time"
+ "v.io/v23/context"
"v.io/v23/naming"
"v.io/v23/services/device"
"v.io/v23/services/stats"
@@ -35,7 +36,7 @@
utiltest.Resolve(t, ctx, "pingserver", 1, true)
// Create an envelope for a first version of the app that will be restarted once.
- *envelope = utiltest.EnvelopeFromShell(sh, nil, utiltest.App, "google naps", 1, 10*time.Second, "appV1")
+ *envelope = utiltest.EnvelopeFromShell(sh, nil, utiltest.App, "google naps", 1, 10*time.Minute, "appV1")
appID := utiltest.InstallApp(t, ctx)
// Start an instance of the app.
@@ -45,7 +46,58 @@
pingCh.VerifyPingArgs(t, utiltest.UserName(t), "default", "")
// Get application pid.
- name := naming.Join("dm", "apps/"+appID+"/"+instance1ID+"/stats/system/pid")
+ pid := getPid(t, ctx, appID, instance1ID)
+
+ utiltest.VerifyState(t, ctx, device.InstanceStateRunning, appID, instance1ID)
+ syscall.Kill(int(pid), 9)
+ pollingWait(t, int(pid))
+
+ // Start a second instance of the app which will force polling to happen.
+ // During this polling, the reaper will restart app instance1
+ instance2ID := utiltest.LaunchApp(t, ctx, appID)
+ pingCh.VerifyPingArgs(t, utiltest.UserName(t), "default", "")
+ utiltest.VerifyState(t, ctx, device.InstanceStateRunning, appID, instance2ID)
+
+ // Stop the second instance of the app which will also force polling. By this point,
+ // instance1 should be live.
+ utiltest.KillApp(t, ctx, appID, instance2ID)
+
+ // instance2ID is not running.
+ utiltest.VerifyState(t, ctx, device.InstanceStateNotRunning, appID, instance2ID)
+
+ // instance1ID was restarted automatically.
+ utiltest.VerifyState(t, ctx, device.InstanceStateRunning, appID, instance1ID)
+
+ // Get application pid.
+ pid = getPid(t, ctx, appID, instance1ID)
+ // Be sure to get the ping from the restarted application so that we block in
+ // RunApp below.
+ pingCh.WaitForPingArgs(t)
+ // Kill the application again.
+ syscall.Kill(int(pid), 9)
+ pollingWait(t, int(pid))
+
+ // Start and stop instance 2 again to force two polling cycles.
+ utiltest.RunApp(t, ctx, appID, instance2ID)
+ pingCh.VerifyPingArgs(t, utiltest.UserName(t), "default", "")
+ utiltest.KillApp(t, ctx, appID, instance2ID)
+
+ // instance1ID is not running because it exceeded its restart limit.
+ utiltest.VerifyState(t, ctx, device.InstanceStateNotRunning, appID, instance1ID)
+
+ // instance2ID is not running.
+ utiltest.VerifyState(t, ctx, device.InstanceStateNotRunning, appID, instance2ID)
+
+ // Cleanly shut down the device manager.
+ utiltest.VerifyNoRunningProcesses(t)
+ syscall.Kill(dmh.Pid(), syscall.SIGINT)
+ dmh.Expect("dm terminated")
+ dmh.ExpectEOF()
+}
+
+// getPid returns the application pid
+func getPid(t *testing.T, ctx *context.T, appID, instanceID string) int {
+ name := naming.Join("dm", "apps/"+appID+"/"+instanceID+"/stats/system/pid")
c := stats.StatsClient(name)
v, err := c.Value(ctx)
if err != nil {
@@ -55,33 +107,19 @@
if err := vdl.Convert(&pid, v); err != nil {
t.Fatalf("pid returned from stats interface is not an int: %v", err)
}
+ return pid
+}
- utiltest.VerifyState(t, ctx, device.InstanceStateRunning, appID, instance1ID)
- syscall.Kill(int(pid), 9)
-
- // Start a second instance of the app which will force polling to happen.
- instance2ID := utiltest.LaunchApp(t, ctx, appID)
- pingCh.VerifyPingArgs(t, utiltest.UserName(t), "default", "")
-
- // TODO(rjkroege): Because there is no daemon mode, instance1ID is not running even
- // though it should be.
- utiltest.VerifyState(t, ctx, device.InstanceStateNotRunning, appID, instance1ID)
-
- // TODO(rjkroege): Demonstrate that the device manager will only restart the app the
- // configured number of times (1)
-
- // instance2ID is still running though.
- utiltest.VerifyState(t, ctx, device.InstanceStateRunning, appID, instance2ID)
-
- // Cleanup.
- utiltest.TerminateApp(t, ctx, appID, instance2ID)
-
- // TODO(rjkroege): instance1ID isn't running but should be.
- // utiltest.TerminateApp(t, ctx, appID, instance1ID)
-
- // Cleanly shut down the device manager.
- utiltest.VerifyNoRunningProcesses(t)
- syscall.Kill(dmh.Pid(), syscall.SIGINT)
- dmh.Expect("dm terminated")
- dmh.ExpectEOF()
+// pollingWait waits until the specificed process is actually dead so that
+// that the test can be certain that the process is actually dead before
+// continuing.
+func pollingWait(t *testing.T, pid int) {
+ for {
+ switch err := syscall.Kill(pid, 0); {
+ case err == syscall.ESRCH:
+ return
+ case err != nil:
+ t.Fatalf("syscall.Kill not working as expected: %v", err)
+ }
+ }
}
diff --git a/services/device/internal/impl/instance_reaping.go b/services/device/internal/impl/instance_reaping.go
index e300283..16cee82 100644
--- a/services/device/internal/impl/instance_reaping.go
+++ b/services/device/internal/impl/instance_reaping.go
@@ -92,7 +92,7 @@
// No such PID.
vlog.VI(2).Infof("processStatusPolling discovered pid %d ended", pid)
markNotRunning(idir)
- r.startState.restartAppIfNecessary(idir)
+ go r.startState.restartAppIfNecessary(idir)
delete(trackedPids, idir)
case nil, syscall.EPERM:
vlog.VI(2).Infof("processStatusPolling saw live pid: %d", pid)
@@ -130,6 +130,7 @@
return
case p.pid == -1: // stop watching this instance
delete(trackedPids, p.instanceDir)
+ poll()
case p.pid == -2: // kill the process
if pid, ok := trackedPids[p.instanceDir]; ok {
if err := suidHelper.terminatePid(pid, nil, nil); err != nil {
diff --git a/services/device/internal/impl/restart_policy.go b/services/device/internal/impl/restart_policy.go
index 57c6cff..465d0e0 100644
--- a/services/device/internal/impl/restart_policy.go
+++ b/services/device/internal/impl/restart_policy.go
@@ -5,6 +5,8 @@
package impl
import (
+ "time"
+
"v.io/v23/services/application"
)
@@ -16,21 +18,37 @@
decide(envelope *application.Envelope, instance *instanceInfo) bool
}
-// startStub implements a stub RestartPolicy.
-type startStub struct {
- start bool
+type basicDecisionPolicy struct {
}
-// alwaysStart returns a RestartPolicy that always (re)starts the application.
-func alwaysStart() restartPolicy {
- return startStub{true}
+func newBasicRestartPolicy() restartPolicy {
+ return new(basicDecisionPolicy)
}
-// neverStart returns a RestartPolicy that never (re)starts the application.
-func neverStart() restartPolicy {
- return startStub{false}
-}
+func (rp *basicDecisionPolicy) decide(envelope *application.Envelope, instance *instanceInfo) bool {
+ if envelope.Restarts == 0 {
+ return false
+ }
+ if envelope.Restarts < 0 {
+ return true
+ }
-func (s startStub) decide(envelope *application.Envelope, instance *instanceInfo) bool {
- return s.start
+ if instance.Restarts == 0 {
+ instance.RestartWindowBegan = time.Now()
+ }
+
+ endOfWindow := instance.RestartWindowBegan.Add(envelope.RestartTimeWindow)
+ if time.Now().After(endOfWindow) {
+ instance.Restarts = 1
+ instance.RestartWindowBegan = time.Now()
+ return true
+ }
+
+ if instance.Restarts < envelope.Restarts {
+ instance.Restarts++
+ return true
+ }
+
+ return false
+
}