veyron/security/agent: Agent can restart child app upon exit.
Add a flag to instruct the agent to conditionally restart the child command,
based on the value of the command's exit code. To make this work, defer the
closing of the child unix socket ends in the agent until the agent is ready to
exit. While at it, also close the key mgr socket end (seems like we should).
Add test logic in the integration test to verify this works. Specifically,
ensure both the principal and key manager functions of the agent are usable upon
restart (the latter we verify using vrun).
Change-Id: If1672ebce626ab940464d2ec8ff7955d19d1d761
diff --git a/security/agent/agentd/main.go b/security/agent/agentd/main.go
index 97adc43..4c34c9e 100644
--- a/security/agent/agentd/main.go
+++ b/security/agent/agentd/main.go
@@ -7,6 +7,7 @@
"os"
"os/exec"
"os/signal"
+ "strconv"
"syscall"
"golang.org/x/crypto/ssh/terminal"
@@ -27,6 +28,11 @@
var (
keypath = flag.String("additional_principals", "", "If non-empty, allow for the creation of new principals and save them in this directory.")
noPassphrase = flag.Bool("no_passphrase", false, "If true, user will not be prompted for principal encryption passphrase.")
+
+ // TODO(caprita): We use the exit code of the child to determine if the
+ // agent should restart it. Consider changing this to use the unix
+ // socket for this purpose.
+ restartExitCode = flag.String("restart_exit_code", "", "If non-empty, will restart the command when it exits, provided that the command's exit code matches the value of this flag. The value must be an integer, or an integer preceded by '!' (in which case all exit codes except the flag will trigger a restart.")
)
func main() {
@@ -40,12 +46,25 @@
`, os.Args[0], consts.VeyronCredentials)
flag.PrintDefaults()
}
+ exitCode := 0
+ defer func() {
+ os.Exit(exitCode)
+ }()
flag.Parse()
if len(flag.Args()) < 1 {
fmt.Fprintln(os.Stderr, "Need at least one argument.")
flag.Usage()
- os.Exit(1)
+ exitCode = 1
+ return
}
+ var restartOpts restartOptions
+ if err := restartOpts.parse(); err != nil {
+ fmt.Fprintln(os.Stderr, err)
+ flag.Usage()
+ exitCode = 1
+ return
+ }
+
// TODO(ashankar,cnicolaou): Should flags.Parse be used instead? But that adds unnecessary
// flags like "--veyron.namespace.root", which has no meaning for this binary.
dir := os.Getenv(consts.VeyronCredentials)
@@ -92,40 +111,47 @@
}
}
- // Now run the client and wait for it to finish.
- cmd := exec.Command(flag.Args()[0], flag.Args()[1:]...)
- cmd.Stdin = os.Stdin
- cmd.Stdout = os.Stdout
- cmd.Stderr = os.Stderr
- cmd.ExtraFiles = []*os.File{sock}
+ for {
+ // Run the client and wait for it to finish.
+ cmd := exec.Command(flag.Args()[0], flag.Args()[1:]...)
+ cmd.Stdin = os.Stdin
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ cmd.ExtraFiles = []*os.File{sock}
- if mgrSock != nil {
- cmd.ExtraFiles = append(cmd.ExtraFiles, mgrSock)
- }
-
- err = cmd.Start()
- if err != nil {
- log.Fatalf("Error starting child: %v", err)
- }
- sock.Close()
- shutdown := make(chan struct{})
- go func() {
- select {
- case sig := <-vsignals.ShutdownOnSignals(runtime):
- // TODO(caprita): Should we also relay double signal to
- // the child? That currently just force exits the
- // current process.
- if sig == vsignals.STOP {
- sig = syscall.SIGTERM
- }
- cmd.Process.Signal(sig)
- case <-shutdown:
+ if mgrSock != nil {
+ cmd.ExtraFiles = append(cmd.ExtraFiles, mgrSock)
}
- }()
- cmd.Wait()
- close(shutdown)
- status := cmd.ProcessState.Sys().(syscall.WaitStatus)
- os.Exit(status.ExitStatus())
+
+ err = cmd.Start()
+ if err != nil {
+ log.Fatalf("Error starting child: %v", err)
+ }
+ shutdown := make(chan struct{})
+ go func() {
+ select {
+ case sig := <-vsignals.ShutdownOnSignals(runtime):
+ // TODO(caprita): Should we also relay double
+ // signal to the child? That currently just
+ // force exits the current process.
+ if sig == vsignals.STOP {
+ sig = syscall.SIGTERM
+ }
+ cmd.Process.Signal(sig)
+ case <-shutdown:
+ }
+ }()
+ cmd.Wait()
+ close(shutdown)
+ exitCode = cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
+ if !restartOpts.restart(exitCode) {
+ break
+ }
+ }
+ // TODO(caprita): If restartOpts.enabled is false, we could close these
+ // right after cmd.Start().
+ sock.Close()
+ mgrSock.Close()
}
func newPrincipalFromDir(dir string) (security.Principal, []byte, error) {
@@ -236,3 +262,29 @@
signal.Stop(sig)
}
}
+
+type restartOptions struct {
+ enabled, unless bool
+ code int
+}
+
+func (opts *restartOptions) parse() error {
+ code := *restartExitCode
+ if code == "" {
+ return nil
+ }
+ opts.enabled = true
+ if code[0] == '!' {
+ opts.unless = true
+ code = code[1:]
+ }
+ var err error
+ if opts.code, err = strconv.Atoi(code); err != nil {
+ return fmt.Errorf("Failed to parse restart exit code: %v", err)
+ }
+ return nil
+}
+
+func (opts *restartOptions) restart(exitCode int) bool {
+ return opts.enabled && opts.unless != (exitCode == opts.code)
+}
diff --git a/security/agent/test.sh b/security/agent/test.sh
index 08164e1..c8af0b8 100755
--- a/security/agent/test.sh
+++ b/security/agent/test.sh
@@ -9,6 +9,7 @@
build() {
AGENTD_BIN="$(shell_test::build_go_binary 'veyron.io/veyron/veyron/security/agent/agentd')"
PINGPONG_BIN="$(shell_test::build_go_binary 'veyron.io/veyron/veyron/security/agent/pingpong')"
+ VRUN="$(shell_test::build_go_binary 'veyron.io/veyron/veyron/tools/vrun')"
}
main() {
@@ -26,7 +27,7 @@
"${AGENTD_BIN}" --v=4 "${PINGPONG_BIN}" || shell_test::fail "line ${LINENO}: failed to run pingpong"
local -r CREDENTIALS_UNDER_AGENT=$("${AGENTD_BIN}" bash -c 'echo ${VEYRON_CREDENTIALS}')
if [[ "${CREDENTIALS_UNDER_AGENT}" != "" ]]; then
- shell_test::fail "line ${LINENO}: VEYRON_CREDENTIALS should not be set when running under the agent(${CREDENTIALS_UNDER_AGENT})"
+ shell_test::fail "line ${LINENO}: VEYRON_CREDENTIALS should not be set when running under the agent(${CREDENTIALS_UNDER_AGENT})"
fi
# Test running multiple apps connecting to the same agent.
@@ -35,6 +36,36 @@
RESULT=$(shell::check_result "${AGENTD_BIN}" bash "$(go list -f {{.Dir}} veyron.io/veyron/veyron/security/agent)/testchild.sh")
shell_test::assert_eq "${RESULT}" "0" "${LINENO}"
+ # Verify the restart feature.
+ local -r COUNTER_FILE="${WORKDIR}/counter"
+ # This script increments a counter in $COUNTER_FILE and exits with exit code 0
+ # while the counter is < 5, and 1 otherwise.
+ local -r SCRIPT=$(cat <<EOF
+"${PINGPONG_BIN}" || exit 101
+"${VRUN}" "${PINGPONG_BIN}" || exit 102
+readonly COUNT=\$(expr \$(<"${COUNTER_FILE}") + 1)
+echo \$COUNT > "${COUNTER_FILE}"
+[[ \$COUNT -lt 5 ]]; exit \$?
+EOF
+ )
+ # Have the agent restart the child while its exit code is 0.
+ echo "0" > "${COUNTER_FILE}"
+ RESULT=$(shell::check_result "${AGENTD_BIN}" --additional_principals="$(shell::tmp_dir)" --restart_exit_code="0" bash -c "${SCRIPT}")
+ shell_test::assert_eq "${RESULT}" "1" "${LINENO}"
+ [[ $(<"${COUNTER_FILE}") -eq 5 ]] || shell_test::fail "Failed"
+
+ # Have the agent restart the child while its exit code is !0.
+ echo "0" > "${COUNTER_FILE}"
+ RESULT=$(shell::check_result "${AGENTD_BIN}" --additional_principals="$(shell::tmp_dir)" --restart_exit_code="!0" bash -c "${SCRIPT}")
+ shell_test::assert_eq "${RESULT}" "0" "${LINENO}"
+ [[ $(<"${COUNTER_FILE}") -eq 1 ]] || shell_test::fail "Failed"
+
+ # Have the agent restart the child while its exit code is != 1.
+ echo "0" > "${COUNTER_FILE}"
+ RESULT=$(shell::check_result "${AGENTD_BIN}" --additional_principals="$(shell::tmp_dir)" --restart_exit_code="!1" bash -c "${SCRIPT}")
+ shell_test::assert_eq "${RESULT}" "1" "${LINENO}"
+ [[ $(<"${COUNTER_FILE}") -eq 5 ]] || shell_test::fail "Failed"
+
shell_test::pass
}