veyron/services/mgmt/node/impl: fix flakiness in testNodeManager

Should fix https://github.com/veyron/release-issues/issues/477

testNodeManager runs a candidate node manager as a subprocess, as part of
Update(). The node manager subprocess is started via a shell script (created by
generateScript()). The old behavior of testNodeManager was to issue a Revert()
to the subprocess node manager. Revert() has the effect that the subprocess node
manager calls Stop() on itself, which eventually causes the subprocess to
exit. However, the Revert() call returns before the subprocess exits, and hence
the subprocess may still be running by the time Update() on the parent returns.
The parent eventually exits, but the child may still be running.  This causes a
race as far as the mount entry created by the subprocess node manager is
concerend: the mount entry may or may not still be present by the time the
parent has exited, depending on whether the subprocess may or may not still be
running. Hence, resolveExpectNotFound on the subprocess node manager's mount
name may or may not flake.  Note, the parent does run handle.Clean() on the
subprocess handle before returning from Update(), but that only has the effect
of killing the shell that runs the subprocess node manager, and not the node
manager process itself. Either way, killing the subprocess node manager is not
the right solution, since we want to give it a chance to shut itself down
cleanly, including unmounting its mount entries.

The fix is to Wait for the subprocess to exit cleanly (as a result of the Stop
call that it makes on itself as part of Revert). That ensures that the
subprocess is no longer running by the time Update on the parent returns, and in
particular, that all mount entries have been cleaned up.

While at it, also improve the error message from resolveExpectNotFound to reveal
the call site.

Change-Id: Icfa2a314f43ab78de542bbef07be139d62c5f033
diff --git a/services/mgmt/node/impl/node_service.go b/services/mgmt/node/impl/node_service.go
index 6d0f12d..ebb604f 100644
--- a/services/mgmt/node/impl/node_service.go
+++ b/services/mgmt/node/impl/node_service.go
@@ -238,7 +238,7 @@
 	if err != nil {
 		return verror2.Make(ErrOperationFailed, ctx)
 	}
-	// Check that invoking Update() succeeds.
+	// Check that invoking Revert() succeeds.
 	childName = naming.Join(childName, "nm")
 	nmClient := node.NodeClient(childName)
 	linkOld, pathOld, err := i.getCurrentFileInfo()
@@ -258,13 +258,17 @@
 	}
 	// Check that the new node manager updated the current symbolic link.
 	if !linkOld.ModTime().Before(linkNew.ModTime()) {
-		vlog.Errorf("new node manager test failed")
+		vlog.Errorf("New node manager test failed")
 		return verror2.Make(ErrOperationFailed, ctx)
 	}
 	// Ensure that the current symbolic link points to the same script.
 	if pathNew != pathOld {
 		updateLink(pathOld, i.config.CurrentLink)
-		vlog.Errorf("new node manager test failed")
+		vlog.Errorf("New node manager test failed")
+		return verror2.Make(ErrOperationFailed, ctx)
+	}
+	if err := handle.Wait(childWaitTimeout); err != nil {
+		vlog.Errorf("New node manager failed to exit cleanly: %v", err)
 		return verror2.Make(ErrOperationFailed, ctx)
 	}
 	return nil
diff --git a/services/mgmt/node/impl/util.go b/services/mgmt/node/impl/util.go
index c3c2bdc..feeb1c3 100644
--- a/services/mgmt/node/impl/util.go
+++ b/services/mgmt/node/impl/util.go
@@ -19,6 +19,7 @@
 // TODO(caprita): Set these timeout in a more principled manner.
 const (
 	childReadyTimeout = 20 * time.Second
+	childWaitTimeout  = 20 * time.Second
 	ipcContextTimeout = time.Minute
 )
 
diff --git a/services/mgmt/node/impl/util_test.go b/services/mgmt/node/impl/util_test.go
index 1c19f37..1d0e119 100644
--- a/services/mgmt/node/impl/util_test.go
+++ b/services/mgmt/node/impl/util_test.go
@@ -171,12 +171,12 @@
 	return server, endpoint.String()
 }
 
-// resolveExpectError verifies that the given name is not in the mounttable.
+// resolveExpectNotFound verifies that the given name is not in the mounttable.
 func resolveExpectNotFound(t *testing.T, name string) {
 	if results, err := globalRT.Namespace().Resolve(globalRT.NewContext(), name); err == nil {
-		t.Fatalf("Resolve(%v) succeeded with results %v when it was expected to fail", name, results)
+		t.Fatalf("%s: Resolve(%v) succeeded with results %v when it was expected to fail", loc(1), name, results)
 	} else if expectErr := naming.ErrNoSuchName.ID; !verror2.Is(err, expectErr) {
-		t.Fatalf("Resolve(%v) failed with error %v, expected error ID %v", name, err, expectErr)
+		t.Fatalf("%s: Resolve(%v) failed with error %v, expected error ID %v", loc(1), name, err, expectErr)
 	}
 }