veyron/tools/playground: Add script to monitor GCE replicas.

Change-Id: If0bc6b97f1dfd6fa8e1e5f40f0641a0426751dd4
diff --git a/tools/playground/monitor.py b/tools/playground/monitor.py
new file mode 100755
index 0000000..300ef85
--- /dev/null
+++ b/tools/playground/monitor.py
@@ -0,0 +1,64 @@
+#!/usr/bin/python2.7
+
+# This needs to run on a gce vm with the replica pool
+# service account scope (https://www.googleapis.com/auth/ndev.cloudman).
+#
+# You also need to enable preview in gcloud:
+# $ gcloud components update preview
+#
+# Then add it to your crontab, e.g.
+# */10 * * * * gcloud preview replica-pools --zone us-central1-a replicas --pool playground-pool list|monitor.py
+
+import os
+import datetime
+import subprocess
+import sys
+import yaml
+
+DESIRED = 2
+MAX_ALIVE_MIN = 60
+POOL = "playground-pool"
+
+def runCommand(*args):
+    cmd = ['gcloud', 'preview', 'replica-pools', '--zone', 'us-central1-a']
+    cmd.extend(args)
+    subprocess.check_call(cmd)
+
+def resizePool(size):
+    runCommand("resize", "--new-size", str(size), POOL)
+
+
+def shouldRestart(replica):
+    if replica['status']['state'] == 'PERMANENTLY_FAILING':
+        print "replica %s failed: %s" % (replica['name'], replica['status']['details'])
+        return True
+    return isTooOld(replica)
+
+
+def isTooOld(replica):
+    start_text = replica['status']['vmStartTime']
+    if start_text:
+        start = yaml.load(start_text)
+        uptime = datetime.datetime.now() - start
+        return uptime.seconds > MAX_ALIVE_MIN * 60
+
+
+def restartReplica(replica):
+    print "Restarting replica " + replica['name']
+    resizePool(DESIRED + 1)
+    runCommand("replicas", "--pool", POOL, "delete", replica['name'])
+
+
+def maybeRestartReplica(replica):
+    if shouldRestart(replica):
+        restartReplica(replica)
+
+
+def main():
+    replicas = yaml.load_all(sys.stdin.read())
+    for replica in replicas:
+        maybeRestartReplica(replica)
+
+
+if __name__ == "__main__":
+    main()