blob: 300ef851335e80b0aa4ca9ac8e7e6204111c62e5 [file] [log] [blame]
#!/usr/bin/python2.7
# This needs to run on a gce vm with the replica pool
# service account scope (https://www.googleapis.com/auth/ndev.cloudman).
#
# You also need to enable preview in gcloud:
# $ gcloud components update preview
#
# Then add it to your crontab, e.g.
# */10 * * * * gcloud preview replica-pools --zone us-central1-a replicas --pool playground-pool list|monitor.py
import os
import datetime
import subprocess
import sys
import yaml
DESIRED = 2
MAX_ALIVE_MIN = 60
POOL = "playground-pool"
def runCommand(*args):
cmd = ['gcloud', 'preview', 'replica-pools', '--zone', 'us-central1-a']
cmd.extend(args)
subprocess.check_call(cmd)
def resizePool(size):
runCommand("resize", "--new-size", str(size), POOL)
def shouldRestart(replica):
if replica['status']['state'] == 'PERMANENTLY_FAILING':
print "replica %s failed: %s" % (replica['name'], replica['status']['details'])
return True
return isTooOld(replica)
def isTooOld(replica):
start_text = replica['status']['vmStartTime']
if start_text:
start = yaml.load(start_text)
uptime = datetime.datetime.now() - start
return uptime.seconds > MAX_ALIVE_MIN * 60
def restartReplica(replica):
print "Restarting replica " + replica['name']
resizePool(DESIRED + 1)
runCommand("replicas", "--pool", POOL, "delete", replica['name'])
def maybeRestartReplica(replica):
if shouldRestart(replica):
restartReplica(replica)
def main():
replicas = yaml.load_all(sys.stdin.read())
for replica in replicas:
maybeRestartReplica(replica)
if __name__ == "__main__":
main()