blob: 300ef851335e80b0aa4ca9ac8e7e6204111c62e5 [file] [log] [blame]
Adam Sadovsky62e37f32014-10-23 18:14:20 -07001#!/usr/bin/python2.7
2
3# This needs to run on a gce vm with the replica pool
4# service account scope (https://www.googleapis.com/auth/ndev.cloudman).
5#
6# You also need to enable preview in gcloud:
7# $ gcloud components update preview
8#
9# Then add it to your crontab, e.g.
10# */10 * * * * gcloud preview replica-pools --zone us-central1-a replicas --pool playground-pool list|monitor.py
11
12import os
13import datetime
14import subprocess
15import sys
16import yaml
17
18DESIRED = 2
19MAX_ALIVE_MIN = 60
20POOL = "playground-pool"
21
22def runCommand(*args):
23 cmd = ['gcloud', 'preview', 'replica-pools', '--zone', 'us-central1-a']
24 cmd.extend(args)
25 subprocess.check_call(cmd)
26
27def resizePool(size):
28 runCommand("resize", "--new-size", str(size), POOL)
29
30
31def shouldRestart(replica):
32 if replica['status']['state'] == 'PERMANENTLY_FAILING':
33 print "replica %s failed: %s" % (replica['name'], replica['status']['details'])
34 return True
35 return isTooOld(replica)
36
37
38def isTooOld(replica):
39 start_text = replica['status']['vmStartTime']
40 if start_text:
41 start = yaml.load(start_text)
42 uptime = datetime.datetime.now() - start
43 return uptime.seconds > MAX_ALIVE_MIN * 60
44
45
46def restartReplica(replica):
47 print "Restarting replica " + replica['name']
48 resizePool(DESIRED + 1)
49 runCommand("replicas", "--pool", POOL, "delete", replica['name'])
50
51
52def maybeRestartReplica(replica):
53 if shouldRestart(replica):
54 restartReplica(replica)
55
56
57def main():
58 replicas = yaml.load_all(sys.stdin.read())
59 for replica in replicas:
60 maybeRestartReplica(replica)
61
62
63if __name__ == "__main__":
64 main()