blob: ce7b86badf9400d2e0520dff3c19e022d04f5954 [file] [log] [blame]
Adam Sadovsky62e37f32014-10-23 18:14:20 -07001#!/usr/bin/python2.7
2
Adam Sadovsky78f22452014-10-23 18:50:04 -07003"""Playground GCE monitoring script.
Adam Sadovsky62e37f32014-10-23 18:14:20 -07004
Adam Sadovsky78f22452014-10-23 18:50:04 -07005This needs to run on a GCE VM with the replica pool service account scope
6(https://www.googleapis.com/auth/ndev.cloudman).
7
8You also need to enable preview in gcloud:
9$ gcloud components update preview
10
11Then add it to your crontab, e.g.
12*/10 * * * * gcloud preview replica-pools --zone us-central1-a replicas --pool playground-pool list|monitor.py
13"""
14
Adam Sadovsky62e37f32014-10-23 18:14:20 -070015import datetime
16import subprocess
17import sys
18import yaml
19
20DESIRED = 2
21MAX_ALIVE_MIN = 60
Adam Sadovsky78f22452014-10-23 18:50:04 -070022POOL = 'playground-pool'
Adam Sadovsky62e37f32014-10-23 18:14:20 -070023
24
Adam Sadovsky78f22452014-10-23 18:50:04 -070025def RunCommand(*args):
26 cmd = ['gcloud', 'preview', 'replica-pools', '--zone', 'us-central1-a']
27 cmd.extend(args)
28 subprocess.check_call(cmd)
Adam Sadovsky62e37f32014-10-23 18:14:20 -070029
30
Adam Sadovsky78f22452014-10-23 18:50:04 -070031def ResizePool(size):
32 RunCommand('resize', '--new-size', str(size), POOL)
Adam Sadovsky62e37f32014-10-23 18:14:20 -070033
34
Adam Sadovsky78f22452014-10-23 18:50:04 -070035def ShouldRestart(replica):
36 if replica['status']['state'] == 'PERMANENTLY_FAILING':
37 print 'Replica %s failed: %s' % (
38 replica['name'], replica['status']['details'])
39 return True
40 return IsTooOld(replica)
Adam Sadovsky62e37f32014-10-23 18:14:20 -070041
42
Adam Sadovsky78f22452014-10-23 18:50:04 -070043def IsTooOld(replica):
44 start_text = replica['status']['vmStartTime']
45 if start_text:
46 start = yaml.load(start_text)
47 uptime = datetime.datetime.now() - start
48 return uptime.seconds > MAX_ALIVE_MIN * 60
49
50
51def RestartReplica(replica):
52 print 'Restarting replica ' + replica['name']
53 ResizePool(DESIRED + 1)
54 RunCommand('replicas', '--pool', POOL, 'delete', replica['name'])
55
56
57def MaybeRestartReplica(replica):
58 if ShouldRestart(replica):
59 RestartReplica(replica)
Adam Sadovsky62e37f32014-10-23 18:14:20 -070060
61
62def main():
Adam Sadovsky78f22452014-10-23 18:50:04 -070063 replicas = yaml.load_all(sys.stdin.read())
64 for replica in replicas:
65 MaybeRestartReplica(replica)
Adam Sadovsky62e37f32014-10-23 18:14:20 -070066
67
Adam Sadovsky78f22452014-10-23 18:50:04 -070068if __name__ == '__main__':
69 main()