blob: 5a5a0f62d3dcd51650c8923617780da2bd8ec60a [file] [log] [blame]
#!/usr/bin/python2.7
# Copyright 2015 The Vanadium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
"""Playground GCE monitoring script.
This needs to run on a GCE VM with the replica pool service account scope
(https://www.googleapis.com/auth/ndev.cloudman).
You also need to enable preview in gcloud:
$ gcloud components update preview
Then add it to your crontab, e.g.
*/10 * * * * gcloud preview replica-pools --zone us-central1-a replicas --pool playground-pool list|monitor.py
"""
import datetime
import subprocess
import sys
import yaml
DESIRED = 2
MAX_ALIVE_MIN = 60
POOL = 'playground-pool'
def RunCommand(*args):
cmd = ['gcloud', 'preview', 'replica-pools', '--zone', 'us-central1-a']
cmd.extend(args)
subprocess.check_call(cmd)
def ResizePool(size):
RunCommand('resize', '--new-size', str(size), POOL)
def ShouldRestart(replica):
if replica['status']['state'] == 'PERMANENTLY_FAILING':
print 'Replica %s failed: %s' % (
replica['name'], replica['status']['details'])
return True
return IsTooOld(replica)
def IsTooOld(replica):
start_text = replica['status']['vmStartTime']
if start_text:
start = yaml.load(start_text)
uptime = datetime.datetime.now() - start
return uptime.seconds > MAX_ALIVE_MIN * 60
def RestartReplica(replica):
print 'Restarting replica ' + replica['name']
ResizePool(DESIRED + 1)
RunCommand('replicas', '--pool', POOL, 'delete', replica['name'])
def MaybeRestartReplica(replica):
if ShouldRestart(replica):
RestartReplica(replica)
def main():
replicas = yaml.load_all(sys.stdin.read())
for replica in replicas:
MaybeRestartReplica(replica)
if __name__ == '__main__':
main()