blob: 2fbbb53e0db4e487ad23b7d080670837497a9e29 [file] [log] [blame]
// Copyright 2015 The Vanadium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package monitoring
import (
"fmt"
"io"
"io/ioutil"
"net/http"
"strings"
"time"
"v.io/jiri/tool"
"v.io/v23"
"v.io/v23/context"
"v.io/v23/naming"
"v.io/v23/options"
"v.io/v23/rpc"
"v.io/v23/services/stats"
"v.io/v23/vdl"
"golang.org/x/oauth2"
"golang.org/x/oauth2/google"
cloudmonitoring "google.golang.org/api/monitoring/v3"
)
const (
defaultTimeout = 20 * time.Second
)
// Human-readable service names.
const (
SNMounttable = "mounttable"
SNIdentity = "identity service"
SNMacaroon = "macaroon service"
SNBinaryDischarger = "binary discharger"
SNRole = "role service"
SNProxy = "proxy service"
SNBenchmark = "benchmark service"
SNAllocator = "syncbase allocator"
hostnameStatSuffix = "__debug/stats/system/hostname"
zoneStatSuffix = "__debug/stats/system/gce/zone"
)
// Human-readable metric names.
const (
MNMounttableMountedServers = "mounttable mounted servers"
MNMounttableNodes = "mounttable nodes"
)
// serviceMountedNames is a map from human-readable service names to their
// relative mounted names in the global mounttable.
var serviceMountedNames = map[string]string{
SNMounttable: "mt",
SNIdentity: "identity/dev.v.io:u",
SNMacaroon: "identity/dev.v.io:u/macaroon",
SNBinaryDischarger: "identity/dev.v.io:u/discharger",
SNRole: "identity/role",
SNProxy: "proxy-mon",
SNBenchmark: "benchmarks",
SNAllocator: "syncbase-allocator",
}
// StatValue stores the name and the value returned from the GetStat function.
type StatValue struct {
Name string
Value interface{}
}
func (sv *StatValue) GetStringValue() string {
return fmt.Sprint(sv.Value)
}
func (sv *StatValue) GetFloat64Value() (float64, error) {
switch i := sv.Value.(type) {
case float64:
return i, nil
case int64:
return float64(i), nil
case uint64:
return float64(i), nil
default:
return 0, fmt.Errorf("invalid value: %v, %T", sv.Value, sv.Value)
}
}
type ServiceLocation struct {
Instance string
Zone string
}
// GetServiceMountedName gets the full mounted name for the given service.
func GetServiceMountedName(namespaceRoot, serviceName string) (string, error) {
relativeName, ok := serviceMountedNames[serviceName]
if !ok {
return "", fmt.Errorf("service %q doesn't exist", serviceName)
}
return fmt.Sprintf("%s/%s", namespaceRoot, relativeName), nil
}
// ResolveAndProcessServiceName resolves the given service name and groups the
// result entries by their routing ids.
func ResolveAndProcessServiceName(v23ctx *context.T, ctx *tool.Context, serviceName, serviceMountedName string) (map[string]naming.MountEntry, error) {
// Resolve the name.
v23ctx, cancel := context.WithTimeout(v23ctx, defaultTimeout)
defer cancel()
ns := v23.GetNamespace(v23ctx)
entry, err := ns.ShallowResolve(v23ctx, serviceMountedName)
if err != nil {
return nil, err
}
resolvedNames := []string{}
for _, server := range entry.Servers {
fullName := naming.JoinAddressName(server.Server, entry.Name)
resolvedNames = append(resolvedNames, fullName)
}
// Group resolved names by their routing ids.
groups := map[string]naming.MountEntry{}
for _, resolvedName := range resolvedNames {
serverName, relativeName := naming.SplitAddressName(resolvedName)
ep, err := naming.ParseEndpoint(serverName)
if err != nil {
return nil, err
}
routingId := ep.RoutingID.String()
if _, ok := groups[routingId]; !ok {
groups[routingId] = naming.MountEntry{}
}
curMountEntry := groups[routingId]
curMountEntry.Servers = append(curMountEntry.Servers, naming.MountedServer{Server: serverName})
// resolvedNames are resolved from the same service so they should have
// the same relative name.
curMountEntry.Name = relativeName
groups[routingId] = curMountEntry
}
return groups, nil
}
// GetServiceLocation returns the given service's location (instance and zone).
// If the service is replicated, the instance name is the pod name.
//
// To make it simpler and faster, we look up service's location in hard-coded "zone maps"
// for both non-replicated and replicated services.
func GetServiceLocation(v23ctx *context.T, ctx *tool.Context, me naming.MountEntry) (*ServiceLocation, error) {
// Check "__debug/stats/system/metadata/hostname" stat to get service's
// host name.
me.Name = ""
hostnameResult, err := GetStat(v23ctx, ctx, me, hostnameStatSuffix)
if err != nil {
return nil, err
}
hostname := hostnameResult[0].GetStringValue()
// Check "__debug/stats/system/gce/zone" stat to get service's
// zone name.
zoneResult, err := GetStat(v23ctx, ctx, me, zoneStatSuffix)
if err != nil {
return nil, err
}
zone := zoneResult[0].GetStringValue()
// The zone stat exported by services is in the form of:
// projects/632758215260/zones/us-central1-c
// We only need the last part.
parts := strings.Split(zone, "/")
zone = parts[len(parts)-1]
return &ServiceLocation{
Instance: hostname,
Zone: zone,
}, nil
}
// GetStat gets the given stat using rpc.
func GetStat(v23ctx *context.T, ctx *tool.Context, me naming.MountEntry, pattern string) ([]*StatValue, error) {
v23ctx, cancel := context.WithTimeout(v23ctx, defaultTimeout)
defer cancel()
call, err := v23.GetClient(v23ctx).StartCall(v23ctx, "", rpc.GlobMethod, []interface{}{pattern}, options.Preresolved{&me})
if err != nil {
return nil, err
}
hasErrors := false
ret := []*StatValue{}
mountEntryName := me.Name
for {
var gr naming.GlobReply
err := call.Recv(&gr)
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
switch v := gr.(type) {
case naming.GlobReplyEntry:
me.Name = naming.Join(mountEntryName, v.Value.Name)
value, err := stats.StatsClient("").Value(v23ctx, options.Preresolved{&me})
if err != nil {
fmt.Fprintf(ctx.Stderr(), "Failed to get stat (pattern: %q, entry: %#v): %v\n%v\n", pattern, me, v.Value.Name, err)
hasErrors = true
continue
}
var convertedValue interface{}
if err := vdl.Convert(&convertedValue, value); err != nil {
fmt.Fprintf(ctx.Stderr(), "Failed to convert value for %v (pattern: %q, entry: %#v): %v\n", pattern, me, v.Value.Name, err)
hasErrors = true
continue
}
ret = append(ret, &StatValue{
Name: v.Value.Name,
Value: convertedValue,
})
case naming.GlobReplyError:
fmt.Fprintf(ctx.Stderr(), "Glob failed at %q: %v", v.Value.Name, v.Value.Error)
}
}
if hasErrors || len(ret) == 0 {
return nil, fmt.Errorf("failed to get stat (pattern: %q, entry: %#v)", pattern, me)
}
if err := call.Finish(); err != nil {
return nil, err
}
return ret, nil
}
func createClient(keyFilePath string) (*http.Client, error) {
if len(keyFilePath) > 0 {
data, err := ioutil.ReadFile(keyFilePath)
if err != nil {
return nil, err
}
conf, err := google.JWTConfigFromJSON(data, cloudmonitoring.MonitoringScope)
if err != nil {
return nil, fmt.Errorf("failed to create JWT config file: %v", err)
}
return conf.Client(oauth2.NoContext), nil
}
return google.DefaultClient(oauth2.NoContext, cloudmonitoring.MonitoringScope)
}