vmon: Fail only when all replicas fail
Fail the data collection only when it fails for *all* the replicas of
a service. The previous behavior was to fail if *any* of them fails.
Change-Id: I4b3d5a3788dbec6fc4623606611040e2e0a1f2f7
diff --git a/vmon/servicecounters.go b/vmon/servicecounters.go
index 8320a85..deb8636 100644
--- a/vmon/servicecounters.go
+++ b/vmon/servicecounters.go
@@ -101,24 +101,31 @@
// Get counters for each group.
counters := []counterData{}
+ errors := []error{}
for _, group := range groups {
counterResult, err := getStat(v23ctx, ctx, group, counter.statSuffix)
if err != nil {
- return nil, err
+ errors = append(errors, err)
+ continue
}
value, err := counterResult[0].getFloat64Value()
if err != nil {
- return nil, err
+ errors = append(errors, err)
+ continue
}
location, err := getServiceLocation(v23ctx, ctx, group)
if err != nil {
- return nil, err
+ errors = append(errors, err)
+ continue
}
counters = append(counters, counterData{
location: location,
value: value,
})
}
+ if len(errors) == len(groups) {
+ return counters, fmt.Errorf("%v", errors)
+ }
return counters, nil
}
diff --git a/vmon/servicemetadata.go b/vmon/servicemetadata.go
index 15a050b..e6f1506 100644
--- a/vmon/servicemetadata.go
+++ b/vmon/servicemetadata.go
@@ -105,26 +105,33 @@
// Get metadata for each group.
metadata := []metadataData{}
+ errors := []error{}
for _, group := range groups {
// Query build time.
timeResult, err := getStat(v23ctx, ctx, group, buildTimeStatSuffix)
if err != nil {
- return nil, err
+ errors = append(errors, err)
+ continue
}
strTime := timeResult[0].getStringValue()
buildTime, err := time.Parse("2006-01-02T15:04:05Z", strTime)
if err != nil {
- return nil, fmt.Errorf("Parse(%v) failed: %v", strTime, err)
+ errors = append(errors, fmt.Errorf("Parse(%v) failed: %v", strTime, err))
+ continue
}
location, err := getServiceLocation(v23ctx, ctx, group)
if err != nil {
- return nil, err
+ errors = append(errors, err)
+ continue
}
metadata = append(metadata, metadataData{
location: location,
buildTime: buildTime,
})
}
+ if len(errors) == len(groups) {
+ return nil, fmt.Errorf("%v", errors)
+ }
return metadata, nil
}
diff --git a/vmon/servicemethodlatency.go b/vmon/servicemethodlatency.go
index 454bb31..34ff880 100644
--- a/vmon/servicemethodlatency.go
+++ b/vmon/servicemethodlatency.go
@@ -112,12 +112,14 @@
// Get per-method latency for each group.
latencies := []perMethodLatencyData{}
+ errors := []error{}
for _, group := range groups {
latency := map[string]float64{}
// Run "debug stats read" for the corresponding object.
statsResult, err := getStat(v23ctx, ctx, group, statsSuffix)
if err != nil {
- return nil, err
+ errors = append(errors, err)
+ continue
}
// Parse output.
latPerMethod := map[string]float64{}
@@ -139,17 +141,22 @@
}
latency = latPerMethod
if len(latency) == 0 {
- return nil, fmt.Errorf("failed to check latency for service %q", serviceName)
+ errors = append(errors, fmt.Errorf("failed to check latency for service %q", serviceName))
+ continue
}
location, err := getServiceLocation(v23ctx, ctx, group)
if err != nil {
- return nil, err
+ errors = append(errors, err)
+ continue
}
latencies = append(latencies, perMethodLatencyData{
location: location,
latency: latency,
})
}
+ if len(errors) == len(groups) {
+ return nil, fmt.Errorf("%v", errors)
+ }
return latencies, nil
}
diff --git a/vmon/serviceqps.go b/vmon/serviceqps.go
index 3dc4c7a..4fa0c9c 100644
--- a/vmon/serviceqps.go
+++ b/vmon/serviceqps.go
@@ -137,12 +137,14 @@
// Get qps for each group.
qps := []qpsData{}
+ errors := []error{}
for _, group := range groups {
perMethodQPS := map[string]float64{}
totalQPS := 0.0
qpsResults, err := getStat(v23ctx, ctx, group, qpsSuffix)
if err != nil {
- return nil, err
+ errors = append(errors, err)
+ continue
}
curPerMethodQPS := map[string]float64{}
curTotalQPS := 0.0
@@ -163,11 +165,13 @@
perMethodQPS = curPerMethodQPS
totalQPS = curTotalQPS
if len(perMethodQPS) == 0 {
- return nil, fmt.Errorf("failed to check qps for service %q", serviceName)
+ errors = append(errors, fmt.Errorf("failed to check qps for service %q", serviceName))
+ continue
}
location, err := getServiceLocation(v23ctx, ctx, group)
if err != nil {
- return nil, err
+ errors = append(errors, err)
+ continue
}
qps = append(qps, qpsData{
location: location,
@@ -176,5 +180,9 @@
})
}
+ if len(errors) == len(groups) {
+ return nil, fmt.Errorf("%v", errors)
+ }
+
return qps, nil
}