vmon: Fail only when all replicas fail

Fail the data collection only when it fails for *all* the replicas of
a service. The previous behavior was to fail if *any* of them fails.

Change-Id: I4b3d5a3788dbec6fc4623606611040e2e0a1f2f7
diff --git a/vmon/servicecounters.go b/vmon/servicecounters.go
index 8320a85..deb8636 100644
--- a/vmon/servicecounters.go
+++ b/vmon/servicecounters.go
@@ -101,24 +101,31 @@
 
 	// Get counters for each group.
 	counters := []counterData{}
+	errors := []error{}
 	for _, group := range groups {
 		counterResult, err := getStat(v23ctx, ctx, group, counter.statSuffix)
 		if err != nil {
-			return nil, err
+			errors = append(errors, err)
+			continue
 		}
 		value, err := counterResult[0].getFloat64Value()
 		if err != nil {
-			return nil, err
+			errors = append(errors, err)
+			continue
 		}
 		location, err := getServiceLocation(v23ctx, ctx, group)
 		if err != nil {
-			return nil, err
+			errors = append(errors, err)
+			continue
 		}
 		counters = append(counters, counterData{
 			location: location,
 			value:    value,
 		})
 	}
+	if len(errors) == len(groups) {
+		return counters, fmt.Errorf("%v", errors)
+	}
 
 	return counters, nil
 }
diff --git a/vmon/servicemetadata.go b/vmon/servicemetadata.go
index 15a050b..e6f1506 100644
--- a/vmon/servicemetadata.go
+++ b/vmon/servicemetadata.go
@@ -105,26 +105,33 @@
 
 	// Get metadata for each group.
 	metadata := []metadataData{}
+	errors := []error{}
 	for _, group := range groups {
 		// Query build time.
 		timeResult, err := getStat(v23ctx, ctx, group, buildTimeStatSuffix)
 		if err != nil {
-			return nil, err
+			errors = append(errors, err)
+			continue
 		}
 		strTime := timeResult[0].getStringValue()
 		buildTime, err := time.Parse("2006-01-02T15:04:05Z", strTime)
 		if err != nil {
-			return nil, fmt.Errorf("Parse(%v) failed: %v", strTime, err)
+			errors = append(errors, fmt.Errorf("Parse(%v) failed: %v", strTime, err))
+			continue
 		}
 		location, err := getServiceLocation(v23ctx, ctx, group)
 		if err != nil {
-			return nil, err
+			errors = append(errors, err)
+			continue
 		}
 		metadata = append(metadata, metadataData{
 			location:  location,
 			buildTime: buildTime,
 		})
 	}
+	if len(errors) == len(groups) {
+		return nil, fmt.Errorf("%v", errors)
+	}
 
 	return metadata, nil
 }
diff --git a/vmon/servicemethodlatency.go b/vmon/servicemethodlatency.go
index 454bb31..34ff880 100644
--- a/vmon/servicemethodlatency.go
+++ b/vmon/servicemethodlatency.go
@@ -112,12 +112,14 @@
 
 	// Get per-method latency for each group.
 	latencies := []perMethodLatencyData{}
+	errors := []error{}
 	for _, group := range groups {
 		latency := map[string]float64{}
 		// Run "debug stats read" for the corresponding object.
 		statsResult, err := getStat(v23ctx, ctx, group, statsSuffix)
 		if err != nil {
-			return nil, err
+			errors = append(errors, err)
+			continue
 		}
 		// Parse output.
 		latPerMethod := map[string]float64{}
@@ -139,17 +141,22 @@
 		}
 		latency = latPerMethod
 		if len(latency) == 0 {
-			return nil, fmt.Errorf("failed to check latency for service %q", serviceName)
+			errors = append(errors, fmt.Errorf("failed to check latency for service %q", serviceName))
+			continue
 		}
 		location, err := getServiceLocation(v23ctx, ctx, group)
 		if err != nil {
-			return nil, err
+			errors = append(errors, err)
+			continue
 		}
 		latencies = append(latencies, perMethodLatencyData{
 			location: location,
 			latency:  latency,
 		})
 	}
+	if len(errors) == len(groups) {
+		return nil, fmt.Errorf("%v", errors)
+	}
 
 	return latencies, nil
 }
diff --git a/vmon/serviceqps.go b/vmon/serviceqps.go
index 3dc4c7a..4fa0c9c 100644
--- a/vmon/serviceqps.go
+++ b/vmon/serviceqps.go
@@ -137,12 +137,14 @@
 
 	// Get qps for each group.
 	qps := []qpsData{}
+	errors := []error{}
 	for _, group := range groups {
 		perMethodQPS := map[string]float64{}
 		totalQPS := 0.0
 		qpsResults, err := getStat(v23ctx, ctx, group, qpsSuffix)
 		if err != nil {
-			return nil, err
+			errors = append(errors, err)
+			continue
 		}
 		curPerMethodQPS := map[string]float64{}
 		curTotalQPS := 0.0
@@ -163,11 +165,13 @@
 		perMethodQPS = curPerMethodQPS
 		totalQPS = curTotalQPS
 		if len(perMethodQPS) == 0 {
-			return nil, fmt.Errorf("failed to check qps for service %q", serviceName)
+			errors = append(errors, fmt.Errorf("failed to check qps for service %q", serviceName))
+			continue
 		}
 		location, err := getServiceLocation(v23ctx, ctx, group)
 		if err != nil {
-			return nil, err
+			errors = append(errors, err)
+			continue
 		}
 		qps = append(qps, qpsData{
 			location:     location,
@@ -176,5 +180,9 @@
 		})
 	}
 
+	if len(errors) == len(groups) {
+		return nil, fmt.Errorf("%v", errors)
+	}
+
 	return qps, nil
 }