Fix too long test caused by c/4467
Change-Id: I1cd48f23fb73009824541a601e8fe04e8d2ea846
diff --git a/runtimes/google/ipc/client.go b/runtimes/google/ipc/client.go
index 2c078c3..8554995 100644
--- a/runtimes/google/ipc/client.go
+++ b/runtimes/google/ipc/client.go
@@ -3,6 +3,7 @@
import (
"fmt"
"io"
+ "math"
"math/rand"
"strings"
"sync"
@@ -114,31 +115,78 @@
return flow, suffix, nil
}
+// A randomized exponential backoff. The randomness deters error convoys from forming.
+func backoff(n int, deadline time.Time) bool {
+ b := time.Duration(math.Pow(1.5+(rand.Float64()/2.0), float64(n)) * float64(time.Second))
+ if b > maxBackoff {
+ b = maxBackoff
+ }
+ r := deadline.Sub(time.Now())
+ if b > r {
+ // We need to leave a little time for the call to start or
+ // we'll just timeout in startCall before we actually do
+ // anything. If we just have a millisecond left, give up.
+ if r <= time.Millisecond {
+ return false
+ }
+ b = r - time.Millisecond
+ }
+ time.Sleep(b)
+ return true
+}
+
+// TODO(p): replace these checks with m3b's retry bit when it exists. This is currently a colossal hack.
+func retriable(err error) bool {
+ e := err.Error()
+ // Authentication errors are permanent.
+ if strings.Contains(e, "authorized") {
+ return false
+ }
+ // Resolution errors are retriable.
+ if strings.Contains(e, "ipc: Resolve") {
+ return true
+ }
+ // Kernel level errors are retriable.
+ if strings.Contains(e, "errno") {
+ return true
+ }
+ return false
+}
+
func (c *client) StartCall(ctx context.T, name, method string, args []interface{}, opts ...ipc.CallOpt) (ipc.Call, error) {
+ var retry = true
deadline, hasDeadline := ctx.Deadline()
if !hasDeadline {
- // If no deadline is set, use a long but finite one.
+ // If no deadline is set, use the default
deadline = time.Now().Add(defaultCallTimeout)
}
+ for _, o := range opts {
+ r, ok := o.(veyron2.RetryTimeoutOpt)
+ if !ok {
+ continue
+ }
+ if r == 0 {
+ retry = false
+ } else {
+ deadline = time.Now().Add(time.Duration(r))
+ }
+ break
+ }
var lastErr verror.E
- b := time.Duration(1.5 + (rand.Float32() / 2.0))
- backoff := b
- for deadline.After(time.Now()) {
+ for retries := 0; deadline.After(time.Now()); retries++ {
+ if retries != 0 {
+ if !backoff(retries, deadline) {
+ break
+ }
+ }
call, err := c.startCall(ctx, name, method, args, opts...)
if err == nil {
return call, nil
}
lastErr = err
- // TODO(p): replace these checks with m3b's retry bit when it exists.
- if !strings.Contains(err.Error(), "ipc: Resolve") &&
- !(strings.Contains(err.Error(), "ipc: couldn't connect") && strings.Contains(err.Error(), "errno")) {
+ if !retry || !retriable(err) {
break
}
- time.Sleep(backoff)
- backoff = backoff * b
- if backoff > maxBackoff {
- backoff = maxBackoff
- }
}
return nil, lastErr
}
diff --git a/services/mounttable/lib/mounttable_test.go b/services/mounttable/lib/mounttable_test.go
index 76ed43d..dc9894c 100644
--- a/services/mounttable/lib/mounttable_test.go
+++ b/services/mounttable/lib/mounttable_test.go
@@ -123,7 +123,7 @@
if err != nil {
boom(t, "Failed to BindMountTable: %s", err)
}
- if err := mtpt.Mount(rt.R().NewContext(), service, uint32(ttlSecs)); err != nil {
+ if err := mtpt.Mount(rt.R().NewContext(), service, uint32(ttlSecs), veyron2.RetryTimeoutOpt(0)); err != nil {
if shouldSucceed {
boom(t, "Failed to Mount %s onto %s: %s", service, name, err)
}
@@ -137,7 +137,7 @@
if err != nil {
boom(t, "Failed to BindMountTable: %s", err)
}
- if err := mtpt.Unmount(rt.R().NewContext(), service); err != nil {
+ if err := mtpt.Unmount(rt.R().NewContext(), service, veyron2.RetryTimeoutOpt(0)); err != nil {
if shouldSucceed {
boom(t, "Failed to Unmount %s onto %s: %s", service, name, err)
}
@@ -161,7 +161,7 @@
if err != nil {
boom(t, "Failed to BindCollection: %s", err)
}
- contents, err := objectPtr.Lookup(rt.R().NewContext())
+ contents, err := objectPtr.Lookup(rt.R().NewContext(), veyron2.RetryTimeoutOpt(0))
if err != nil {
if shouldSucceed {
boom(t, "Failed to Lookup %s: %s", name, err)