mojo/syncbase: Retry Syncgroup Join
This bug appeared soon after submitting my benchmark.
When running on Android, it looks like the syncgroup join occasionally
fails. It is not clear to me why this is the case, but in order to
avoid random crashes, I have switched to using a while loop + try
catch block.
This seems to stabilize the benchmark, but... it is still a bug.
https://github.com/vanadium/issues/issues/1162
Croupier does not run into this, so it may be a recent change that
is causing this. Or it may be related to the syncgroup create bug.
Change-Id: I66e454c9164c50a90d11c848e226277b7c015813
diff --git a/Makefile b/Makefile
index 3dc4a2e..e7f3013 100644
--- a/Makefile
+++ b/Makefile
@@ -30,7 +30,7 @@
# directory inside APP_HOME_DIR.) We set syncbase root-dir inside
# APP_HOME_DIR for the same reason.
APP_HOME_DIR = /data/data/org.chromium.mojo.shell/app_home
- SYNCBASE_ROOT_DIR=$(APP_HOME_DIR)/syncbase_data
+ SYNCBASE_ROOT_DIR=$(APP_HOME_DIR)/mojo_syncbase_data
ANDROID_CREDS_DIR := /sdcard/v23creds
V23_MOJO_FLAGS += --logtostderr=true --root-dir=$(SYNCBASE_ROOT_DIR) --v23.credentials=$(ANDROID_CREDS_DIR)
@@ -152,6 +152,9 @@
# Note: The Syncbase Creator (peer 0) should be created first; it may even take a minute to be ready.
.PHONY: benchmark-pingpong
benchmark-pingpong: packages $(SYNCBASE_BUILD_DIR)/syncbase_server.mojo gen-mojom test-preparation | mojo-env-check creds
+ifdef ANDROID
+ adb -s $(DEVICE_ID) push -p $(PWD)/creds $(ANDROID_CREDS_DIR)
+endif
#$(MOJO_DEVTOOLS)/mojo_run --config-file $(PWD)/mojoconfig --shell-path $(MOJO_SHELL) \
# $(MOJO_SHELL_FLAGS) $(MOJO_ANDROID_FLAGS) "https://benchmark.mojo.v.io/pingpong/pingpong.dart $(ARGS)"
$(call MOJO_RUN,"https://benchmark.mojo.v.io/pingpong/pingpong.dart $(ARGS)")
diff --git a/benchmark/pingpong/pingpong_test.dart b/benchmark/pingpong/pingpong_test.dart
index 7ff3b38..6b7d235 100644
--- a/benchmark/pingpong/pingpong_test.dart
+++ b/benchmark/pingpong/pingpong_test.dart
@@ -91,6 +91,11 @@
await tb.create(openPerms);
}
+ // TODO(alexfandrianto): We should add a now resume marker constant.
+ // https://github.com/vanadium/issues/issues/1155
+ Stream<sc.WatchChange> watchStream =
+ db.watch(tbName, syncPrefix, UTF8.encode('now'));
+
// Devices will find each other at this sgName.
// Note: We can also accomplish this via discovery, but for simplicity, we
// will use a common location on the global mount table.
@@ -116,15 +121,26 @@
await sg.create(syncSpec, syncInfo);
} else {
print('Joining Syncgroup ${sgName} as peer ${peerID}.');
- await sg.join(syncInfo);
+
+ // TODO(alexfandrianto): Why does the first join fail so often?
+ // Same problem as create (except that join doesn't retry)?
+ // Is something slowly trying to mount? Or is it something else?
+ // https://github.com/vanadium/issues/issues/1162
+ bool success = false;
+ while (!success) {
+ try {
+ await sg.join(syncInfo);
+ success = true;
+ } catch(e) {
+ print('Failed to join. Waiting...');
+ await new Future.delayed(new Duration(seconds: 5));
+ print('Trying again...');
+ }
+ }
}
// After entering the syncgroup, we should watch the table.
print('Ready to time sync!');
- // TODO(alexfandrianto): We should add a now resume marker constant.
- // https://github.com/vanadium/issues/issues/1155
- Stream<sc.WatchChange> watchStream =
- db.watch(tbName, syncPrefix, UTF8.encode('now'));
// During this phase, there will be numPeers * (numPeers - 1) watch updates.
// Everybody will write a value to everyone else.