todosapp: pull benchmarking code into module; add logLatency helper; add scan benchmark and vrpc example

Change-Id: I05f1eb9e1575c3aae20dbba068140342e6bf7fbc
diff --git a/Makefile b/Makefile
index 2999237..ea49bb4 100644
--- a/Makefile
+++ b/Makefile
@@ -44,7 +44,7 @@
 	v23 go build -a -o $@/principal v.io/x/ref/cmd/principal
 	v23 go build -a -o $@/syncbased v.io/syncbase/x/ref/services/syncbase/syncbased
 
-node_modules: package.json $(shell $(FIND) $(V23_ROOT)/roadmap/javascript/syncbase/{package.json,src})
+node_modules: package.json $(shell $(FIND) $(V23_ROOT)/roadmap/javascript/syncbase/{package.json,src} $(V23_ROOT)/release/javascript/core/{package.json,src})
 	npm prune
 	npm install
 	touch $@
diff --git a/README.md b/README.md
index b8dae45..00169cd 100644
--- a/README.md
+++ b/README.md
@@ -24,6 +24,9 @@
 
     TMPDIR=tmp ./tools/start_syncbased.sh
 
+    # Or, start from a clean slate.
+    rm -rf tmp/syncbase* && TMPDIR=tmp ./tools/start_syncbased.sh
+
 Finally, start the web app.
 
     DEBUG=1 make serve
@@ -78,24 +81,29 @@
 changes (currently via polling; soon, via watch) and emits a `'change'` event,
 triggering the same redraw procedure as described above.
 
-## Resources for debugging
+## Debugging notes
+
+### Links
 
 - https://sites.google.com/a/google.com/v-prod/
 - https://sites.google.com/a/google.com/v-prod/vanadium-services/how-to
 
-### Signature
+### Commands
+
+Signature
 
     $V23_ROOT/release/go/bin/vrpc -v23.credentials=tmp/creds signature /localhost:8200
 
-### Method call
+Method call
 
     $V23_ROOT/release/go/bin/vrpc -v23.credentials=tmp/creds call /localhost:8200 GetPermissions
+    $V23_ROOT/release/go/bin/vrpc -v23.credentials=tmp/creds call /localhost:8200/todos/db/tb Scan '""' '""'
 
-### Glob
+Glob
 
     $V23_ROOT/release/go/bin/namespace -v23.credentials=tmp/creds glob "/localhost:8200/..."
 
-### Debug
+Debug
 
     $V23_ROOT/release/go/bin/debug -v23.credentials=tmp/creds glob "/localhost:8200/__debug/stats/rpc/server/routing-id/..."
     $V23_ROOT/release/go/bin/debug -v23.credentials=tmp/creds stats read "/localhost:8200/__debug/stats/rpc/server/routing-id/c61964ab4c72ee522067eb6d5ddd22fc/methods/BeginBatch/latency-ms"
@@ -105,6 +113,7 @@
 For debugging performance issues, it can be helpful to use the JS integration
 test configuration. To do so, first run the integration test as follows.
 
+    cd $V23_ROOT/roadmap/javascript/syncbase
     NOQUIT=1 NOHEADLESS=1 make test-integration-browser
 
 This command starts a local mount table, identityd, and Syncbase mounted at
@@ -120,14 +129,31 @@
 Visit `http://localhost:4000/?d=syncbase&n=test/syncbased` in the launched
 Chrome instance to talk to your test syncbase.
 
-To run a simple benchmark (100 puts), specify query param `bm=1`.
+To run a simple benchmark (100 puts, followed by a scan of those rows), specify
+query param `bm=1`.
 
-TODO(sadovsky): Understand the following:
-- Why can test browser talk to (run benchmark on) todosapp syncbase and not to
-  test syncbase? This is the opposite of what I'd expect given the blessings.
-- Why does benchmark take 2s on test browser vs. 5.5s on normal browser?
-- Why is scan so slow? (Note, latency appears to be proportional to data size,
-  with some small fixed RPC overhead.)
+### Open questions
+
+- Why can test browser talk to normal syncbase and not to test syncbase? This is
+  the opposite of what I'd expect given the blessings.
+  - Glob from test browser to test syncbase (service.listApps) fails with "does
+    not have Resolve access".
+  - RPCs from test browser to normal syncbase should fail with "untrusted root",
+    but instead they succeed.
+
+- Why do test and normal browsers have different performance talking to the
+  same (normal) syncbase?
+  - Test browser: 100 puts takes 2s, scan takes 3.5s.
+  - Normal browser: 100 puts takes 5s, scan takes 9s.
+
+  With dev console closed, scan takes roughly 0.6s on both (see below), but 100
+  puts still takes 2s in test browser vs. 4s in normal browser.
+
+- Why is JS scan so slow? Note, latency appears to be proportional to data size,
+  with some small fixed overhead. Also note, vrpc scan takes less than 0.3s.
+
+  ANSWER: Turns out if the dev console is closed, scan is much faster (0.6s).
+  Issue filed: https://github.com/vanadium/issues/issues/610
 
 [syncbase]: https://docs.google.com/document/d/12wS_IEPf8HTE7598fcmlN-Y692OWMSneoe2tvyBEpi0/edit#
 [crx]: https://v.io/tools/vanadium-chrome-extension.html
diff --git a/browser/benchmark.js b/browser/benchmark.js
new file mode 100644
index 0000000..f1be681
--- /dev/null
+++ b/browser/benchmark.js
@@ -0,0 +1,91 @@
+// Code for benchmarking.
+
+'use strict';
+
+var async = require('async');
+var moment = require('moment');
+var syncbase = require('syncbase');
+var nosql = syncbase.nosql;
+
+exports.logLatency = logLatency;
+exports.runBenchmark = runBenchmark;
+
+var LOG_EVERYTHING = false;
+
+// Returns a string timestamp, useful for logging.
+function timestamp(t) {
+  t = t || Date.now();
+  return moment(t).format('HH:mm:ss.SSS');
+}
+
+function logStart(name) {
+  var t = Date.now();
+  console.log(timestamp(t) + ' ' + name + ' start');
+  return t;
+}
+
+function logStop(name, start) {
+  var t = Date.now();
+  console.log(timestamp(t) + ' ' + name + ' took ' + (t - start) + 'ms');
+}
+
+function logLatency(name, cb) {
+  var start = logStart(name);
+  return function() {
+    logStop(name, start);
+    cb.apply(null, arguments);
+  };
+}
+
+// Does n parallel puts with a common prefix, then returns the prefix.
+// TODO(sadovsky): According to Shyam, since these puts are being done in
+// parallel, it may be the case that each one is setting up its own VC (doing
+// auth handshake, fetching discharge, etc.), rather than all puts sharing a
+// single VC. Ali or Suharsh should know the state of VC sharing. Possible
+// workaround would be to do something that forces VC creation before starting
+// the parallel puts. (OTOH, we always run service.listApps before the puts,
+// which should create the VC.)
+function doPuts(ctx, tb, n, cb) {
+  cb = logLatency('doPuts', cb);
+  var prefix = timestamp() + '.';
+  async.times(100, function(n, cb) {
+    // TODO(sadovsky): Remove this once we loosen Syncbase's naming rules.
+    prefix = prefix.replace(/:/g, '.');
+    var key = prefix + n;
+    var value = '';
+    if (LOG_EVERYTHING) console.log('put: ' + key);
+    tb.put(ctx, key, value, function(err) {
+      if (LOG_EVERYTHING) console.log('put done: ' + key);
+      cb(err);
+    });
+  }, function(err) {
+    return cb(err, prefix);
+  });
+}
+
+// Scans (and logs) all records with the given prefix.
+function doScan(ctx, tb, prefix, cb) {
+  cb = logLatency('doScan(' + prefix + ')', cb);
+  var bytes = 0, streamErr = null;
+  tb.scan(ctx, nosql.rowrange.prefix(prefix), function(err) {
+    err = err || streamErr;
+    if (err) return cb(err);
+    console.log('scanned ' + bytes + ' bytes');
+    cb();
+  }).on('data', function(row) {
+    bytes += row.key.length + row.value.length;
+    if (LOG_EVERYTHING) console.log('scan: ' + JSON.stringify(row));
+  }).on('error', function(err) {
+    streamErr = streamErr || err.error;
+  });
+}
+
+// Assumes table 'tb' exists.
+function runBenchmark(ctx, db, cb) {
+  cb = logLatency('runBenchmark', cb);
+  var tb = db.table('tb');
+  doPuts(ctx, tb, 100, function(err, prefix) {
+    if (err) return cb(err);
+    doScan(ctx, tb, prefix, cb);
+  });
+}
diff --git a/browser/defaults.js b/browser/defaults.js
index d13714f..e9dfea4 100644
--- a/browser/defaults.js
+++ b/browser/defaults.js
@@ -3,6 +3,7 @@
 var async = require('async');
 var syncbase = require('syncbase');
 
+var bm = require('./benchmark');
 var CollectionDispatcher = require('./collection_dispatcher');
 var MemCollection = require('./mem_collection');
 var SyncbaseDispatcher = require('./syncbase_dispatcher');
@@ -63,23 +64,6 @@
   }, cb);
 }
 
-function runBenchmark(disp, cb) {
-  var start = Date.now();
-  async.times(100, function(n, cb) {
-    var key = '' + n + '.' + Date.now();
-    var value = '';
-    console.log('putting ' + key);
-    disp.tb_.put(disp.ctx_, key, value, function(err) {
-      console.log('done putting ' + key);
-      cb(err);
-    });
-  }, function(err) {
-    var end = Date.now();
-    console.log('runBenchmark done, took ' + (end - start) + 'ms');
-    return cb(err);
-  });
-}
-
 // Returns a new Vanadium context object with a timeout.
 function wt(ctx, timeout) {
   return ctx.withTimeout(timeout || 5000);
@@ -93,6 +77,7 @@
 }
 
 exports.initSyncbaseDispatcher = function(rt, name, benchmark, cb) {
+  cb = bm.logLatency('initSyncbaseDispatcher', cb);
   var service = syncbase.newService(name);
   // TODO(sadovsky): Instead of appExists, simply check for ErrExist in the
   // app.create response.
@@ -102,24 +87,23 @@
     var app = service.app('todos'), db = app.noSqlDatabase('db');
     var disp = new SyncbaseDispatcher(rt, db);
     if (exists) {
-      console.log('app exists; assuming everything has been initialized');
       if (benchmark) {
-        return runBenchmark(disp, cb);
+        return bm.runBenchmark(ctx, db, cb);
       }
+      console.log('app exists; assuming everything has been initialized');
       return cb(null, disp);
     }
     console.log('app does not exist; initializing everything');
-    console.log('-----> creating hierarchy');
     app.create(wt(ctx), {}, function(err) {
       if (err) return cb(err);
       db.create(wt(ctx), {}, function(err) {
         if (err) return cb(err);
         db.createTable(wt(ctx), 'tb', {}, function(err) {
           if (err) return cb(err);
-          console.log('-----> hierarchy created; writing rows');
           if (benchmark) {
-            return runBenchmark(disp, cb);
+            return bm.runBenchmark(ctx, db, cb);
           }
+          console.log('hierarchy created; writing rows');
           initData(disp, function(err) {
             if (err) return cb(err);
             cb(null, disp);
@@ -131,6 +115,7 @@
 };
 
 exports.initCollectionDispatcher = function(cb) {
+  cb = bm.logLatency('initCollectionDispatcher', cb);
   var lists = new MemCollection('lists'), todos = new MemCollection('todos');
   var disp = new CollectionDispatcher(lists, todos);
   initData(disp, function(err) {
diff --git a/browser/index.js b/browser/index.js
index 0e6a49b..17eb9a1 100644
--- a/browser/index.js
+++ b/browser/index.js
@@ -529,10 +529,7 @@
   };
   initDispatcher(dispType, syncbaseName, benchmark, function(err, resDisp) {
     if (err) throw err;
-    if (benchmark) {
-      console.log('benchmark done');
-      return;
-    }
+    if (benchmark) return;
     disp = resDisp;
     // TODO(sadovsky): initDispatcher with DISP_TYPE_SYNCBASE is slow. We should
     // show a "loading" message in the UI.
diff --git a/browser/syncbase_dispatcher.js b/browser/syncbase_dispatcher.js
index 9109501..e7cc62c 100644
--- a/browser/syncbase_dispatcher.js
+++ b/browser/syncbase_dispatcher.js
@@ -6,19 +6,30 @@
 // TODO(sadovsky): Currently, list and todo order are not preserved. We should
 // make the app always order these lexicographically, or better yet, use a
 // Dewey-Decimal-like scheme (with randomization).
+//
+// NOTE: For now, when an item is deleted, any sub-items that were added
+// concurrently (on some other device) are orphaned. Eventually, we'll GC
+// orphaned records; for now, we don't bother. This orphaning-based approach
+// enables us to use simple last-one-wins conflict resolution for all records
+// stored in Syncbase.
+//
+// TODO(sadovsky): Unfortunately, orphaning degrades performance, because scan
+// RPCs (e.g. scan to get all lists) read (and discard) orphaned records. If we
+// switch from scans to queries, performance should improve since all row
+// filtering will happen on the server side.
 
 'use strict';
 
 var _ = require('lodash');
 var async = require('async');
 var inherits = require('inherits');
-var moment = require('moment');
 var randomBytes = require('randombytes');
 var syncbase = require('syncbase');
 var nosql = syncbase.nosql;
 var vanadium = require('vanadium');
 var vtrace = vanadium.vtrace;
 
+var bm = require('./benchmark');
 var Dispatcher = require('./dispatcher');
 
 inherits(SyncbaseDispatcher, Dispatcher);
@@ -84,11 +95,6 @@
   return vtrace.withNewSpan(ctx, name);
 }
 
-// Returns a string timestamp, for logging.
-function timestamp(t) {
-  return moment(t || Date.now()).format('hh:mm:ss.SSS');
-}
-
 // Defines a SyncbaseDispatcher method. If the first argument to fn is not a
 // context, creates a new context with a timeout.
 function define(name, fn) {
@@ -105,17 +111,10 @@
     if (typeof args[args.length - 1] !== 'function') {
       args.push(noop);
     }
-    // Log the invocation.
-    var cb = args[args.length - 1];
-    var start = Date.now();
-    args[args.length - 1] = function() {
-      var end = Date.now();
-      console.log(name + ' done, took ' + (end - start) + 'ms');
-      cb.apply(null, arguments);
-    };
     // Drop ctx and cb, convert to JSON, drop square brackets.
+    var cb = args[args.length - 1];
     var argsStr = JSON.stringify(args.slice(1, -1)).slice(1, -1);
-    console.log(timestamp(start) + ' ' + name + '(' + argsStr + ')');
+    args[args.length - 1] = bm.logLatency(name + '(' + argsStr + ')', cb);
     return fn.apply(this, args);
   };
 }
@@ -265,8 +264,8 @@
 
 // Writes the given tag into the given table.
 define('addTagImpl_', function(ctx, tb, todoId, tag, cb) {
-  // NOTE: Syncbase currently disallows whitespace in keys, so as a quick hack
-  // we drop all whitespace before storing tags.
+  // TODO(sadovsky): Syncbase currently disallows whitespace in keys, so as a
+  // quick hack we drop all whitespace before storing tags.
   tag = tag.replace(/\s+/g, '');
   tb.put(ctx, tagKey(todoId, tag), null, cb);
 });