Merge branch 'master' of /tmp/staging2/roadmap.go.syncbase into sb

commit: 1f66c65483cc6f405a6ae0221a2af6cd57103b66 [log] [tgz]
author: Adam Sadovsky <asadovsky@gmail.com> Mon Aug 31 13:52:39 2015 -0700
committer: Adam Sadovsky <asadovsky@gmail.com> Mon Aug 31 13:52:39 2015 -0700
tree: d3d714ca808667d3df3ebd4c24eee6e9711ead3c
parent: 51d78a480e99d23531890ec401aebfcff4bd50a4 [diff]
parent: 67badb68dc77b5507bca8cd7854ee85dc5bb5cbe [diff]
diff --git a/cmd/sb51/doc.go b/cmd/sb51/doc.go
new file mode 100644
index 0000000..0cd9c36
--- /dev/null
+++ b/cmd/sb51/doc.go

@@ -0,0 +1,55 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Antimony (sb51) is a Syncbase general-purpose client and management utility.
+// It currently supports experimenting with the Syncbase query language.
+//
+// The 'sh' command connects to a specified database on a Syncbase instance,
+// creating it if it does not exist if -create-missing is specified.
+// The user can then enter the following at the command line:
+//     1. dump - to get a dump of the database
+//     2. a syncbase select statement - which is executed and results printed to stdout
+//     3. make-demo - to create demo tables in the database to experiment with, equivalent to -make-demo flag
+//     4. exit (or quit) - to exit the program
+//
+// When the shell is running non-interactively (stdin not connected to a tty),
+// errors cause the shell to exit with a non-zero status.
+//
+// To build client:
+//     v23 go install v.io/syncbase/x/ref/syncbase/sb51
+//
+// To run client:
+//     $V23_ROOT/roadmap/go/bin/sb51 sh <appname> <dbname>
+//
+// Sample run (assuming a syncbase service is mounted at '/:8101/syncbase',
+// otherwise specify using -service flag):
+//     > $V23_ROOT/roadmap/go/bin/sb51 sh -create-missing -make-demo -format=csv demoapp demodb
+//     ? select v.Name, v.Address.State from DemoCustomers where Type(v) = "Customer";
+//     v.Name,v.Address.State
+//     John Smith,CA
+//     Bat Masterson,IA
+//     ? select v.CustId, v.InvoiceNum, v.ShipTo.Zip, v.Amount from DemoCustomers where Type(v) = "Invoice" and v.Amount > 100;
+//     v.CustId,v.InvoiceNum,v.ShipTo.Zip,v.Amount
+//     2,1001,50055,166
+//     2,1002,50055,243
+//     2,1004,50055,787
+//     ? select k, v fro DemoCustomers;
+//     Error:
+//     select k, v fro DemoCustomers
+//                 ^
+//     13: Expected 'from', found fro.
+//     ? select k, v from DemoCustomers;
+//     k,v
+//     001,"{Name: ""John Smith"", Id: 1, Active: true, Address: {Street: ""1 Main St."", City: ""Palo Alto"", State: ""CA"", Zip: ""94303""}, Credit: {Agency: Equifax, Report: EquifaxReport: {Rating: 65}}}"
+//     001001,"{CustId: 1, InvoiceNum: 1000, Amount: 42, ShipTo: {Street: ""1 Main St."", City: ""Palo Alto"", State: ""CA"", Zip: ""94303""}}"
+//     001002,"{CustId: 1, InvoiceNum: 1003, Amount: 7, ShipTo: {Street: ""2 Main St."", City: ""Palo Alto"", State: ""CA"", Zip: ""94303""}}"
+//     001003,"{CustId: 1, InvoiceNum: 1005, Amount: 88, ShipTo: {Street: ""3 Main St."", City: ""Palo Alto"", State: ""CA"", Zip: ""94303""}}"
+//     002,"{Name: ""Bat Masterson"", Id: 2, Active: true, Address: {Street: ""777 Any St."", City: ""Collins"", State: ""IA"", Zip: ""50055""}, Credit: {Agency: TransUnion, Report: TransUnionReport: {Rating: 80}}}"
+//     002001,"{CustId: 2, InvoiceNum: 1001, Amount: 166, ShipTo: {Street: ""777 Any St."", City: ""Collins"", State: ""IA"", Zip: ""50055""}}"
+//     002002,"{CustId: 2, InvoiceNum: 1002, Amount: 243, ShipTo: {Street: ""888 Any St."", City: ""Collins"", State: ""IA"", Zip: ""50055""}}"
+//     002003,"{CustId: 2, InvoiceNum: 1004, Amount: 787, ShipTo: {Street: ""999 Any St."", City: ""Collins"", State: ""IA"", Zip: ""50055""}}"
+//     002004,"{CustId: 2, InvoiceNum: 1006, Amount: 88, ShipTo: {Street: ""101010 Any St."", City: ""Collins"", State: ""IA"", Zip: ""50055""}}"
+//     ? exit;
+//     >
+package main

diff --git a/cmd/sb51/internal/demodb/db.go b/cmd/sb51/internal/demodb/db.go
new file mode 100644
index 0000000..2674361
--- /dev/null
+++ b/cmd/sb51/internal/demodb/db.go

@@ -0,0 +1,138 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package demodb
+
+import (
+	"fmt"
+	"time"
+
+	wire "v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/syncbase/v23/syncbase/nosql"
+	"v.io/v23/context"
+	"v.io/v23/vdl"
+)
+
+type kv struct {
+	key   string
+	value *vdl.Value
+}
+
+type table struct {
+	name string
+	rows []kv
+}
+
+const demoPrefix = "Demo"
+
+var demoTables = []table{
+	table{
+		name: "Customers",
+		rows: []kv{
+			kv{
+				"001",
+				vdl.ValueOf(Customer{"John Smith", 1, true, AddressInfo{"1 Main St.", "Palo Alto", "CA", "94303"}, CreditReport{Agency: CreditAgencyEquifax, Report: AgencyReportEquifaxReport{EquifaxCreditReport{'A'}}}}),
+			},
+			kv{
+				"001001",
+				vdl.ValueOf(Invoice{1, 1000, 42, AddressInfo{"1 Main St.", "Palo Alto", "CA", "94303"}}),
+			},
+			kv{
+				"001002",
+				vdl.ValueOf(Invoice{1, 1003, 7, AddressInfo{"2 Main St.", "Palo Alto", "CA", "94303"}}),
+			},
+			kv{
+				"001003",
+				vdl.ValueOf(Invoice{1, 1005, 88, AddressInfo{"3 Main St.", "Palo Alto", "CA", "94303"}}),
+			},
+			kv{
+				"002",
+				vdl.ValueOf(Customer{"Bat Masterson", 2, true, AddressInfo{"777 Any St.", "Collins", "IA", "50055"}, CreditReport{Agency: CreditAgencyTransUnion, Report: AgencyReportTransUnionReport{TransUnionCreditReport{80}}}}),
+			},
+			kv{
+				"002001",
+				vdl.ValueOf(Invoice{2, 1001, 166, AddressInfo{"777 Any St.", "Collins", "IA", "50055"}}),
+			},
+			kv{
+				"002002",
+				vdl.ValueOf(Invoice{2, 1002, 243, AddressInfo{"888 Any St.", "Collins", "IA", "50055"}}),
+			},
+			kv{
+				"002003",
+				vdl.ValueOf(Invoice{2, 1004, 787, AddressInfo{"999 Any St.", "Collins", "IA", "50055"}}),
+			},
+			kv{
+				"002004",
+				vdl.ValueOf(Invoice{2, 1006, 88, AddressInfo{"101010 Any St.", "Collins", "IA", "50055"}}),
+			},
+		},
+	},
+	table{
+		name: "Numbers",
+		rows: []kv{
+			kv{
+				"001",
+				vdl.ValueOf(Numbers{byte(12), uint16(1234), uint32(5678), uint64(999888777666), int16(9876), int32(876543), int64(128), float32(3.14159), float64(2.71828182846), complex64(123.0 + 7.0i), complex128(456.789 + 10.1112i)}),
+			},
+			kv{
+				"002",
+				vdl.ValueOf(Numbers{byte(9), uint16(99), uint32(999), uint64(9999999), int16(9), int32(99), int64(88), float32(1.41421356237), float64(1.73205080757), complex64(9.87 + 7.65i), complex128(4.32 + 1.0i)}),
+			},
+			kv{
+				"003",
+				vdl.ValueOf(Numbers{byte(210), uint16(210), uint32(210), uint64(210), int16(210), int32(210), int64(210), float32(210.0), float64(210.0), complex64(210.0 + 0.0i), complex128(210.0 + 0.0i)}),
+			},
+		},
+	},
+	table{
+		name: "Composites",
+		rows: []kv{
+			kv{
+				"uno",
+				vdl.ValueOf(Composite{Array2String{"foo", "bar"}, []int32{1, 2}, map[int32]struct{}{1: struct{}{}, 2: struct{}{}}, map[string]int32{"foo": 1, "bar": 2}}),
+			},
+		},
+	},
+	table{
+		name: "Recursives",
+		rows: []kv{
+			kv{
+				"alpha",
+				vdl.ValueOf(Recursive{nil, &Times{time.Unix(123456789, 42244224), time.Duration(1337)}, map[Array2String]Recursive{
+					Array2String{"a", "b"}: Recursive{},
+					Array2String{"x", "y"}: Recursive{vdl.ValueOf(CreditReport{Agency: CreditAgencyExperian, Report: AgencyReportExperianReport{ExperianCreditReport{ExperianRatingGood}}}), nil, map[Array2String]Recursive{
+						Array2String{"alpha", "beta"}: Recursive{vdl.ValueOf(FooType{Bar: BarType{Baz: BazType{Name: "hello", TitleOrValue: TitleOrValueTypeValue{Value: 42}}}}), nil, nil},
+					}},
+					Array2String{"u", "v"}: Recursive{vdl.ValueOf(vdl.TypeOf(Recursive{})), nil, nil},
+				}}),
+			},
+		},
+	},
+}
+
+// Creates demo tables in the provided database. Tables are deleted and
+// recreated if they already exist.
+func PopulateDemoDB(ctx *context.T, db nosql.Database) error {
+	for i, t := range demoTables {
+		tn := demoPrefix + t.name
+		if err := db.DeleteTable(ctx, tn); err != nil {
+			return fmt.Errorf("failed deleting table %s (%d/%d): %v", tn, i+1, len(demoTables), err)
+		}
+		if err := db.CreateTable(ctx, tn, nil); err != nil {
+			return fmt.Errorf("failed creating table %s (%d/%d): %v", tn, i+1, len(demoTables), err)
+		}
+		if err := nosql.RunInBatch(ctx, db, wire.BatchOptions{}, func(db nosql.BatchDatabase) error {
+			dt := db.Table(tn)
+			for _, kv := range t.rows {
+				if err := dt.Put(ctx, kv.key, kv.value); err != nil {
+					return err
+				}
+			}
+			return nil
+		}); err != nil {
+			return fmt.Errorf("failed populating table %s (%d/%d): %v", tn, i+1, len(demoTables), err)
+		}
+	}
+	return nil
+}

diff --git a/cmd/sb51/internal/demodb/db_objects.vdl b/cmd/sb51/internal/demodb/db_objects.vdl
new file mode 100644
index 0000000..cbf119a
--- /dev/null
+++ b/cmd/sb51/internal/demodb/db_objects.vdl

@@ -0,0 +1,115 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package demodb
+
+import "time"
+
+type AddressInfo struct {
+	Street string
+	City   string
+	State  string
+	Zip    string
+}
+
+type CreditAgency enum {
+	Equifax
+	Experian
+	TransUnion
+}
+
+type ExperianRating enum {
+	Good
+	Bad
+}
+
+type EquifaxCreditReport struct {
+	Rating byte
+}
+
+type ExperianCreditReport struct {
+	Rating ExperianRating
+}
+
+type TransUnionCreditReport struct {
+	Rating	int16
+}
+
+type AgencyReport union {
+	EquifaxReport    EquifaxCreditReport
+	ExperianReport   ExperianCreditReport
+	TransUnionReport TransUnionCreditReport
+}
+
+type CreditReport struct {
+	Agency	CreditAgency
+	Report	AgencyReport
+}
+
+type Customer struct {
+	Name    string
+	Id      int64
+	Active  bool
+	Address AddressInfo
+	Credit  CreditReport
+}
+
+type Invoice struct {
+	CustId     int64
+	InvoiceNum int64
+	Amount     int64
+	ShipTo     AddressInfo
+}
+
+type Numbers struct {
+	B    byte
+	Ui16 uint16
+	Ui32 uint32
+	Ui64 uint64
+	I16  int16
+	I32  int32
+	I64  int64
+	F32  float32
+	F64  float64
+	C64  complex64
+	C128 complex128
+}
+
+type FooType struct {
+	Bar BarType
+}
+
+type BarType struct {
+	Baz BazType
+}
+
+type TitleOrValueType union {
+	Title string
+	Value int64
+}
+
+type BazType struct {
+	Name         string
+	TitleOrValue TitleOrValueType
+}
+
+type Array2String [2]string
+
+type Composite struct {
+	Arr     Array2String
+	ListInt []int32
+	MySet   set[int32]
+	Map     map[string]int32
+}
+
+type Times struct {
+	Stamp    time.Time
+	Interval time.Duration
+}
+
+type Recursive struct {
+	Any   any
+	Maybe ?Times
+	Rec   map[Array2String]Recursive
+}

diff --git a/cmd/sb51/internal/demodb/db_objects.vdl.go b/cmd/sb51/internal/demodb/db_objects.vdl.go
new file mode 100644
index 0000000..23481f8
--- /dev/null
+++ b/cmd/sb51/internal/demodb/db_objects.vdl.go

@@ -0,0 +1,383 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: db_objects.vdl
+
+package demodb
+
+import (
+	// VDL system imports
+	"fmt"
+	"v.io/v23/vdl"
+
+	// VDL user imports
+	"time"
+	_ "v.io/v23/vdlroot/time"
+)
+
+type AddressInfo struct {
+	Street string
+	City   string
+	State  string
+	Zip    string
+}
+
+func (AddressInfo) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.AddressInfo"`
+}) {
+}
+
+type CreditAgency int
+
+const (
+	CreditAgencyEquifax CreditAgency = iota
+	CreditAgencyExperian
+	CreditAgencyTransUnion
+)
+
+// CreditAgencyAll holds all labels for CreditAgency.
+var CreditAgencyAll = [...]CreditAgency{CreditAgencyEquifax, CreditAgencyExperian, CreditAgencyTransUnion}
+
+// CreditAgencyFromString creates a CreditAgency from a string label.
+func CreditAgencyFromString(label string) (x CreditAgency, err error) {
+	err = x.Set(label)
+	return
+}
+
+// Set assigns label to x.
+func (x *CreditAgency) Set(label string) error {
+	switch label {
+	case "Equifax", "equifax":
+		*x = CreditAgencyEquifax
+		return nil
+	case "Experian", "experian":
+		*x = CreditAgencyExperian
+		return nil
+	case "TransUnion", "transunion":
+		*x = CreditAgencyTransUnion
+		return nil
+	}
+	*x = -1
+	return fmt.Errorf("unknown label %q in demodb.CreditAgency", label)
+}
+
+// String returns the string label of x.
+func (x CreditAgency) String() string {
+	switch x {
+	case CreditAgencyEquifax:
+		return "Equifax"
+	case CreditAgencyExperian:
+		return "Experian"
+	case CreditAgencyTransUnion:
+		return "TransUnion"
+	}
+	return ""
+}
+
+func (CreditAgency) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.CreditAgency"`
+	Enum struct{ Equifax, Experian, TransUnion string }
+}) {
+}
+
+type ExperianRating int
+
+const (
+	ExperianRatingGood ExperianRating = iota
+	ExperianRatingBad
+)
+
+// ExperianRatingAll holds all labels for ExperianRating.
+var ExperianRatingAll = [...]ExperianRating{ExperianRatingGood, ExperianRatingBad}
+
+// ExperianRatingFromString creates a ExperianRating from a string label.
+func ExperianRatingFromString(label string) (x ExperianRating, err error) {
+	err = x.Set(label)
+	return
+}
+
+// Set assigns label to x.
+func (x *ExperianRating) Set(label string) error {
+	switch label {
+	case "Good", "good":
+		*x = ExperianRatingGood
+		return nil
+	case "Bad", "bad":
+		*x = ExperianRatingBad
+		return nil
+	}
+	*x = -1
+	return fmt.Errorf("unknown label %q in demodb.ExperianRating", label)
+}
+
+// String returns the string label of x.
+func (x ExperianRating) String() string {
+	switch x {
+	case ExperianRatingGood:
+		return "Good"
+	case ExperianRatingBad:
+		return "Bad"
+	}
+	return ""
+}
+
+func (ExperianRating) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.ExperianRating"`
+	Enum struct{ Good, Bad string }
+}) {
+}
+
+type EquifaxCreditReport struct {
+	Rating byte
+}
+
+func (EquifaxCreditReport) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.EquifaxCreditReport"`
+}) {
+}
+
+type ExperianCreditReport struct {
+	Rating ExperianRating
+}
+
+func (ExperianCreditReport) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.ExperianCreditReport"`
+}) {
+}
+
+type TransUnionCreditReport struct {
+	Rating int16
+}
+
+func (TransUnionCreditReport) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.TransUnionCreditReport"`
+}) {
+}
+
+type (
+	// AgencyReport represents any single field of the AgencyReport union type.
+	AgencyReport interface {
+		// Index returns the field index.
+		Index() int
+		// Interface returns the field value as an interface.
+		Interface() interface{}
+		// Name returns the field name.
+		Name() string
+		// __VDLReflect describes the AgencyReport union type.
+		__VDLReflect(__AgencyReportReflect)
+	}
+	// AgencyReportEquifaxReport represents field EquifaxReport of the AgencyReport union type.
+	AgencyReportEquifaxReport struct{ Value EquifaxCreditReport }
+	// AgencyReportExperianReport represents field ExperianReport of the AgencyReport union type.
+	AgencyReportExperianReport struct{ Value ExperianCreditReport }
+	// AgencyReportTransUnionReport represents field TransUnionReport of the AgencyReport union type.
+	AgencyReportTransUnionReport struct{ Value TransUnionCreditReport }
+	// __AgencyReportReflect describes the AgencyReport union type.
+	__AgencyReportReflect struct {
+		Name  string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.AgencyReport"`
+		Type  AgencyReport
+		Union struct {
+			EquifaxReport    AgencyReportEquifaxReport
+			ExperianReport   AgencyReportExperianReport
+			TransUnionReport AgencyReportTransUnionReport
+		}
+	}
+)
+
+func (x AgencyReportEquifaxReport) Index() int                         { return 0 }
+func (x AgencyReportEquifaxReport) Interface() interface{}             { return x.Value }
+func (x AgencyReportEquifaxReport) Name() string                       { return "EquifaxReport" }
+func (x AgencyReportEquifaxReport) __VDLReflect(__AgencyReportReflect) {}
+
+func (x AgencyReportExperianReport) Index() int                         { return 1 }
+func (x AgencyReportExperianReport) Interface() interface{}             { return x.Value }
+func (x AgencyReportExperianReport) Name() string                       { return "ExperianReport" }
+func (x AgencyReportExperianReport) __VDLReflect(__AgencyReportReflect) {}
+
+func (x AgencyReportTransUnionReport) Index() int                         { return 2 }
+func (x AgencyReportTransUnionReport) Interface() interface{}             { return x.Value }
+func (x AgencyReportTransUnionReport) Name() string                       { return "TransUnionReport" }
+func (x AgencyReportTransUnionReport) __VDLReflect(__AgencyReportReflect) {}
+
+type CreditReport struct {
+	Agency CreditAgency
+	Report AgencyReport
+}
+
+func (CreditReport) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.CreditReport"`
+}) {
+}
+
+type Customer struct {
+	Name    string
+	Id      int64
+	Active  bool
+	Address AddressInfo
+	Credit  CreditReport
+}
+
+func (Customer) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.Customer"`
+}) {
+}
+
+type Invoice struct {
+	CustId     int64
+	InvoiceNum int64
+	Amount     int64
+	ShipTo     AddressInfo
+}
+
+func (Invoice) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.Invoice"`
+}) {
+}
+
+type Numbers struct {
+	B    byte
+	Ui16 uint16
+	Ui32 uint32
+	Ui64 uint64
+	I16  int16
+	I32  int32
+	I64  int64
+	F32  float32
+	F64  float64
+	C64  complex64
+	C128 complex128
+}
+
+func (Numbers) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.Numbers"`
+}) {
+}
+
+type FooType struct {
+	Bar BarType
+}
+
+func (FooType) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.FooType"`
+}) {
+}
+
+type BarType struct {
+	Baz BazType
+}
+
+func (BarType) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.BarType"`
+}) {
+}
+
+type (
+	// TitleOrValueType represents any single field of the TitleOrValueType union type.
+	TitleOrValueType interface {
+		// Index returns the field index.
+		Index() int
+		// Interface returns the field value as an interface.
+		Interface() interface{}
+		// Name returns the field name.
+		Name() string
+		// __VDLReflect describes the TitleOrValueType union type.
+		__VDLReflect(__TitleOrValueTypeReflect)
+	}
+	// TitleOrValueTypeTitle represents field Title of the TitleOrValueType union type.
+	TitleOrValueTypeTitle struct{ Value string }
+	// TitleOrValueTypeValue represents field Value of the TitleOrValueType union type.
+	TitleOrValueTypeValue struct{ Value int64 }
+	// __TitleOrValueTypeReflect describes the TitleOrValueType union type.
+	__TitleOrValueTypeReflect struct {
+		Name  string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.TitleOrValueType"`
+		Type  TitleOrValueType
+		Union struct {
+			Title TitleOrValueTypeTitle
+			Value TitleOrValueTypeValue
+		}
+	}
+)
+
+func (x TitleOrValueTypeTitle) Index() int                             { return 0 }
+func (x TitleOrValueTypeTitle) Interface() interface{}                 { return x.Value }
+func (x TitleOrValueTypeTitle) Name() string                           { return "Title" }
+func (x TitleOrValueTypeTitle) __VDLReflect(__TitleOrValueTypeReflect) {}
+
+func (x TitleOrValueTypeValue) Index() int                             { return 1 }
+func (x TitleOrValueTypeValue) Interface() interface{}                 { return x.Value }
+func (x TitleOrValueTypeValue) Name() string                           { return "Value" }
+func (x TitleOrValueTypeValue) __VDLReflect(__TitleOrValueTypeReflect) {}
+
+type BazType struct {
+	Name         string
+	TitleOrValue TitleOrValueType
+}
+
+func (BazType) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.BazType"`
+}) {
+}
+
+type Array2String [2]string
+
+func (Array2String) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.Array2String"`
+}) {
+}
+
+type Composite struct {
+	Arr     Array2String
+	ListInt []int32
+	MySet   map[int32]struct{}
+	Map     map[string]int32
+}
+
+func (Composite) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.Composite"`
+}) {
+}
+
+type Times struct {
+	Stamp    time.Time
+	Interval time.Duration
+}
+
+func (Times) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.Times"`
+}) {
+}
+
+type Recursive struct {
+	Any   *vdl.Value
+	Maybe *Times
+	Rec   map[Array2String]Recursive
+}
+
+func (Recursive) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.Recursive"`
+}) {
+}
+
+func init() {
+	vdl.Register((*AddressInfo)(nil))
+	vdl.Register((*CreditAgency)(nil))
+	vdl.Register((*ExperianRating)(nil))
+	vdl.Register((*EquifaxCreditReport)(nil))
+	vdl.Register((*ExperianCreditReport)(nil))
+	vdl.Register((*TransUnionCreditReport)(nil))
+	vdl.Register((*AgencyReport)(nil))
+	vdl.Register((*CreditReport)(nil))
+	vdl.Register((*Customer)(nil))
+	vdl.Register((*Invoice)(nil))
+	vdl.Register((*Numbers)(nil))
+	vdl.Register((*FooType)(nil))
+	vdl.Register((*BarType)(nil))
+	vdl.Register((*TitleOrValueType)(nil))
+	vdl.Register((*BazType)(nil))
+	vdl.Register((*Array2String)(nil))
+	vdl.Register((*Composite)(nil))
+	vdl.Register((*Times)(nil))
+	vdl.Register((*Recursive)(nil))
+}

diff --git a/cmd/sb51/internal/demodb/doc.go b/cmd/sb51/internal/demodb/doc.go
new file mode 100644
index 0000000..3daf52f
--- /dev/null
+++ b/cmd/sb51/internal/demodb/doc.go

@@ -0,0 +1,7 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package demodb supports loading an example database into Syncbase for
+// experimentation and testing purposes.
+package demodb

diff --git a/cmd/sb51/internal/reader/reader.go b/cmd/sb51/internal/reader/reader.go
new file mode 100644
index 0000000..930c140
--- /dev/null
+++ b/cmd/sb51/internal/reader/reader.go

@@ -0,0 +1,130 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package reader provides an object that reads queries from various input
+// sources (e.g. stdin, pipe).
+package reader
+
+import (
+	"bufio"
+	"os"
+	"strings"
+	"text/scanner"
+
+	"github.com/peterh/liner"
+)
+
+type T struct {
+	s      scanner.Scanner
+	prompt prompter
+}
+
+func newT(prompt prompter) *T {
+	t := &T{prompt: prompt}
+	t.initScanner("")
+	return t
+}
+
+// Close frees any resources acquired by this reader.
+func (t *T) Close() {
+	t.prompt.Close()
+}
+
+func (t *T) initScanner(input string) {
+	t.s.Init(strings.NewReader(input))
+	// Keep all whitespace.
+	t.s.Whitespace = 0
+}
+
+// GetQuery returns an entire query where queries are delimited by semicolons.
+// GetQuery returns the error io.EOF when there is no more input.
+func (t *T) GetQuery() (string, error) {
+	if t.s.Peek() == scanner.EOF {
+		input, err := t.prompt.InitialPrompt()
+		if err != nil {
+			return "", err
+		}
+		t.initScanner(input)
+	}
+	var query string
+WholeQuery:
+	for true {
+		for tok := t.s.Scan(); tok != scanner.EOF; tok = t.s.Scan() {
+			if tok == ';' {
+				break WholeQuery
+			}
+			query += t.s.TokenText()
+		}
+		input, err := t.prompt.ContinuePrompt()
+		if err != nil {
+			return "", err
+		}
+		t.initScanner(input)
+		query += "\n" // User started a new line.
+	}
+	t.prompt.AppendHistory(query + ";")
+	return query, nil
+}
+
+type prompter interface {
+	Close()
+	InitialPrompt() (string, error)
+	ContinuePrompt() (string, error)
+	AppendHistory(query string)
+}
+
+// noninteractive prompter just blindly reads from stdin.
+type noninteractive struct {
+	input *bufio.Reader
+}
+
+// NewNonInteractive returns a T that simply reads input from stdin. Useful
+// for when the user is piping input from a file or another program.
+func NewNonInteractive() *T {
+	return newT(&noninteractive{bufio.NewReader(os.Stdin)})
+}
+
+func (i *noninteractive) Close() {
+}
+
+func (i *noninteractive) InitialPrompt() (string, error) {
+	return i.input.ReadString('\n')
+}
+
+func (i *noninteractive) ContinuePrompt() (string, error) {
+	return i.input.ReadString('\n')
+}
+
+func (i *noninteractive) AppendHistory(query string) {
+}
+
+// interactive prompter provides a nice prompt for a user to input queries.
+type interactive struct {
+	line *liner.State
+}
+
+// NewInteractive returns a T that prompts the user for input.
+func NewInteractive() *T {
+	i := &interactive{
+		line: liner.NewLiner(),
+	}
+	i.line.SetCtrlCAborts(true)
+	return newT(i)
+}
+
+func (i *interactive) Close() {
+	i.line.Close()
+}
+
+func (i *interactive) InitialPrompt() (string, error) {
+	return i.line.Prompt("? ")
+}
+
+func (i *interactive) ContinuePrompt() (string, error) {
+	return i.line.Prompt("  > ")
+}
+
+func (i *interactive) AppendHistory(query string) {
+	i.line.AppendHistory(query)
+}

diff --git a/cmd/sb51/internal/reader/reader_test.go b/cmd/sb51/internal/reader/reader_test.go
new file mode 100644
index 0000000..de4332c
--- /dev/null
+++ b/cmd/sb51/internal/reader/reader_test.go

@@ -0,0 +1,87 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package reader
+
+import (
+	"io"
+	"reflect"
+	"testing"
+)
+
+type stringPrompter struct {
+	lines []string
+	curr  int
+}
+
+func (s *stringPrompter) Close() {
+}
+
+func (s *stringPrompter) InitialPrompt() (string, error) {
+	if s.curr >= len(s.lines) {
+		return "", io.EOF
+	}
+	q := s.lines[s.curr]
+	s.curr++
+	return q, nil
+}
+
+func (s *stringPrompter) ContinuePrompt() (string, error) {
+	return s.InitialPrompt()
+}
+
+func (s *stringPrompter) AppendHistory(query string) {
+}
+
+func TestGetQuery(t *testing.T) {
+	type testCase struct {
+		lines   []string
+		queries []string
+	}
+
+	tests := []testCase{
+		{ // Single query.
+			[]string{"select k from C;"},
+			[]string{"select k from C"},
+		},
+		{ // Multiple queries.
+			[]string{"select k from C;", "select bar from C;"},
+			[]string{"select k from C", "select bar from C"},
+		},
+		{ // Multiple queries on one line.
+			[]string{"select k from C; select bar from C;"},
+			[]string{"select k from C", " select bar from C"},
+		},
+		{ // Multiple queries without a ; are just one query.
+			[]string{"select k from C select bar from C;"},
+			[]string{"select k from C select bar from C"},
+		},
+		{ // Multiple queries without a ; are just one query.
+			[]string{"select k from C", "select bar from C;"},
+			[]string{"select k from C\nselect bar from C"},
+		},
+		{
+			[]string{"select\tfoo.bar from\nC;"},
+			[]string{"select\tfoo.bar from\nC"},
+		},
+	}
+	for _, test := range tests {
+		r := newT(&stringPrompter{lines: test.lines})
+		var queries []string
+		for true {
+			if q, err := r.GetQuery(); err != nil {
+				if err == io.EOF {
+					break
+				}
+				t.Errorf("test %v: unexpected error: %v", test.lines, err)
+				break
+			} else {
+				queries = append(queries, q)
+			}
+		}
+		if got, want := queries, test.queries; !reflect.DeepEqual(got, want) {
+			t.Errorf("test %#v: got %#v, want %#v", test.lines, got, want)
+		}
+	}
+}

diff --git a/cmd/sb51/internal/writer/doc.go b/cmd/sb51/internal/writer/doc.go
new file mode 100644
index 0000000..315ba57
--- /dev/null
+++ b/cmd/sb51/internal/writer/doc.go

@@ -0,0 +1,8 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package writer provides functions for formatting query results.
+//
+// TODO(ivanpi): Export as VDL formatter library.
+package writer

diff --git a/cmd/sb51/internal/writer/writer.go b/cmd/sb51/internal/writer/writer.go
new file mode 100644
index 0000000..c7900b6
--- /dev/null
+++ b/cmd/sb51/internal/writer/writer.go

@@ -0,0 +1,404 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package writer
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"strconv"
+	"strings"
+	"unicode/utf8"
+
+	"v.io/syncbase/v23/syncbase/nosql"
+	"v.io/v23/vdl"
+	vtime "v.io/v23/vdlroot/time"
+)
+
+type Justification int
+
+const (
+	Unknown Justification = iota
+	Left
+	Right
+)
+
+// WriteTable formats the results as ASCII tables.
+func WriteTable(out io.Writer, columnNames []string, rs nosql.ResultStream) error {
+	// Buffer the results so we can compute the column widths.
+	columnWidths := make([]int, len(columnNames))
+	for i, cName := range columnNames {
+		columnWidths[i] = utf8.RuneCountInString(cName)
+	}
+	justification := make([]Justification, len(columnNames))
+	var results [][]string
+	for rs.Advance() {
+		row := make([]string, len(columnNames))
+		for i, column := range rs.Result() {
+			if i >= len(columnNames) {
+				return errors.New("more columns in result than in columnNames")
+			}
+			if justification[i] == Unknown {
+				justification[i] = getJustification(column)
+			}
+			columnStr := toString(column, false)
+			row[i] = columnStr
+			columnLen := utf8.RuneCountInString(columnStr)
+			if columnLen > columnWidths[i] {
+				columnWidths[i] = columnLen
+			}
+		}
+		results = append(results, row)
+	}
+	if rs.Err() != nil {
+		return rs.Err()
+	}
+
+	writeBorder(out, columnWidths)
+	sep := "| "
+	for i, cName := range columnNames {
+		io.WriteString(out, fmt.Sprintf("%s%*s", sep, columnWidths[i], cName))
+		sep = " | "
+	}
+	io.WriteString(out, " |\n")
+	writeBorder(out, columnWidths)
+	for _, result := range results {
+		sep = "| "
+		for i, column := range result {
+			if justification[i] == Right {
+				io.WriteString(out, fmt.Sprintf("%s%*s", sep, columnWidths[i], column))
+			} else {
+				io.WriteString(out, fmt.Sprintf("%s%-*s", sep, columnWidths[i], column))
+			}
+			sep = " | "
+		}
+		io.WriteString(out, " |\n")
+	}
+	writeBorder(out, columnWidths)
+	return nil
+}
+
+func writeBorder(out io.Writer, columnWidths []int) {
+	sep := "+-"
+	for _, width := range columnWidths {
+		io.WriteString(out, fmt.Sprintf("%s%s", sep, strings.Repeat("-", width)))
+		sep = "-+-"
+	}
+	io.WriteString(out, "-+\n")
+}
+
+func getJustification(val *vdl.Value) Justification {
+	switch val.Kind() {
+	// TODO(kash): Floating point numbers should have the decimal point line up.
+	case vdl.Bool, vdl.Byte, vdl.Uint16, vdl.Uint32, vdl.Uint64, vdl.Int16, vdl.Int32, vdl.Int64,
+		vdl.Float32, vdl.Float64, vdl.Complex64, vdl.Complex128:
+		return Right
+	// TODO(kash): Leave nil values as unknown.
+	default:
+		return Left
+	}
+}
+
+// WriteCSV formats the results as CSV as specified by https://tools.ietf.org/html/rfc4180.
+func WriteCSV(out io.Writer, columnNames []string, rs nosql.ResultStream, delimiter string) error {
+	delim := ""
+	for _, cName := range columnNames {
+		str := doubleQuoteForCSV(cName, delimiter)
+		io.WriteString(out, fmt.Sprintf("%s%s", delim, str))
+		delim = delimiter
+	}
+	io.WriteString(out, "\n")
+	for rs.Advance() {
+		delim := ""
+		for _, column := range rs.Result() {
+			str := doubleQuoteForCSV(toString(column, false), delimiter)
+			io.WriteString(out, fmt.Sprintf("%s%s", delim, str))
+			delim = delimiter
+		}
+		io.WriteString(out, "\n")
+	}
+	return rs.Err()
+}
+
+// doubleQuoteForCSV follows the escaping rules from
+// https://tools.ietf.org/html/rfc4180. In particular, values containing
+// newlines, double quotes, and the delimiter must be enclosed in double
+// quotes.
+func doubleQuoteForCSV(str, delimiter string) string {
+	doubleQuote := strings.Index(str, delimiter) != -1 || strings.Index(str, "\n") != -1
+	if strings.Index(str, "\"") != -1 {
+		str = strings.Replace(str, "\"", "\"\"", -1)
+		doubleQuote = true
+	}
+	if doubleQuote {
+		str = "\"" + str + "\""
+	}
+	return str
+}
+
+// WriteJson formats the result as a JSON array of arrays (rows) of values.
+func WriteJson(out io.Writer, columnNames []string, rs nosql.ResultStream) error {
+	io.WriteString(out, "[")
+	jsonColNames := make([][]byte, len(columnNames))
+	for i, cName := range columnNames {
+		jsonCName, err := json.Marshal(cName)
+		if err != nil {
+			panic(fmt.Sprintf("JSON marshalling failed for column name: %v", err))
+		}
+		jsonColNames[i] = jsonCName
+	}
+	bOpen := "{"
+	for rs.Advance() {
+		io.WriteString(out, bOpen)
+		linestart := "\n  "
+		for i, column := range rs.Result() {
+			str := toJson(column)
+			io.WriteString(out, fmt.Sprintf("%s%s: %s", linestart, jsonColNames[i], str))
+			linestart = ",\n  "
+		}
+		io.WriteString(out, "\n}")
+		bOpen = ", {"
+	}
+	io.WriteString(out, "]\n")
+	return rs.Err()
+}
+
+// Converts VDL value to readable yet parseable string representation.
+// If nested is not set, strings outside composites are left unquoted.
+// TODO(ivanpi): Handle cycles and improve non-tree DAG handling.
+func toString(val *vdl.Value, nested bool) string {
+	switch val.Type() {
+	case vdl.TypeOf(vtime.Time{}), vdl.TypeOf(vtime.Duration{}):
+		s, err := toStringNative(val)
+		if err != nil {
+			panic(fmt.Sprintf("toStringNative failed for builtin time type: %v", err))
+		}
+		if nested {
+			s = strconv.Quote(s)
+		}
+		return s
+	default:
+		// fall through to Kind switch
+	}
+	switch val.Kind() {
+	case vdl.Bool:
+		return fmt.Sprint(val.Bool())
+	case vdl.Byte:
+		return fmt.Sprint(val.Byte())
+	case vdl.Uint16, vdl.Uint32, vdl.Uint64:
+		return fmt.Sprint(val.Uint())
+	case vdl.Int16, vdl.Int32, vdl.Int64:
+		return fmt.Sprint(val.Int())
+	case vdl.Float32, vdl.Float64:
+		return fmt.Sprint(val.Float())
+	case vdl.Complex64, vdl.Complex128:
+		c := val.Complex()
+		return fmt.Sprintf("%v+%vi", real(c), imag(c))
+	case vdl.String:
+		s := val.RawString()
+		if nested {
+			s = strconv.Quote(s)
+		}
+		return s
+	case vdl.Enum:
+		return val.EnumLabel()
+	case vdl.Array, vdl.List:
+		return listToString("[", ", ", "]", val.Len(), func(i int) string {
+			return toString(val.Index(i), true)
+		})
+	case vdl.Any, vdl.Optional:
+		if val.IsNil() {
+			if nested {
+				return "nil"
+			}
+			// TODO(ivanpi): Blank is better for CSV, but <nil> might be better for table and TSV.
+			return ""
+		}
+		return toString(val.Elem(), nested)
+	case vdl.Struct:
+		return listToString("{", ", ", "}", val.Type().NumField(), func(i int) string {
+			field := toString(val.StructField(i), true)
+			return fmt.Sprintf("%s: %s", val.Type().Field(i).Name, field)
+		})
+	case vdl.Union:
+		ui, uv := val.UnionField()
+		field := toString(uv, true)
+		return fmt.Sprintf("%s: %s", val.Type().Field(ui).Name, field)
+	case vdl.Set:
+		// TODO(ivanpi): vdl.SortValuesAsString() used for predictable output ordering.
+		// Use a more sensible sort for numbers etc.
+		keys := vdl.SortValuesAsString(val.Keys())
+		return listToString("{", ", ", "}", len(keys), func(i int) string {
+			return toString(keys[i], true)
+		})
+	case vdl.Map:
+		// TODO(ivanpi): vdl.SortValuesAsString() used for predictable output ordering.
+		// Use a more sensible sort for numbers etc.
+		keys := vdl.SortValuesAsString(val.Keys())
+		return listToString("{", ", ", "}", len(keys), func(i int) string {
+			k := toString(keys[i], true)
+			v := toString(val.MapIndex(keys[i]), true)
+			return fmt.Sprintf("%s: %s", k, v)
+		})
+	case vdl.TypeObject:
+		return val.String()
+	default:
+		panic(fmt.Sprintf("unknown Kind %s", val.Kind()))
+	}
+}
+
+// Converts a VDL value to string using the corresponding native type String()
+// method.
+func toStringNative(val *vdl.Value) (string, error) {
+	var natVal interface{}
+	if err := vdl.Convert(&natVal, val); err != nil {
+		return "", fmt.Errorf("failed converting %s to native value: %v", val.Type().String(), err)
+	}
+	if _, ok := natVal.(*vdl.Value); ok {
+		return "", fmt.Errorf("failed converting %s to native value: got vdl.Value", val.Type().String())
+	}
+	if strNatVal, ok := natVal.(fmt.Stringer); !ok {
+		return "", fmt.Errorf("native value of %s doesn't implement String()", val.Type().String())
+	} else {
+		return strNatVal.String(), nil
+	}
+}
+
+// Stringifies a sequence of n elements, where element i string representation
+// is obtained using elemToString(i),
+func listToString(begin, sep, end string, n int, elemToString func(i int) string) string {
+	elems := make([]string, n)
+	for i, _ := range elems {
+		elems[i] = elemToString(i)
+	}
+	return begin + strings.Join(elems, sep) + end
+}
+
+// Converts VDL value to JSON representation.
+func toJson(val *vdl.Value) string {
+	jf := toJsonFriendly(val)
+	jOut, err := json.Marshal(jf)
+	if err != nil {
+		panic(fmt.Sprintf("JSON marshalling failed: %v", err))
+	}
+	return string(jOut)
+}
+
+// Converts VDL value to Go type compatible with json.Marshal().
+func toJsonFriendly(val *vdl.Value) interface{} {
+	switch val.Type() {
+	case vdl.TypeOf(vtime.Time{}), vdl.TypeOf(vtime.Duration{}):
+		s, err := toStringNative(val)
+		if err != nil {
+			panic(fmt.Sprintf("toStringNative failed for builtin time type: %v", err))
+		}
+		return s
+	default:
+		// fall through to Kind switch
+	}
+	switch val.Kind() {
+	case vdl.Bool:
+		return val.Bool()
+	case vdl.Byte:
+		return val.Byte()
+	case vdl.Uint16, vdl.Uint32, vdl.Uint64:
+		return val.Uint()
+	case vdl.Int16, vdl.Int32, vdl.Int64:
+		return val.Int()
+	case vdl.Float32, vdl.Float64:
+		return val.Float()
+	case vdl.Complex64, vdl.Complex128:
+		// Go doesn't support marshalling complex values, we need to stringify.
+		c := val.Complex()
+		return fmt.Sprintf("%v+%vi", real(c), imag(c))
+	case vdl.String:
+		return val.RawString()
+	case vdl.Enum:
+		return val.EnumLabel()
+	case vdl.Array, vdl.List:
+		arr := make([]interface{}, val.Len())
+		for i, _ := range arr {
+			arr[i] = toJsonFriendly(val.Index(i))
+		}
+		return arr
+	case vdl.Any, vdl.Optional:
+		if val.IsNil() {
+			return nil
+		}
+		return toJsonFriendly(val.Elem())
+	case vdl.Struct:
+		// TODO(ivanpi): Consider lowercasing field names.
+		return toOrderedMap(val.Type().NumField(), func(i int) (string, interface{}) {
+			return val.Type().Field(i).Name, toJsonFriendly(val.StructField(i))
+		})
+	case vdl.Union:
+		// TODO(ivanpi): Consider lowercasing field name.
+		ui, uv := val.UnionField()
+		return toOrderedMap(1, func(_ int) (string, interface{}) {
+			return val.Type().Field(ui).Name, toJsonFriendly(uv)
+		})
+	case vdl.Set:
+		// TODO(ivanpi): vdl.SortValuesAsString() used for predictable output ordering.
+		// Use a more sensible sort for numbers etc.
+		keys := vdl.SortValuesAsString(val.Keys())
+		return toOrderedMap(len(keys), func(i int) (string, interface{}) {
+			return toString(keys[i], false), true
+		})
+	case vdl.Map:
+		// TODO(ivanpi): vdl.SortValuesAsString() used for predictable output ordering.
+		// Use a more sensible sort for numbers etc.
+		keys := vdl.SortValuesAsString(val.Keys())
+		return toOrderedMap(len(keys), func(i int) (string, interface{}) {
+			return toString(keys[i], false), toJsonFriendly(val.MapIndex(keys[i]))
+		})
+	case vdl.TypeObject:
+		return val.String()
+	default:
+		panic(fmt.Sprintf("unknown Kind %s", val.Kind()))
+	}
+}
+
+// Serializes to JSON object, preserving key order.
+// Native Go map will serialize to JSON object with sorted keys, which is
+// unexpected behaviour for a struct.
+type orderedMap []orderedMapElem
+
+type orderedMapElem struct {
+	Key string
+	Val interface{}
+}
+
+var _ json.Marshaler = (*orderedMap)(nil)
+
+// Builds an orderedMap with n elements, obtaining the key and value of element
+// i using elemToKeyVal(i).
+func toOrderedMap(n int, elemToKeyVal func(i int) (string, interface{})) orderedMap {
+	om := make(orderedMap, n)
+	for i, _ := range om {
+		om[i].Key, om[i].Val = elemToKeyVal(i)
+	}
+	return om
+}
+
+// Serializes orderedMap to JSON object, preserving key order.
+func (om orderedMap) MarshalJSON() (_ []byte, rerr error) {
+	defer func() {
+		if r := recover(); r != nil {
+			rerr = fmt.Errorf("orderedMap: %v", r)
+		}
+	}()
+	return []byte(listToString("{", ",", "}", len(om), func(i int) string {
+		keyJson, err := json.Marshal(om[i].Key)
+		if err != nil {
+			panic(err)
+		}
+		valJson, err := json.Marshal(om[i].Val)
+		if err != nil {
+			panic(err)
+		}
+		return fmt.Sprintf("%s:%s", keyJson, valJson)
+	})), nil
+}

diff --git a/cmd/sb51/internal/writer/writer_test.go b/cmd/sb51/internal/writer/writer_test.go
new file mode 100644
index 0000000..3d7c1f2
--- /dev/null
+++ b/cmd/sb51/internal/writer/writer_test.go

@@ -0,0 +1,564 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package writer_test
+
+import (
+	"bytes"
+	"encoding/json"
+	"testing"
+	"time"
+
+	"v.io/syncbase/v23/syncbase/nosql"
+	db "v.io/syncbase/x/ref/syncbase/sb51/internal/demodb"
+	"v.io/syncbase/x/ref/syncbase/sb51/internal/writer"
+	"v.io/v23/vdl"
+)
+
+type fakeResultStream struct {
+	rows [][]*vdl.Value
+	curr int
+}
+
+var (
+	customer = db.Customer{
+		Name:   "John Smith",
+		Id:     1,
+		Active: true,
+		Address: db.AddressInfo{
+			Street: "1 Main St.",
+			City:   "Palo Alto",
+			State:  "CA",
+			Zip:    "94303",
+		},
+		Credit: db.CreditReport{
+			Agency: db.CreditAgencyEquifax,
+			Report: db.AgencyReportEquifaxReport{Value: db.EquifaxCreditReport{Rating: 'A'}},
+		},
+	}
+	invoice = db.Invoice{
+		CustId:     1,
+		InvoiceNum: 1000,
+		Amount:     42,
+		ShipTo: db.AddressInfo{
+			Street: "1 Main St.",
+			City:   "Palo Alto",
+			State:  "CA",
+			Zip:    "94303",
+		},
+	}
+)
+
+func array2String(s1, s2 string) db.Array2String {
+	a := [2]string{s1, s2}
+	return db.Array2String(a)
+}
+
+func newResultStream(iRows [][]interface{}) nosql.ResultStream {
+	vRows := make([][]*vdl.Value, len(iRows))
+	for i, iRow := range iRows {
+		vRow := make([]*vdl.Value, len(iRow))
+		for j, iCol := range iRow {
+			vRow[j] = vdl.ValueOf(iCol)
+		}
+		vRows[i] = vRow
+	}
+	return &fakeResultStream{
+		rows: vRows,
+		curr: -1,
+	}
+}
+
+func (f *fakeResultStream) Advance() bool {
+	f.curr++
+	return f.curr < len(f.rows)
+}
+
+func (f *fakeResultStream) Result() []*vdl.Value {
+	if f.curr == -1 {
+		panic("call advance first")
+	}
+	return f.rows[f.curr]
+}
+
+func (f *fakeResultStream) Err() error {
+	return nil
+}
+
+func (f *fakeResultStream) Cancel() {
+	// Nothing to do.
+}
+
+func TestWriteTable(t *testing.T) {
+	type testCase struct {
+		columns []string
+		rows    [][]interface{}
+		// To make the test cases easier to read, output should have a leading
+		// newline.
+		output string
+	}
+	tests := []testCase{
+		{
+			[]string{"c1", "c2"},
+			[][]interface{}{
+				{5, "foo"},
+				{6, "bar"},
+			},
+			`
++----+-----+
+| c1 |  c2 |
++----+-----+
+|  5 | foo |
+|  6 | bar |
++----+-----+
+`,
+		},
+		{
+			[]string{"c1", "c2"},
+			[][]interface{}{
+				{500, "foo"},
+				{6, "barbaz"},
+			},
+			`
++-----+--------+
+|  c1 |     c2 |
++-----+--------+
+| 500 | foo    |
+|   6 | barbaz |
++-----+--------+
+`,
+		},
+		{
+			[]string{"c1", "reallylongcolumnheader"},
+			[][]interface{}{
+				{5, "foo"},
+				{6, "bar"},
+			},
+			`
++----+------------------------+
+| c1 | reallylongcolumnheader |
++----+------------------------+
+|  5 | foo                    |
+|  6 | bar                    |
++----+------------------------+
+`,
+		},
+		{ // Numbers.
+			[]string{"byte", "uint16", "uint32", "uint64", "int16", "int32", "int64",
+				"float32", "float64", "complex64", "complex128"},
+			[][]interface{}{
+				{
+					byte(12), uint16(1234), uint32(5678), uint64(999888777666), int16(9876), int32(876543), int64(128),
+					float32(3.14159), float64(2.71828182846), complex64(123.0 + 7.0i), complex128(456.789 + 10.1112i),
+				},
+				{
+					byte(9), uint16(99), uint32(999), uint64(9999999), int16(9), int32(99), int64(88),
+					float32(1.41421356237), float64(1.73205080757), complex64(9.87 + 7.65i), complex128(4.32 + 1.0i),
+				},
+			},
+			`
++------+--------+--------+--------------+-------+--------+-------+--------------------+---------------+--------------------------------------+------------------+
+| byte | uint16 | uint32 |       uint64 | int16 |  int32 | int64 |            float32 |       float64 |                            complex64 |       complex128 |
++------+--------+--------+--------------+-------+--------+-------+--------------------+---------------+--------------------------------------+------------------+
+|   12 |   1234 |   5678 | 999888777666 |  9876 | 876543 |   128 |  3.141590118408203 | 2.71828182846 |                               123+7i | 456.789+10.1112i |
+|    9 |     99 |    999 |      9999999 |     9 |     99 |    88 | 1.4142135381698608 | 1.73205080757 | 9.869999885559082+7.650000095367432i |          4.32+1i |
++------+--------+--------+--------------+-------+--------+-------+--------------------+---------------+--------------------------------------+------------------+
+`,
+		},
+		{ // Strings with whitespace should be printed literally.
+			[]string{"c1", "c2"},
+			[][]interface{}{
+				{"foo\tbar", "foo\nbar"},
+			},
+			`
++---------+---------+
+|      c1 |      c2 |
++---------+---------+
+| foo	bar | foo
+bar |
++---------+---------+
+`,
+		},
+		{ // nil is shown as blank.
+			[]string{"c1"},
+			[][]interface{}{
+				{nil},
+			},
+			`
++----+
+| c1 |
++----+
+|    |
++----+
+`,
+		},
+		{
+			[]string{"c1"},
+			[][]interface{}{{customer}, {invoice}},
+			`
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|                                                                                                                                                                                       c1 |
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| {Name: "John Smith", Id: 1, Active: true, Address: {Street: "1 Main St.", City: "Palo Alto", State: "CA", Zip: "94303"}, Credit: {Agency: Equifax, Report: EquifaxReport: {Rating: 65}}} |
+| {CustId: 1, InvoiceNum: 1000, Amount: 42, ShipTo: {Street: "1 Main St.", City: "Palo Alto", State: "CA", Zip: "94303"}}                                                                  |
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+`,
+		},
+		{
+			[]string{"c1"},
+			[][]interface{}{
+				{db.Composite{array2String("foo", "棎鶊鵱"), []int32{1, 2}, map[int32]struct{}{1: struct{}{}, 2: struct{}{}}, map[string]int32{"foo": 1, "bar": 2}}},
+			},
+			`
++----------------------------------------------------------------------------------+
+|                                                                               c1 |
++----------------------------------------------------------------------------------+
+| {Arr: ["foo", "棎鶊鵱"], ListInt: [1, 2], MySet: {1, 2}, Map: {"bar": 2, "foo": 1}} |
++----------------------------------------------------------------------------------+
+`,
+		},
+		{ // Types not built in to Go.
+			[]string{"time", "type", "union", "enum", "set"},
+			[][]interface{}{
+				{time.Unix(13377331, 0), vdl.TypeOf(map[float32]struct{ B bool }{}), db.TitleOrValueTypeTitle{"dahar master"}, db.ExperianRatingBad, map[int32]struct{}{47: struct{}{}}},
+			},
+			`
++-------------------------------+----------------------------------------+-----------------------+------+------+
+|                          time |                                   type |                 union | enum |  set |
++-------------------------------+----------------------------------------+-----------------------+------+------+
+| 1970-06-04 19:55:31 +0000 UTC | typeobject(map[float32]struct{B bool}) | Title: "dahar master" | Bad  | {47} |
++-------------------------------+----------------------------------------+-----------------------+------+------+
+`,
+		},
+		{
+			[]string{"c1"},
+			[][]interface{}{
+				{
+					db.Recursive{
+						Any: nil,
+						Maybe: &db.Times{
+							Stamp:    time.Unix(123456789, 42244224),
+							Interval: time.Duration(13377331),
+						},
+						Rec: map[db.Array2String]db.Recursive{
+							array2String("a", "b"): db.Recursive{},
+							array2String("x\nx", "y\"y"): db.Recursive{
+								Any:   vdl.ValueOf(db.AgencyReportExperianReport{Value: db.ExperianCreditReport{Rating: db.ExperianRatingGood}}),
+								Maybe: nil,
+								Rec:   nil,
+							},
+						},
+					},
+				},
+			},
+			`
++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|                                                                                                                                                                                                                               c1 |
++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| {Any: nil, Maybe: {Stamp: "1973-11-29 21:33:09.042244224 +0000 UTC", Interval: "13.377331ms"}, Rec: {["a", "b"]: {Any: nil, Maybe: nil, Rec: {}}, ["x\nx", "y\"y"]: {Any: ExperianReport: {Rating: Good}, Maybe: nil, Rec: {}}}} |
++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+`,
+		},
+	}
+	for _, test := range tests {
+		var b bytes.Buffer
+		if err := writer.WriteTable(&b, test.columns, newResultStream(test.rows)); err != nil {
+			t.Errorf("Unexpected error: %v", err)
+			continue
+		}
+		// Add a leading newline to the output to match the leading newline
+		// in our test cases.
+		if got, want := "\n"+b.String(), test.output; got != want {
+			t.Errorf("Wrong output:\nGOT:%s\nWANT:%s", got, want)
+		}
+	}
+}
+
+func TestWriteCSV(t *testing.T) {
+	type testCase struct {
+		columns   []string
+		rows      [][]interface{}
+		delimiter string
+		// To make the test cases easier to read, output should have a leading
+		// newline.
+		output string
+	}
+	tests := []testCase{
+		{ // Basic.
+			[]string{"c1", "c2"},
+			[][]interface{}{
+				{5, "foo"},
+				{6, "bar"},
+			},
+			",",
+			`
+c1,c2
+5,foo
+6,bar
+`,
+		},
+		{ // Numbers.
+			[]string{"byte", "uint16", "uint32", "uint64", "int16", "int32", "int64",
+				"float32", "float64", "complex64", "complex128"},
+			[][]interface{}{
+				{
+					byte(12), uint16(1234), uint32(5678), uint64(999888777666), int16(9876), int32(876543), int64(128),
+					float32(3.14159), float64(2.71828182846), complex64(123.0 + 7.0i), complex128(456.789 + 10.1112i),
+				},
+				{
+					byte(9), uint16(99), uint32(999), uint64(9999999), int16(9), int32(99), int64(88),
+					float32(1.41421356237), float64(1.73205080757), complex64(9.87 + 7.65i), complex128(4.32 + 1.0i),
+				},
+			},
+			",",
+			`
+byte,uint16,uint32,uint64,int16,int32,int64,float32,float64,complex64,complex128
+12,1234,5678,999888777666,9876,876543,128,3.141590118408203,2.71828182846,123+7i,456.789+10.1112i
+9,99,999,9999999,9,99,88,1.4142135381698608,1.73205080757,9.869999885559082+7.650000095367432i,4.32+1i
+`,
+		},
+		{
+			// Values containing newlines, double quotes, and the delimiter must be
+			// enclosed in double quotes.
+			[]string{"c1", "c2"},
+			[][]interface{}{
+				{"foo\tbar", "foo\nbar"},
+				{"foo\"bar\"", "foo,bar"},
+			},
+			",",
+			`
+c1,c2
+foo	bar,"foo
+bar"
+"foo""bar""","foo,bar"
+`,
+		},
+		{ // Delimiters other than comma should be supported.
+			[]string{"c1", "c2"},
+			[][]interface{}{
+				{"foo\tbar", "foo\nbar"},
+				{"foo\"bar\"", "foo,bar"},
+			},
+			"\t",
+			`
+c1	c2
+"foo	bar"	"foo
+bar"
+"foo""bar"""	foo,bar
+`,
+		},
+		{ // Column names should be escaped properly.
+			[]string{"foo\tbar", "foo,bar"},
+			[][]interface{}{},
+			",",
+			`
+foo	bar,"foo,bar"
+`,
+		},
+		{ // Same as above but use a non-default delimiter.
+			[]string{"foo\tbar", "foo,棎鶊鵱"},
+			[][]interface{}{},
+			"\t",
+			`
+"foo	bar"	foo,棎鶊鵱
+`,
+		},
+		{
+			[]string{"c1"},
+			[][]interface{}{{customer}, {invoice}},
+			",",
+			`
+c1
+"{Name: ""John Smith"", Id: 1, Active: true, Address: {Street: ""1 Main St."", City: ""Palo Alto"", State: ""CA"", Zip: ""94303""}, Credit: {Agency: Equifax, Report: EquifaxReport: {Rating: 65}}}"
+"{CustId: 1, InvoiceNum: 1000, Amount: 42, ShipTo: {Street: ""1 Main St."", City: ""Palo Alto"", State: ""CA"", Zip: ""94303""}}"
+`,
+		},
+	}
+	for _, test := range tests {
+		var b bytes.Buffer
+		if err := writer.WriteCSV(&b, test.columns, newResultStream(test.rows), test.delimiter); err != nil {
+			t.Errorf("Unexpected error: %v", err)
+			continue
+		}
+		// Add a leading newline to the output to match the leading newline
+		// in our test cases.
+		if got, want := "\n"+b.String(), test.output; got != want {
+			t.Errorf("Wrong output:\nGOT: %q\nWANT:%q", got, want)
+		}
+	}
+}
+
+func TestWriteJson(t *testing.T) {
+	type testCase struct {
+		columns []string
+		rows    [][]interface{}
+		// To make the test cases easier to read, output should have a leading
+		// newline.
+		output string
+	}
+	tests := []testCase{
+		{ // Basic.
+			[]string{"c\n1", "c鶊2"},
+			[][]interface{}{
+				{5, "foo\nbar"},
+				{6, "bar\tfoo"},
+			},
+			`
+[{
+  "c\n1": 5,
+  "c鶊2": "foo\nbar"
+}, {
+  "c\n1": 6,
+  "c鶊2": "bar\tfoo"
+}]
+`,
+		},
+		{ // Numbers.
+			[]string{"byte", "uint16", "uint32", "uint64", "int16", "int32", "int64",
+				"float32", "float64", "complex64", "complex128"},
+			[][]interface{}{
+				{
+					byte(12), uint16(1234), uint32(5678), uint64(999888777666), int16(9876), int32(876543), int64(128),
+					float32(3.14159), float64(2.71828182846), complex64(123.0 + 7.0i), complex128(456.789 + 10.1112i),
+				},
+				{
+					byte(9), uint16(99), uint32(999), uint64(9999999), int16(9), int32(99), int64(88),
+					float32(1.41421356237), float64(1.73205080757), complex64(9.87 + 7.65i), complex128(4.32 + 1.0i),
+				},
+			},
+			`
+[{
+  "byte": 12,
+  "uint16": 1234,
+  "uint32": 5678,
+  "uint64": 999888777666,
+  "int16": 9876,
+  "int32": 876543,
+  "int64": 128,
+  "float32": 3.141590118408203,
+  "float64": 2.71828182846,
+  "complex64": "123+7i",
+  "complex128": "456.789+10.1112i"
+}, {
+  "byte": 9,
+  "uint16": 99,
+  "uint32": 999,
+  "uint64": 9999999,
+  "int16": 9,
+  "int32": 99,
+  "int64": 88,
+  "float32": 1.4142135381698608,
+  "float64": 1.73205080757,
+  "complex64": "9.869999885559082+7.650000095367432i",
+  "complex128": "4.32+1i"
+}]
+`,
+		},
+		{ // Empty result.
+			[]string{"nothing", "nada", "zilch"},
+			[][]interface{}{},
+			`
+[]
+`,
+		},
+		{ // Empty column set.
+			[]string{},
+			[][]interface{}{
+				{},
+				{},
+			},
+			`
+[{
+}, {
+}]
+`,
+		},
+		{ // Empty values.
+			[]string{"blank", "empty", "nil"},
+			[][]interface{}{
+				{struct{}{}, []string{}, nil},
+			},
+			`
+[{
+  "blank": {},
+  "empty": [],
+  "nil": null
+}]
+`,
+		},
+		{
+			[]string{"c1"},
+			[][]interface{}{{customer}, {invoice}},
+			`
+[{
+  "c1": {"Name":"John Smith","Id":1,"Active":true,"Address":{"Street":"1 Main St.","City":"Palo Alto","State":"CA","Zip":"94303"},"Credit":{"Agency":"Equifax","Report":{"EquifaxReport":{"Rating":65}}}}
+}, {
+  "c1": {"CustId":1,"InvoiceNum":1000,"Amount":42,"ShipTo":{"Street":"1 Main St.","City":"Palo Alto","State":"CA","Zip":"94303"}}
+}]
+`,
+		},
+		{
+			[]string{"nil", "composite", "typeobj"},
+			[][]interface{}{
+				{
+					nil,
+					db.Composite{array2String("foo", "bar"), []int32{1, 2}, map[int32]struct{}{1: struct{}{}, 2: struct{}{}}, map[string]int32{"foo": 1, "bar": 2}},
+					vdl.TypeOf(map[string]struct{}{}),
+				},
+			},
+			`
+[{
+  "nil": null,
+  "composite": {"Arr":["foo","bar"],"ListInt":[1,2],"MySet":{"1":true,"2":true},"Map":{"bar":2,"foo":1}},
+  "typeobj": "typeobject(set[string])"
+}]
+`,
+		},
+		{
+			[]string{"c1"},
+			[][]interface{}{
+				{
+					db.Recursive{
+						Any: nil,
+						Maybe: &db.Times{
+							Stamp:    time.Unix(123456789, 42244224),
+							Interval: time.Duration(1337),
+						},
+						Rec: map[db.Array2String]db.Recursive{
+							array2String("a", "棎鶊鵱"): db.Recursive{},
+							array2String("x", "y"): db.Recursive{
+								Any: vdl.ValueOf(db.CreditReport{
+									Agency: db.CreditAgencyExperian,
+									Report: db.AgencyReportExperianReport{Value: db.ExperianCreditReport{Rating: db.ExperianRatingGood}},
+								}),
+								Maybe: nil,
+								Rec:   nil,
+							},
+						}},
+				},
+			},
+			`
+[{
+  "c1": {"Any":null,"Maybe":{"Stamp":"1973-11-29 21:33:09.042244224 +0000 UTC","Interval":"1.337µs"},"Rec":{"[\"a\", \"棎鶊鵱\"]":{"Any":null,"Maybe":null,"Rec":{}},"[\"x\", \"y\"]":{"Any":{"Agency":"Experian","Report":{"ExperianReport":{"Rating":"Good"}}},"Maybe":null,"Rec":{}}}}
+}]
+`,
+		},
+	}
+	for _, test := range tests {
+		var b bytes.Buffer
+		if err := writer.WriteJson(&b, test.columns, newResultStream(test.rows)); err != nil {
+			t.Errorf("Unexpected error: %v", err)
+			continue
+		}
+		var decoded interface{}
+		if err := json.Unmarshal(b.Bytes(), &decoded); err != nil {
+			t.Errorf("Got invalid JSON: %v", err)
+		}
+		// Add a leading newline to the output to match the leading newline
+		// in our test cases.
+		if got, want := "\n"+b.String(), test.output; got != want {
+			t.Errorf("Wrong output:\nGOT: %q\nWANT:%q", got, want)
+		}
+	}
+}

diff --git a/cmd/sb51/main.go b/cmd/sb51/main.go
new file mode 100644
index 0000000..9968722
--- /dev/null
+++ b/cmd/sb51/main.go

@@ -0,0 +1,34 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Antimony (sb51) - Syncbase general-purpose client and management utility.
+// Currently supports SyncQL select queries.
+
+package main
+
+import (
+	"flag"
+
+	"v.io/x/lib/cmdline"
+	_ "v.io/x/ref/runtime/factories/generic"
+)
+
+func main() {
+	cmdline.Main(cmdSb51)
+}
+
+var cmdSb51 = &cmdline.Command{
+	Name:  "sb51",
+	Short: "Antimony - Vanadium Syncbase client and management utility",
+	Long: `
+Syncbase general-purpose client and management utility.
+Currently supports starting a SyncQL shell.
+`,
+	Children: []*cmdline.Command{cmdSbShell},
+}
+
+var (
+	// TODO(ivanpi): Decide on convention for local syncbase service name.
+	flagSbService = flag.String("service", "/:8101/syncbase", "Location of the Syncbase service to connect to. Can be absolute or relative to the namespace root.")
+)

diff --git a/cmd/sb51/shell.go b/cmd/sb51/shell.go
new file mode 100644
index 0000000..b5f18c0
--- /dev/null
+++ b/cmd/sb51/shell.go

@@ -0,0 +1,240 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Syncbase client shell. Currently supports SyncQL select queries.
+
+package main
+
+import (
+	"fmt"
+	"io"
+	"os"
+	"strconv"
+	"strings"
+
+	isatty "github.com/mattn/go-isatty"
+
+	"v.io/syncbase/v23/syncbase"
+	"v.io/syncbase/v23/syncbase/nosql"
+	"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb"
+	"v.io/syncbase/x/ref/syncbase/sb51/internal/reader"
+	"v.io/syncbase/x/ref/syncbase/sb51/internal/writer"
+	"v.io/v23/context"
+	"v.io/x/lib/cmdline"
+	"v.io/x/ref/lib/v23cmd"
+)
+
+var cmdSbShell = &cmdline.Command{
+	Runner: v23cmd.RunnerFunc(runSbShell),
+	Name:   "sh",
+	Short:  "Start a SyncQL shell",
+	Long: `
+Connect to a database on the Syncbase service and start a SyncQL shell.
+`,
+	ArgsName: "<app_name> <db_name>",
+	ArgsLong: `
+<app_name> and <db_name> specify the database to execute queries against.
+The database must exist unless -create-missing is specified.
+`,
+}
+
+var (
+	flagFormat            string
+	flagCSVDelimiter      string
+	flagCreateIfNotExists bool
+	flagMakeDemoTables    bool
+)
+
+func init() {
+	cmdSbShell.Flags.StringVar(&flagFormat, "format", "table", "Output format. 'table': human-readable table; 'csv': comma-separated values, use -csv-delimiter to control the delimiter; 'json': JSON objects.")
+	cmdSbShell.Flags.StringVar(&flagCSVDelimiter, "csv-delimiter", ",", "Delimiter to use when printing data as CSV (e.g. \"\t\", \",\")")
+	cmdSbShell.Flags.BoolVar(&flagCreateIfNotExists, "create-missing", false, "Create the app and/or database if they do not exist yet.")
+	cmdSbShell.Flags.BoolVar(&flagMakeDemoTables, "make-demo", false, "(Re)create demo tables in the database.")
+}
+
+func validateFlags() error {
+	if flagFormat != "table" && flagFormat != "csv" && flagFormat != "json" {
+		return fmt.Errorf("Unsupported -format %q.  Must be one of 'table', 'csv', or 'json'.", flagFormat)
+	}
+	if len(flagCSVDelimiter) == 0 {
+		return fmt.Errorf("-csv-delimiter cannot be empty.")
+	}
+	return nil
+}
+
+// Starts a SyncQL shell against the specified database.
+// Runs in interactive or batch mode depending on stdin.
+func runSbShell(ctx *context.T, env *cmdline.Env, args []string) error {
+	// TODO(ivanpi): Add 'use' statement, default to no app/database selected.
+	if len(args) != 2 {
+		return env.UsageErrorf("exactly two arguments expected")
+	}
+	appName, dbName := args[0], args[1]
+	if err := validateFlags(); err != nil {
+		return env.UsageErrorf("%v", err)
+	}
+
+	sbs := syncbase.NewService(*flagSbService)
+	d, err := openAppDB(ctx, sbs, appName, dbName, flagCreateIfNotExists)
+	if err != nil {
+		return err
+	}
+
+	if flagMakeDemoTables {
+		if err := makeDemoDB(ctx, d); err != nil {
+			return err
+		}
+	}
+
+	var input *reader.T
+	// TODO(ivanpi): This is hacky, it would be better for lib/cmdline to support IsTerminal.
+	stdinFile, ok := env.Stdin.(*os.File)
+	isTerminal := ok && isatty.IsTerminal(stdinFile.Fd())
+	if isTerminal {
+		input = reader.NewInteractive()
+	} else {
+		input = reader.NewNonInteractive()
+	}
+	defer input.Close()
+
+stmtLoop:
+	for true {
+		if q, err := input.GetQuery(); err != nil {
+			if err == io.EOF {
+				if isTerminal {
+					// ctrl-d
+					fmt.Println()
+				}
+				break
+			} else {
+				// ctrl-c
+				break
+			}
+		} else {
+			var err error
+			tq := strings.Fields(q)
+			if len(tq) > 0 {
+				switch strings.ToLower(tq[0]) {
+				case "exit", "quit":
+					break stmtLoop
+				case "dump":
+					err = dumpDB(ctx, env.Stdout, d)
+				case "make-demo":
+					err = makeDemoDB(ctx, d)
+				case "select":
+					err = queryExec(ctx, env.Stdout, d, q)
+				default:
+					err = fmt.Errorf("unknown statement: '%s'; expected one of: 'select', 'make-demo', 'dump', 'exit', 'quit'", strings.ToLower(tq[0]))
+				}
+			}
+			if err != nil {
+				if isTerminal {
+					fmt.Fprintln(env.Stderr, "Error:", err)
+				} else {
+					// If running non-interactively, errors stop execution.
+					return err
+				}
+			}
+		}
+	}
+
+	return nil
+}
+
+func openAppDB(ctx *context.T, sbs syncbase.Service, appName, dbName string, createIfNotExists bool) (nosql.Database, error) {
+	app := sbs.App(appName)
+	if exists, err := app.Exists(ctx); err != nil {
+		return nil, fmt.Errorf("failed checking for app %q: %v", app.FullName(), err)
+	} else if !exists {
+		if !createIfNotExists {
+			return nil, fmt.Errorf("app %q does not exist", app.FullName())
+		}
+		if err := app.Create(ctx, nil); err != nil {
+			return nil, err
+		}
+	}
+	d := app.NoSQLDatabase(dbName, nil)
+	if exists, err := d.Exists(ctx); err != nil {
+		return nil, fmt.Errorf("failed checking for db %q: %v", d.FullName(), err)
+	} else if !exists {
+		if !createIfNotExists {
+			return nil, fmt.Errorf("db %q does not exist", d.FullName())
+		}
+		if err := d.Create(ctx, nil); err != nil {
+			return nil, err
+		}
+	}
+	return d, nil
+}
+
+func dumpDB(ctx *context.T, w io.Writer, d nosql.Database) error {
+	tables, err := d.ListTables(ctx)
+	if err != nil {
+		return fmt.Errorf("failed listing tables: %v", err)
+	}
+	var errs []error
+	for _, table := range tables {
+		fmt.Fprintf(w, "table: %s\n", table)
+		if err := queryExec(ctx, w, d, fmt.Sprintf("select k, v from %s", table)); err != nil {
+			errs = append(errs, fmt.Errorf("> %s: %v", table, err))
+		}
+	}
+	if len(errs) > 0 {
+		err := fmt.Errorf("failed dumping %d of %d tables:", len(errs), len(tables))
+		for _, e := range errs {
+			err = fmt.Errorf("%v\n%v", err, e)
+		}
+		return err
+	}
+	return nil
+}
+
+func makeDemoDB(ctx *context.T, d nosql.Database) error {
+	if err := demodb.PopulateDemoDB(ctx, d); err != nil {
+		return fmt.Errorf("failed making demo tables: %v", err)
+	}
+	return nil
+}
+
+// Split an error message into an offset and the remaining (i.e., rhs of offset) message.
+// The convention for syncql is "<module><optional-rpc>[offset]<remaining-message>".
+func splitError(err error) (int64, string) {
+	errMsg := err.Error()
+	idx1 := strings.Index(errMsg, "[")
+	idx2 := strings.Index(errMsg, "]")
+	if idx1 == -1 || idx2 == -1 {
+		return 0, errMsg
+	}
+	offsetString := errMsg[idx1+1 : idx2]
+	offset, err := strconv.ParseInt(offsetString, 10, 64)
+	if err != nil {
+		return 0, errMsg
+	}
+	return offset, errMsg[idx2+1:]
+}
+
+func queryExec(ctx *context.T, w io.Writer, d nosql.Database, q string) error {
+	if columnNames, rs, err := d.Exec(ctx, q); err != nil {
+		off, msg := splitError(err)
+		return fmt.Errorf("\n%s\n%s^\n%d: %s", q, strings.Repeat(" ", int(off)), off+1, msg)
+	} else {
+		switch flagFormat {
+		case "table":
+			if err := writer.WriteTable(w, columnNames, rs); err != nil {
+				return err
+			}
+		case "csv":
+			if err := writer.WriteCSV(w, columnNames, rs, flagCSVDelimiter); err != nil {
+				return err
+			}
+		case "json":
+			if err := writer.WriteJson(w, columnNames, rs); err != nil {
+				return err
+			}
+		default:
+			panic(fmt.Sprintf("invalid format flag value: %v", flagFormat))
+		}
+	}
+	return nil
+}

diff --git a/services/syncbase/clock/clock_darwin.go b/services/syncbase/clock/clock_darwin.go
new file mode 100644
index 0000000..4c801cb
--- /dev/null
+++ b/services/syncbase/clock/clock_darwin.go

@@ -0,0 +1,51 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+	"bytes"
+	"encoding/binary"
+	"syscall"
+	"time"
+	"unsafe"
+)
+
+// This file contains darwin specific implementations of functions for clock
+// package.
+
+// ElapsedTime returns the time elapsed since last boot.
+// Darwin provides a system call "kern.boottime" which returns a Timeval32
+// object containing the boot time for the system. Darwin calculates this
+// boottime based on the current clock and the internal tracking of elapsed
+// time since boot. Hence if the clock is changed, the boot time changes along
+// with it. So the difference between the current time and boot time will always
+// give us the correct elapsed time since boot.
+func (sc *systemClockImpl) ElapsedTime() (time.Duration, error) {
+	tv := syscall.Timeval32{}
+
+	if err := sysctlbyname("kern.boottime", &tv); err != nil {
+		return 0, err
+	}
+	return time.Since(time.Unix(int64(tv.Sec), int64(tv.Usec)*1000)), nil
+}
+
+// Generic Sysctl buffer unmarshalling.
+func sysctlbyname(name string, data interface{}) (err error) {
+	val, err := syscall.Sysctl(name)
+	if err != nil {
+		return err
+	}
+
+	buf := []byte(val)
+
+	switch v := data.(type) {
+	case *uint64:
+		*v = *(*uint64)(unsafe.Pointer(&buf[0]))
+		return
+	}
+
+	bbuf := bytes.NewBuffer([]byte(val))
+	return binary.Read(bbuf, binary.LittleEndian, data)
+}

diff --git a/services/syncbase/clock/clock_linux.go b/services/syncbase/clock/clock_linux.go
new file mode 100644
index 0000000..dabaab4
--- /dev/null
+++ b/services/syncbase/clock/clock_linux.go

@@ -0,0 +1,26 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+	"syscall"
+	"time"
+)
+
+// This file contains linux specific implementations of functions for clock
+// package.
+
+// Linux System stores this information in /proc/uptime as seconds
+// since boot with a precision up to 2 decimal points.
+// NOTE: Go system call returns elapsed time in seconds and removes the decimal
+// points by rounding to the closest second. Be careful in using this value as
+// it can introduce a compounding error.
+func (sc *systemClockImpl) ElapsedTime() (time.Duration, error) {
+	var sysInfo syscall.Sysinfo_t
+	if err := syscall.Sysinfo(&sysInfo); err != nil {
+		return 0, err
+	}
+	return time.Duration(sysInfo.Uptime) * time.Second, nil
+}

diff --git a/services/syncbase/clock/clockservice.go b/services/syncbase/clock/clockservice.go
new file mode 100644
index 0000000..fed9a02
--- /dev/null
+++ b/services/syncbase/clock/clockservice.go

@@ -0,0 +1,127 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+	"math"
+	"time"
+
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/v23/context"
+	"v.io/v23/verror"
+	"v.io/x/lib/vlog"
+)
+
+// This file contains code related to checking current system clock to see
+// if it has been changed by any external action.
+
+// runClockCheck estimates the current system time based on saved boottime
+// and elapsed time since boot and checks if the system clock shows the same
+// time. This involves the following steps:
+// 1) Check if system was rebooted since last run. If so update the saved
+// ClockData.
+// 2) Fetch stored ClockData. If none exists, this is the first time
+// runClockCheck has been run. Write new ClockData.
+// 3) Estimate current system clock time and check if the actual system clock
+// agrees with the estimation. If not update the skew value appropriately.
+// 4) Update saved elapsed time since boot. This is used to check if the system
+// was rebooted or not. TODO(jlodhia): work with device manager to provide a
+// way to notify syncbase if the system was just rebooted.
+func (c *VClock) runClockCheck(ctx *context.T) {
+	checkSystemRebooted(ctx, c)
+
+	clockData := &ClockData{}
+	if err := c.sa.GetClockData(ctx, clockData); err != nil {
+		if verror.ErrorID(err) == verror.ErrNoExist.ID {
+			// VClock's cron job to setup UTC time at boot is being run for the
+			// first time. Skew is not known, hence assigning 0.
+			writeNewClockData(ctx, c, 0)
+		} else {
+			vlog.Errorf("Error while fetching clock data: %v", err)
+		}
+		return
+	}
+
+	systemTime := c.clock.Now()
+	elapsedTime, err := c.clock.ElapsedTime()
+	if err != nil {
+		vlog.Errorf("Error while fetching elapsed time: %v", err)
+		return
+	}
+
+	newClockData := &ClockData{
+		SystemTimeAtBoot:     clockData.SystemTimeAtBoot,
+		Skew:                 clockData.Skew,
+		ElapsedTimeSinceBoot: elapsedTime.Nanoseconds(),
+	}
+
+	estimatedClockTime := clockData.SystemBootTime().Add(elapsedTime)
+	diff := estimatedClockTime.Sub(systemTime)
+	if math.Abs(float64(diff.Nanoseconds())) > util.LocalClockDriftThreshold {
+		newClockData.Skew = newClockData.Skew + diff.Nanoseconds()
+		newSystemTimeAtBoot := systemTime.Add(-elapsedTime)
+		newClockData.SystemTimeAtBoot = newSystemTimeAtBoot.UnixNano()
+	}
+
+	if err := c.sa.SetClockData(ctx, newClockData); err != nil {
+		vlog.Errorf("Error while setting clock data: %v", err)
+	}
+}
+
+func writeNewClockData(ctx *context.T, c *VClock, skew time.Duration) {
+	systemTime := c.clock.Now()
+	elapsedTime, err := c.clock.ElapsedTime()
+	if err != nil {
+		vlog.Errorf("Error while fetching elapsed time: %v", err)
+		return
+	}
+	systemTimeAtBoot := systemTime.Add(-elapsedTime)
+	clockData := &ClockData{
+		SystemTimeAtBoot:     systemTimeAtBoot.UnixNano(),
+		Skew:                 skew.Nanoseconds(),
+		ElapsedTimeSinceBoot: elapsedTime.Nanoseconds(),
+	}
+	if err := c.sa.SetClockData(ctx, clockData); err != nil {
+		vlog.Errorf("Error while setting clock data: %v", err)
+	}
+}
+
+// checkSystemRebooted compares the elapsed time stored during the last
+// run of runClockCheck() to the current elapsed time since boot provided
+// by system clock. Since elapsed time is monotonically increasing and cannot
+// be changed unless a reboot happens, if the current value is lower than the
+// previous value then a reboot has happened since last run. If so, update
+// the boot time and elapsed time since boot appropriately.
+func checkSystemRebooted(ctx *context.T, c *VClock) bool {
+	currentSysTime := c.clock.Now()
+	elapsedTime, err := c.clock.ElapsedTime()
+	if err != nil {
+		vlog.Errorf("Error while fetching elapsed time: %v", err)
+		return false
+	}
+
+	clockData := &ClockData{}
+	if err := c.sa.GetClockData(ctx, clockData); err != nil {
+		if verror.ErrorID(err) != verror.ErrNoExist.ID {
+			vlog.Errorf("Error while fetching clock delta: %v", err)
+		}
+		// In case of verror.ErrNoExist no clock data present. Nothing needed to
+		// be done. writeNewClockData() will write new clock data to storage.
+		return false
+	}
+
+	if elapsedTime.Nanoseconds() < clockData.ElapsedTimeSinceBoot {
+		// Since the elapsed time since last boot provided by the system is
+		// less than the elapsed time since boot seen the last time clockservice
+		// ran, the system must have rebooted in between.
+		clockData.SystemTimeAtBoot = currentSysTime.Add(-elapsedTime).UnixNano()
+		clockData.ElapsedTimeSinceBoot = elapsedTime.Nanoseconds()
+		if err := c.sa.SetClockData(ctx, clockData); err != nil {
+			vlog.Errorf("Error while setting clock data: %v", err)
+		}
+		return true
+	}
+	return false
+}

diff --git a/services/syncbase/clock/clockservice_test.go b/services/syncbase/clock/clockservice_test.go
new file mode 100644
index 0000000..86650c1
--- /dev/null
+++ b/services/syncbase/clock/clockservice_test.go

@@ -0,0 +1,191 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+	"testing"
+	"time"
+)
+
+const (
+	constElapsedTime int64 = 50
+)
+
+func TestWriteNewClockData(t *testing.T) {
+	sysTs := time.Now()
+	sysClock := MockSystemClock(sysTs, time.Duration(constElapsedTime))
+	stAdapter := MockStorageAdapter()
+
+	clock := NewVClockWithMockServices(stAdapter, sysClock, nil)
+
+	writeNewClockData(nil, clock, 0)
+
+	expectedSystemTimeAtBoot := sysTs.UnixNano() - constElapsedTime
+	verifyClockData(t, stAdapter, 0, expectedSystemTimeAtBoot, constElapsedTime)
+}
+
+// This test runs the following scenarios
+// 1) Run checkSystemRebooted() with no ClockData stored
+// Result: no op.
+// 2) Run checkSystemRebooted() with ClockData that has SystemTimeAtBoot higher
+// than the current elapsed time.
+// Result: A new ClockData is written with updated SystemTimeAtBoot and
+// elapsed time.
+// 3) Run checkSystemRebooted() again after moving the sysClock forward
+// Result: no op.
+func TestCheckSystemRebooted(t *testing.T) {
+	sysTs := time.Now()
+	sysClock := MockSystemClock(sysTs, time.Duration(constElapsedTime))
+	stAdapter := MockStorageAdapter()
+
+	clock := NewVClockWithMockServices(stAdapter, sysClock, nil)
+
+	// stAdapter will return ErrNoExist while fetching ClockData
+	// checkSystemRebooted should return false.
+	if checkSystemRebooted(nil, clock) {
+		t.Error("Unexpected return value")
+	}
+
+	// Set clock data with elapsed time greater than constElapsedTime
+	clockData := &ClockData{25003, 25, 34569}
+	stAdapter.SetClockData(nil, clockData)
+
+	if !checkSystemRebooted(nil, clock) {
+		t.Error("Unexpected return value")
+	}
+	expectedSystemTimeAtBoot := sysTs.UnixNano() - constElapsedTime
+	verifyClockData(t, stAdapter, 25, expectedSystemTimeAtBoot, constElapsedTime)
+
+	// move clock forward without reboot and run checkSystemRebooted again
+	var timePassed int64 = 200
+	newSysTs := sysTs.Add(time.Duration(timePassed))
+	sysClock.SetNow(newSysTs)
+	sysClock.SetElapsedTime(time.Duration(constElapsedTime + timePassed))
+
+	if checkSystemRebooted(nil, clock) {
+		t.Error("Unexpected return value")
+	}
+	expectedSystemTimeAtBoot = sysTs.UnixNano() - constElapsedTime
+	verifyClockData(t, stAdapter, 25, expectedSystemTimeAtBoot, constElapsedTime)
+}
+
+// Setup: No prior ClockData present.
+// Result: A new ClockData value gets set.
+func TestRunClockCheck1(t *testing.T) {
+	sysTs := time.Now()
+	sysClock := MockSystemClock(sysTs, time.Duration(constElapsedTime))
+	stAdapter := MockStorageAdapter()
+
+	clock := NewVClockWithMockServices(stAdapter, sysClock, nil)
+
+	clock.runClockCheck(nil)
+	expectedSystemTimeAtBoot := sysTs.UnixNano() - constElapsedTime
+	verifyClockData(t, stAdapter, 0, expectedSystemTimeAtBoot, constElapsedTime)
+}
+
+// Setup: ClockData present, system clock elapsed time is lower than whats
+// stored in clock data.
+// Result: A new ClockData value gets set with new system boot time and elapsed
+// time, skew remains the same.
+func TestRunClockCheck2(t *testing.T) {
+	sysTs := time.Now()
+	sysClock := MockSystemClock(sysTs, time.Duration(constElapsedTime))
+	stAdapter := MockStorageAdapter()
+	// Set clock data with elapsed time greater than constElapsedTime
+	clockData := &ClockData{25003, 25, 34569}
+	stAdapter.SetClockData(nil, clockData)
+
+	clock := NewVClockWithMockServices(stAdapter, sysClock, nil)
+
+	clock.runClockCheck(nil)
+	expectedSystemTimeAtBoot := sysTs.UnixNano() - constElapsedTime
+	verifyClockData(t, stAdapter, 25, expectedSystemTimeAtBoot, constElapsedTime)
+}
+
+// Setup: ClockData present, system clock gets a skew of 10 seconds
+// Result: A new ClockData value gets set with new elapsed time and skew,
+// system boot time remains the same.
+func TestRunClockCheck3(t *testing.T) {
+	sysTs := time.Now()
+	sysClock := MockSystemClock(sysTs, time.Duration(constElapsedTime))
+	stAdapter := MockStorageAdapter()
+
+	bootTs := sysTs.Add(time.Duration(-constElapsedTime))
+	oldSkew := 25 * time.Second
+	clockData := &ClockData{bootTs.UnixNano(), oldSkew.Nanoseconds(), 40}
+	stAdapter.SetClockData(nil, clockData)
+
+	clock := NewVClockWithMockServices(stAdapter, sysClock, nil)
+
+	// introduce a change in sys clock
+	extraSkew := 10 * time.Second // moves clock closer to UTC
+	changedSysTs := sysTs.Add(extraSkew)
+	sysClock.SetNow(changedSysTs)
+	newSkew := 15 * time.Second
+
+	clock.runClockCheck(nil)
+	expectedSystemTimeAtBoot := bootTs.UnixNano() + extraSkew.Nanoseconds()
+	verifyClockData(t, stAdapter, newSkew.Nanoseconds(), expectedSystemTimeAtBoot, constElapsedTime)
+}
+
+func TestWithRealSysClock(t *testing.T) {
+	stAdapter := MockStorageAdapter()
+	clock := NewVClockWithMockServices(stAdapter, nil, nil)
+
+	writeNewClockData(nil, clock, 0)
+
+	// Verify if clock data was written to StorageAdapter
+	clockData := &ClockData{}
+	if err := stAdapter.GetClockData(nil, clockData); err != nil {
+		t.Errorf("Expected to find clockData, received error: %v", err)
+	}
+
+	// Verify that calling checkSystemRebooted() does nothing
+	if checkSystemRebooted(nil, clock) {
+		t.Error("Unexpected return value")
+	}
+
+	// sleep for 1 second more than the skew threshold
+	time.Sleep(1800 * time.Millisecond)
+
+	// Verify that calling runClockCheck() only updates elapsed time
+	clock.runClockCheck(nil)
+	newClockData := &ClockData{}
+	if err := stAdapter.GetClockData(nil, newClockData); err != nil {
+		t.Errorf("Expected to find clockData, received error: %v", err)
+	}
+	if newClockData.Skew != clockData.Skew {
+		t.Errorf("Unexpected value for skew: %d", newClockData.Skew)
+	}
+	if newClockData.ElapsedTimeSinceBoot <= clockData.ElapsedTimeSinceBoot {
+		t.Errorf("Unexpected value for elapsed time: %d",
+			newClockData.ElapsedTimeSinceBoot)
+	}
+	if newClockData.SystemTimeAtBoot != clockData.SystemTimeAtBoot {
+		t.Errorf("SystemTimeAtBoot expected: %d, found: %d",
+			clockData.SystemTimeAtBoot, newClockData.SystemTimeAtBoot)
+	}
+}
+
+func verifyClockData(t *testing.T, stAdapter StorageAdapter, skew int64,
+	sysTimeAtBoot int64, elapsedTime int64) {
+	// verify ClockData
+	clockData := &ClockData{}
+	if err := stAdapter.GetClockData(nil, clockData); err != nil {
+		t.Errorf("Expected to find clockData, found error: %v", err)
+	}
+
+	if clockData.Skew != skew {
+		t.Errorf("Expected value for skew: %d, found: %d", skew, clockData.Skew)
+	}
+	if clockData.ElapsedTimeSinceBoot != elapsedTime {
+		t.Errorf("Expected value for elapsed time: %d, found: %d", elapsedTime,
+			clockData.ElapsedTimeSinceBoot)
+	}
+	if clockData.SystemTimeAtBoot != sysTimeAtBoot {
+		t.Errorf("Expected value for SystemTimeAtBoot: %d, found: %d",
+			sysTimeAtBoot, clockData.SystemTimeAtBoot)
+	}
+}

diff --git a/services/syncbase/clock/ntp.go b/services/syncbase/clock/ntp.go
new file mode 100644
index 0000000..ba55322
--- /dev/null
+++ b/services/syncbase/clock/ntp.go

@@ -0,0 +1,151 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+	"fmt"
+	"net"
+	"time"
+
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+)
+
+const (
+	udp  = "udp"
+	port = "123"
+)
+
+var _ NtpSource = (*ntpSourceImpl)(nil)
+
+func NewNtpSource(clock SystemClock) NtpSource {
+	return &ntpSourceImpl{util.NtpServerPool, clock}
+}
+
+type ntpSourceImpl struct {
+	ntpHost string
+	sc      SystemClock
+}
+
+// NtpSync samples data from NTP server and returns the one which has the lowest
+// network delay. The sample with lowest network delay will have the least error
+// in computation of the offset.
+// Param sampleCount is the number of samples this method will fetch.
+func (ns *ntpSourceImpl) NtpSync(sampleCount int) (*NtpData, error) {
+	var canonicalSample *NtpData = nil
+	for i := 0; i < sampleCount; i++ {
+		if sample, err := ns.sample(); err == nil {
+			if (canonicalSample == nil) || (sample.delay < canonicalSample.delay) {
+				canonicalSample = sample
+			}
+		}
+	}
+	if canonicalSample == nil {
+		err := fmt.Errorf("Failed to get any sample from NTP server: %s", ns.ntpHost)
+		return nil, err
+	}
+	return canonicalSample, nil
+}
+
+// Sample connects to an NTP server and returns NtpData containing the clock
+// offset and the network delay experienced while talking to the server.
+//
+// NTP protocol involves sending a request of size 48 bytes with the first
+// byte containing protocol version and mode and the last 8 bytes containing
+// transmit timestamp. The current NTP version is 4. A response from NTP server
+// contains original timestamp (client's transmit timestamp from request) from
+// bytes 24 to 31, server's receive timestamp from byte 32 to 39 and server's
+// transmit time from byte 40 to 47. Client can register the response receive
+// time as soon it receives a response from server.
+// Based on the 4 timestamps the client can compute the offset between the
+// two clocks and the roundtrip network delay for the request.
+func (ns *ntpSourceImpl) sample() (*NtpData, error) {
+	raddr, err := net.ResolveUDPAddr(udp, ns.ntpHost+":"+port)
+	if err != nil {
+		return nil, err
+	}
+
+	con, err := net.DialUDP("udp", nil, raddr)
+	if err != nil {
+		return nil, err
+	}
+	defer con.Close()
+
+	msg := ns.createRequest()
+	_, err = con.Write(msg)
+	if err != nil {
+		return nil, err
+	}
+
+	con.SetDeadline(time.Now().Add(5 * time.Second))
+	_, err = con.Read(msg)
+	if err != nil {
+		return nil, err
+	}
+
+	clientReceiveTs := ns.sc.Now()
+	clientTransmitTs := extractTime(msg[24:32])
+	serverReceiveTs := extractTime(msg[32:40])
+	serverTransmitTs := extractTime(msg[40:48])
+
+	// Following code extracts the clock offset and network delay based on the
+	// transmit and receive timestamps on the client and the server as per
+	// the formula explained at http://www.eecis.udel.edu/~mills/time.html
+	data := NtpData{}
+	data.offset = (serverReceiveTs.Sub(clientTransmitTs) + serverTransmitTs.Sub(clientReceiveTs)) / 2
+	data.delay = clientReceiveTs.Sub(clientTransmitTs) - serverTransmitTs.Sub(serverReceiveTs)
+
+	return &data, nil
+}
+
+func (ns *ntpSourceImpl) createRequest() []byte {
+	data := make([]byte, 48)
+	data[0] = 0x23 // protocol version = 4, mode = 3 (Client)
+
+	// For NTP the prime epoch, or base date of era 0, is 0 h 1 January 1900 UTC
+	t0 := time.Date(1900, 1, 1, 0, 0, 0, 0, time.UTC)
+	tnow := ns.sc.Now()
+	d := tnow.Sub(t0)
+	nsec := d.Nanoseconds()
+
+	// The encoding of timestamp below is an exact opposite of the decoding
+	// being done in extractTime(). Refer extractTime() for more explaination.
+	sec := nsec / 1e9                  // Integer part of seconds since epoch
+	frac := ((nsec % 1e9) << 32) / 1e9 // fractional part of seconds since epoch
+
+	// write the timestamp to Transmit Timestamp section of request.
+	data[43] = byte(sec)
+	data[42] = byte(sec >> 8)
+	data[41] = byte(sec >> 16)
+	data[40] = byte(sec >> 24)
+
+	data[47] = byte(frac)
+	data[46] = byte(frac >> 8)
+	data[45] = byte(frac >> 16)
+	data[44] = byte(frac >> 24)
+	return data
+}
+
+// ExtractTime takes a byte array which contains encoded timestamp from NTP
+// server starting at the 0th byte and is 8 bytes long. The encoded timestamp is
+// in seconds since 1900. The first 4 bytes contain the integer part of of the
+// seconds while the last 4 bytes contain the fractional part of the seconds
+// where (FFFFFFFF + 1) represents 1 second while 00000001 represents 2^(-32) of
+// a second.
+func extractTime(data []byte) time.Time {
+	var sec, frac uint64
+	sec = uint64(data[3]) | uint64(data[2])<<8 | uint64(data[1])<<16 | uint64(data[0])<<24
+	frac = uint64(data[7]) | uint64(data[6])<<8 | uint64(data[5])<<16 | uint64(data[4])<<24
+
+	// multiply the integral second part with 1Billion to convert to nanoseconds
+	nsec := sec * 1e9
+	// multiply frac part with 2^(-32) to get the correct value in seconds and
+	// then multiply with 1Billion to convert to nanoseconds. The multiply by
+	// Billion is done first to make sure that we dont loose precision.
+	nsec += (frac * 1e9) >> 32
+
+	t := time.Date(1900, 1, 1, 0, 0, 0, 0, time.UTC).Add(time.Duration(nsec)).Local()
+
+	return t
+}

diff --git a/services/syncbase/clock/ntpservice.go b/services/syncbase/clock/ntpservice.go
new file mode 100644
index 0000000..79045f3
--- /dev/null
+++ b/services/syncbase/clock/ntpservice.go

@@ -0,0 +1,46 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+	"math"
+
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/v23/context"
+	"v.io/v23/verror"
+	"v.io/x/lib/vlog"
+)
+
+// runNtpCheck talks to an NTP server, fetches the current UTC time from it
+// and corrects VClock time.
+func (c *VClock) runNtpCheck(ctx *context.T) error {
+	ntpData, err := c.ntpSource.NtpSync(util.NtpSampleCount)
+	if err != nil {
+		vlog.Errorf("Error while fetching ntp time: %v", err)
+		return err
+	}
+	offset := ntpData.offset
+
+	data := &ClockData{}
+	if err := c.sa.GetClockData(ctx, data); err != nil {
+		if verror.ErrorID(err) == verror.ErrNoExist.ID {
+			// No ClockData found, write a new one.
+			writeNewClockData(ctx, c, offset)
+			return nil
+		}
+		vlog.Info("Error while fetching clock data: %v", err)
+		vlog.Info("Overwriting clock data with NTP")
+		writeNewClockData(ctx, c, offset)
+		return nil
+	}
+
+	// Update clock skew if the difference between offset and skew is larger
+	// than NtpDiffThreshold. NtpDiffThreshold helps avoid constant tweaking of
+	// the syncbase clock.
+	if math.Abs(float64(offset.Nanoseconds() - data.Skew)) > util.NtpDiffThreshold {
+		writeNewClockData(ctx, c, offset)
+	}
+	return nil
+}

diff --git a/services/syncbase/clock/ntpservice_test.go b/services/syncbase/clock/ntpservice_test.go
new file mode 100644
index 0000000..e505e01
--- /dev/null
+++ b/services/syncbase/clock/ntpservice_test.go

@@ -0,0 +1,192 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+	"net"
+	"testing"
+	"time"
+)
+
+func TestWithMockNtpForErr(t *testing.T) {
+	sysClock := MockSystemClock(time.Now(), 0)
+	stAdapter := MockStorageAdapter()
+	ntpSource := MockNtpSource()
+	ntpSource.Err = net.UnknownNetworkError("network err")
+
+	vclock := NewVClockWithMockServices(stAdapter, sysClock, ntpSource)
+
+	if err := vclock.runNtpCheck(nil); err == nil {
+		t.Error("Network error expected but not found")
+	}
+
+	if stAdapter.clockData != nil {
+		t.Error("Non-nil clock data found.")
+	}
+}
+
+func TestWithMockNtpForDiffBelowThreshold(t *testing.T) {
+	sysClock := MockSystemClock(time.Now(), 0) // not used
+	stAdapter := MockStorageAdapter()
+	originalData := NewClockData(0)
+	stAdapter.SetClockData(nil, &originalData)
+
+	ntpSource := MockNtpSource()
+	offset := 1800 * time.Millisecond // error threshold is 2 seconds
+	ntpSource.Data = &NtpData{offset: offset, delay: 5 * time.Millisecond}
+
+	vclock := NewVClockWithMockServices(stAdapter, sysClock, ntpSource)
+	if err := vclock.runNtpCheck(nil); err != nil {
+		t.Errorf("Unexpected err: %v", err)
+	}
+	if isClockDataChanged(stAdapter, &originalData) {
+		t.Error("ClockData expected to be unchanged but found updated")
+	}
+}
+
+func TestWithMockNtpForDiffAboveThreshold(t *testing.T) {
+	sysTs := time.Now()
+	elapsedTime := 10 * time.Minute
+	sysClock := MockSystemClock(sysTs, elapsedTime)
+
+	stAdapter := MockStorageAdapter()
+	originalData := NewClockData(0)
+	stAdapter.SetClockData(nil, &originalData)
+
+	ntpSource := MockNtpSource()
+	skew := 2100 * time.Millisecond // error threshold is 2 seconds
+	ntpSource.Data = &NtpData{offset: skew, delay: 5 * time.Millisecond}
+
+	vclock := NewVClockWithMockServices(stAdapter, sysClock, ntpSource)
+	if err := vclock.runNtpCheck(nil); err != nil {
+		t.Errorf("Unexpected err: %v", err)
+	}
+	if !isClockDataChanged(stAdapter, &originalData) {
+		t.Error("ClockData expected to be updated but found unchanged")
+	}
+	expectedBootTime := sysTs.Add(-elapsedTime).UnixNano()
+	if stAdapter.clockData.Skew != skew.Nanoseconds() {
+		t.Errorf("Skew expected to be %d but found %d",
+			skew.Nanoseconds(), stAdapter.clockData.Skew)
+	}
+	if stAdapter.clockData.ElapsedTimeSinceBoot != elapsedTime.Nanoseconds() {
+		t.Errorf("ElapsedTime expected to be %d but found %d",
+			elapsedTime.Nanoseconds(), stAdapter.clockData.ElapsedTimeSinceBoot)
+	}
+	if stAdapter.clockData.SystemTimeAtBoot != expectedBootTime {
+		t.Errorf("Skew expected to be %d but found %d",
+			expectedBootTime, stAdapter.clockData.SystemTimeAtBoot)
+	}
+}
+
+func TestWithMockNtpForDiffBelowThresholdAndExistingLargeSkew(t *testing.T) {
+	sysTs := time.Now()
+	elapsedTime := 10 * time.Minute
+	sysClock := MockSystemClock(sysTs, elapsedTime)
+
+	stAdapter := MockStorageAdapter()
+	originalData := NewClockData(2300 * time.Millisecond.Nanoseconds()) // large skew
+	stAdapter.SetClockData(nil, &originalData)
+
+	ntpSource := MockNtpSource()
+	skew := 200 * time.Millisecond // error threshold is 2 seconds
+	ntpSource.Data = &NtpData{offset: skew, delay: 5 * time.Millisecond}
+
+	vclock := NewVClockWithMockServices(stAdapter, sysClock, ntpSource)
+	if err := vclock.runNtpCheck(nil); err != nil {
+		t.Errorf("Unexpected err: %v", err)
+	}
+	if !isClockDataChanged(stAdapter, &originalData) {
+		t.Error("ClockData expected to be updated but found unchanged")
+	}
+	expectedBootTime := sysTs.Add(-elapsedTime).UnixNano()
+	if stAdapter.clockData.Skew != skew.Nanoseconds() {
+		t.Errorf("Skew expected to be %d but found %d",
+			skew.Nanoseconds(), stAdapter.clockData.Skew)
+	}
+	if stAdapter.clockData.ElapsedTimeSinceBoot != elapsedTime.Nanoseconds() {
+		t.Errorf("ElapsedTime expected to be %d but found %d",
+			elapsedTime.Nanoseconds(), stAdapter.clockData.ElapsedTimeSinceBoot)
+	}
+	if stAdapter.clockData.SystemTimeAtBoot != expectedBootTime {
+		t.Errorf("Skew expected to be %d but found %d",
+			expectedBootTime, stAdapter.clockData.SystemTimeAtBoot)
+	}
+}
+
+func TestWithMockNtpForDiffBelowThresholdWithNoStoredClockData(t *testing.T) {
+	sysTs := time.Now()
+	elapsedTime := 10 * time.Minute
+	sysClock := MockSystemClock(sysTs, elapsedTime)
+
+	stAdapter := MockStorageAdapter() // no skew data stored
+
+	ntpSource := MockNtpSource()
+	skew := 200 * time.Millisecond // error threshold is 2 seconds
+	ntpSource.Data = &NtpData{offset: skew, delay: 5 * time.Millisecond}
+
+	vclock := NewVClockWithMockServices(stAdapter, sysClock, ntpSource)
+	if err := vclock.runNtpCheck(nil); err != nil {
+		t.Errorf("Unexpected err: %v", err)
+	}
+	if !isClockDataChanged(stAdapter, nil) {
+		t.Error("ClockData expected to be updated but found unchanged")
+	}
+	expectedBootTime := sysTs.Add(-elapsedTime).UnixNano()
+	if stAdapter.clockData.Skew != skew.Nanoseconds() {
+		t.Errorf("Skew expected to be %d but found %d",
+			skew.Nanoseconds(), stAdapter.clockData.Skew)
+	}
+	if stAdapter.clockData.ElapsedTimeSinceBoot != elapsedTime.Nanoseconds() {
+		t.Errorf("ElapsedTime expected to be %d but found %d",
+			elapsedTime.Nanoseconds(), stAdapter.clockData.ElapsedTimeSinceBoot)
+	}
+	if stAdapter.clockData.SystemTimeAtBoot != expectedBootTime {
+		t.Errorf("Skew expected to be %d but found %d",
+			expectedBootTime, stAdapter.clockData.SystemTimeAtBoot)
+	}
+}
+
+/*
+Following two tests are commented out as they hit the real NTP servers
+and can resut into being flaky if the clock of the machine running continuous
+test has a skew more than 2 seconds.
+
+func TestWithRealNtp(t *testing.T) {
+	stAdapter := MockStorageAdapter()
+	originalData := NewClockData(100 * time.Millisecond.Nanoseconds())  // small skew
+	stAdapter.SetClockData(nil, &originalData)
+	vclock := NewVClockWithMockServices(stAdapter, nil, nil)
+	if err := vclock.runNtpCheck(nil); err != nil {
+		t.Errorf("Unexpected err: %v", err)
+	}
+	if isClockDataChanged(stAdapter, &originalData) {
+		t.Error("ClockData expected to be unchanged but found updated")
+	}
+}
+
+func TestWithRealNtpForNoClockData(t *testing.T) {
+	stAdapter := MockStorageAdapter()
+	vclock := NewVClockWithMockServices(stAdapter, nil, nil)
+	if err := vclock.runNtpCheck(nil); err != nil {
+		t.Errorf("Unexpected err: %v", err)
+	}
+	if !isClockDataChanged(stAdapter, nil) {
+		t.Error("ClockData expected to be updated but found unchanged")
+	}
+}
+*/
+
+func NewClockData(skew int64) ClockData {
+	return ClockData{
+		SystemTimeAtBoot:     0,
+		Skew:                 skew,
+		ElapsedTimeSinceBoot: 0,
+	}
+}
+
+func isClockDataChanged(stAdapter *storageAdapterMockImpl, originalData *ClockData) bool {
+	return stAdapter.clockData != originalData // check for same pointer
+}

diff --git a/services/syncbase/clock/storage_adapter.go b/services/syncbase/clock/storage_adapter.go
new file mode 100644
index 0000000..33b4cda
--- /dev/null
+++ b/services/syncbase/clock/storage_adapter.go

@@ -0,0 +1,33 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+)
+
+var _ StorageAdapter = (*storageAdapterImpl)(nil)
+
+func NewStorageAdapter(st store.Store) StorageAdapter {
+	return &storageAdapterImpl{st}
+}
+
+type storageAdapterImpl struct {
+	st store.Store
+}
+
+func (sa *storageAdapterImpl) GetClockData(ctx *context.T, data *ClockData) error {
+	return util.Get(ctx, sa.st, clockDataKey(), data)
+}
+
+func (sa *storageAdapterImpl) SetClockData(ctx *context.T, data *ClockData) error {
+	return util.Put(ctx, sa.st, clockDataKey(), data)
+}
+
+func clockDataKey() string {
+	return util.ClockPrefix
+}

diff --git a/services/syncbase/clock/test_util.go b/services/syncbase/clock/test_util.go
new file mode 100644
index 0000000..6027f49
--- /dev/null
+++ b/services/syncbase/clock/test_util.go

@@ -0,0 +1,119 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+// Utilities for testing clock.
+
+import (
+	"time"
+
+	"v.io/v23/context"
+	"v.io/v23/verror"
+)
+
+/////////////////////////////////////////////////
+// Mock for StorageAdapter
+
+var _ StorageAdapter = (*storageAdapterMockImpl)(nil)
+
+func MockStorageAdapter() *storageAdapterMockImpl {
+	return &storageAdapterMockImpl{}
+}
+
+type storageAdapterMockImpl struct {
+	clockData *ClockData
+	err       error
+}
+
+func (sa *storageAdapterMockImpl) GetClockData(ctx *context.T, data *ClockData) error {
+	if sa.err != nil {
+		return sa.err
+	}
+	if sa.clockData == nil {
+		return verror.NewErrNoExist(ctx)
+	}
+	*data = *sa.clockData
+	return nil
+}
+
+func (sa *storageAdapterMockImpl) SetClockData(ctx *context.T, data *ClockData) error {
+	if sa.err != nil {
+		return sa.err
+	}
+	sa.clockData = data
+	return nil
+}
+
+func (sa *storageAdapterMockImpl) SetError(err error) {
+	sa.err = err
+}
+
+/////////////////////////////////////////////////
+// Mock for SystemClock
+
+var _ SystemClock = (*systemClockMockImpl)(nil)
+
+func MockSystemClock(now time.Time, elapsedTime time.Duration) *systemClockMockImpl {
+	return &systemClockMockImpl{
+		now: now,
+		elapsedTime: elapsedTime,
+	}
+}
+
+type systemClockMockImpl struct {
+	now         time.Time
+	elapsedTime time.Duration
+}
+
+func (sc *systemClockMockImpl) Now() time.Time {
+	return sc.now
+}
+
+func (sc *systemClockMockImpl) SetNow(now time.Time) {
+	sc.now = now
+}
+
+func (sc *systemClockMockImpl) ElapsedTime() (time.Duration, error) {
+	return sc.elapsedTime, nil
+}
+
+func (sc *systemClockMockImpl) SetElapsedTime(elapsed time.Duration) {
+	sc.elapsedTime = elapsed
+}
+
+/////////////////////////////////////////////////
+// Mock for NtpSource
+
+var _ NtpSource = (*ntpSourceMockImpl)(nil)
+
+func MockNtpSource() *ntpSourceMockImpl {
+	return &ntpSourceMockImpl{}
+}
+
+type ntpSourceMockImpl struct {
+	Err  error
+	Data *NtpData
+}
+
+func (ns *ntpSourceMockImpl) NtpSync(sampleCount int) (*NtpData, error) {
+	if ns.Err != nil {
+		return nil, ns.Err
+	}
+	return ns.Data, nil
+}
+
+func NewVClockWithMockServices(sa StorageAdapter, sc SystemClock, ns NtpSource) *VClock {
+	if sc == nil {
+		sc = newSystemClock()
+	}
+	if ns == nil {
+		ns = NewNtpSource(sc)
+	}
+	return &VClock{
+		clock:     sc,
+		sa:        sa,
+		ntpSource: ns,
+	}
+}

diff --git a/services/syncbase/clock/types.go b/services/syncbase/clock/types.go
new file mode 100644
index 0000000..6934f11
--- /dev/null
+++ b/services/syncbase/clock/types.go

@@ -0,0 +1,53 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+	"time"
+
+	"v.io/v23/context"
+)
+
+// This interface provides a wrapper over system clock to allow easy testing
+// of VClock and other code that uses timestamps.
+type SystemClock interface {
+	// Now returns the current UTC time as known by the system.
+	// This may not reflect the NTP time if the system clock is out of
+	// sync with NTP.
+	Now() time.Time
+
+	// ElapsedTime returns a duration representing the time elapsed since the device
+	// rebooted.
+	ElapsedTime() (time.Duration, error)
+}
+
+type StorageAdapter interface {
+	GetClockData(ctx *context.T, data *ClockData) error
+	SetClockData(ctx *context.T, data *ClockData) error
+}
+
+type NtpSource interface {
+	// NtpSync obtains NtpData samples from an NTP server and returns the one
+	// which has the lowest network delay.
+	// Param sampleCount is the number of samples this method will fetch.
+	// NtpData contains the clock offset and the network delay experienced while
+	// talking to the server.
+	NtpSync(sampleCount int) (*NtpData, error)
+}
+
+type NtpData struct {
+	// Offset is the difference between the NTP time and the system clock.
+	// Adding offset to system clock will give estimated NTP time.
+	offset time.Duration
+
+	// Delay is the round trip network delay experienced while talking to NTP
+	// server. The smaller the delay, the more accurate the offset is.
+	delay  time.Duration
+}
+
+func (cd *ClockData) SystemBootTime() time.Time {
+	ns := time.Second.Nanoseconds()
+	return time.Unix(cd.SystemTimeAtBoot/ns, cd.SystemTimeAtBoot%ns)
+}

diff --git a/services/syncbase/clock/types.vdl b/services/syncbase/clock/types.vdl
new file mode 100644
index 0000000..b12f8f9
--- /dev/null
+++ b/services/syncbase/clock/types.vdl

@@ -0,0 +1,20 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+// ClockData is the persistent state of syncbase clock used to estimate current
+// NTP time and catch any unexpected changes to system clock.
+type ClockData struct {
+	// UTC time in unix nano seconds obtained from system clock at boot.
+	SystemTimeAtBoot int64
+
+	// Skew between the system clock and NTP time.
+	Skew int64
+
+	// The elapsed time since boot as last seen during a run of clockservice.
+	// This is used to determine if the device rebooted since the last run of
+	// clockservice.
+	ElapsedTimeSinceBoot int64
+}
\ No newline at end of file

diff --git a/services/syncbase/clock/types.vdl.go b/services/syncbase/clock/types.vdl.go
new file mode 100644
index 0000000..4749d42
--- /dev/null
+++ b/services/syncbase/clock/types.vdl.go

@@ -0,0 +1,35 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: types.vdl
+
+package clock
+
+import (
+	// VDL system imports
+	"v.io/v23/vdl"
+)
+
+// ClockData is the persistent state of syncbase clock used to estimate current
+// NTP time and catch any unexpected changes to system clock.
+type ClockData struct {
+	// UTC time in unix nano seconds obtained from system clock at boot.
+	SystemTimeAtBoot int64
+	// Skew between the system clock and NTP time.
+	Skew int64
+	// The elapsed time since boot as last seen during a run of clockservice.
+	// This is used to determine if the device rebooted since the last run of
+	// clockservice.
+	ElapsedTimeSinceBoot int64
+}
+
+func (ClockData) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/clock.ClockData"`
+}) {
+}
+
+func init() {
+	vdl.Register((*ClockData)(nil))
+}

diff --git a/services/syncbase/clock/vclock.go b/services/syncbase/clock/vclock.go
new file mode 100644
index 0000000..95719f4
--- /dev/null
+++ b/services/syncbase/clock/vclock.go

@@ -0,0 +1,73 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+	"time"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	"v.io/v23/verror"
+	"v.io/x/lib/vlog"
+)
+
+// VClock holds data required to provide an estimate of the UTC time at any
+// given point. The fields contained in here are
+// - systemTimeAtBoot : the time shown by the system clock at boot.
+// - skew             : the difference between the system clock and UTC time.
+// - clock            : Instance of clock.SystemClock interface providing access
+//                      to the system time.
+// - sa               : adapter for storage of clock data.
+// - ntpSource        : source for fetching NTP data.
+type VClock struct {
+	systemTimeAtBoot time.Time
+	skew             time.Duration
+	clock            SystemClock
+	sa               StorageAdapter
+	ntpSource        NtpSource
+}
+
+func NewVClock(st store.Store) *VClock {
+	sysClock := newSystemClock()
+	return &VClock{
+		clock:     sysClock,
+		sa:        NewStorageAdapter(st),
+		ntpSource: NewNtpSource(sysClock),
+	}
+}
+
+// Now returns current UTC time based on the estimation of skew that
+// the system clock has with respect to NTP time.
+func (c *VClock) Now(ctx *context.T) time.Time {
+	clockData := &ClockData{}
+	if err := c.sa.GetClockData(ctx, clockData); err != nil {
+		if verror.ErrorID(err) == verror.ErrNoExist.ID {
+			// VClock's cron job to setup UTC time at boot has not been run yet.
+			vlog.Error("No ClockData found while creating a timestamp")
+		} else {
+			vlog.Errorf("Error while fetching clock data: %v", err)
+		}
+		vlog.Error("Returning current system clock time")
+		return c.clock.Now()
+	}
+	skew := time.Duration(clockData.Skew)
+	return c.clock.Now().Add(skew)
+}
+
+///////////////////////////////////////////////////
+// Implementation for SystemClock.
+
+type systemClockImpl struct{}
+
+// Returns system time in UTC.
+func (sc *systemClockImpl) Now() time.Time {
+	return time.Now().UTC()
+}
+
+var _ SystemClock = (*systemClockImpl)(nil)
+
+func newSystemClock() SystemClock {
+	return &systemClockImpl{}
+}

diff --git a/services/syncbase/clock/vclock_test.go b/services/syncbase/clock/vclock_test.go
new file mode 100644
index 0000000..bf92f5c
--- /dev/null
+++ b/services/syncbase/clock/vclock_test.go

@@ -0,0 +1,69 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+	"testing"
+	"time"
+
+	"v.io/v23/verror"
+)
+
+func TestVClock(t *testing.T) {
+	sysTs := time.Now()
+	sysClock := MockSystemClock(sysTs, 0)
+	stAdapter := MockStorageAdapter()
+	stAdapter.SetClockData(nil, &ClockData{0, 0, 0})
+	clock := NewVClockWithMockServices(stAdapter, sysClock, nil)
+
+	ts := clock.Now(nil)
+	if ts != sysTs {
+		t.Errorf("timestamp expected to be %q but found to be %q", sysTs, ts)
+	}
+}
+
+func TestVClockWithSkew(t *testing.T) {
+	// test with positive skew
+	checkSkew(t, 5)
+	// test with negative skew
+	checkSkew(t, -5)
+}
+
+func checkSkew(t *testing.T, skew int64) {
+	sysTs := time.Now()
+	sysClock := MockSystemClock(sysTs, 0)
+
+	var elapsedTime int64 = 100
+	stAdapter := MockStorageAdapter()
+	bootTime := sysTs.UnixNano() - elapsedTime
+	clockData := ClockData{bootTime, skew, elapsedTime}
+	stAdapter.SetClockData(nil, &clockData)
+
+	clock := NewVClockWithMockServices(stAdapter, sysClock, nil)
+
+	ts := clock.Now(nil)
+	if ts == sysTs {
+		t.Errorf("timestamp expected to be %q but found to be %q", sysTs, ts)
+	}
+	if ts.UnixNano() != (sysTs.UnixNano() + skew) {
+		t.Errorf("Unexpected vclock timestamp. vclock: %v, sysclock: %v, skew: %v", ts, sysTs, skew)
+	}
+}
+
+func TestVClockWithInternalErr(t *testing.T) {
+	sysTs := time.Now()
+	sysClock := MockSystemClock(sysTs, 0)
+
+	stAdapter := MockStorageAdapter()
+	stAdapter.SetError(verror.NewErrInternal(nil))
+
+	clock := NewVClockWithMockServices(stAdapter, sysClock, nil)
+
+	// Internal err should result in vclock falling back to the system clock.
+	ts := clock.Now(nil)
+	if ts != sysTs {
+		t.Errorf("timestamp expected to be %q but found to be %q", sysTs, ts)
+	}
+}

diff --git a/services/syncbase/localblobstore/blobmap/blobmap.go b/services/syncbase/localblobstore/blobmap/blobmap.go
new file mode 100644
index 0000000..c674f68
--- /dev/null
+++ b/services/syncbase/localblobstore/blobmap/blobmap.go

@@ -0,0 +1,480 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package blobmap implements a map from chunk checksums to chunk locations
+// and vice versa, using a store.Store (currently, one implemented with
+// leveldb).
+package blobmap
+
+import "encoding/binary"
+import "sync"
+
+import "v.io/syncbase/x/ref/services/syncbase/store"
+import "v.io/syncbase/x/ref/services/syncbase/store/leveldb"
+import "v.io/v23/context"
+import "v.io/v23/verror"
+
+const pkgPath = "v.io/syncbase/x/ref/services/syncbase/localblobstore/blobmap"
+
+var (
+	errBadBlobIDLen        = verror.Register(pkgPath+".errBadBlobIDLen", verror.NoRetry, "{1:}{2:} blobmap {3}: bad blob length {4} should be {5}{:_}")
+	errBadChunkHashLen     = verror.Register(pkgPath+".errBadChunkHashLen", verror.NoRetry, "{1:}{2:} blobmap {3}: bad chunk hash length {4} should be {5}{:_}")
+	errNoSuchBlob          = verror.Register(pkgPath+".errNoSuchBlob", verror.NoRetry, "{1:}{2:} blobmap {3}: no such blob{:_}")
+	errMalformedChunkEntry = verror.Register(pkgPath+".errMalformedChunkEntry", verror.NoRetry, "{1:}{2:} blobmap {3}: malfored chunk entry{:_}")
+	errNoSuchChunk         = verror.Register(pkgPath+".errNoSuchChunk", verror.NoRetry, "{1:}{2:} blobmap {3}: no such chunk{:_}")
+	errMalformedBlobEntry  = verror.Register(pkgPath+".errMalformedBlobEntry", verror.NoRetry, "{1:}{2:} blobmap {3}: malfored blob entry{:_}")
+)
+
+// There are two tables: chunk-to-location, and blob-to-chunk.
+// Each chunk is represented by one entry in each table.
+// On deletion, the latter is used to find the former, so the latter is added
+// first, and deleted last.
+//
+// chunk-to-location:
+//    Key:    1-byte containing chunkPrefix, 16-byte chunk hash, 16-byte blob ID
+//    Value:  Varint offset, Varint length.
+// The chunk with the specified 16-byte hash had the specified length, and is
+// (or was) found at the specified offset in the blob.
+//
+// blob-to-chunk:
+//    Key:    1-byte containing blobPrefix, 16-byte blob ID, 8-byte bigendian offset
+//    Value:  16-byte chunk hash, Varint length.
+//
+// The varint encoded fields are written/read with
+// encoding/binary.{Put,Read}Varint.  The blob-to-chunk keys encode the offset
+// as raw big-endian (encoding/binary.{Put,}Uint64) so that it will sort in
+// increasing offset order.
+
+const chunkHashLen = 16 // length of chunk hash
+const blobIDLen = 16    // length of blob ID
+const offsetLen = 8     // length of offset in blob-to-chunk key
+
+const maxKeyLen = 64 // conservative maximum key length
+const maxValLen = 64 // conservative maximum value length
+
+var chunkPrefix []byte = []byte{0} // key prefix for chunk-to-location
+var blobPrefix []byte = []byte{1}  // key prefix for blob-to-chunk
+
+// offsetLimit is an offset that's greater than, and one byte longer than, any
+// real offset.
+var offsetLimit []byte = []byte{
+	0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff,
+	0xff,
+}
+
+// blobLimit is a blobID that's greater than, and one byte longer than, any
+// real blob ID
+var blobLimit []byte = []byte{
+	0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff,
+	0xff,
+}
+
+// A Location describes chunk's location within a blob.
+type Location struct {
+	BlobID []byte // ID of blob
+	Offset int64  // byte offset of chunk within blob
+	Size   int64  // size of chunk
+}
+
+// A BlobMap maps chunk checksums to Locations, and vice versa.
+type BlobMap struct {
+	dir string      // the directory where the store is held
+	st  store.Store // private store that holds the mapping.
+}
+
+// New() returns a pointer to a BlobMap, backed by storage in directory dir.
+func New(ctx *context.T, dir string) (bm *BlobMap, err error) {
+	bm = new(BlobMap)
+	bm.dir = dir
+	bm.st, err = leveldb.Open(dir, leveldb.OpenOptions{CreateIfMissing: true, ErrorIfExists: false})
+	return bm, err
+}
+
+// Close() closes any files or other resources associated with *bm.
+// No other methods on bm may be called after Close().
+func (bm *BlobMap) Close() error {
+	return bm.st.Close()
+}
+
+// AssociateChunkWithLocation() remembers that the specified chunk hash is
+// associated with the specified Location.
+func (bm *BlobMap) AssociateChunkWithLocation(ctx *context.T, chunk []byte, loc Location) (err error) {
+	// Check of expected lengths explicitly in routines that modify the database.
+	if len(loc.BlobID) != blobIDLen {
+		err = verror.New(errBadBlobIDLen, ctx, bm.dir, len(loc.BlobID), blobIDLen)
+	} else if len(chunk) != chunkHashLen {
+		err = verror.New(errBadChunkHashLen, ctx, bm.dir, len(chunk), chunkHashLen)
+	} else {
+		var key [maxKeyLen]byte
+		var val [maxValLen]byte
+
+		// Put the blob-to-chunk entry first, since it's used
+		// to garbage collect the other.
+		keyLen := copy(key[:], blobPrefix)
+		keyLen += copy(key[keyLen:], loc.BlobID)
+		binary.BigEndian.PutUint64(key[keyLen:], uint64(loc.Offset))
+		keyLen += offsetLen
+
+		valLen := copy(val[:], chunk)
+		valLen += binary.PutVarint(val[valLen:], loc.Size)
+		err = bm.st.Put(key[:keyLen], val[:valLen])
+
+		if err == nil {
+			keyLen = copy(key[:], chunkPrefix)
+			keyLen += copy(key[keyLen:], chunk)
+			keyLen += copy(key[keyLen:], loc.BlobID)
+
+			valLen = binary.PutVarint(val[:], loc.Offset)
+			valLen += binary.PutVarint(val[valLen:], loc.Size)
+
+			err = bm.st.Put(key[:keyLen], val[:valLen])
+		}
+	}
+
+	return err
+}
+
+// DeleteBlob() deletes any of the chunk associations previously added with
+// AssociateChunkWithLocation(..., chunk, ...).
+func (bm *BlobMap) DeleteBlob(ctx *context.T, blob []byte) (err error) {
+	// Check of expected lengths explicitly in routines that modify the database.
+	if len(blob) != blobIDLen {
+		err = verror.New(errBadBlobIDLen, ctx, bm.dir, len(blob), blobIDLen)
+	} else {
+		var start [maxKeyLen]byte
+		var limit [maxKeyLen]byte
+
+		startLen := copy(start[:], blobPrefix)
+		startLen += copy(start[startLen:], blob)
+
+		limitLen := copy(limit[:], start[:startLen])
+		limitLen += copy(limit[limitLen:], offsetLimit)
+
+		var keyBuf [maxKeyLen]byte    // buffer for keys returned by stream
+		var valBuf [maxValLen]byte    // buffer for values returned by stream
+		var deleteKey [maxKeyLen]byte // buffer to construct chunk-to-location keys to delete
+
+		deletePrefixLen := copy(deleteKey[:], chunkPrefix)
+
+		seenAValue := false
+
+		s := bm.st.Scan(start[:startLen], limit[:limitLen])
+		for s.Advance() && err == nil {
+			seenAValue = true
+
+			key := s.Key(keyBuf[:])
+			value := s.Value(valBuf[:])
+
+			if len(value) >= chunkHashLen {
+				deleteKeyLen := deletePrefixLen
+				deleteKeyLen += copy(deleteKey[deleteKeyLen:], value[:chunkHashLen])
+				deleteKeyLen += copy(deleteKey[deleteKeyLen:], blob)
+				err = bm.st.Delete(deleteKey[:deleteKeyLen])
+			}
+
+			if err == nil {
+				// Delete the blob-to-chunk entry last, as it's
+				// used to find the chunk-to-location entry.
+				err = bm.st.Delete(key)
+			}
+		}
+
+		if err != nil {
+			s.Cancel()
+		} else {
+			err = s.Err()
+			if err == nil && !seenAValue {
+				err = verror.New(errNoSuchBlob, ctx, bm.dir, blob)
+			}
+		}
+	}
+
+	return err
+}
+
+// LookupChunk() returns a Location for the specified chunk.  Only one Location
+// is returned, even if several are available in the database.  If the client
+// finds that the Location is not available, perhaps because its blob has
+// been deleted, the client should remove the blob from the BlobMap using
+// DeleteBlob(loc.Blob), and try again.  (The client may also wish to
+// arrange at some point to call GC() on the blob store.)
+func (bm *BlobMap) LookupChunk(ctx *context.T, chunkHash []byte) (loc Location, err error) {
+	var start [maxKeyLen]byte
+	var limit [maxKeyLen]byte
+
+	startLen := copy(start[:], chunkPrefix)
+	startLen += copy(start[startLen:], chunkHash)
+
+	limitLen := copy(limit[:], start[:startLen])
+	limitLen += copy(limit[limitLen:], blobLimit)
+
+	var keyBuf [maxKeyLen]byte // buffer for keys returned by stream
+	var valBuf [maxValLen]byte // buffer for values returned by stream
+
+	s := bm.st.Scan(start[:startLen], limit[:limitLen])
+	if s.Advance() {
+		var n int
+		key := s.Key(keyBuf[:])
+		value := s.Value(valBuf[:])
+		loc.BlobID = key[len(chunkPrefix)+chunkHashLen:]
+		loc.Offset, n = binary.Varint(value)
+		if n > 0 {
+			loc.Size, n = binary.Varint(value[n:])
+		}
+		if n <= 0 {
+			err = verror.New(errMalformedChunkEntry, ctx, bm.dir, chunkHash, key, value)
+		}
+		s.Cancel()
+	} else {
+		if err == nil {
+			err = s.Err()
+		}
+		if err == nil {
+			err = verror.New(errNoSuchChunk, ctx, bm.dir, chunkHash)
+		}
+	}
+
+	return loc, err
+}
+
+// A ChunkStream allows the client to iterate over the chunks in a blob:
+//	cs := bm.NewChunkStream(ctx, blob)
+//	for cs.Advance() {
+//		chunkHash := cs.Value()
+//		...process chunkHash...
+//	}
+//	if cs.Err() != nil {
+//		...there was an error...
+//	}
+type ChunkStream struct {
+	bm     *BlobMap
+	ctx    *context.T
+	stream store.Stream
+
+	keyBuf [maxKeyLen]byte // buffer for keys
+	valBuf [maxValLen]byte // buffer for values
+	key    []byte          // key for current element
+	value  []byte          // value of current element
+	loc    Location        // location of current element
+	err    error           // error encountered.
+	more   bool            // whether stream may be consulted again
+}
+
+// NewChunkStream() returns a pointer to a new ChunkStream that allows the client
+// to enumerate the chunk hashes in a blob, in order.
+func (bm *BlobMap) NewChunkStream(ctx *context.T, blob []byte) *ChunkStream {
+	var start [maxKeyLen]byte
+	var limit [maxKeyLen]byte
+
+	startLen := copy(start[:], blobPrefix)
+	startLen += copy(start[startLen:], blob)
+
+	limitLen := copy(limit[:], start[:startLen])
+	limitLen += copy(limit[limitLen:], offsetLimit)
+
+	cs := new(ChunkStream)
+	cs.bm = bm
+	cs.ctx = ctx
+	cs.stream = bm.st.Scan(start[:startLen], limit[:limitLen])
+	cs.more = true
+
+	return cs
+}
+
+// Advance() stages an element so the client can retrieve the chunk hash with
+// Value(), or its Location with Location().  Advance() returns true iff there
+// is an element to retrieve.  The client must call Advance() before calling
+// Value() or Location() The client must call Cancel if it does not iterate
+// through all elements (i.e. until Advance() returns false).  Advance() may
+// block if an element is not immediately available.
+func (cs *ChunkStream) Advance() (ok bool) {
+	if cs.more && cs.err == nil {
+		if !cs.stream.Advance() {
+			cs.err = cs.stream.Err()
+			cs.more = false // no more stream, even if no error
+		} else {
+			cs.key = cs.stream.Key(cs.keyBuf[:])
+			cs.value = cs.stream.Value(cs.valBuf[:])
+			ok = (len(cs.value) >= chunkHashLen) &&
+				(len(cs.key) == len(blobPrefix)+blobIDLen+offsetLen)
+			if ok {
+				var n int
+				cs.loc.BlobID = make([]byte, blobIDLen)
+				copy(cs.loc.BlobID, cs.key[len(blobPrefix):len(blobPrefix)+blobIDLen])
+				cs.loc.Offset = int64(binary.BigEndian.Uint64(cs.key[len(blobPrefix)+blobIDLen:]))
+				cs.loc.Size, n = binary.Varint(cs.value[chunkHashLen:])
+				ok = (n > 0)
+			}
+			if !ok {
+				cs.err = verror.New(errMalformedBlobEntry, cs.ctx, cs.bm.dir, cs.key, cs.value)
+				cs.stream.Cancel()
+			}
+		}
+	}
+	return ok
+}
+
+// Value() returns the content hash of the chunk staged by
+// Advance().  The returned slice may be a sub-slice of buf if buf is large
+// enough to hold the entire value.  Otherwise, a newly allocated slice will be
+// returned.  It is valid to pass a nil buf.  Value() may panic if Advance()
+// returned false or was not called at all.  Value() does not block.
+func (cs *ChunkStream) Value(buf []byte) (result []byte) {
+	if len(buf) < chunkHashLen {
+		buf = make([]byte, chunkHashLen)
+	}
+	copy(buf, cs.value[:chunkHashLen])
+	return buf[:chunkHashLen]
+}
+
+// Location() returns the Location associated with the chunk staged by
+// Advance().  Location() may panic if Advance() returned false or was not
+// called at all.  Location() does not block.
+func (cs *ChunkStream) Location() Location {
+	return cs.loc
+}
+
+// Err() returns a non-nil error iff the stream encountered any errors.  Err()
+// does not block.
+func (cs *ChunkStream) Err() error {
+	return cs.err
+}
+
+// Cancel() notifies the stream provider that it can stop producing elements.
+// The client must call Cancel() if it does not iterate through all elements
+// (i.e. until Advance() returns false).  Cancel() is idempotent and can be
+// called concurrently with a goroutine that is iterating via Advance() and
+// Value().  Cancel() causes Advance() to subsequently return false.
+// Cancel() does not block.
+func (cs *ChunkStream) Cancel() {
+	cs.stream.Cancel()
+}
+
+// A BlobStream allows the client to iterate over the blobs in BlobMap:
+//	bs := bm.NewBlobStream(ctx)
+//	for bs.Advance() {
+//		blobID := bs.Value()
+//		...process blobID...
+//	}
+//	if bs.Err() != nil {
+//		...there was an error...
+//	}
+type BlobStream struct {
+	bm  *BlobMap
+	ctx *context.T
+
+	key    []byte          // key for current element
+	keyBuf [maxKeyLen]byte // buffer for keys
+	err    error           // error encountered.
+	mu     sync.Mutex      // protects "more", which may be written in Cancel()
+	more   bool            // whether stream may be consulted again
+}
+
+// keyLimit is the key for limit in store.Scan() calls within a BlobStream.
+var keyLimit []byte
+
+func init() {
+	// The limit key is the maximum length key, all ones after the blobPrefix.
+	keyLimit = make([]byte, maxKeyLen)
+	for i := copy(keyLimit, blobPrefix); i != len(keyLimit); i++ {
+		keyLimit[i] = 0xff
+	}
+}
+
+// NewBlobStream() returns a pointer to a new BlobStream that allows the client
+// to enumerate the blobs BlobMap, in lexicographic order.
+func (bm *BlobMap) NewBlobStream(ctx *context.T) *BlobStream {
+	bs := new(BlobStream)
+	bs.bm = bm
+	bs.ctx = ctx
+	bs.more = true
+	return bs
+}
+
+// Advance() stages an element so the client can retrieve the next blob ID with
+// Value().  Advance() returns true iff there is an element to retrieve.  The
+// client must call Advance() before calling Value().  The client must call
+// Cancel if it does not iterate through all elements (i.e. until Advance()
+// returns false).  Advance() may block if an element is not immediately
+// available.
+func (bs *BlobStream) Advance() (ok bool) {
+	bs.mu.Lock()
+	ok = bs.more
+	bs.mu.Unlock()
+	if ok {
+		prefixAndKeyLen := len(blobPrefix) + blobIDLen
+		// Compute the next key to search for.
+		if len(bs.key) == 0 { // First time through: anything starting with blobPrefix.
+			n := copy(bs.keyBuf[:], blobPrefix)
+			bs.key = bs.keyBuf[:n]
+		} else {
+			// Increment the blobID to form the next possible key.
+			i := prefixAndKeyLen - 1
+			for ; i != len(blobPrefix)-1 && bs.keyBuf[i] == 0xff; i-- {
+				bs.keyBuf[i] = 0
+			}
+			if i == len(blobPrefix)-1 { // End of database
+				ok = false
+			} else {
+				bs.keyBuf[i]++
+			}
+			bs.key = bs.keyBuf[:prefixAndKeyLen]
+		}
+		if ok {
+			stream := bs.bm.st.Scan(bs.key, keyLimit)
+			if !stream.Advance() {
+				bs.err = stream.Err()
+				ok = false // no more stream, even if no error
+			} else {
+				bs.key = stream.Key(bs.keyBuf[:])
+				if len(bs.key) < prefixAndKeyLen {
+					bs.err = verror.New(errMalformedBlobEntry, bs.ctx, bs.bm.dir, bs.key, stream.Value(nil))
+					ok = false
+				}
+				stream.Cancel() // We get at most one element from each stream.
+			}
+		}
+		if !ok {
+			bs.mu.Lock()
+			bs.more = false
+			bs.mu.Unlock()
+		}
+	}
+	return ok
+}
+
+// Value() returns the blob ID staged by Advance().  The returned slice may be
+// a sub-slice of buf if buf is large enough to hold the entire value.
+// Otherwise, a newly allocated slice will be returned.  It is valid to pass a
+// nil buf.  Value() may panic if Advance() returned false or was not called at
+// all.  Value() does not block.
+func (bs *BlobStream) Value(buf []byte) (result []byte) {
+	if len(buf) < blobIDLen {
+		buf = make([]byte, blobIDLen)
+	}
+	copy(buf, bs.key[len(blobPrefix):len(blobPrefix)+blobIDLen])
+	return buf[:blobIDLen]
+}
+
+// Err() returns a non-nil error iff the stream encountered any errors.  Err()
+// does not block.
+func (bs *BlobStream) Err() error {
+	return bs.err
+}
+
+// Cancel() notifies the stream provider that it can stop producing elements.
+// The client must call Cancel() if it does not iterate through all elements
+// (i.e. until Advance() returns false).  Cancel() is idempotent and can be
+// called concurrently with a goroutine that is iterating via Advance() and
+// Value().  Cancel() causes Advance() to subsequently return false.
+// Cancel() does not block.
+func (bs *BlobStream) Cancel() {
+	bs.mu.Lock()
+	bs.more = false
+	bs.mu.Unlock()
+}

diff --git a/services/syncbase/localblobstore/blobmap/blobmap_test.go b/services/syncbase/localblobstore/blobmap/blobmap_test.go
new file mode 100644
index 0000000..450049a
--- /dev/null
+++ b/services/syncbase/localblobstore/blobmap/blobmap_test.go

@@ -0,0 +1,278 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// A test for blobmap.
+package blobmap_test
+
+import "bytes"
+import "io/ioutil"
+import "math/rand"
+import "os"
+import "runtime"
+import "testing"
+
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/blobmap"
+import "v.io/v23/context"
+
+import "v.io/x/ref/test"
+import _ "v.io/x/ref/runtime/factories/generic"
+
+// id() returns a new random 16-byte byte vector.
+func id() []byte {
+	v := make([]byte, 16)
+	for i := 0; i != len(v); i++ {
+		v[i] = byte(rand.Int31n(256))
+	}
+	return v
+}
+
+// verifyBlobs() tests that the blobs in *bm are those in b[], as revealed via
+// the BlobStream() interface.
+func verifyBlobs(t *testing.T, ctx *context.T, bm *blobmap.BlobMap, b [][]byte) {
+	_, _, callerLine, _ := runtime.Caller(1)
+	seen := make([]bool, len(b)) // seen[i] == whether b[i] seen in *bm
+	bs := bm.NewBlobStream(ctx)
+	var i int
+	for i = 0; bs.Advance(); i++ {
+		blob := bs.Value(nil)
+		var j int
+		for j = 0; j != len(b) && bytes.Compare(b[j], blob) != 0; j++ {
+		}
+		if j == len(b) {
+			t.Errorf("blobmap_test: line %d: unexpected blob %v present in BlobMap",
+				callerLine, blob)
+		} else if seen[j] {
+			t.Errorf("blobmap_test: line %d: blob %v seen twice in BlobMap",
+				callerLine, blob)
+		} else {
+			seen[j] = true
+		}
+	}
+	if i != len(b) {
+		t.Errorf("blobmap_test: line %d: found %d blobs in BlobMap, but expected %d",
+			callerLine, i, len(b))
+	}
+	for j := range seen {
+		if !seen[j] {
+			t.Errorf("blobmap_test: line %d: blob %v not seen un BlobMap",
+				callerLine, b[j])
+		}
+	}
+	if bs.Err() != nil {
+		t.Errorf("blobmap_test: line %d: BlobStream.Advance: unexpected error %v",
+			callerLine, bs.Err())
+	}
+}
+
+// verifyNoChunksInBlob() tests that blob b[blobi] has no chunks in *bm, as
+// revealed by the ChunkStream interface.
+func verifyNoChunksInBlob(t *testing.T, ctx *context.T, bm *blobmap.BlobMap, blobi int, b [][]byte) {
+	_, _, callerLine, _ := runtime.Caller(1)
+	cs := bm.NewChunkStream(ctx, b[blobi])
+	for i := 0; cs.Advance(); i++ {
+		t.Errorf("blobmap_test: line %d: blob %d: chunk %d: %v",
+			callerLine, blobi, i, cs.Value(nil))
+	}
+	if cs.Err() != nil {
+		t.Errorf("blobmap_test: line %d: blob %d: ChunkStream.Advance: unexpected error %v",
+			callerLine, blobi, cs.Err())
+	}
+}
+
+// verifyChunksInBlob() tests that blob b[blobi] in *bm contains the expected
+// chunks from c[].  Each blob is expected to have 8 chunks, 0...7, except that
+// b[1] has c[8] instead of c[4] for chunk 4.
+func verifyChunksInBlob(t *testing.T, ctx *context.T, bm *blobmap.BlobMap, blobi int, b [][]byte, c [][]byte) {
+	_, _, callerLine, _ := runtime.Caller(1)
+	var err error
+	var i int
+	cs := bm.NewChunkStream(ctx, b[blobi])
+	for i = 0; cs.Advance(); i++ {
+		chunk := cs.Value(nil)
+		chunki := i
+		if blobi == 1 && i == 4 { // In blob 1, c[4] is replaced by c[8]
+			chunki = 8
+		}
+		if bytes.Compare(c[chunki], chunk) != 0 {
+			t.Errorf("blobmap_test: line %d: blob %d: chunk %d: got %v, expected %v",
+				callerLine, blobi, i, chunk, c[chunki])
+		}
+
+		var loc blobmap.Location
+		loc, err = bm.LookupChunk(ctx, chunk)
+		if err != nil {
+			t.Errorf("blobmap_test: line %d: blob %d: chunk %d: LookupChunk got unexpected error: %v",
+				callerLine, blobi, i, err)
+		} else {
+			if i == 4 {
+				if bytes.Compare(loc.BlobID, b[blobi]) != 0 {
+					t.Errorf("blobmap_test: line %d: blob %d: chunk %d: Location.BlobID got %v, expected %v",
+						callerLine, blobi, i, loc.BlobID, b[blobi])
+				}
+			} else {
+				if bytes.Compare(loc.BlobID, b[0]) != 0 && bytes.Compare(loc.BlobID, b[1]) != 0 {
+					t.Errorf("blobmap_test: line %d: blob %d: chunk %d: Location.BlobID got %v, expected %v",
+						callerLine, blobi, i, loc.BlobID, b[blobi])
+				}
+			}
+			if loc.Offset != int64(i) {
+				t.Errorf("blobmap_test: line %d: blob %d: chunk %d: Location.Offset got %d, expected %d",
+					callerLine, blobi, i, loc.Offset, i)
+			}
+			if loc.Size != 1 {
+				t.Errorf("blobmap_test: line %d: blob %d: chunk %d: Location.Size got %d, expected 1",
+					callerLine, blobi, i, loc.Size)
+			}
+
+			// The offsets and sizes will match, between the result
+			// from the stream and the result from LookupChunk(),
+			// because for all chunks written to both, they are
+			// written to the same places.  However, the blob need
+			// not match, since LookupChunk() will return an
+			// arbitrary Location in the store that contains the
+			// chunk.
+			loc2 := cs.Location()
+			if loc.Offset != loc2.Offset || loc.Size != loc2.Size {
+				t.Errorf("blobmap_test: line %d: blob %d: chunk %d: disagreement about location: LookupChunk %v vs ChunkStream %v",
+					callerLine, blobi, i, loc, loc2)
+			}
+		}
+	}
+	if cs.Err() != nil {
+		t.Errorf("blobmap_test: line %d: blob %d: ChunkStream.Err() unepxected error %v",
+			callerLine, blobi, cs.Err())
+	}
+	if i != 8 {
+		t.Errorf("blobmap_test: line %d: blob %d: ChunkStream.Advance unexpectedly saw %d chunks, expected 8",
+			callerLine, blobi, i)
+	}
+}
+
+// TestAddRetrieveAndDelete() tests insertion, retrieval, and deletion of blobs
+// from a BlobMap.  It's all done in one test case, because one cannot retrieve
+// or delete blobs that have not been inserted.
+func TestAddRetrieveAndDelete(t *testing.T) {
+	ctx, shutdown := test.V23Init()
+	defer shutdown()
+
+	// Make a temporary directory.
+	var err error
+	var testDirName string
+	testDirName, err = ioutil.TempDir("", "blobmap_test")
+	if err != nil {
+		t.Fatalf("blobmap_test: can't make tmp directory: %v", err)
+	}
+	defer os.RemoveAll(testDirName)
+
+	// Create a blobmap.
+	var bm *blobmap.BlobMap
+	bm, err = blobmap.New(ctx, testDirName)
+	if err != nil {
+		t.Fatalf("blobmap_test: blobmap.New failed: %v", err)
+	}
+
+	// Two blobs: b[0] and b[1].
+	b := [][]byte{id(), id()}
+
+	// Nine chunks: c[0 .. 8]
+	c := [][]byte{id(), id(), id(), id(), id(), id(), id(), id(), id()}
+
+	// Verify that there are no blobs, or chunks in blobs initially.
+	verifyBlobs(t, ctx, bm, nil)
+	verifyNoChunksInBlob(t, ctx, bm, 0, b)
+	verifyNoChunksInBlob(t, ctx, bm, 1, b)
+
+	// Verify that all chunks have no locations initially.
+	for chunki := range c {
+		_, err = bm.LookupChunk(ctx, c[chunki])
+		if err == nil {
+			t.Errorf("blobmap_test: chunk %d: LookupChunk: unexpected lack of error", chunki)
+		}
+	}
+
+	// Put chunks 0..7 into blob 0, and chunks 0..3, 8, 5..7 into blob 1.
+	// Each blob is treated as size 1.
+	for blobi := 0; blobi != 2; blobi++ {
+		for i := 0; i != 8; i++ {
+			chunki := i
+			if blobi == 1 && i == 4 { // In blob 1, c[4] 4 is replaced by c[8]
+				chunki = 8
+			}
+			err = bm.AssociateChunkWithLocation(ctx, c[chunki],
+				blobmap.Location{BlobID: b[blobi], Offset: int64(i), Size: 1})
+			if err != nil {
+				t.Errorf("blobmap_test: blob %d: AssociateChunkWithLocation: unexpected error: %v",
+					blobi, err)
+			}
+		}
+	}
+
+	// Verify that the blobs are present, with the chunks specified.
+	verifyBlobs(t, ctx, bm, b)
+	verifyChunksInBlob(t, ctx, bm, 0, b, c)
+	verifyChunksInBlob(t, ctx, bm, 1, b, c)
+
+	// Verify that all chunks now have locations.
+	for chunki := range c {
+		_, err = bm.LookupChunk(ctx, c[chunki])
+		if err != nil {
+			t.Errorf("blobmap_test: chunk %d: LookupChunk: unexpected error: %v",
+				chunki, err)
+		}
+	}
+
+	// Delete b[0].
+	err = bm.DeleteBlob(ctx, b[0])
+	if err != nil {
+		t.Errorf("blobmap_test: blob 0: DeleteBlob: unexpected error: %v", err)
+	}
+
+	// Verify that all chunks except chunk 4 (which was in only blob 0)
+	// still have locations.
+	for chunki := range c {
+		_, err = bm.LookupChunk(ctx, c[chunki])
+		if chunki == 4 {
+			if err == nil {
+				t.Errorf("blobmap_test: chunk %d: LookupChunk: expected lack of error",
+					chunki)
+			}
+		} else {
+			if err != nil {
+				t.Errorf("blobmap_test: chunk %d: LookupChunk: unexpected error: %v",
+					chunki, err)
+			}
+		}
+	}
+
+	// Verify that blob 0 is gone, but blob 1 remains.
+	verifyBlobs(t, ctx, bm, b[1:])
+	verifyNoChunksInBlob(t, ctx, bm, 0, b)
+	verifyChunksInBlob(t, ctx, bm, 1, b, c)
+
+	// Delete b[1].
+	err = bm.DeleteBlob(ctx, b[1])
+	if err != nil {
+		t.Errorf("blobmap_test: blob 1: DeleteBlob: unexpected error: %v",
+			err)
+	}
+
+	// Verify that there are no blobs, or chunks in blobs once more.
+	verifyBlobs(t, ctx, bm, nil)
+	verifyNoChunksInBlob(t, ctx, bm, 0, b)
+	verifyNoChunksInBlob(t, ctx, bm, 1, b)
+
+	// Verify that all chunks have no locations once more.
+	for chunki := range c {
+		_, err = bm.LookupChunk(ctx, c[chunki])
+		if err == nil {
+			t.Errorf("blobmap_test: chunk %d: LookupChunk: unexpected lack of error",
+				chunki)
+		}
+	}
+
+	err = bm.Close()
+	if err != nil {
+		t.Errorf("blobmap_test: unexpected error closing BlobMap: %v", err)
+	}
+}

diff --git a/services/syncbase/localblobstore/chunker/chunker.go b/services/syncbase/localblobstore/chunker/chunker.go
new file mode 100644
index 0000000..cb07533
--- /dev/null
+++ b/services/syncbase/localblobstore/chunker/chunker.go

@@ -0,0 +1,284 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package chunker breaks a stream of bytes into context-defined chunks whose
+// boundaries are chosen based on content checksums of a window that slides
+// over the data.  An edited sequence with insertions and removals can share
+// many chunks with the original sequence.
+//
+// The intent is that when a sequence of bytes is to be transmitted to a
+// recipient that may have much of the data, the sequence can be broken down
+// into chunks.  The checksums of the resulting chunks may then be transmitted
+// to the recipient, which can then discover which of the chunks it has, and
+// which it needs.
+//
+// Example:
+//      var s *chunker.Stream = chunker.New(&chunker.DefaultParam, anIOReader)
+//      for s.Advance() {
+//		var chunk []byte := s.Value()
+//              // process chunk
+//	}
+//	if s.Err() != nil {
+//		// anIOReader generated an error.
+//	}
+package chunker
+
+// The design is from:
+// "A Framework for Analyzing and Improving Content-Based Chunking Algorithms";
+// Kave Eshghi, Hsiu Khuern Tang; HPL-2005-30(R.1); Sep, 2005;
+// http://www.hpl.hp.com/techreports/2005/HPL-2005-30R1.pdf
+
+import "io"
+import "sync"
+
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/crc64window"
+import "v.io/v23/context"
+import "v.io/v23/verror"
+
+const pkgPath = "v.io/syncbase/x/ref/services/syncbase/localblobstore/chunker"
+
+var (
+	errStreamCancelled = verror.Register(pkgPath+".errStreamCancelled", verror.NoRetry, "{1:}{2:} Advance() called on cancelled stream{:_}")
+)
+
+// A Param contains the parameters for chunking.
+//
+// Chunks are broken based on a hash of a sliding window of width WindowWidth
+// bytes.
+// Each chunk is at most MaxChunk bytes long, and, unless end-of-file or an
+// error is reached, at least MinChunk bytes long.
+//
+// Subject to those constaints, a chunk boundary introduced at the first point
+// where the hash of the sliding window is 1 mod Primary, or if that doesn't
+// occur before MaxChunk bytes, at the last position where the hash is 1 mod
+// Secondary, or if that does not occur, after MaxChunk bytes.
+// Normally, MinChunk < Primary < MaxChunk.
+// Primary is the expected chunk size.
+// The Secondary divisor exists to make it more likely that a chunk boundary is
+// selected based on the local data when the Primary divisor by chance does not
+// find a match for a long distance.  It should be a few times smaller than
+// Primary.
+//
+// Using primes for Primary and Secondary is not essential, but recommended
+// because it guarantees mixing of the checksum bits should their distribution
+// be non-uniform.
+type Param struct {
+	WindowWidth int    // the window size to use when looking for chunk boundaries
+	MinChunk    int64  // minimum chunk size
+	MaxChunk    int64  // maximum chunk size
+	Primary     uint64 // primary divisor; the expected chunk size
+	Secondary   uint64 // secondary divisor
+}
+
+// DefaultParam contains default chunking parameters.
+var DefaultParam Param = Param{WindowWidth: 48, MinChunk: 512, MaxChunk: 3072, Primary: 601, Secondary: 307}
+
+// A Stream allows a client to iterate over the chunks within an io.Reader byte
+// stream.
+type Stream struct {
+	param        Param               // chunking parameters
+	ctx          *context.T          // context of creator
+	window       *crc64window.Window // sliding window for computing the hash
+	buf          []byte              // buffer of data
+	rd           io.Reader           // source of data
+	err          error               // error from rd
+	mu           sync.Mutex          // protects cancelled
+	cancelled    bool                // whether the stream has been cancelled
+	bufferChunks bool                // whether to buffer entire chunks
+	// Invariant:  bufStart <= chunkStart <= chunkEnd <= bufEnd
+	bufStart   int64  // offset in rd of first byte in buf[]
+	bufEnd     int64  // offset in rd of next byte after those in buf[]
+	chunkStart int64  // offset in rd of first byte of current chunk
+	chunkEnd   int64  // offset in rd of next byte after current chunk
+	windowEnd  int64  // offset in rd of next byte to be given to window
+	hash       uint64 // hash of sliding window
+}
+
+// newStream() returns a pointer to a new Stream instance, with the
+// parameters in *param.  This internal version of NewStream() allows the caller
+// to specify via bufferChunks whether entire chunks should be buffered.
+func newStream(ctx *context.T, param *Param, rd io.Reader, bufferChunks bool) *Stream {
+	s := new(Stream)
+	s.param = *param
+	s.ctx = ctx
+	s.window = crc64window.New(crc64window.ECMA, s.param.WindowWidth)
+	bufSize := int64(8192)
+	if bufferChunks {
+		// If we must buffer entire chunks, arrange that the buffer
+		// size is considerably larger than the max chunk size to avoid
+		// copying data repeatedly.
+		for bufSize < 4*s.param.MaxChunk {
+			bufSize *= 2
+		}
+	}
+	s.buf = make([]byte, bufSize)
+	s.rd = rd
+	s.bufferChunks = bufferChunks
+	return s
+}
+
+// NewStream() returns a pointer to a new Stream instance, with the
+// parameters in *param.
+func NewStream(ctx *context.T, param *Param, rd io.Reader) *Stream {
+	return newStream(ctx, param, rd, true)
+}
+
+// isCancelled() returns whether s.Cancel() has been called.
+func (s *Stream) isCancelled() (cancelled bool) {
+	s.mu.Lock()
+	cancelled = s.cancelled
+	s.mu.Unlock()
+	return cancelled
+}
+
+// Advance() stages the next chunk so that it may be retrieved via Value().
+// Returns true iff there is an item to retrieve.  Advance() must be called
+// before Value() is called.
+func (s *Stream) Advance() bool {
+	// Remember that s.{bufStart,bufEnd,chunkStart,chunkEnd,windowEnd}
+	// are all relative to the offset in it.rd, not it.buf.
+	// Therefore, these starts and ends can easily be compared
+	// with each other, but we must subtract bufStart when
+	// indexing into buf.  (Other schemes were considered, but
+	// nothing seems uniformly better.)
+
+	// If buffering entire chunks, ensure there's enough data in the buffer
+	// for the next chunk.
+	if s.bufferChunks && s.bufEnd < s.chunkEnd+s.param.MaxChunk && s.err == nil {
+		// Next chunk might need more data.
+		if s.bufStart < s.chunkEnd {
+			// Move any remaining buffered data to start of buffer.
+			copy(s.buf, s.buf[s.chunkEnd-s.bufStart:s.bufEnd-s.bufStart])
+			s.bufStart = s.chunkEnd
+		}
+		// Fill buffer with data, unless error/EOF.
+		for s.err == nil && s.bufEnd < s.bufStart+int64(len(s.buf)) && !s.isCancelled() {
+			var n int
+			n, s.err = s.rd.Read(s.buf[s.bufEnd-s.bufStart:])
+			s.bufEnd += int64(n)
+		}
+	}
+
+	// Make the next chunk current.
+	s.chunkStart = s.chunkEnd
+	minChunk := s.chunkStart + s.param.MinChunk
+	maxChunk := s.chunkStart + s.param.MaxChunk
+	lastSecondaryBreak := maxChunk
+
+	// While not end of chunk...
+	for s.windowEnd != maxChunk &&
+		(s.windowEnd < minChunk || (s.hash%s.param.Primary) != 1) &&
+		(s.windowEnd != s.bufEnd || s.err == nil) && !s.isCancelled() {
+
+		// Fill the buffer if empty, and there's more data to read.
+		if s.windowEnd == s.bufEnd && s.err == nil {
+			if s.bufferChunks {
+				panic("chunker.Advance had to fill buffer in bufferChunks mode")
+			}
+			s.bufStart = s.bufEnd
+			var n int
+			n, s.err = s.rd.Read(s.buf)
+			s.bufEnd += int64(n)
+		}
+
+		// bufLimit is the minimum of the maximum possible chunk size and the buffer length.
+		bufLimit := maxChunk
+		if s.bufEnd < bufLimit {
+			bufLimit = s.bufEnd
+		}
+		// Advance window until both MinChunk reached and primary boundary found.
+		for s.windowEnd != bufLimit &&
+			(s.windowEnd < minChunk || (s.hash%s.param.Primary) != 1) &&
+			!s.isCancelled() {
+
+			// Advance the window by one byte.
+			s.hash = s.window.Advance(s.buf[s.windowEnd-s.bufStart])
+			s.windowEnd++
+			if (s.hash % s.param.Secondary) == 1 {
+				lastSecondaryBreak = s.windowEnd
+			}
+		}
+	}
+
+	if s.windowEnd == maxChunk && (s.hash%s.param.Primary) != 1 && lastSecondaryBreak != maxChunk {
+		// The primary break point was not found in the maximum chunk
+		// size, and a secondary break point was found; use it.
+		s.chunkEnd = lastSecondaryBreak
+	} else {
+		s.chunkEnd = s.windowEnd
+	}
+
+	return !s.isCancelled() && s.chunkStart != s.chunkEnd // We have a non-empty chunk to return.
+}
+
+// Value() returns the chunk that was staged by Advance().  May panic if
+// Advance() returned false or was not called.  Never blocks.
+func (s *Stream) Value() []byte {
+	return s.buf[s.chunkStart-s.bufStart : s.chunkEnd-s.bufStart]
+}
+
+// Err() returns any error encountered by Advance().  Never blocks.
+func (s *Stream) Err() (err error) {
+	s.mu.Lock()
+	if s.cancelled && (s.err == nil || s.err == io.EOF) {
+		s.err = verror.New(errStreamCancelled, s.ctx)
+	}
+	s.mu.Unlock()
+	if s.err != io.EOF { // Do not consider EOF to be an error.
+		err = s.err
+	}
+	return err
+}
+
+// Cancel() causes the next call to Advance() to return false.
+// It should be used when the client does not wish to iterate to the end of the stream.
+// Never blocks.  May be called concurrently with other method calls on s.
+func (s *Stream) Cancel() {
+	s.mu.Lock()
+	s.cancelled = true
+	s.mu.Unlock()
+}
+
+// ----------------------------------
+
+// A PosStream is just like a Stream, except that the Value() method returns only
+// the byte offsets of the ends of chunks, rather than the chunks themselves.
+// It can be used when chunks are too large to buffer a small number
+// comfortably in memory.
+type PosStream struct {
+	s *Stream
+}
+
+// NewPosStream() returns a pointer to a new PosStream instance, with the
+// parameters in *param.
+func NewPosStream(ctx *context.T, param *Param, rd io.Reader) *PosStream {
+	ps := new(PosStream)
+	ps.s = newStream(ctx, param, rd, false)
+	return ps
+}
+
+// Advance() stages the offset of the end of the next chunk so that it may be
+// retrieved via Value().  Returns true iff there is an item to retrieve.
+// Advance() must be called before Value() is called.
+func (ps *PosStream) Advance() bool {
+	return ps.s.Advance()
+}
+
+// Value() returns the chunk that was staged by Advance().  May panic if
+// Advance() returned false or was not called.  Never blocks.
+func (ps *PosStream) Value() int64 {
+	return ps.s.chunkEnd
+}
+
+// Err() returns any error encountered by Advance().  Never blocks.
+func (ps *PosStream) Err() error {
+	return ps.s.Err()
+}
+
+// Cancel() causes the next call to Advance() to return false.
+// It should be used when the client does not wish to iterate to the end of the stream.
+// Never blocks.  May be called concurrently with other method calls on ps.
+func (ps *PosStream) Cancel() {
+	ps.s.Cancel()
+}

diff --git a/services/syncbase/localblobstore/chunker/chunker_test.go b/services/syncbase/localblobstore/chunker/chunker_test.go
new file mode 100644
index 0000000..57c6fed
--- /dev/null
+++ b/services/syncbase/localblobstore/chunker/chunker_test.go

@@ -0,0 +1,197 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// A test for the chunker package.
+package chunker_test
+
+import "bytes"
+import "crypto/md5"
+import "fmt"
+import "io"
+import "testing"
+
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/chunker"
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/localblobstore_testlib"
+import "v.io/v23/context"
+import "v.io/x/ref/test"
+import _ "v.io/x/ref/runtime/factories/generic"
+
+// TestChunksPartitionStream() tests that the chunker partitions its input
+// stream into reasonable sized chunks, which when concatenated form the
+// original stream.
+func TestChunksPartitionStream(t *testing.T) {
+	ctx, shutdown := test.V23Init()
+	defer shutdown()
+
+	var err error
+	totalLength := 1024 * 1024
+
+	// Compute the md5 of an arbiotrary stream.  We will later compare this
+	// with the md5 of the concanenation of chunks from an equivalent
+	// stream.
+	r := localblobstore_testlib.NewRandReader(1, totalLength, 0, io.EOF)
+	hStream := md5.New()
+	buf := make([]byte, 8192)
+	for err == nil {
+		var n int
+		n, err = r.Read(buf)
+		hStream.Write(buf[0:n])
+	}
+	checksumStream := hStream.Sum(nil)
+
+	// Using an equivalent stream, break it into chunks.
+	r = localblobstore_testlib.NewRandReader(1, totalLength, 0, io.EOF)
+	param := &chunker.DefaultParam
+	hChunked := md5.New()
+
+	length := 0
+	s := chunker.NewStream(ctx, param, r)
+	for s.Advance() {
+		chunk := s.Value()
+		length += len(chunk)
+		// The last chunk is permitted to be short, hence the second
+		// conjunct in the following predicate.
+		if int64(len(chunk)) < param.MinChunk && length != totalLength {
+			t.Errorf("chunker_test: chunk length %d below minimum %d", len(chunk), param.MinChunk)
+		}
+		if int64(len(chunk)) > param.MaxChunk {
+			t.Errorf("chunker_test: chunk length %d above maximum %d", len(chunk), param.MaxChunk)
+		}
+		hChunked.Write(chunk)
+	}
+	if s.Err() != nil {
+		t.Errorf("chunker_test: got error from chunker: %v\n", err)
+	}
+
+	if length != totalLength {
+		t.Errorf("chunker_test: chunk lengths summed to %d, expected %d", length, totalLength)
+	}
+
+	checksumChunked := hChunked.Sum(nil)
+	if bytes.Compare(checksumStream, checksumChunked) != 0 {
+		t.Errorf("chunker_test: md5 of stream is %v, but md5 of chunks is %v", checksumStream, checksumChunked)
+	}
+}
+
+// TestPosStream() tests that a PosStream leads to the same chunks as an Stream.
+func TestPosStream(t *testing.T) {
+	ctx, shutdown := test.V23Init()
+	defer shutdown()
+
+	totalLength := 1024 * 1024
+
+	s := chunker.NewStream(ctx, &chunker.DefaultParam,
+		localblobstore_testlib.NewRandReader(1, totalLength, 0, io.EOF))
+	ps := chunker.NewPosStream(ctx, &chunker.DefaultParam,
+		localblobstore_testlib.NewRandReader(1, totalLength, 0, io.EOF))
+
+	itReady := s.Advance()
+	pitReady := ps.Advance()
+	it_pos := 0
+	chunk_count := 0
+	for itReady && pitReady {
+		it_pos += len(s.Value())
+		if int64(it_pos) != ps.Value() {
+			t.Fatalf("chunker_test: Stream and PosStream positions diverged at chunk %d: %d vs %d", chunk_count, it_pos, ps.Value())
+		}
+		chunk_count++
+		itReady = s.Advance()
+		pitReady = ps.Advance()
+	}
+	if itReady {
+		t.Error("chunker_test: Stream ended before PosStream")
+	}
+	if pitReady {
+		t.Error("chunker_test: PosStream ended before Stream")
+	}
+	if s.Err() != nil {
+		t.Errorf("chunker_test: Stream got unexpected error: %v", s.Err())
+	}
+	if ps.Err() != nil {
+		t.Errorf("chunker_test: PosStream got unexpected error: %v", ps.Err())
+	}
+}
+
+// chunkSums() returns a vector of md5 checksums for the chunks of the
+// specified Reader, using the default chunking parameters.
+func chunkSums(ctx *context.T, r io.Reader) (sums [][md5.Size]byte) {
+	s := chunker.NewStream(ctx, &chunker.DefaultParam, r)
+	for s.Advance() {
+		sums = append(sums, md5.Sum(s.Value()))
+	}
+	return sums
+}
+
+// TestInsertions() tests the how chunk sequences differ when bytes are
+// periodically inserted into a stream.
+func TestInsertions(t *testing.T) {
+	ctx, shutdown := test.V23Init()
+	defer shutdown()
+
+	totalLength := 1024 * 1024
+	insertionInterval := 20 * 1024
+	bytesInserted := totalLength / insertionInterval
+
+	// Get the md5 sums of the chunks of two similar streams, where the
+	// second has an extra bytes every 20k bytes.
+	sums0 := chunkSums(ctx, localblobstore_testlib.NewRandReader(1, totalLength, 0, io.EOF))
+	sums1 := chunkSums(ctx, localblobstore_testlib.NewRandReader(1, totalLength, insertionInterval, io.EOF))
+
+	// Iterate over chunks of second stream, counting which are in common
+	// with first stream.  We expect to find common chunks within 10 of the
+	// last chunk in common, since insertions are single bytes, widely
+	// separated.
+	same := 0 // Number of chunks in sums1 that are the same as chunks in sums0.
+	i0 := 0   // Where to search for a match in sums0.
+	for i1 := 0; i1 != len(sums1); i1++ {
+		// Be prepared to search up to the next 10 elements of sums0 from the most recent match.
+		limit := len(sums0) - i0
+		if limit > 10 {
+			limit = 10
+		}
+		var d int
+		for d = 0; d != limit && bytes.Compare(sums0[i0+d][:], sums1[i1][:]) != 0; d++ {
+		}
+		if d != limit { // found
+			same++
+			i0 += d // Advance i0 to the most recent match.
+		}
+	}
+	// The number of chunks that aren't the same as one in the original stream should be at least as large
+	// as the number of bytes inserted, and not too many more.
+	different := len(sums1) - same
+	if different < bytesInserted {
+		t.Errorf("chunker_test: saw %d different chunks, but expected at least %d", different, bytesInserted)
+	}
+	if bytesInserted+(bytesInserted/2) < different {
+		t.Errorf("chunker_test: saw %d different chunks, but expected at most %d", different, bytesInserted+(bytesInserted/2))
+	}
+	// Require that most chunks are the same, by a substantial margin.
+	if same < 5*different {
+		t.Errorf("chunker_test: saw %d different chunks, and %d same, but expected at least a factor of 5 more same than different", different, same)
+	}
+}
+
+// TestError() tests the behaviour of a chunker when given an error by its
+// reader.
+func TestError(t *testing.T) {
+	ctx, shutdown := test.V23Init()
+	defer shutdown()
+
+	notEOF := fmt.Errorf("not EOF")
+	totalLength := 50 * 1024
+	r := localblobstore_testlib.NewRandReader(1, totalLength, 0, notEOF)
+	s := chunker.NewStream(ctx, &chunker.DefaultParam, r)
+	length := 0
+	for s.Advance() {
+		chunk := s.Value()
+		length += len(chunk)
+	}
+	if s.Err() != notEOF {
+		t.Errorf("chunker_test: chunk stream ended with error %v, expected %v", s.Err(), notEOF)
+	}
+	if length != totalLength {
+		t.Errorf("chunker_test: chunk lengths summed to %d, expected %d", length, totalLength)
+	}
+}

diff --git a/services/syncbase/localblobstore/crc64window/crc64window.go b/services/syncbase/localblobstore/crc64window/crc64window.go
new file mode 100644
index 0000000..b27dbb9
--- /dev/null
+++ b/services/syncbase/localblobstore/crc64window/crc64window.go

@@ -0,0 +1,153 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package crc64window provides CRCs over fixed-sized, rolling windows of bytes.
+//
+// It uses the same polynomial representation and CRC conditioning as
+// hash/crc64, so the results are the same as computing hash/crc64 over the
+// window of the last sz bytes added, where sz is the window size.  Thus, in
+// this code, rolling and nonRolling will receive the same value.
+//     w := crc64window.New(crc64window.ECMA, 3)  // Window size is 3 bytes.
+//     w.Advance(0x17)
+//     w.Advance(0x92)
+//     w.Advance(0x04)
+//     rolling := w.Advance(0x28)   // Rolls 0x17 out, and 0x28 in.
+//
+//     nonRolling := crc64.Update(0, crc64.MakeTable(crc64.ECMA), []byte{0x92, 0x04, 0x28})
+//
+// Strangely, hash/crc64's specification does not mention which of the many
+// possible bit representations and conditioning choices it uses.  We assume it
+// will not change from the following, which was gleaned from the hash/crc64
+// source code:
+//
+//  - All messages to be processed, CRC values, and CRC polynomials are
+//    polynomials in x whose coefficients are in Z(2).
+//  - CRC values are represented by uint64 values in which bit i of the integer
+//    represents the coefficient of x**(63-i) in the polynomial.
+//  - CRC polynomials are represented like CRC values, except that the x**64
+//    coefficient of the CRC polynomial is implicitly 1, and not stored.
+//  - Messages to be processed are represented by byte vectors in which the
+//    lowest-order bit of the first byte is the highest-degree polynomial
+//    coefficient.
+//  - For a CRC polynomial p and a message m, the CRC value:
+//        CRC(p, m) = c + ((c * (x**len(m)) + (m * x**64)) mod p)
+//    where the conditioning constant c = x**63 + x**62 + x**61 + ... + x + 1,
+//    and len(m) is the number of bits in m.
+package crc64window
+
+import "sync"
+
+// The ECMA-64 polynomial, defined in ECMA 182.
+// This polynomial is recommended for use with this package, though other
+// polynomials found in hash/crc64 will also work.
+const ECMA = 0xc96c5795d7870f42
+
+// A Window contains the state needed to compute a CRC over a fixed-sized,
+// rolling window of data.
+type Window struct {
+	crc     uint64   // CRC of window, unconditioned (i.e., just the window mod the CRC polynomial).
+	window  []byte   // The bytes in the window.
+	pos     int      // Index in window[] of first byte, which is the next byte to be overwritten.
+	crcData *crcData // Pointer to the immutable CRC tables for the CRC.
+}
+
+// A crcData is immutable after initialization, and contains tables for
+// computing a particular CRC over a particular window size.  Pre-computed
+// copies of crcData are stored in tables[] so that CRC tables need be computed
+// only once per (polynomial, window size) pair.
+type crcData struct {
+	conditioning  uint64
+	crcTableFront [256]uint64
+	crcTableRear  [256]uint64
+}
+
+var mu sync.Mutex // Protects "tables", the cache of CRC tables already computed.
+
+// A polySize represents a pair of a CRC polynomial and a window size.
+type polySize struct {
+	poly uint64
+	size int
+}
+
+// tables[] maps (polynomial,window size) pairs to computed tables, so tables
+// are computed only once.  It's accessed only under mu.
+var tables map[polySize]*crcData
+
+// getCRCData() returns a pointer to a crcData for the given CRC polynomial
+// and window size, either by cache lookup on by calculating it.  Requires
+// size > 0.
+func getCRCData(poly uint64, size int) *crcData {
+	mu.Lock()
+	// Use cached CRC tables if available.
+	if tables == nil {
+		tables = make(map[polySize]*crcData)
+	}
+	ps := polySize{poly: poly, size: size}
+	c, found := tables[ps]
+	if !found { // Compute and save the CRC tables.
+		c = new(crcData)
+		// Loop ensures:  c.crcTableFront[m & 0xff] ^ (m >> 8)==CRC(m * x**8)
+		zeroOrPoly := []uint64{0, poly}
+		for i := 1; i != 256; i <<= 1 {
+			crc := uint64(i)
+			for j := 0; j != 8; j++ {
+				crc = (crc >> 1) ^ zeroOrPoly[crc&1]
+			}
+			for j := 0; j != i; j++ {
+				c.crcTableFront[j+i] = crc ^ c.crcTableFront[j]
+			}
+		}
+		// Loop ensures: c.crcTableRear[b] == CRC(b * x**(size*8))
+		for i := 1; i != 256; i <<= 1 {
+			crc := c.crcTableFront[i]
+			for j := 1; j != size; j++ {
+				crc = c.crcTableFront[byte(crc)] ^ (crc >> 8)
+			}
+			for j := 0; j != i; j++ {
+				c.crcTableRear[j+i] = crc ^ c.crcTableRear[j]
+			}
+		}
+
+		// Loop ensures: c.conditioning == CRC(all-ones * x**(size*8))
+		conditioning := ^uint64(0)
+		for i := 0; i != size; i++ {
+			conditioning = c.crcTableFront[byte(conditioning)] ^ (conditioning >> 8)
+		}
+		c.conditioning = conditioning
+
+		tables[ps] = c
+	}
+	mu.Unlock()
+	return c
+}
+
+// New() returns a Window with the given size and CRC polynomial.
+// Initially, all the bytes in the window are zero.  Requires size > 0.
+func New(poly uint64, size int) *Window {
+	if size <= 0 {
+		panic("crc64window.New() called with size <= 0")
+	}
+	w := new(Window)
+	w.window = make([]byte, size)
+	w.crc = 0
+	w.crcData = getCRCData(poly, size)
+	return w
+}
+
+// Advance() removes the first byte from window *w, adds b as the new last
+// byte, and returns the CRC of the window.
+func (w *Window) Advance(b byte) uint64 {
+	c := w.crcData
+	pos := w.pos
+	crc := w.crc
+	crc ^= c.crcTableRear[w.window[pos]]
+	w.crc = c.crcTableFront[byte(crc)^b] ^ (crc >> 8)
+	w.window[pos] = b
+	pos++
+	if pos == len(w.window) {
+		pos = 0
+	}
+	w.pos = pos
+	return ^(c.conditioning ^ w.crc)
+}

diff --git a/services/syncbase/localblobstore/crc64window/crc64window_test.go b/services/syncbase/localblobstore/crc64window/crc64window_test.go
new file mode 100644
index 0000000..9969125
--- /dev/null
+++ b/services/syncbase/localblobstore/crc64window/crc64window_test.go

@@ -0,0 +1,51 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// A test for crc64window.
+package crc64window_test
+
+import "hash/crc64"
+import "math/rand"
+import "testing"
+
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/crc64window"
+
+// A test for the example given in the package's specification.
+func TestCRC64WindowExample(t *testing.T) {
+	w := crc64window.New(crc64.ECMA, 3)
+	w.Advance(0x17)
+	w.Advance(0x92)
+	w.Advance(0x04)
+	rolling := w.Advance(0x28) // Rolls 0x17 out, and 0x28 in.
+	nonRolling := crc64.Update(0, crc64.MakeTable(crc64.ECMA), []byte{0x92, 0x04, 0x28})
+	if rolling != nonRolling {
+		t.Errorf("crc64window: rolling(0x92, 0x04, 0x28)==%x nonRolling(0x92, 0x04, 0x28)==%x\n", rolling, nonRolling)
+	}
+}
+
+func TestCRC64Window(t *testing.T) {
+	winSize := 16
+	iterations := 1000
+
+	w := crc64window.New(crc64.ECMA, winSize)
+
+	table := crc64.MakeTable(crc64.ECMA)
+	block := make([]byte, winSize-1+iterations)
+
+	for i := 0; i != len(block); i++ {
+		block[i] = byte(rand.Int31n(256))
+	}
+
+	i := 0
+	for ; i != winSize-1; i++ {
+		w.Advance(block[i])
+	}
+	for ; i != len(block); i++ {
+		expect := crc64.Update(0, table, block[i+1-winSize:i+1])
+		got := w.Advance(block[i])
+		if expect != got {
+			t.Errorf("crc64window: i %d   winSize %d  got %x, expect %x\n", i, winSize, got, expect)
+		}
+	}
+}

diff --git a/services/syncbase/localblobstore/fs_cablobstore/fs_cablobstore.go b/services/syncbase/localblobstore/fs_cablobstore/fs_cablobstore.go
new file mode 100644
index 0000000..4fb7500
--- /dev/null
+++ b/services/syncbase/localblobstore/fs_cablobstore/fs_cablobstore.go

@@ -0,0 +1,1521 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package fs_cablobstore implements a content addressable blob store
+// on top of a file system.  It assumes that either os.Link() or
+// os.Rename() is available.
+package fs_cablobstore
+
+// Internals:
+// Blobs are partitioned into two types of unit: "fragments" and "chunks".
+// A fragment is stored in a single file on disc.  A chunk is a unit of network
+// transmission.
+//
+//   The blobstore consists of a directory with "blob", "cas", "chunk", and
+//   "tmp" subdirectories.
+//   - "tmp" is used for temporary files that are moved into place via
+//     link()/unlink() or rename(), depending on what's available.
+//   - "cas" contains files whose names are content hashes of the files being
+//     named.  A few slashes are thrown into the name near the front so that no
+//     single directory gets too large.  These files are called "fragments".
+//   - "blob" contains files whose names are random numbers.  These names are
+//     visible externally as "blob names".  Again, a few slashes are thrown
+//     into the name near the front so that no single directory gets too large.
+//     Each of these files contains a series of lines of the form:
+//        d <size> <offset> <cas-fragment>
+//     followed optionally by a line of the form:
+//        f <md5-hash>
+//     Each "d" line indicates that the next <size> bytes of the blob appear at
+//     <offset> bytes into <cas-fragment>, which is in the "cas" subtree.  The
+//     "f" line indicates that the blob is "finalized" and gives its complete
+//     md5 hash.  No fragments may be appended to a finalized blob.
+//   - "chunk" contains a store (currently implemented with leveldb) that
+//     maps chunks of blobs to content hashes and vice versa.
+
+import "bufio"
+import "bytes"
+import "crypto/md5"
+import "fmt"
+import "hash"
+import "io"
+import "io/ioutil"
+import "math"
+import "math/rand"
+import "os"
+import "path/filepath"
+import "strconv"
+import "strings"
+import "sync"
+import "time"
+
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore"
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/chunker"
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/blobmap"
+import "v.io/v23/context"
+import "v.io/v23/verror"
+
+const pkgPath = "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore"
+
+var (
+	errNotADir                = verror.Register(pkgPath+".errNotADir", verror.NoRetry, "{1:}{2:} Not a directory{:_}")
+	errAppendFailed           = verror.Register(pkgPath+".errAppendFailed", verror.NoRetry, "{1:}{2:} fs_cablobstore.Append failed{:_}")
+	errMalformedField         = verror.Register(pkgPath+".errMalformedField", verror.NoRetry, "{1:}{2:} Malformed field in blob specification{:_}")
+	errAlreadyClosed          = verror.Register(pkgPath+".errAlreadyClosed", verror.NoRetry, "{1:}{2:} BlobWriter is already closed{:_}")
+	errBlobAlreadyFinalized   = verror.Register(pkgPath+".errBlobAlreadyFinalized", verror.NoRetry, "{1:}{2:} Blob is already finalized{:_}")
+	errIllegalPositionForRead = verror.Register(pkgPath+".errIllegalPositionForRead", verror.NoRetry, "{1:}{2:} BlobReader: illegal position {3} on Blob of size {4}{:_}")
+	errBadSeekWhence          = verror.Register(pkgPath+".errBadSeekWhence", verror.NoRetry, "{1:}{2:} BlobReader: Bad value for 'whence' in Seek{:_}")
+	errNegativeSeekPosition   = verror.Register(pkgPath+".errNegativeSeekPosition", verror.NoRetry, "{1:}{2:} BlobReader: negative position for Seek: offset {3}, whence {4}{:_}")
+	errBadSizeOrOffset        = verror.Register(pkgPath+".errBadSizeOrOffset", verror.NoRetry, "{1:}{2:} Bad size ({3}) or offset ({4}) in blob {5} (size {6}){:_}")
+	errMalformedBlobHash      = verror.Register(pkgPath+".errMalformedBlobHash", verror.NoRetry, "{1:}{2:} Blob {3} hash malformed hash{:_}")
+	errInvalidBlobName        = verror.Register(pkgPath+".errInvalidBlobName", verror.NoRetry, "{1:}{2:} Invalid blob name {3}{:_}")
+	errCantDeleteBlob         = verror.Register(pkgPath+".errCantDeleteBlob", verror.NoRetry, "{1:}{2:} Can't delete blob {3}{:_}")
+	errBlobDeleted            = verror.Register(pkgPath+".errBlobDeleted", verror.NoRetry, "{1:}{2:} Blob is deleted{:_}")
+	errSizeTooBigForFragment  = verror.Register(pkgPath+".errSizeTooBigForFragment", verror.NoRetry, "{1:}{2:} writing blob {1}, size too big for fragment{:1}")
+	errStreamCancelled        = verror.Register(pkgPath+".errStreamCancelled", verror.NoRetry, "{1:}{2:} Advance() called on cancelled stream{:_}")
+)
+
+// For the moment, we disallow others from accessing the tree where blobs are
+// stored.  We could in the future relax this to 0711/0755, and 0644.
+const dirPermissions = 0700
+const filePermissions = 0600
+
+// Subdirectories of the blobstore's tree
+const (
+	blobDir  = "blob"  // Subdirectory where blobs are indexed by blob id.
+	casDir   = "cas"   // Subdirectory where fragments are indexed by content hash.
+	chunkDir = "chunk" // Subdirectory where chunks are indexed by content hash.
+	tmpDir   = "tmp"   // Subdirectory where temporary files are created.
+)
+
+// An FsCaBlobStore represents a simple, content-addressable store.
+type FsCaBlobStore struct {
+	rootName string           // The name of the root of the store.
+	bm       *blobmap.BlobMap // Mapping from chunks to blob locations and vice versa.
+
+	// mu protects fields below, plus most fields in each blobDesc when used from a BlobWriter.
+	mu         sync.Mutex
+	activeDesc []*blobDesc        // The blob descriptors in use by active BlobReaders and BlobWriters.
+	toDelete   []*map[string]bool // Sets of items that active GC threads are about to delete. (Pointers to maps, to allow pointer comparison.)
+}
+
+// hashToFileName() returns the name of the binary ID with the specified
+// prefix.  Requires len(id)==16.  An md5 hash is suitable.
+func hashToFileName(prefix string, hash []byte) string {
+	return filepath.Join(prefix,
+		fmt.Sprintf("%02x", hash[0]),
+		fmt.Sprintf("%02x", hash[1]),
+		fmt.Sprintf("%02x", hash[2]),
+		fmt.Sprintf("%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
+			hash[3],
+			hash[4], hash[5], hash[6], hash[7],
+			hash[8], hash[9], hash[10], hash[11],
+			hash[12], hash[13], hash[14], hash[15]))
+}
+
+// fileNameToHash() converts a file name in the format generated by
+// hashToFileName(prefix, ...) to a vector of 16 bytes.  If the string is
+// malformed, the nil slice is returned.
+func fileNameToHash(prefix string, s string) []byte {
+	idStr := strings.TrimPrefix(filepath.ToSlash(s), prefix+"/")
+	hash := make([]byte, 16, 16)
+	n, err := fmt.Sscanf(idStr, "%02x/%02x/%02x/%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
+		&hash[0], &hash[1], &hash[2], &hash[3],
+		&hash[4], &hash[5], &hash[6], &hash[7],
+		&hash[8], &hash[9], &hash[10], &hash[11],
+		&hash[12], &hash[13], &hash[14], &hash[15])
+	if n != 16 || err != nil {
+		hash = nil
+	}
+	return hash
+}
+
+// newBlobName() returns a new random name for a blob.
+func newBlobName() string {
+	return filepath.Join(blobDir,
+		fmt.Sprintf("%02x", rand.Int31n(256)),
+		fmt.Sprintf("%02x", rand.Int31n(256)),
+		fmt.Sprintf("%02x", rand.Int31n(256)),
+		fmt.Sprintf("%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
+			rand.Int31n(256),
+			rand.Int31n(256), rand.Int31n(256), rand.Int31n(256), rand.Int31n(256),
+			rand.Int31n(256), rand.Int31n(256), rand.Int31n(256), rand.Int31n(256),
+			rand.Int31n(256), rand.Int31n(256), rand.Int31n(256), rand.Int31n(256)))
+}
+
+// hashToString() returns a string representation of the hash.
+// Requires len(hash)==16.  An md5 hash is suitable.
+func hashToString(hash []byte) string {
+	return fmt.Sprintf("%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
+		hash[0], hash[1], hash[2], hash[3],
+		hash[4], hash[5], hash[6], hash[7],
+		hash[8], hash[9], hash[10], hash[11],
+		hash[12], hash[13], hash[14], hash[15])
+}
+
+// stringToHash() converts a string in the format generated by hashToString()
+// to a vector of 16 bytes.  If the string is malformed, the nil slice is
+// returned.
+func stringToHash(s string) []byte {
+	hash := make([]byte, 16, 16)
+	n, err := fmt.Sscanf(s, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
+		&hash[0], &hash[1], &hash[2], &hash[3],
+		&hash[4], &hash[5], &hash[6], &hash[7],
+		&hash[8], &hash[9], &hash[10], &hash[11],
+		&hash[12], &hash[13], &hash[14], &hash[15])
+	if n != 16 || err != nil {
+		hash = nil
+	}
+	return hash
+}
+
+// Create() returns a pointer to an FsCaBlobStore stored in the file system at
+// "rootName".  If the directory rootName does not exist, it is created.
+func Create(ctx *context.T, rootName string) (fscabs *FsCaBlobStore, err error) {
+	dir := []string{tmpDir, casDir, chunkDir, blobDir}
+	for i := 0; i != len(dir) && err == nil; i++ {
+		fullName := filepath.Join(rootName, dir[i])
+		os.MkdirAll(fullName, dirPermissions)
+		var fi os.FileInfo
+		fi, err = os.Stat(fullName)
+		if err == nil && !fi.IsDir() {
+			err = verror.New(errNotADir, ctx, fullName)
+		}
+	}
+	var bm *blobmap.BlobMap
+	if err == nil {
+		bm, err = blobmap.New(ctx, filepath.Join(rootName, chunkDir))
+	}
+	if err == nil {
+		fscabs = new(FsCaBlobStore)
+		fscabs.rootName = rootName
+		fscabs.bm = bm
+	}
+	return fscabs, err
+}
+
+// Close() closes the FsCaBlobStore. {
+func (fscabs *FsCaBlobStore) Close() error {
+	return fscabs.bm.Close()
+}
+
+// Root() returns the name of the root directory where *fscabs is stored.
+func (fscabs *FsCaBlobStore) Root() string {
+	return fscabs.rootName
+}
+
+// DeleteBlob() deletes the named blob from *fscabs.
+func (fscabs *FsCaBlobStore) DeleteBlob(ctx *context.T, blobName string) (err error) {
+	// Disallow deletions of things outside the blob tree, or that may contain "..".
+	// For simplicity, the code currently disallows '.'.
+	blobID := fileNameToHash(blobDir, blobName)
+	if blobID == nil || strings.IndexByte(blobName, '.') != -1 {
+		err = verror.New(errInvalidBlobName, ctx, blobName)
+	} else {
+		err = os.Remove(filepath.Join(fscabs.rootName, blobName))
+		if err != nil {
+			err = verror.New(errCantDeleteBlob, ctx, blobName, err)
+		} else {
+			err = fscabs.bm.DeleteBlob(ctx, blobID)
+		}
+	}
+	return err
+}
+
+// -----------------------------------------------------------
+
+// A file encapsulates both an os.File and a bufio.Writer on that file.
+type file struct {
+	fh     *os.File
+	writer *bufio.Writer
+}
+
+// newFile() returns a *file containing fh and a bufio.Writer on that file, if
+// err is nil.
+func newFile(fh *os.File, err error) (*file, error) {
+	var f *file
+	if err == nil {
+		f = new(file)
+		f.fh = fh
+		f.writer = bufio.NewWriter(f.fh)
+	}
+	return f, err
+}
+
+// newTempFile() returns a *file on a new temporary file created in the
+// directory dir.
+func newTempFile(ctx *context.T, dir string) (*file, error) {
+	return newFile(ioutil.TempFile(dir, "newfile"))
+}
+
+// close() flushes buffers (if err==nil initially) and closes the file,
+// returning its name.
+func (f *file) close(ctx *context.T, err error) (string, error) {
+	name := f.fh.Name()
+	// Flush the data out to disc and close the file.
+	if err == nil {
+		err = f.writer.Flush()
+	}
+	if err == nil {
+		err = f.fh.Sync()
+	}
+	err2 := f.fh.Close()
+	if err == nil {
+		err = err2
+	}
+	return name, err
+}
+
+// closeAndRename() calls f.close(), and if err==nil initially and no new
+// errors are seen, renames the file to newName.
+func (f *file) closeAndRename(ctx *context.T, newName string, err error) error {
+	var oldName string
+	oldName, err = f.close(ctx, err)
+	if err == nil { // if temp file written successfully...
+		// Link or rename the file into place, hoping at least one is
+		// supported on this file system.
+		os.MkdirAll(filepath.Dir(newName), dirPermissions)
+		err = os.Link(oldName, newName)
+		if err == nil {
+			os.Remove(oldName)
+		} else {
+			err = os.Rename(oldName, newName)
+		}
+	}
+	if err != nil {
+		os.Remove(oldName)
+	}
+	return err
+}
+
+// -----------------------------------------------------------
+
+// addFragment() ensures that the store *fscabs contains a fragment comprising
+// the catenation of the byte vectors named by item[..].block and the contents
+// of the files named by item[..].fileName.  The block field is ignored if
+// fileName!="".  The fragment is not physically added if already present.
+// The fragment is added to the fragment list of the descriptor *desc.
+func (fscabs *FsCaBlobStore) addFragment(ctx *context.T, extHasher hash.Hash,
+	desc *blobDesc, item ...localblobstore.BlockOrFile) (fileName string, size int64, err error) {
+
+	hasher := md5.New()
+	var buf []byte
+	var fileHandleList []*os.File
+
+	// Hash the inputs.
+	for i := 0; i != len(item) && err == nil; i++ {
+		if len(item[i].FileName) != 0 {
+			if buf == nil {
+				buf = make([]byte, 8192, 8192)
+				fileHandleList = make([]*os.File, 0, len(item))
+			}
+			var fileHandle *os.File
+			fileHandle, err = os.Open(filepath.Join(fscabs.rootName, item[i].FileName))
+			if err == nil {
+				fileHandleList = append(fileHandleList, fileHandle)
+				at := item[i].Offset
+				toRead := item[i].Size
+				var haveRead int64
+				for err == nil && (toRead == -1 || haveRead < toRead) {
+					var n int
+					n, err = fileHandle.ReadAt(buf, at)
+					if err == nil {
+						if toRead != -1 && int64(n)+haveRead > toRead {
+							n = int(toRead - haveRead)
+						}
+						haveRead += int64(n)
+						at += int64(n)
+						size += int64(n)
+						hasher.Write(buf[0:n]) // Cannot fail; see Hash interface.
+						extHasher.Write(buf[0:n])
+					}
+				}
+				if err == io.EOF {
+					if toRead == -1 || haveRead == toRead {
+						err = nil // The loop read all that was asked; EOF is a possible outcome.
+					} else { // The loop read less than was asked; request must have been too big.
+						err = verror.New(errSizeTooBigForFragment, ctx, desc.name, item[i].FileName)
+					}
+				}
+			}
+		} else {
+			hasher.Write(item[i].Block) // Cannot fail; see Hash interface.
+			extHasher.Write(item[i].Block)
+			size += int64(len(item[i].Block))
+		}
+	}
+
+	// Compute the hash, and form the file name in the respository.
+	hash := hasher.Sum(nil)
+	relFileName := hashToFileName(casDir, hash)
+	absFileName := filepath.Join(fscabs.rootName, relFileName)
+
+	// Add the fragment's name to *desc's fragments so the garbage
+	// collector will not delete it.
+	fscabs.mu.Lock()
+	desc.fragment = append(desc.fragment, blobFragment{
+		pos:      desc.size,
+		size:     size,
+		offset:   0,
+		fileName: relFileName})
+	fscabs.mu.Unlock()
+
+	// If the file does not already exist, ...
+	if _, statErr := os.Stat(absFileName); err == nil && os.IsNotExist(statErr) {
+		// ... try to create it by writing to a temp file and renaming.
+		var t *file
+		t, err = newTempFile(ctx, filepath.Join(fscabs.rootName, tmpDir))
+		if err == nil {
+			// Copy the byte-sequences and input files to the temp file.
+			j := 0
+			for i := 0; i != len(item) && err == nil; i++ {
+				if len(item[i].FileName) != 0 {
+					at := item[i].Offset
+					toRead := item[i].Size
+					var haveRead int64
+					for err == nil && (toRead == -1 || haveRead < toRead) {
+						var n int
+						n, err = fileHandleList[j].ReadAt(buf, at)
+						if err == nil {
+							if toRead != -1 && int64(n)+haveRead > toRead {
+								n = int(toRead - haveRead)
+							}
+							haveRead += int64(n)
+							at += int64(n)
+							_, err = t.writer.Write(buf[0:n])
+						}
+					}
+					if err == io.EOF { // EOF is the expected outcome.
+						err = nil
+					}
+					j++
+				} else {
+					_, err = t.writer.Write(item[i].Block)
+				}
+			}
+			err = t.closeAndRename(ctx, absFileName, err)
+		}
+	} // else file already exists, nothing more to do.
+
+	for i := 0; i != len(fileHandleList); i++ {
+		fileHandleList[i].Close()
+	}
+
+	if err != nil {
+		err = verror.New(errAppendFailed, ctx, fscabs.rootName, err)
+		// Remove the entry added to fragment list above.
+		fscabs.mu.Lock()
+		desc.fragment = desc.fragment[0 : len(desc.fragment)-1]
+		fscabs.mu.Unlock()
+	} else { // commit the change by updating the size
+		fscabs.mu.Lock()
+		desc.size += size
+		desc.cv.Broadcast() // Tell blobmap BlobReader there's more to read.
+		fscabs.mu.Unlock()
+	}
+
+	return relFileName, size, err
+}
+
+// A blobFragment represents a vector of bytes and its position within a blob.
+type blobFragment struct {
+	pos      int64  // position of this fragment within its containing blob.
+	size     int64  // size of this fragment.
+	offset   int64  // offset within fileName.
+	fileName string // name of file describing this fragment.
+}
+
+// A blobDesc is the in-memory representation of a blob.
+type blobDesc struct {
+	activeDescIndex int // Index into fscabs.activeDesc if refCount>0; under fscabs.mu.
+	refCount        int // Reference count; under fscabs.mu.
+
+	name string // Name of the blob.
+
+	// The following fields are modified under fscabs.mu and in BlobWriter
+	// owner's thread; they may be read by GC (when obtained from
+	// fscabs.activeDesc) and the chunk writer under fscabs.mu.  In the
+	// BlobWriter owner's thread, reading does not require a lock, but
+	// writing does.  In other contexts (BlobReader, or a desc that has
+	// just been allocated by getBlob()), no locking is needed.
+
+	fragment  []blobFragment // All the fragments in this blob
+	size      int64          // Total size of the blob.
+	finalized bool           // Whether the blob has been finalized.
+	// A finalized blob has a valid hash field, and no new bytes may be added
+	// to it.  A well-formed hash has 16 bytes.
+	hash []byte
+
+	openWriter bool       // Whether this descriptor is being written by an open BlobWriter.
+	cv         *sync.Cond // signalled when a BlobWriter writes or closes.
+}
+
+// isBeingDeleted() returns whether fragment fragName is about to be deleted
+// by the garbage collector.   Requires fscabs.mu held.
+func (fscabs *FsCaBlobStore) isBeingDeleted(fragName string) (beingDeleted bool) {
+	for i := 0; i != len(fscabs.toDelete) && !beingDeleted; i++ {
+		_, beingDeleted = (*(fscabs.toDelete[i]))[fragName]
+	}
+	return beingDeleted
+}
+
+// descRef() increments the reference count of *desc and returns whether
+// successful.  It may fail if the fragments referenced by the descriptor are
+// being deleted by the garbage collector.
+func (fscabs *FsCaBlobStore) descRef(desc *blobDesc) bool {
+	beingDeleted := false
+	fscabs.mu.Lock()
+	if desc.refCount == 0 {
+		// On the first reference, check whether the fragments are
+		// being deleted, and if not, add *desc to the
+		// fscabs.activeDesc vector.
+		for i := 0; i != len(desc.fragment) && !beingDeleted; i++ {
+			beingDeleted = fscabs.isBeingDeleted(desc.fragment[i].fileName)
+		}
+		if !beingDeleted {
+			desc.activeDescIndex = len(fscabs.activeDesc)
+			fscabs.activeDesc = append(fscabs.activeDesc, desc)
+		}
+	}
+	if !beingDeleted {
+		desc.refCount++
+	}
+	fscabs.mu.Unlock()
+	return !beingDeleted
+}
+
+// descUnref() decrements the reference count of *desc if desc!=nil; if that
+// removes the last reference, *desc is removed from the fscabs.activeDesc
+// vector.
+func (fscabs *FsCaBlobStore) descUnref(desc *blobDesc) {
+	if desc != nil {
+		fscabs.mu.Lock()
+		desc.refCount--
+		if desc.refCount < 0 {
+			panic("negative reference count")
+		} else if desc.refCount == 0 {
+			// Remove desc from fscabs.activeDesc by moving the
+			// last entry in fscabs.activeDesc to desc's slot.
+			n := len(fscabs.activeDesc)
+			lastDesc := fscabs.activeDesc[n-1]
+			lastDesc.activeDescIndex = desc.activeDescIndex
+			fscabs.activeDesc[desc.activeDescIndex] = lastDesc
+			fscabs.activeDesc = fscabs.activeDesc[0 : n-1]
+			desc.activeDescIndex = -1
+		}
+		fscabs.mu.Unlock()
+	}
+}
+
+// getBlob() returns the in-memory blob descriptor for the named blob.
+func (fscabs *FsCaBlobStore) getBlob(ctx *context.T, blobName string) (desc *blobDesc, err error) {
+	slashBlobName := filepath.ToSlash(blobName)
+	if !strings.HasPrefix(slashBlobName, blobDir+"/") || strings.IndexByte(blobName, '.') != -1 {
+		err = verror.New(errInvalidBlobName, ctx, blobName)
+	} else {
+		absBlobName := filepath.Join(fscabs.rootName, blobName)
+		var fh *os.File
+		fh, err = os.Open(absBlobName)
+		if err == nil {
+			var line string
+			desc = new(blobDesc)
+			desc.activeDescIndex = -1
+			desc.name = blobName
+			desc.cv = sync.NewCond(&fscabs.mu)
+			scanner := bufio.NewScanner(fh)
+			for scanner.Scan() {
+				field := strings.Split(scanner.Text(), " ")
+				if len(field) == 4 && field[0] == "d" {
+					var fragSize int64
+					var fragOffset int64
+					fragSize, err = strconv.ParseInt(field[1], 0, 64)
+					if err == nil {
+						fragOffset, err = strconv.ParseInt(field[2], 0, 64)
+					}
+					if err == nil {
+						// No locking needed here because desc
+						// is newly allocated and not yet passed to descRef().
+						desc.fragment = append(desc.fragment,
+							blobFragment{
+								fileName: field[3],
+								pos:      desc.size,
+								size:     fragSize,
+								offset:   fragOffset})
+					}
+					desc.size += fragSize
+				} else if len(field) == 2 && field[0] == "f" {
+					desc.hash = stringToHash(field[1])
+					desc.finalized = true
+					if desc.hash == nil {
+						err = verror.New(errMalformedBlobHash, ctx, blobName, field[1])
+					}
+				} else if len(field) > 0 && len(field[0]) == 1 && "a" <= field[0] && field[0] <= "z" {
+					// unrecognized line, reserved for extensions: ignore.
+				} else {
+					err = verror.New(errMalformedField, ctx, line)
+				}
+			}
+			err = scanner.Err()
+			fh.Close()
+		}
+	}
+	// Ensure that we return either a properly referenced desc, or nil.
+	if err != nil {
+		desc = nil
+	} else if !fscabs.descRef(desc) {
+		err = verror.New(errBlobDeleted, ctx, blobName)
+		desc = nil
+	}
+	return desc, err
+}
+
+// -----------------------------------------------------------
+
+// A BlobWriter allows a blob to be written.  If a blob has not yet been
+// finalized, it also allows that blob to be extended.  A BlobWriter may be
+// created with NewBlobWriter(), and should be closed with Close() or
+// CloseWithoutFinalize().
+type BlobWriter struct {
+	// The BlobWriter exists within a particular FsCaBlobStore and context.T
+	fscabs *FsCaBlobStore
+	ctx    *context.T
+
+	desc   *blobDesc // Description of the blob being written.
+	f      *file     // The file being written.
+	hasher hash.Hash // Running hash of blob.
+
+	// Fields to allow the BlobMap to be written.
+	csBr  *BlobReader     // Reader over the blob that's currently being written.
+	cs    *chunker.Stream // Stream of chunks derived from csBr
+	csErr chan error      // writeBlobMap() sends its result here; Close/CloseWithoutFinalize receives it.
+}
+
+// NewBlobWriter() returns a pointer to a newly allocated BlobWriter on
+// a newly created blob.  If "name" is non-empty, it is used to name
+// the blob, and it must be in the format of a name returned by this
+// interface (probably by another instance on another device).
+// Otherwise, a new name is created, which can be found using
+// the Name() method.  It is an error to attempt to overwrite a blob
+// that already exists in this blob store.  BlobWriters should not be
+// used concurrently by multiple threads.  The returned handle should
+// be closed with either the Close() or CloseWithoutFinalize() method
+// to avoid leaking file handles.
+func (fscabs *FsCaBlobStore) NewBlobWriter(ctx *context.T, name string) (localblobstore.BlobWriter, error) {
+	var bw *BlobWriter
+	if name == "" {
+		name = newBlobName()
+	}
+	fileName := filepath.Join(fscabs.rootName, name)
+	os.MkdirAll(filepath.Dir(fileName), dirPermissions)
+	f, err := newFile(os.OpenFile(fileName, os.O_RDWR|os.O_CREATE|os.O_EXCL, filePermissions))
+	if err == nil {
+		bw = new(BlobWriter)
+		bw.fscabs = fscabs
+		bw.ctx = ctx
+		bw.desc = new(blobDesc)
+		bw.desc.activeDescIndex = -1
+		bw.desc.name = name
+		bw.desc.cv = sync.NewCond(&fscabs.mu)
+		bw.desc.openWriter = true
+		bw.f = f
+		bw.hasher = md5.New()
+		if !fscabs.descRef(bw.desc) {
+			// Can't happen; descriptor refers to no fragments.
+			panic(verror.New(errBlobDeleted, ctx, bw.desc.name))
+		}
+		// Write the chunks of this blob into the BlobMap, as they are
+		// written by this writer.
+		bw.forkWriteBlobMap()
+	}
+	return bw, err
+}
+
+// ResumeBlobWriter() returns a pointer to a newly allocated BlobWriter on an
+// old, but unfinalized blob name.
+func (fscabs *FsCaBlobStore) ResumeBlobWriter(ctx *context.T, blobName string) (localblobstore.BlobWriter, error) {
+	var err error
+	var bw *BlobWriter
+	var desc *blobDesc
+	desc, err = fscabs.getBlob(ctx, blobName)
+	if err == nil && desc.finalized {
+		err = verror.New(errBlobAlreadyFinalized, ctx, blobName)
+	} else if err == nil {
+		bw = new(BlobWriter)
+		bw.fscabs = fscabs
+		bw.ctx = ctx
+		bw.desc = desc
+		bw.desc.openWriter = true
+		fileName := filepath.Join(fscabs.rootName, bw.desc.name)
+		bw.f, err = newFile(os.OpenFile(fileName, os.O_WRONLY|os.O_APPEND, 0666))
+		bw.hasher = md5.New()
+		// Add the existing fragments to the running hash.
+		// The descRef's ref count is incremented here to compensate
+		// for the decrement it will receive in br.Close(), below.
+		if !fscabs.descRef(bw.desc) {
+			// Can't happen; descriptor's ref count was already
+			// non-zero.
+			panic(verror.New(errBlobDeleted, ctx, fileName))
+		}
+		br := fscabs.blobReaderFromDesc(ctx, bw.desc, dontWaitForWriter)
+		buf := make([]byte, 8192, 8192)
+		for err == nil {
+			var n int
+			n, err = br.Read(buf)
+			bw.hasher.Write(buf[0:n])
+		}
+		br.Close()
+		if err == io.EOF { // EOF is expected.
+			err = nil
+		}
+		if err == nil {
+			// Write the chunks of this blob into the BlobMap, as
+			// they are written by this writer.
+			bw.forkWriteBlobMap()
+		}
+	}
+	return bw, err
+}
+
+// forkWriteBlobMap() creates a new thread to run writeBlobMap().  It adds
+// the chunks written to *bw to the blob store's BlobMap.  The caller is
+// expected to call joinWriteBlobMap() at some later point.
+func (bw *BlobWriter) forkWriteBlobMap() {
+	// The descRef's ref count is incremented here to compensate
+	// for the decrement it will receive in br.Close() in joinWriteBlobMap.
+	if !bw.fscabs.descRef(bw.desc) {
+		// Can't happen; descriptor's ref count was already non-zero.
+		panic(verror.New(errBlobDeleted, bw.ctx, bw.desc.name))
+	}
+	bw.csBr = bw.fscabs.blobReaderFromDesc(bw.ctx, bw.desc, waitForWriter)
+	bw.cs = chunker.NewStream(bw.ctx, &chunker.DefaultParam, bw.csBr)
+	bw.csErr = make(chan error)
+	go bw.writeBlobMap()
+}
+
+// insertChunk() inserts chunk into the blob store's BlobMap, associating it
+// with the specified byte offset in the blob blobID being written by *bw.  The byte
+// offset of the next chunk is returned.
+func (bw *BlobWriter) insertChunk(blobID []byte, chunkHash []byte, offset int64, size int64) (int64, error) {
+	err := bw.fscabs.bm.AssociateChunkWithLocation(bw.ctx, chunkHash[:],
+		blobmap.Location{BlobID: blobID, Offset: offset, Size: size})
+	if err != nil {
+		bw.cs.Cancel()
+	}
+	return offset + size, err
+}
+
+// writeBlobMap() iterates over the chunk in stream bw.cs, and associates each
+// one with the blob being written.
+func (bw *BlobWriter) writeBlobMap() {
+	var err error
+	var offset int64
+	blobID := fileNameToHash(blobDir, bw.desc.name)
+	// Associate each chunk only after the next chunk has been seen (or
+	// the blob finalized), to avoid recording an artificially short chunk
+	// at the end of a partial transfer.
+	var chunkHash [md5.Size]byte
+	var chunkLen int64
+	if bw.cs.Advance() {
+		chunk := bw.cs.Value()
+		// Record the hash and size, since chunk's underlying buffer
+		// may be reused by the next call to Advance().
+		chunkHash = md5.Sum(chunk)
+		chunkLen = int64(len(chunk))
+		for bw.cs.Advance() {
+			offset, err = bw.insertChunk(blobID, chunkHash[:], offset, chunkLen)
+			chunk = bw.cs.Value()
+			chunkHash = md5.Sum(chunk)
+			chunkLen = int64(len(chunk))
+		}
+	}
+	if err == nil {
+		err = bw.cs.Err()
+	}
+	bw.fscabs.mu.Lock()
+	if err == nil && chunkLen != 0 && bw.desc.finalized {
+		offset, err = bw.insertChunk(blobID, chunkHash[:], offset, chunkLen)
+	}
+	bw.fscabs.mu.Unlock()
+	bw.csErr <- err // wake joinWriteBlobMap()
+}
+
+// joinWriteBlobMap waits for the completion of the thread forked by forkWriteBlobMap().
+// It returns when the chunks in the blob have been written to the blob store's BlobMap.
+func (bw *BlobWriter) joinWriteBlobMap(err error) error {
+	err2 := <-bw.csErr // read error from end of writeBlobMap()
+	if err == nil {
+		err = err2
+	}
+	bw.csBr.Close()
+	return err
+}
+
+// Close() finalizes *bw, and indicates that the client will perform no further
+// append operations on *bw.  Any internal open file handles are closed.
+func (bw *BlobWriter) Close() (err error) {
+	if bw.f == nil {
+		err = verror.New(errAlreadyClosed, bw.ctx, bw.desc.name)
+	} else if bw.desc.finalized {
+		err = verror.New(errBlobAlreadyFinalized, bw.ctx, bw.desc.name)
+	} else {
+		h := bw.hasher.Sum(nil)
+		_, err = fmt.Fprintf(bw.f.writer, "f %s\n", hashToString(h)) // finalize
+		_, err = bw.f.close(bw.ctx, err)
+		bw.f = nil
+		bw.fscabs.mu.Lock()
+		bw.desc.finalized = true
+		bw.desc.openWriter = false
+		bw.desc.cv.Broadcast() // Tell blobmap BlobReader that writing has ceased.
+		bw.fscabs.mu.Unlock()
+		err = bw.joinWriteBlobMap(err)
+		bw.fscabs.descUnref(bw.desc)
+	}
+	return err
+}
+
+// CloseWithoutFinalize() indicates that the client will perform no further
+// append operations on *bw, but does not finalize the blob.  Any internal open
+// file handles are closed.  Clients are expected to need this operation
+// infrequently.
+func (bw *BlobWriter) CloseWithoutFinalize() (err error) {
+	if bw.f == nil {
+		err = verror.New(errAlreadyClosed, bw.ctx, bw.desc.name)
+	} else {
+		bw.fscabs.mu.Lock()
+		bw.desc.openWriter = false
+		bw.desc.cv.Broadcast() // Tell blobmap BlobReader that writing has ceased.
+		bw.fscabs.mu.Unlock()
+		_, err = bw.f.close(bw.ctx, err)
+		bw.f = nil
+		err = bw.joinWriteBlobMap(err)
+		bw.fscabs.descUnref(bw.desc)
+	}
+	return err
+}
+
+// AppendFragment() appends a fragment to the blob being written by *bw, where
+// the fragment is composed of the byte vectors described by the elements of
+// item[].  The fragment is copied into the blob store.
+func (bw *BlobWriter) AppendFragment(item ...localblobstore.BlockOrFile) (err error) {
+	if bw.f == nil {
+		panic("fs_cablobstore.BlobWriter programming error: AppendFragment() after Close()")
+	}
+	var fragmentName string
+	var size int64
+	fragmentName, size, err = bw.fscabs.addFragment(bw.ctx, bw.hasher, bw.desc, item...)
+	if err == nil {
+		_, err = fmt.Fprintf(bw.f.writer, "d %d %d %s\n", size, 0 /*offset*/, fragmentName)
+	}
+	if err == nil {
+		err = bw.f.writer.Flush()
+	}
+	return err
+}
+
+// AppendBlob() adds a (substring of a) pre-existing blob to the blob being
+// written by *bw.  The fragments of the pre-existing blob are not physically
+// copied; they are referenced by both blobs.
+func (bw *BlobWriter) AppendBlob(blobName string, size int64, offset int64) (err error) {
+	if bw.f == nil {
+		panic("fs_cablobstore.BlobWriter programming error: AppendBlob() after Close()")
+	}
+	var desc *blobDesc
+	desc, err = bw.fscabs.getBlob(bw.ctx, blobName)
+	origSize := bw.desc.size
+	if err == nil {
+		if size == -1 {
+			size = desc.size - offset
+		}
+		if offset < 0 || desc.size < offset+size {
+			err = verror.New(errBadSizeOrOffset, bw.ctx, size, offset, blobName, desc.size)
+		}
+		for i := 0; i != len(desc.fragment) && err == nil && size > 0; i++ {
+			if desc.fragment[i].size <= offset {
+				offset -= desc.fragment[i].size
+			} else {
+				consume := desc.fragment[i].size - offset
+				if size < consume {
+					consume = size
+				}
+				_, err = fmt.Fprintf(bw.f.writer, "d %d %d %s\n",
+					consume, offset+desc.fragment[i].offset, desc.fragment[i].fileName)
+				if err == nil {
+					// Add fragment so garbage collector can see it.
+					// The garbage collector cannot be
+					// about to delete the fragment, because
+					// getBlob() already checked for that
+					// above, and kept a reference.
+					bw.fscabs.mu.Lock()
+					bw.desc.fragment = append(bw.desc.fragment, blobFragment{
+						pos:      bw.desc.size,
+						size:     consume,
+						offset:   offset + desc.fragment[i].offset,
+						fileName: desc.fragment[i].fileName})
+					bw.desc.size += consume
+					bw.desc.cv.Broadcast() // Tell blobmap BlobReader there's more to read.
+					bw.fscabs.mu.Unlock()
+				}
+				offset = 0
+				size -= consume
+			}
+		}
+		bw.fscabs.descUnref(desc)
+		// Add the new fragments to the running hash.
+		if !bw.fscabs.descRef(bw.desc) {
+			// Can't happen; descriptor's ref count was already
+			// non-zero.
+			panic(verror.New(errBlobDeleted, bw.ctx, blobName))
+		}
+		br := bw.fscabs.blobReaderFromDesc(bw.ctx, bw.desc, dontWaitForWriter)
+		if err == nil {
+			_, err = br.Seek(origSize, 0)
+		}
+		buf := make([]byte, 8192, 8192)
+		for err == nil {
+			var n int
+			n, err = br.Read(buf)
+			bw.hasher.Write(buf[0:n]) // Cannot fail; see Hash interface.
+		}
+		br.Close()
+		if err == io.EOF { // EOF is expected.
+			err = nil
+		}
+		if err == nil {
+			err = bw.f.writer.Flush()
+		}
+	}
+	return err
+}
+
+// IsFinalized() returns whether *bw has been finalized.
+func (bw *BlobWriter) IsFinalized() bool {
+	return bw.desc.finalized
+}
+
+// Size() returns *bw's size.
+func (bw *BlobWriter) Size() int64 {
+	return bw.desc.size
+}
+
+// Name() returns *bw's name.
+func (bw *BlobWriter) Name() string {
+	return bw.desc.name
+}
+
+// Hash() returns *bw's hash, reflecting the bytes written so far.
+func (bw *BlobWriter) Hash() []byte {
+	return bw.hasher.Sum(nil)
+}
+
+// -----------------------------------------------------------
+
+// A BlobReader allows a blob to be read using the standard ReadAt(), Read(),
+// and Seek() calls.  A BlobReader can be created with NewBlobReader(), and
+// should be closed with the Close() method to avoid leaking file handles.
+type BlobReader struct {
+	// The BlobReader exists within a particular FsCaBlobStore and context.T.
+	fscabs *FsCaBlobStore
+	ctx    *context.T
+
+	desc          *blobDesc // A description of the blob being read.
+	waitForWriter bool      // whether this reader should wait for a concurrent BlobWriter
+
+	pos int64 // The next position we will read from (used by Read/Seek, not ReadAt).
+
+	// The fields below represent a cached open fragment desc.fragment[fragmentIndex].
+	fragmentIndex int      // -1 or  0 <= fragmentIndex < len(desc.fragment).
+	fh            *os.File // non-nil iff fragmentIndex != -1.
+}
+
+// constants to make the calls to blobReaderFromDesc invocations more readable
+const (
+	dontWaitForWriter = false
+	waitForWriter     = true
+)
+
+// blobReaderFromDesc() returns a pointer to a newly allocated BlobReader given
+// a pre-existing blobDesc.  If waitForWriter is true, the reader will wait for
+// any BlobWriter to finish writing the part of the blob the reader is trying
+// to read.
+func (fscabs *FsCaBlobStore) blobReaderFromDesc(ctx *context.T, desc *blobDesc, waitForWriter bool) *BlobReader {
+	br := new(BlobReader)
+	br.fscabs = fscabs
+	br.ctx = ctx
+	br.fragmentIndex = -1
+	br.desc = desc
+	br.waitForWriter = waitForWriter
+	return br
+}
+
+// NewBlobReader() returns a pointer to a newly allocated BlobReader on the
+// specified blobName.  BlobReaders should not be used concurrently by multiple
+// threads.  Returned handles should be closed with Close().
+func (fscabs *FsCaBlobStore) NewBlobReader(ctx *context.T, blobName string) (br localblobstore.BlobReader, err error) {
+	var desc *blobDesc
+	desc, err = fscabs.getBlob(ctx, blobName)
+	if err == nil {
+		br = fscabs.blobReaderFromDesc(ctx, desc, dontWaitForWriter)
+	}
+	return br, err
+}
+
+// closeInternal() closes any open file handles within *br.
+func (br *BlobReader) closeInternal() {
+	if br.fh != nil {
+		br.fh.Close()
+		br.fh = nil
+	}
+	br.fragmentIndex = -1
+}
+
+// Close() indicates that the client will perform no further operations on *br.
+// It closes any open file handles within a BlobReader.
+func (br *BlobReader) Close() error {
+	br.closeInternal()
+	br.fscabs.descUnref(br.desc)
+	return nil
+}
+
+// findFragment() returns the index of the first element of fragment[] that may
+// contain "offset", based on the "pos" fields of each element.
+// Requires that fragment[] be sorted on the "pos" fields of the elements.
+func findFragment(fragment []blobFragment, offset int64) int {
+	lo := 0
+	hi := len(fragment)
+	for lo < hi {
+		mid := (lo + hi) >> 1
+		if offset < fragment[mid].pos {
+			hi = mid
+		} else {
+			lo = mid + 1
+		}
+	}
+	if lo > 0 {
+		lo--
+	}
+	return lo
+}
+
+// waitUntilAvailable() waits until position pos within *br is available for
+// reading, if this reader is waiting for writers.  This may be because:
+//  - *br is on an already written blob.
+//  - *br is on a blob being written that has been closed, or whose writes have
+//    passed position pos.
+// The value pos==math.MaxInt64 can be used to mean "until the writer is closed".
+// Requires br.fscabs.mu held.
+func (br *BlobReader) waitUntilAvailable(pos int64) {
+	for br.waitForWriter && br.desc.openWriter && br.desc.size < pos {
+		br.desc.cv.Wait()
+	}
+}
+
+// ReadAt() fills b[] with up to len(b) bytes of data starting at position "at"
+// within the blob that *br indicates, and returns the number of bytes read.
+func (br *BlobReader) ReadAt(b []byte, at int64) (n int, err error) {
+	br.fscabs.mu.Lock()
+	br.waitUntilAvailable(at + int64(len(b)))
+	i := findFragment(br.desc.fragment, at)
+	if i < len(br.desc.fragment) && at <= br.desc.size {
+		fragmenti := br.desc.fragment[i] // copy fragment data to allow releasing lock
+		br.fscabs.mu.Unlock()
+		if i != br.fragmentIndex {
+			br.closeInternal()
+		}
+		if br.fragmentIndex == -1 {
+			br.fh, err = os.Open(filepath.Join(br.fscabs.rootName, fragmenti.fileName))
+			if err == nil {
+				br.fragmentIndex = i
+			} else {
+				br.closeInternal()
+			}
+		}
+		var offset int64 = at - fragmenti.pos + fragmenti.offset
+		consume := fragmenti.size - (at - fragmenti.pos)
+		if int64(len(b)) < consume {
+			consume = int64(len(b))
+		}
+		if br.fh != nil {
+			n, err = br.fh.ReadAt(b[0:consume], offset)
+		} else if err == nil {
+			panic("failed to open blob fragment")
+		}
+		br.fscabs.mu.Lock()
+		// Return io.EOF if the Read reached the end of the last
+		// fragment, but not if it's merely the end of some interior
+		// fragment or the blob is still being extended.
+		if int64(n)+at >= br.desc.size && !(br.waitForWriter && br.desc.openWriter) {
+			if err == nil {
+				err = io.EOF
+			}
+		} else if err == io.EOF {
+			err = nil
+		}
+	} else if at == br.desc.size { // Reading at the end of the file, past the last fragment.
+		err = io.EOF
+	} else {
+		err = verror.New(errIllegalPositionForRead, br.ctx, br.pos, br.desc.size)
+	}
+	br.fscabs.mu.Unlock()
+	return n, err
+}
+
+// Read() fills b[] with up to len(b) bytes of data starting at the current
+// seek position of *br within the blob that *br indicates, and then both
+// returns the number of bytes read and advances *br's seek position by that
+// amount.
+func (br *BlobReader) Read(b []byte) (n int, err error) {
+	n, err = br.ReadAt(b, br.pos)
+	if err == nil {
+		br.pos += int64(n)
+	}
+	return n, err
+}
+
+// Seek() sets the seek position of *br to offset if whence==0,
+// offset+current_seek_position if whence==1, and offset+end_of_blob if
+// whence==2, and then returns the current seek position.
+func (br *BlobReader) Seek(offset int64, whence int) (result int64, err error) {
+	br.fscabs.mu.Lock()
+	if whence == 0 {
+		result = offset
+	} else if whence == 1 {
+		result = offset + br.pos
+	} else if whence == 2 {
+		br.waitUntilAvailable(math.MaxInt64)
+		result = offset + br.desc.size
+	} else {
+		err = verror.New(errBadSeekWhence, br.ctx, whence)
+		result = br.pos
+	}
+	if result < 0 {
+		err = verror.New(errNegativeSeekPosition, br.ctx, offset, whence)
+		result = br.pos
+	} else if result > br.desc.size {
+		err = verror.New(errIllegalPositionForRead, br.ctx, result, br.desc.size)
+		result = br.pos
+	} else if err == nil {
+		br.pos = result
+	}
+	br.fscabs.mu.Unlock()
+	return result, err
+}
+
+// IsFinalized() returns whether *br has been finalized.
+func (br *BlobReader) IsFinalized() bool {
+	br.fscabs.mu.Lock()
+	br.waitUntilAvailable(math.MaxInt64)
+	finalized := br.desc.finalized
+	br.fscabs.mu.Unlock()
+	return finalized
+}
+
+// Size() returns *br's size.
+func (br *BlobReader) Size() int64 {
+	br.fscabs.mu.Lock()
+	br.waitUntilAvailable(math.MaxInt64)
+	size := br.desc.size
+	br.fscabs.mu.Unlock()
+	return size
+}
+
+// Name() returns *br's name.
+func (br *BlobReader) Name() string {
+	return br.desc.name
+}
+
+// Hash() returns *br's hash.  It may be nil if the blob is not finalized.
+func (br *BlobReader) Hash() []byte {
+	br.fscabs.mu.Lock()
+	br.waitUntilAvailable(math.MaxInt64)
+	hash := br.desc.hash
+	br.fscabs.mu.Unlock()
+	return hash
+}
+
+// -----------------------------------------------------------
+
+// A dirListing is a list of names in a directory, plus a position, which
+// indexes the last item in nameList that has been processed.
+type dirListing struct {
+	pos      int      // Current position in nameList; may be -1 at the start of iteration.
+	nameList []string // List of directory entries.
+}
+
+// An FsCasIter represents an iterator that allows the client to enumerate all
+// the blobs or fragments in a FsCaBlobStore.
+type FsCasIter struct {
+	fscabs *FsCaBlobStore // The parent FsCaBlobStore.
+	err    error          // If non-nil, the error that terminated iteration.
+	stack  []dirListing   // The stack of dirListings leading to the current entry.
+	ctx    *context.T     // context passed to ListBlobIds() or ListCAIds()
+
+	mu        sync.Mutex // Protects cancelled.
+	cancelled bool       // Whether Cancel() has been called.
+}
+
+// ListBlobIds() returns an iterator that can be used to enumerate the blobs in
+// an FsCaBlobStore.  Expected use is:
+//    fscabsi := fscabs.ListBlobIds(ctx)
+//    for fscabsi.Advance() {
+//      // Process fscabsi.Value() here.
+//    }
+//    if fscabsi.Err() != nil {
+//      // The loop terminated early due to an error.
+//    }
+func (fscabs *FsCaBlobStore) ListBlobIds(ctx *context.T) localblobstore.Stream {
+	stack := make([]dirListing, 1)
+	stack[0] = dirListing{pos: -1, nameList: []string{blobDir}}
+	return &FsCasIter{fscabs: fscabs, stack: stack, ctx: ctx}
+}
+
+// ListCAIds() returns an iterator that can be used to enumerate the
+// content-addressable fragments in an FsCaBlobStore.
+// Expected use is:
+//    fscabsi := fscabs.ListCAIds(ctx)
+//    for fscabsi.Advance() {
+//      // Process fscabsi.Value() here.
+//    }
+//    if fscabsi.Err() != nil {
+//      // The loop terminated early due to an error.
+//    }
+func (fscabs *FsCaBlobStore) ListCAIds(ctx *context.T) localblobstore.Stream {
+	stack := make([]dirListing, 1)
+	stack[0] = dirListing{pos: -1, nameList: []string{casDir}}
+	return &FsCasIter{fscabs: fscabs, stack: stack, ctx: ctx}
+}
+
+// isCancelled() returns whether Cancel() has been called.
+func (fscabsi *FsCasIter) isCancelled() bool {
+	fscabsi.mu.Lock()
+	cancelled := fscabsi.cancelled
+	fscabsi.mu.Unlock()
+	return cancelled
+}
+
+// Advance() stages an item so that it may be retrieved via Value.  Returns
+// true iff there is an item to retrieve.  Advance must be called before Value
+// is called.
+func (fscabsi *FsCasIter) Advance() (advanced bool) {
+	stack := fscabsi.stack
+	err := fscabsi.err
+
+	for err == nil && !advanced && len(stack) != 0 && !fscabsi.isCancelled() {
+		last := len(stack) - 1
+		stack[last].pos++
+		if stack[last].pos == len(stack[last].nameList) {
+			stack = stack[0:last]
+			fscabsi.stack = stack
+		} else {
+			fullName := filepath.Join(fscabsi.fscabs.rootName, fscabsi.Value())
+			var fi os.FileInfo
+			fi, err = os.Lstat(fullName)
+			if err != nil {
+				// error: nothing to do
+			} else if fi.IsDir() {
+				var dirHandle *os.File
+				dirHandle, err = os.Open(fullName)
+				if err == nil {
+					var nameList []string
+					nameList, err = dirHandle.Readdirnames(0)
+					dirHandle.Close()
+					stack = append(stack, dirListing{pos: -1, nameList: nameList})
+					fscabsi.stack = stack
+					last = len(stack) - 1
+				}
+			} else {
+				advanced = true
+			}
+		}
+	}
+
+	if fscabsi.isCancelled() {
+		if err == nil {
+			fscabsi.err = verror.New(errStreamCancelled, fscabsi.ctx)
+		}
+		advanced = false
+	}
+
+	fscabsi.err = err
+	return advanced
+}
+
+// Value() returns the item that was staged by Advance.  May panic if Advance
+// returned false or was not called.  Never blocks.
+func (fscabsi *FsCasIter) Value() (name string) {
+	stack := fscabsi.stack
+	if fscabsi.err == nil && len(stack) != 0 && stack[0].pos >= 0 {
+		name = stack[0].nameList[stack[0].pos]
+		for i := 1; i != len(stack); i++ {
+			name = filepath.Join(name, stack[i].nameList[stack[i].pos])
+		}
+	}
+	return name
+}
+
+// Err() returns any error encountered by Advance.  Never blocks.
+func (fscabsi *FsCasIter) Err() error {
+	return fscabsi.err
+}
+
+// Cancel() indicates that the iteration stream should terminate early.
+// Never blocks.  May be called concurrently with other methods on fscabsi.
+func (fscabsi *FsCasIter) Cancel() {
+	fscabsi.mu.Lock()
+	fscabsi.cancelled = true
+	fscabsi.mu.Unlock()
+}
+
+// -----------------------------------------------------------
+
+// An errorChunkStream is a localblobstore.ChunkStream that yields an error.
+type errorChunkStream struct {
+	err error
+}
+
+func (*errorChunkStream) Advance() bool       { return false }
+func (*errorChunkStream) Value([]byte) []byte { return nil }
+func (ecs *errorChunkStream) Err() error      { return ecs.err }
+func (*errorChunkStream) Cancel()             {}
+
+// BlobChunkStream() returns a ChunkStream that can be used to read the ordered
+// list of content hashes of chunks in blob blobName.  It is expected that this
+// list will be presented to RecipeFromChunks() on another device, to create a
+// recipe for transmitting the blob efficiently to that other device.
+func (fscabs *FsCaBlobStore) BlobChunkStream(ctx *context.T, blobName string) (cs localblobstore.ChunkStream) {
+	blobID := fileNameToHash(blobDir, blobName)
+	if blobID == nil {
+		cs = &errorChunkStream{err: verror.New(errInvalidBlobName, ctx, blobName)}
+	} else {
+		cs = fscabs.bm.NewChunkStream(ctx, blobID)
+	}
+	return cs
+}
+
+// -----------------------------------------------------------
+
+// LookupChunk returns the location of a chunk with the specified chunk hash
+// within the store.
+func (fscabs *FsCaBlobStore) LookupChunk(ctx *context.T, chunkHash []byte) (loc localblobstore.Location, err error) {
+	var chunkMapLoc blobmap.Location
+	chunkMapLoc, err = fscabs.bm.LookupChunk(ctx, chunkHash)
+	if err == nil {
+		loc.BlobName = hashToFileName(blobDir, chunkMapLoc.BlobID)
+		loc.Size = chunkMapLoc.Size
+		loc.Offset = chunkMapLoc.Offset
+	}
+	return loc, err
+}
+
+// -----------------------------------------------------------
+
+// A RecipeStream implements localblobstore.RecipeStream.  It allows the client
+// to iterate over the recipe steps to recreate a blob identified by a stream
+// of chunk hashes (from chunkStream), but using parts of blobs in the current
+// blob store where possible.
+type RecipeStream struct {
+	fscabs *FsCaBlobStore
+	ctx    *context.T
+
+	chunkStream     localblobstore.ChunkStream // the stream of chunks in the blob
+	pendingChunkBuf [16]byte                   // a buffer for pendingChunk
+	pendingChunk    []byte                     // the last unprocessed chunk hash read chunkStream, or nil if none
+	step            localblobstore.RecipeStep  // the recipe step to be returned by Value()
+	mu              sync.Mutex                 // protects cancelled
+	cancelled       bool                       // whether Cancel() has been called
+}
+
+// RecipeStreamFromChunkStream() returns a pointer to a RecipeStream that allows
+// the client to iterate over each RecipeStep needed to create the blob formed
+// by the chunks in chunkStream.
+func (fscabs *FsCaBlobStore) RecipeStreamFromChunkStream(ctx *context.T, chunkStream localblobstore.ChunkStream) localblobstore.RecipeStream {
+	rs := new(RecipeStream)
+	rs.fscabs = fscabs
+	rs.ctx = ctx
+	rs.chunkStream = chunkStream
+	return rs
+}
+
+// isCancelled() returns whether rs.Cancel() has been called.
+func (rs *RecipeStream) isCancelled() bool {
+	rs.mu.Lock()
+	cancelled := rs.cancelled
+	rs.mu.Unlock()
+	return cancelled
+}
+
+// Advance() stages an item so that it may be retrieved via Value().
+// Returns true iff there is an item to retrieve.  Advance() must be
+// called before Value() is called.  The caller is expected to read
+// until Advance() returns false, or to call Cancel().
+func (rs *RecipeStream) Advance() (ok bool) {
+	if rs.pendingChunk == nil && rs.chunkStream.Advance() {
+		rs.pendingChunk = rs.chunkStream.Value(rs.pendingChunkBuf[:])
+	}
+	for !ok && rs.pendingChunk != nil && !rs.isCancelled() {
+		var err error
+		var loc0 blobmap.Location
+		loc0, err = rs.fscabs.bm.LookupChunk(rs.ctx, rs.pendingChunk)
+		if err == nil {
+			blobName := hashToFileName(blobDir, loc0.BlobID)
+			var blobDesc *blobDesc
+			if blobDesc, err = rs.fscabs.getBlob(rs.ctx, blobName); err != nil {
+				// The BlobMap contained a reference to a
+				// deleted blob.  Delete the reference in the
+				// BlobMap; the next loop iteration will
+				// consider the chunk again.
+				rs.fscabs.bm.DeleteBlob(rs.ctx, loc0.BlobID)
+			} else {
+				rs.fscabs.descUnref(blobDesc)
+				// The chunk is in a known blob.  Combine
+				// contiguous chunks into a single recipe
+				// entry.
+				rs.pendingChunk = nil // consumed
+				for rs.pendingChunk == nil && rs.chunkStream.Advance() {
+					rs.pendingChunk = rs.chunkStream.Value(rs.pendingChunkBuf[:])
+					var loc blobmap.Location
+					loc, err = rs.fscabs.bm.LookupChunk(rs.ctx, rs.pendingChunk)
+					if err == nil && bytes.Compare(loc0.BlobID, loc.BlobID) == 0 && loc.Offset == loc0.Offset+loc0.Size {
+						loc0.Size += loc.Size
+						rs.pendingChunk = nil // consumed
+					}
+				}
+				rs.step = localblobstore.RecipeStep{Blob: blobName, Offset: loc0.Offset, Size: loc0.Size}
+				ok = true
+			}
+		} else { // The chunk is not in the BlobMap; yield a single chunk hash.
+			rs.step = localblobstore.RecipeStep{Chunk: rs.pendingChunk}
+			rs.pendingChunk = nil // consumed
+			ok = true
+		}
+	}
+	return ok && !rs.isCancelled()
+}
+
+// Value() returns the item that was staged by Advance().  May panic if
+// Advance() returned false or was not called.  Never blocks.
+func (rs *RecipeStream) Value() localblobstore.RecipeStep {
+	return rs.step
+}
+
+// Err() returns any error encountered by Advance.  Never blocks.
+func (rs *RecipeStream) Err() error {
+	// There are no errors to return here.  The errors encountered in
+	// Advance() are expected and recoverable.
+	return nil
+}
+
+// Cancel() indicates that the client wishes to cease reading from the stream.
+// It causes the next call to Advance() to return false.  Never blocks.
+// It may be called concurrently with other calls on the stream.
+func (rs *RecipeStream) Cancel() {
+	rs.mu.Lock()
+	rs.cancelled = true
+	rs.mu.Unlock()
+	rs.chunkStream.Cancel()
+}
+
+// -----------------------------------------------------------
+
+// gcTemp() attempts to delete files in dirName older than threshold.
+// Errors are ignored.
+func gcTemp(dirName string, threshold time.Time) {
+	fh, err := os.Open(dirName)
+	if err == nil {
+		fi, _ := fh.Readdir(0)
+		fh.Close()
+		for i := 0; i < len(fi); i++ {
+			if fi[i].ModTime().Before(threshold) {
+				os.Remove(filepath.Join(dirName, fi[i].Name()))
+			}
+		}
+	}
+}
+
+// GC() removes old temp files and content-addressed blocks that are no longer
+// referenced by any blob.  It may be called concurrently with other calls to
+// GC(), and with uses of BlobReaders and BlobWriters.
+func (fscabs *FsCaBlobStore) GC(ctx *context.T) (err error) {
+	// Remove old temporary files.
+	gcTemp(filepath.Join(fscabs.rootName, tmpDir), time.Now().Add(-10*time.Hour))
+
+	// Add a key to caSet for each content-addressed fragment in *fscabs,
+	caSet := make(map[string]bool)
+	caIter := fscabs.ListCAIds(ctx)
+	for caIter.Advance() {
+		caSet[caIter.Value()] = true
+	}
+	err = caIter.Err()
+
+	// cmBlobs maps the names of blobs found in the BlobMap to their IDs.
+	// (The IDs can be derived from the names; the map is really being used
+	// to record which blobs exist, and the value merely avoids repeated
+	// conversions.)
+	cmBlobs := make(map[string][]byte)
+	if err == nil {
+		// Record all the blobs known to the BlobMap;
+		bs := fscabs.bm.NewBlobStream(ctx)
+		for bs.Advance() {
+			blobID := bs.Value(nil)
+			cmBlobs[hashToFileName(blobDir, blobID)] = blobID
+		}
+	}
+
+	if err == nil {
+		// Remove from cmBlobs all extant blobs, and remove from
+		// caSet all their fragments.
+		blobIter := fscabs.ListBlobIds(ctx)
+		for blobIter.Advance() {
+			var blobDesc *blobDesc
+			if blobDesc, err = fscabs.getBlob(ctx, blobIter.Value()); err == nil {
+				delete(cmBlobs, blobDesc.name)
+				for i := range blobDesc.fragment {
+					delete(caSet, blobDesc.fragment[i].fileName)
+				}
+				fscabs.descUnref(blobDesc)
+			}
+		}
+	}
+
+	if err == nil {
+		// Remove all blobs still mentioned in cmBlobs from the BlobMap;
+		// these are the ones that no longer exist in the blobs directory.
+		for _, blobID := range cmBlobs {
+			err = fscabs.bm.DeleteBlob(ctx, blobID)
+			if err != nil {
+				break
+			}
+		}
+	}
+
+	if err == nil {
+		// Remove from caSet all fragments referenced by open BlobReaders and
+		// BlobWriters.  Advertise to new readers and writers which blobs are
+		// about to be deleted.
+		fscabs.mu.Lock()
+		for _, desc := range fscabs.activeDesc {
+			for i := range desc.fragment {
+				delete(caSet, desc.fragment[i].fileName)
+			}
+		}
+		fscabs.toDelete = append(fscabs.toDelete, &caSet)
+		fscabs.mu.Unlock()
+
+		// Delete the things that still remain in caSet; they are no longer
+		// referenced.
+		for caName := range caSet {
+			os.Remove(filepath.Join(fscabs.rootName, caName))
+		}
+
+		// Stop advertising what's been deleted.
+		fscabs.mu.Lock()
+		n := len(fscabs.toDelete)
+		var i int
+		// We require that &caSet still be in the list.
+		for i = 0; fscabs.toDelete[i] != &caSet; i++ {
+		}
+		fscabs.toDelete[i] = fscabs.toDelete[n-1]
+		fscabs.toDelete = fscabs.toDelete[0 : n-1]
+		fscabs.mu.Unlock()
+	}
+	return err
+}

diff --git a/services/syncbase/localblobstore/fs_cablobstore/fs_cablobstore_test.go b/services/syncbase/localblobstore/fs_cablobstore/fs_cablobstore_test.go
new file mode 100644
index 0000000..0d964c3
--- /dev/null
+++ b/services/syncbase/localblobstore/fs_cablobstore/fs_cablobstore_test.go

@@ -0,0 +1,97 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// A test for fs_cablobstore
+package fs_cablobstore_test
+
+import "io/ioutil"
+import "os"
+import "testing"
+
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore"
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore"
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/localblobstore_testlib"
+import "v.io/x/ref/test"
+import _ "v.io/x/ref/runtime/factories/generic"
+
+// This test case tests adding files, retrieving them and deleting them.  One
+// can't retrieve or delete something that hasn't been created, so it's all one
+// test case.
+func TestAddRetrieveAndDelete(t *testing.T) {
+	ctx, shutdown := test.V23Init()
+	defer shutdown()
+
+	// Make a temporary directory.
+	var err error
+	var testDirName string
+	testDirName, err = ioutil.TempDir("", "localblobstore_test")
+	if err != nil {
+		t.Fatalf("localblobstore_test: can't make tmp directory: %v\n", err)
+	}
+	defer os.RemoveAll(testDirName)
+
+	// Create an fs_cablobstore.
+	var bs localblobstore.BlobStore
+	bs, err = fs_cablobstore.Create(ctx, testDirName)
+	if err != nil {
+		t.Fatalf("fs_cablobstore.Create failed: %v", err)
+	}
+
+	// Test it.
+	localblobstore_testlib.AddRetrieveAndDelete(t, ctx, bs, testDirName)
+}
+
+// This test case tests the incremental transfer of blobs via chunks.
+func TestWritingViaChunks(t *testing.T) {
+	ctx, shutdown := test.V23Init()
+	defer shutdown()
+
+	var err error
+
+	// Make a pair of blobstores, each in its own temporary directory.
+	const nBlobStores = 2
+	var testDirName [nBlobStores]string
+	var bs [nBlobStores]localblobstore.BlobStore
+	for i := 0; i != nBlobStores; i++ {
+		testDirName[i], err = ioutil.TempDir("", "localblobstore_test")
+		if err != nil {
+			t.Fatalf("localblobstore_test: can't make tmp directory: %v\n", err)
+		}
+		defer os.RemoveAll(testDirName[i])
+
+		bs[i], err = fs_cablobstore.Create(ctx, testDirName[i])
+		if err != nil {
+			t.Fatalf("fs_cablobstore.Create failed: %v", err)
+		}
+	}
+
+	// Test it.
+	localblobstore_testlib.WriteViaChunks(t, ctx, bs)
+}
+
+// This test case checks that empty blobs can be created, then extended via
+// ResumeBlobWriter.
+func TestCreateAndResume(t *testing.T) {
+	ctx, shutdown := test.V23Init()
+	defer shutdown()
+
+	// Make a temporary directory.
+	var err error
+	var testDirName string
+	testDirName, err = ioutil.TempDir("", "localblobstore_test")
+	if err != nil {
+		t.Fatalf("localblobstore_test: can't make tmp directory: %v\n", err)
+	}
+	defer os.RemoveAll(testDirName)
+
+	// Create an fs_cablobstore.
+	var bs localblobstore.BlobStore
+	bs, err = fs_cablobstore.Create(ctx, testDirName)
+	if err != nil {
+		t.Fatalf("fs_cablobstore.Create failed: %v", err)
+	}
+
+	// Test it.
+	localblobstore_testlib.CreateAndResume(t, ctx, bs)
+}

diff --git a/services/syncbase/localblobstore/localblobstore_test.go b/services/syncbase/localblobstore/localblobstore_test.go
new file mode 100644
index 0000000..a258c0f
--- /dev/null
+++ b/services/syncbase/localblobstore/localblobstore_test.go

@@ -0,0 +1,97 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// A test for localblobstore
+package localblobstore_test
+
+import "io/ioutil"
+import "os"
+import "testing"
+
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore"
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore"
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/localblobstore_testlib"
+import "v.io/x/ref/test"
+import _ "v.io/x/ref/runtime/factories/generic"
+
+// This test case tests adding files, retrieving them and deleting them.  One
+// can't retrieve or delete something that hasn't been created, so it's all one
+// test case.
+func TestAddRetrieveAndDelete(t *testing.T) {
+	ctx, shutdown := test.V23Init()
+	defer shutdown()
+
+	// Make a temporary directory.
+	var err error
+	var testDirName string
+	testDirName, err = ioutil.TempDir("", "localblobstore_test")
+	if err != nil {
+		t.Fatalf("localblobstore_test: can't make tmp directory: %v\n", err)
+	}
+	defer os.RemoveAll(testDirName)
+
+	// Create an fs_cablobstore.
+	var bs localblobstore.BlobStore
+	bs, err = fs_cablobstore.Create(ctx, testDirName)
+	if err != nil {
+		t.Fatalf("fs_cablobstore.Create failed: %v", err)
+	}
+
+	// Test it.
+	localblobstore_testlib.AddRetrieveAndDelete(t, ctx, bs, testDirName)
+}
+
+// This test case tests the incremental transfer of blobs via chunks.
+func TestWritingViaChunks(t *testing.T) {
+	ctx, shutdown := test.V23Init()
+	defer shutdown()
+
+	var err error
+
+	// Make a pair of blobstores, each in its own temporary directory.
+	const nBlobStores = 2
+	var testDirName [nBlobStores]string
+	var bs [nBlobStores]localblobstore.BlobStore
+	for i := 0; i != nBlobStores; i++ {
+		testDirName[i], err = ioutil.TempDir("", "localblobstore_test")
+		if err != nil {
+			t.Fatalf("localblobstore_test: can't make tmp directory: %v\n", err)
+		}
+		defer os.RemoveAll(testDirName[i])
+
+		bs[i], err = fs_cablobstore.Create(ctx, testDirName[i])
+		if err != nil {
+			t.Fatalf("fs_cablobstore.Create failed: %v", err)
+		}
+	}
+
+	// Test it.
+	localblobstore_testlib.WriteViaChunks(t, ctx, bs)
+}
+
+// This test case checks that empty blobs can be created, then extended via
+// ResumeBlobWriter.
+func TestCreateAndResume(t *testing.T) {
+	ctx, shutdown := test.V23Init()
+	defer shutdown()
+
+	// Make a temporary directory.
+	var err error
+	var testDirName string
+	testDirName, err = ioutil.TempDir("", "localblobstore_test")
+	if err != nil {
+		t.Fatalf("localblobstore_test: can't make tmp directory: %v\n", err)
+	}
+	defer os.RemoveAll(testDirName)
+
+	// Create an fs_cablobstore.
+	var bs localblobstore.BlobStore
+	bs, err = fs_cablobstore.Create(ctx, testDirName)
+	if err != nil {
+		t.Fatalf("fs_cablobstore.Create failed: %v", err)
+	}
+
+	// Test it.
+	localblobstore_testlib.CreateAndResume(t, ctx, bs)
+}

diff --git a/services/syncbase/localblobstore/localblobstore_testlib/localblobstore_testlib.go b/services/syncbase/localblobstore/localblobstore_testlib/localblobstore_testlib.go
new file mode 100644
index 0000000..d6c1d9d
--- /dev/null
+++ b/services/syncbase/localblobstore/localblobstore_testlib/localblobstore_testlib.go

@@ -0,0 +1,889 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// A test library for localblobstores.
+package localblobstore_testlib
+
+import "bytes"
+import "crypto/md5"
+import "fmt"
+import "io"
+import "io/ioutil"
+import "path/filepath"
+import "testing"
+
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore"
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/chunker"
+import "v.io/v23/context"
+import "v.io/v23/verror"
+
+// A blobOrBlockOrFile represents some bytes that may be contained in a named
+// blob, a named file, or in an explicit slice of bytes.
+type blobOrBlockOrFile struct {
+	blob   string // If non-empty, the name of the blob containing the bytes.
+	file   string // If non-empty and blob is empty, the name of the file containing the bytes.
+	size   int64  // Size of part of file or blob, or -1 for "everything until EOF".
+	offset int64  // Offset within file or blob.
+	block  []byte // If both blob and file are empty, a slice containing the bytes.
+}
+
+// A testBlob records that some specified content has been stored with a given
+// blob name in the blob store.
+type testBlob struct {
+	content  []byte // content that has been stored.
+	blobName string // the name of the blob.
+}
+
+// removeBlobFromBlobVector() removes the entry named blobName from
+// blobVector[], returning the new vector.
+func removeBlobFromBlobVector(blobVector []testBlob, blobName string) []testBlob {
+	n := len(blobVector)
+	i := 0
+	for i = 0; i != n && blobName != blobVector[i].blobName; i++ {
+	}
+	if i != n {
+		blobVector[i] = blobVector[n-1]
+		blobVector = blobVector[0 : n-1]
+	}
+	return blobVector
+}
+
+// writeBlob() writes a new blob to bs, and returns its name.  The new
+// blob's content is described by the elements of data[].  Any error messages
+// generated include the index of the blob in blobVector and its content; the
+// latter is assumed to be printable.  The expected content of the the blob is
+// "content", so that this routine can check it.  If useResume is true, and data[]
+// has length more than 1, the function artificially uses ResumeBlobWriter(),
+// to test it.
+func writeBlob(t *testing.T, ctx *context.T, bs localblobstore.BlobStore, blobVector []testBlob,
+	content []byte, useResume bool, data ...blobOrBlockOrFile) []testBlob {
+	var bw localblobstore.BlobWriter
+	var err error
+	bw, err = bs.NewBlobWriter(ctx, "")
+	if err != nil {
+		t.Errorf("localblobstore.NewBlobWriter blob %d:%s failed: %v", len(blobVector), string(content), err)
+	}
+	blobName := bw.Name()
+
+	// Construct the blob from the pieces.
+	// There is a loop within the loop to exercise the possibility of
+	// passing multiple fragments to AppendFragment().
+	for i := 0; i != len(data) && err == nil; {
+		if len(data[i].blob) != 0 {
+			err = bw.AppendBlob(data[i].blob, data[i].size, data[i].offset)
+			if err != nil {
+				t.Errorf("localblobstore.AppendBlob %d:%s blob %s failed: %v", len(blobVector), string(content), data[i].blob, err)
+			}
+			i++
+		} else {
+			var pieces []localblobstore.BlockOrFile
+			for ; i != len(data) && len(data[i].blob) == 0; i++ {
+				if len(data[i].file) != 0 {
+					pieces = append(pieces, localblobstore.BlockOrFile{
+						FileName: data[i].file,
+						Size:     data[i].size,
+						Offset:   data[i].offset})
+				} else {
+					pieces = append(pieces, localblobstore.BlockOrFile{Block: data[i].block})
+				}
+			}
+			err = bw.AppendFragment(pieces...)
+			if err != nil {
+				t.Errorf("localblobstore.AppendFragment %d:%s failed on %v: %v", len(blobVector), string(content), pieces, err)
+			}
+		}
+		if useResume && i < len(data)-1 && err == nil {
+			err = bw.CloseWithoutFinalize()
+			if err == nil {
+				bw, err = bs.ResumeBlobWriter(ctx, blobName)
+			}
+		}
+	}
+
+	if bw != nil {
+		if bw.Size() != int64(len(content)) {
+			t.Errorf("localblobstore.Size before finalization %d:%s got %d, expected %d", len(blobVector), string(content), bw.Size(), len(content))
+		}
+		if bw.IsFinalized() {
+			t.Errorf("localblobstore.IsFinalized %d:%s got true, expected false", len(blobVector), string(content))
+		}
+		err = bw.Close()
+		if err != nil {
+			t.Errorf("localblobstore.Close %d:%s failed: %v", len(blobVector), string(content), err)
+		}
+		if !bw.IsFinalized() {
+			t.Errorf("localblobstore.IsFinalized %d:%s got true, expected false", len(blobVector), string(content))
+		}
+		if bw.Size() != int64(len(content)) {
+			t.Errorf("localblobstore.Size %d:%s after finalization got %d, expected %d", len(blobVector), string(content), bw.Size(), len(content))
+		}
+		if bw.Name() != blobName {
+			t.Errorf("localblobstore %d:%s name changed when finalized was %s now %s", len(blobVector), string(content), blobName, bw.Name())
+		}
+		hasher := md5.New()
+		hasher.Write(content)
+		if bytes.Compare(bw.Hash(), hasher.Sum(nil)) != 0 {
+			t.Errorf("localblobstore %d:%s BlobWriter.Hash got %v, expected %v", len(blobVector), string(content), bw.Hash(), hasher.Sum(nil))
+		}
+	}
+
+	return append(blobVector,
+		testBlob{
+			content:  content,
+			blobName: blobName,
+		})
+}
+
+// readBlob() returns a substring of the content of the blob named blobName in bs.
+// The return values are:
+// - the "size" bytes from the content, starting at the given "offset",
+//   measured from "whence" (as defined by io.Seeker.Seek).
+// - the position to which BlobBeader seeks to,
+// - the md5 hash of the bytes read, and
+// - the md5 hash of the bytes of the blob, as returned by BlobReader.Hash(),
+// - and error.
+func readBlob(ctx *context.T, bs localblobstore.BlobStore, blobName string,
+	size int64, offset int64, whence int) (content []byte, pos int64, hash []byte, fullHash []byte, err error) {
+
+	var br localblobstore.BlobReader
+	hasher := md5.New()
+	br, err = bs.NewBlobReader(ctx, blobName)
+	if err == nil {
+		buf := make([]byte, 8192, 8192)
+		fullHash = br.Hash()
+		pos, err = br.Seek(offset, whence)
+		if err == nil {
+			var n int
+			first := true // Read at least once, to test reading zero bytes.
+			for err == nil && (size == -1 || int64(len(content)) < size || first) {
+				// Read just what was asked for.
+				var toRead []byte = buf
+				if size >= 0 && int(size)-len(content) < len(buf) {
+					toRead = buf[0 : int(size)-len(content)]
+				}
+				n, err = br.Read(toRead)
+				hasher.Write(toRead[0:n])
+				if size >= 0 && int64(len(content)+n) > size {
+					n = int(size) - len(content)
+				}
+				content = append(content, toRead[0:n]...)
+				first = false
+			}
+		}
+		br.Close()
+	}
+	return content, pos, hasher.Sum(nil), fullHash, err
+}
+
+// checkWrittenBlobsAreReadable() checks that the blobs in blobVector[] can be
+// read, and that they contain the appropriate data.
+func checkWrittenBlobsAreReadable(t *testing.T, ctx *context.T, bs localblobstore.BlobStore, blobVector []testBlob) {
+	for i := range blobVector {
+		var size int64
+		data := blobVector[i].content
+		dataLen := int64(len(data))
+		blobName := blobVector[i].blobName
+		for size = -1; size != dataLen+1; size++ {
+			var offset int64
+			for offset = -dataLen - 1; offset != dataLen+1; offset++ {
+				for whence := -1; whence != 4; whence++ {
+					content, pos, hash, fullHash, err := readBlob(ctx, bs, blobName, size, offset, whence)
+
+					// Compute expected seek position.
+					expectedPos := offset
+					if whence == 2 {
+						expectedPos += dataLen
+					}
+
+					// Computed expected size.
+					expectedSize := size
+					if expectedSize == -1 || expectedPos+expectedSize > dataLen {
+						expectedSize = dataLen - expectedPos
+					}
+
+					// Check that reads behave as expected.
+					if (whence == -1 || whence == 3) &&
+						verror.ErrorID(err) == "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore.errBadSeekWhence" {
+						// Expected error from bad "whence" value.
+					} else if expectedPos < 0 &&
+						verror.ErrorID(err) == "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore.errNegativeSeekPosition" {
+						// Expected error from negative Seek position.
+					} else if expectedPos > dataLen &&
+						verror.ErrorID(err) == "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore.errIllegalPositionForRead" {
+						// Expected error from too high a Seek position.
+					} else if 0 <= expectedPos && expectedPos+expectedSize <= int64(len(data)) &&
+						bytes.Compare(data[expectedPos:expectedPos+expectedSize], content) == 0 && err == io.EOF &&
+						pos == expectedPos && expectedPos+expectedSize == dataLen {
+						// Expected success with EOF.
+					} else if 0 <= expectedPos && expectedPos+expectedSize <= int64(len(data)) &&
+						bytes.Compare(data[expectedPos:expectedPos+expectedSize], content) == 0 && err == nil &&
+						pos == expectedPos && expectedPos+expectedSize != dataLen {
+						if pos == 0 && size == -1 && bytes.Compare(hash, fullHash) != 0 {
+							t.Errorf("localblobstore read test on %q size %d offset %d whence %d; got hash %v, expected %v  (blob is %q)",
+								string(data), size, offset, whence,
+								hash, fullHash, blobName)
+						} // Else expected success without EOF.
+					} else {
+						t.Errorf("localblobstore read test on %q size %d offset %d whence %d yields %q pos %d %v   (blob is %q)",
+							string(data), size, offset, whence,
+							content, pos, err, blobName)
+					}
+				}
+			}
+		}
+	}
+}
+
+// checkAllBlobs() checks all the blobs in bs to ensure they correspond to
+// those in blobVector[].
+func checkAllBlobs(t *testing.T, ctx *context.T, bs localblobstore.BlobStore, blobVector []testBlob, testDirName string) {
+	blobCount := 0
+	iterator := bs.ListBlobIds(ctx)
+	for iterator.Advance() {
+		fileName := iterator.Value()
+		i := 0
+		for ; i != len(blobVector) && fileName != blobVector[i].blobName; i++ {
+		}
+		if i == len(blobVector) {
+			t.Errorf("localblobstore.ListBlobIds found unexpected file %s", fileName)
+		} else {
+			content, pos, hash, fullHash, err := readBlob(ctx, bs, fileName, -1, 0, 0)
+			if err != nil && err != io.EOF {
+				t.Errorf("localblobstore.ListCAIds can't read %q: %v", filepath.Join(testDirName, fileName), err)
+			} else if bytes.Compare(blobVector[i].content, content) != 0 {
+				t.Errorf("localblobstore.ListCAIds found unexpected blob content: %q, contains %q, expected %q",
+					filepath.Join(testDirName, fileName), content, string(blobVector[i].content))
+			} else if pos != 0 {
+				t.Errorf("localblobstore.ListCAIds Seek on %q returned %d instead of 0",
+					filepath.Join(testDirName, fileName), pos)
+			}
+			if bytes.Compare(hash, fullHash) != 0 {
+				t.Errorf("localblobstore.ListCAIds read on %q; got hash %v, expected %v",
+					fileName, hash, fullHash)
+			}
+		}
+		blobCount++
+	}
+	if iterator.Err() != nil {
+		t.Errorf("localblobstore.ListBlobIds iteration failed: %v", iterator.Err())
+	}
+	if blobCount != len(blobVector) {
+		t.Errorf("localblobstore.ListBlobIds iteration expected 4 files, got %d", blobCount)
+	}
+}
+
+// checkFragments() checks all the fragments in bs to ensure they
+// correspond to those fragmentMap[], iff testDirName is non-empty.
+func checkFragments(t *testing.T, ctx *context.T, bs localblobstore.BlobStore, fragmentMap map[string]bool, testDirName string) {
+	if testDirName != "" {
+		caCount := 0
+		iterator := bs.ListCAIds(ctx)
+		for iterator.Advance() {
+			fileName := iterator.Value()
+			content, err := ioutil.ReadFile(filepath.Join(testDirName, fileName))
+			if err != nil && err != io.EOF {
+				t.Errorf("localblobstore.ListCAIds can't read %q: %v", filepath.Join(testDirName, fileName), err)
+			} else if !fragmentMap[string(content)] {
+				t.Errorf("localblobstore.ListCAIds found unexpected fragment entry: %q, contains %q", filepath.Join(testDirName, fileName), content)
+			} else {
+				hasher := md5.New()
+				hasher.Write(content)
+				hash := hasher.Sum(nil)
+				nameFromContent := filepath.Join("cas",
+					fmt.Sprintf("%02x", hash[0]),
+					fmt.Sprintf("%02x", hash[1]),
+					fmt.Sprintf("%02x", hash[2]),
+					fmt.Sprintf("%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
+						hash[3],
+						hash[4], hash[5], hash[6], hash[7],
+						hash[8], hash[9], hash[10], hash[11],
+						hash[12], hash[13], hash[14], hash[15]))
+				if nameFromContent != fileName {
+					t.Errorf("localblobstore.ListCAIds hash of fragment: got %q, expected %q (content=%s)", nameFromContent, fileName, string(content))
+				}
+			}
+			caCount++
+		}
+		if iterator.Err() != nil {
+			t.Errorf("localblobstore.ListCAIds iteration failed: %v", iterator.Err())
+		}
+		if caCount != len(fragmentMap) {
+			t.Errorf("localblobstore.ListCAIds iteration expected %d files, got %d", len(fragmentMap), caCount)
+		}
+	}
+}
+
+// AddRetrieveAndDelete() tests adding, retrieving, and deleting blobs from a
+// blobstore bs.  One can't retrieve or delete something that hasn't been
+// created, so it's all done in one routine.    If testDirName is non-empty,
+// the blobstore is assumed to be accessible in the file system, and its
+// files are checked.
+func AddRetrieveAndDelete(t *testing.T, ctx *context.T, bs localblobstore.BlobStore, testDirName string) {
+	var err error
+
+	// Check that there are no files in the blobstore we were given.
+	iterator := bs.ListBlobIds(ctx)
+	for iterator.Advance() {
+		fileName := iterator.Value()
+		t.Errorf("unexpected file %q\n", fileName)
+	}
+	if iterator.Err() != nil {
+		t.Errorf("localblobstore.ListBlobIds iteration failed: %v", iterator.Err())
+	}
+
+	// Create the strings:  "wom", "bat", "wombat", "batwom", "atwo", "atwoatwoombatatwo".
+	womData := []byte("wom")
+	batData := []byte("bat")
+	wombatData := []byte("wombat")
+	batwomData := []byte("batwom")
+	atwoData := []byte("atwo")
+	atwoatwoombatatwoData := []byte("atwoatwoombatatwo")
+
+	// fragmentMap will have an entry per content-addressed fragment.
+	fragmentMap := make(map[string]bool)
+
+	// Create the blobs, by various means.
+
+	var blobVector []testBlob // Accumulate the blobs we create here.
+
+	blobVector = writeBlob(t, ctx, bs, blobVector,
+		womData, false,
+		blobOrBlockOrFile{block: womData})
+	womName := blobVector[len(blobVector)-1].blobName
+	fragmentMap[string(womData)] = true
+
+	blobVector = writeBlob(t, ctx, bs, blobVector,
+		batData, false,
+		blobOrBlockOrFile{block: batData})
+	batName := blobVector[len(blobVector)-1].blobName
+	fragmentMap[string(batData)] = true
+
+	blobVector = writeBlob(t, ctx, bs, blobVector,
+		wombatData, false,
+		blobOrBlockOrFile{block: wombatData})
+	firstWombatName := blobVector[len(blobVector)-1].blobName
+	fragmentMap[string(wombatData)] = true
+
+	blobVector = writeBlob(t, ctx, bs, blobVector,
+		wombatData, true,
+		blobOrBlockOrFile{block: womData},
+		blobOrBlockOrFile{block: batData})
+
+	blobVector = writeBlob(t, ctx, bs, blobVector,
+		wombatData, false,
+		blobOrBlockOrFile{
+			blob:   firstWombatName,
+			size:   -1,
+			offset: 0})
+
+	blobVector = writeBlob(t, ctx, bs, blobVector,
+		wombatData, false,
+		blobOrBlockOrFile{
+			blob:   firstWombatName,
+			size:   6,
+			offset: 0})
+
+	blobVector = writeBlob(t, ctx, bs, blobVector,
+		batwomData, false,
+		blobOrBlockOrFile{
+			blob:   firstWombatName,
+			size:   3,
+			offset: 3},
+		blobOrBlockOrFile{
+			blob:   firstWombatName,
+			size:   3,
+			offset: 0})
+	batwomName := blobVector[len(blobVector)-1].blobName
+
+	blobVector = writeBlob(t, ctx, bs, blobVector,
+		atwoData, false,
+		blobOrBlockOrFile{
+			blob:   batwomName,
+			size:   4,
+			offset: 1})
+	atwoName := blobVector[len(blobVector)-1].blobName
+
+	blobVector = writeBlob(t, ctx, bs, blobVector,
+		atwoatwoombatatwoData, true,
+		blobOrBlockOrFile{
+			blob:   atwoName,
+			size:   -1,
+			offset: 0},
+		blobOrBlockOrFile{
+			blob:   atwoName,
+			size:   4,
+			offset: 0},
+		blobOrBlockOrFile{
+			blob:   firstWombatName,
+			size:   -1,
+			offset: 1},
+		blobOrBlockOrFile{
+			blob:   batName,
+			size:   -1,
+			offset: 1},
+		blobOrBlockOrFile{
+			blob:   womName,
+			size:   2,
+			offset: 0})
+	atwoatwoombatatwoName := blobVector[len(blobVector)-1].blobName
+
+	// -------------------------------------------------
+	// Check that the state is as we expect.
+	checkWrittenBlobsAreReadable(t, ctx, bs, blobVector)
+	checkAllBlobs(t, ctx, bs, blobVector, testDirName)
+	checkFragments(t, ctx, bs, fragmentMap, testDirName)
+
+	// -------------------------------------------------
+	// Nothing should change if we garbage collect.
+	bs.GC(ctx)
+	checkWrittenBlobsAreReadable(t, ctx, bs, blobVector)
+	checkAllBlobs(t, ctx, bs, blobVector, testDirName)
+	checkFragments(t, ctx, bs, fragmentMap, testDirName)
+
+	// -------------------------------------------------
+	// Ensure that deleting non-existent blobs fails.
+	err = bs.DeleteBlob(ctx, "../../../../etc/passwd")
+	if verror.ErrorID(err) != "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore.errInvalidBlobName" {
+		t.Errorf("DeleteBlob attempted to delete a bogus blob name")
+	}
+	err = bs.DeleteBlob(ctx, "foo/00/00/00/00000000000000000000000000")
+	if verror.ErrorID(err) != "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore.errInvalidBlobName" {
+		t.Errorf("DeleteBlob attempted to delete a bogus blob name")
+	}
+
+	// -------------------------------------------------
+	// Delete a blob.
+	err = bs.DeleteBlob(ctx, batName)
+	if err != nil {
+		t.Errorf("DeleteBlob failed to delete blob %q: %v", batName, err)
+	}
+	blobVector = removeBlobFromBlobVector(blobVector, batName)
+
+	// -------------------------------------------------
+	// Check that the state is as we expect.
+	checkWrittenBlobsAreReadable(t, ctx, bs, blobVector)
+	checkAllBlobs(t, ctx, bs, blobVector, testDirName)
+	checkFragments(t, ctx, bs, fragmentMap, testDirName)
+
+	// -------------------------------------------------
+	// Nothing should change if we garbage collect.
+	bs.GC(ctx)
+	checkWrittenBlobsAreReadable(t, ctx, bs, blobVector)
+	checkAllBlobs(t, ctx, bs, blobVector, testDirName)
+	checkFragments(t, ctx, bs, fragmentMap, testDirName)
+
+	// -------------------------------------------------
+	// Open a BlobReader on a blob we're about to delete,
+	// so its fragments won't be garbage collected.
+
+	var br localblobstore.BlobReader
+	br, err = bs.NewBlobReader(ctx, atwoatwoombatatwoName)
+	if err != nil {
+		t.Errorf("NewBlobReader failed in blob %q: %v", atwoatwoombatatwoName, err)
+	}
+
+	// -------------------------------------------------
+	// Delete a blob.  This should be the last on-disc reference to the
+	// content-addressed fragment "bat", but the fragment won't be deleted
+	// until close the reader and garbage collect.
+	err = bs.DeleteBlob(ctx, atwoatwoombatatwoName)
+	if err != nil {
+		t.Errorf("DeleteBlob failed to delete blob %q: %v", atwoatwoombatatwoName, err)
+	}
+	blobVector = removeBlobFromBlobVector(blobVector, atwoatwoombatatwoName)
+
+	// -------------------------------------------------
+	// Check that the state is as we expect.
+	checkWrittenBlobsAreReadable(t, ctx, bs, blobVector)
+	checkAllBlobs(t, ctx, bs, blobVector, testDirName)
+	checkFragments(t, ctx, bs, fragmentMap, testDirName)
+
+	// -------------------------------------------------
+	// Garbage collection should change nothing; the fragment involved
+	// is still referenced from the open reader *br.
+	bs.GC(ctx)
+	checkWrittenBlobsAreReadable(t, ctx, bs, blobVector)
+	checkAllBlobs(t, ctx, bs, blobVector, testDirName)
+	checkFragments(t, ctx, bs, fragmentMap, testDirName)
+
+	// -------------------------------------------------
+
+	// Close the open BlobReader and garbage collect.
+	err = br.Close()
+	if err != nil {
+		t.Errorf("BlobReader.Close failed on blob %q: %v", atwoatwoombatatwoName, err)
+	}
+	delete(fragmentMap, string(batData))
+
+	bs.GC(ctx)
+	checkWrittenBlobsAreReadable(t, ctx, bs, blobVector)
+	checkAllBlobs(t, ctx, bs, blobVector, testDirName)
+	checkFragments(t, ctx, bs, fragmentMap, testDirName)
+
+	// -------------------------------------------------
+	// Delete all blobs.
+	for len(blobVector) != 0 {
+		err = bs.DeleteBlob(ctx, blobVector[0].blobName)
+		if err != nil {
+			t.Errorf("DeleteBlob failed to delete blob %q: %v", blobVector[0].blobName, err)
+		}
+		blobVector = removeBlobFromBlobVector(blobVector, blobVector[0].blobName)
+	}
+
+	// -------------------------------------------------
+	// Check that the state is as we expect.
+	checkWrittenBlobsAreReadable(t, ctx, bs, blobVector)
+	checkAllBlobs(t, ctx, bs, blobVector, testDirName)
+	checkFragments(t, ctx, bs, fragmentMap, testDirName)
+
+	// -------------------------------------------------
+	// The remaining fragments should be removed when we garbage collect.
+	for frag := range fragmentMap {
+		delete(fragmentMap, frag)
+	}
+	bs.GC(ctx)
+	checkWrittenBlobsAreReadable(t, ctx, bs, blobVector)
+	checkAllBlobs(t, ctx, bs, blobVector, testDirName)
+	checkFragments(t, ctx, bs, fragmentMap, testDirName)
+}
+
+// writeBlobFromReader() writes the contents of rd to blobstore bs, as blob
+// "name", or picks a name name if "name" is empty.  It returns the name of the
+// blob.  Errors cause the test to terminate.  Error messages contain the
+// "callSite" value to allow the test to tell which call site is which.
+func writeBlobFromReader(t *testing.T, ctx *context.T, bs localblobstore.BlobStore, name string, rd io.Reader, callSite int) string {
+	var err error
+	var bw localblobstore.BlobWriter
+	if bw, err = bs.NewBlobWriter(ctx, name); err != nil {
+		t.Fatalf("callSite %d: NewBlobWriter failed: %v", callSite, err)
+	}
+	blobName := bw.Name()
+	buf := make([]byte, 8192) // buffer for data read from rd.
+	for i := 0; err == nil; i++ {
+		var n int
+		if n, err = rd.Read(buf); err != nil && err != io.EOF {
+			t.Fatalf("callSite %d: unexpected error from reader: %v", callSite, err)
+		}
+		if n > 0 {
+			if err = bw.AppendFragment(localblobstore.BlockOrFile{Block: buf[:n]}); err != nil {
+				t.Fatalf("callSite %d: BlobWriter.AppendFragment failed: %v", callSite, err)
+			}
+			// Every so often, close without finalizing, and reopen.
+			if (i % 7) == 0 {
+				if err = bw.CloseWithoutFinalize(); err != nil {
+					t.Fatalf("callSite %d: BlobWriter.CloseWithoutFinalize failed: %v", callSite, err)
+				}
+				if bw, err = bs.ResumeBlobWriter(ctx, blobName); err != nil {
+					t.Fatalf("callSite %d: ResumeBlobWriter %q failed: %v", callSite, blobName, err)
+				}
+			}
+		}
+	}
+	if err = bw.Close(); err != nil {
+		t.Fatalf("callSite %d: BlobWriter.Close failed: %v", callSite, err)
+	}
+	return blobName
+}
+
+// checkBlobAgainstReader() verifies that the blob blobName has the same bytes as the reader rd.
+// Errors cause the test to terminate.  Error messages contain the
+// "callSite" value to allow the test to tell which call site is which.
+func checkBlobAgainstReader(t *testing.T, ctx *context.T, bs localblobstore.BlobStore, blobName string, rd io.Reader, callSite int) {
+	// Open a reader on the blob.
+	var blob_rd io.Reader
+	var blob_err error
+	if blob_rd, blob_err = bs.NewBlobReader(ctx, blobName); blob_err != nil {
+		t.Fatalf("callSite %d: NewBlobReader on %q failed: %v", callSite, blobName, blob_err)
+	}
+
+	// Variables for reading the two streams, indexed by "reader" and "blob".
+	type stream struct {
+		name string
+		rd   io.Reader // Reader for this stream
+		buf  []byte    // buffer for data
+		i    int       // bytes processed within current buffer
+		n    int       // valid bytes in current buffer
+		err  error     // error, or nil
+	}
+
+	s := [2]stream{
+		{name: "reader", rd: rd, buf: make([]byte, 8192)},
+		{name: blobName, rd: blob_rd, buf: make([]byte, 8192)},
+	}
+
+	// Descriptive names for the two elements of s, when we aren't treating them the same.
+	reader := &s[0]
+	blob := &s[1]
+
+	var pos int // position within file, for error reporting.
+
+	for x := 0; x != 2; x++ {
+		s[x].n, s[x].err = s[x].rd.Read(s[x].buf)
+		s[x].i = 0
+	}
+	for blob.n != 0 && reader.n != 0 {
+		for reader.i != reader.n && blob.i != blob.n && reader.buf[reader.i] == blob.buf[blob.i] {
+			pos++
+			blob.i++
+			reader.i++
+		}
+		if reader.i != reader.n && blob.i != blob.n {
+			t.Fatalf("callSite %d: BlobStore %q: BlobReader on blob %q and rd reader generated different bytes at position %d: 0x%x vs 0x%x",
+				callSite, bs.Root(), blobName, pos, reader.buf[reader.i], blob.buf[blob.i])
+		}
+		for x := 0; x != 2; x++ { // read more data from each reader, if needed
+			if s[x].i == s[x].n {
+				s[x].i = 0
+				s[x].n = 0
+				if s[x].err == nil {
+					s[x].n, s[x].err = s[x].rd.Read(s[x].buf)
+				}
+			}
+		}
+	}
+	for x := 0; x != 2; x++ {
+		if s[x].err != io.EOF {
+			t.Fatalf("callSite %d: %s got error %v", callSite, s[x].name, s[x].err)
+		}
+		if s[x].n != 0 {
+			t.Fatalf("callSite %d: %s is longer than %s", callSite, s[x].name, s[1-x].name)
+		}
+	}
+}
+
+// checkBlobAgainstReader() verifies that the blob blobName has the same chunks
+// (according to BlobChunkStream) as a chunker applied to the reader rd.
+// Errors cause the test to terminate.  Error messages contain the
+// "callSite" value to allow the test to tell which call site is which.
+func checkBlobChunksAgainstReader(t *testing.T, ctx *context.T, bs localblobstore.BlobStore, blobName string, rd io.Reader, callSite int) {
+	buf := make([]byte, 8192) // buffer used to hold data from the chunk stream from rd.
+	rawChunks := chunker.NewStream(ctx, &chunker.DefaultParam, rd)
+	cs := bs.BlobChunkStream(ctx, blobName)
+	pos := 0 // byte position within the blob, to be retported in error messages
+	i := 0   // chunk index, to be reported in error messages
+	rawMore, more := rawChunks.Advance(), cs.Advance()
+	for rawMore && more {
+		c := rawChunks.Value()
+		rawChunk := md5.Sum(rawChunks.Value())
+		chunk := cs.Value(buf)
+		if bytes.Compare(rawChunk[:], chunk) != 0 {
+			t.Errorf("raw random stream and chunk record for blob %q have different chunk %d:\n\t%v\nvs\n\t%v\n\tpos %d\n\tlen %d\n\tc %v",
+				blobName, i, rawChunk, chunk, pos, len(c), c)
+		}
+		pos += len(c)
+		i++
+		rawMore, more = rawChunks.Advance(), cs.Advance()
+	}
+	if rawMore {
+		t.Fatalf("callSite %d: blob %q has fewer chunks than raw stream", callSite, blobName)
+	}
+	if more {
+		t.Fatalf("callSite %d: blob %q has more chunks than raw stream", callSite, blobName)
+	}
+	if rawChunks.Err() != nil {
+		t.Fatalf("callSite %d: error reading raw chunk stream: %v", callSite, rawChunks.Err())
+	}
+	if cs.Err() != nil {
+		t.Fatalf("callSite %d: error reading chunk stream for blob %q; %v", callSite, blobName, cs.Err())
+	}
+}
+
+// WriteViaChunks() tests that a large blob in one blob store can be transmitted
+// to another incrementally, without transferring chunks already in the other blob store.
+func WriteViaChunks(t *testing.T, ctx *context.T, bs [2]localblobstore.BlobStore) {
+	// The original blob will be a megabyte.
+	totalLength := 1024 * 1024
+
+	// Write a random blob to bs[0], using seed 1, then check that the
+	// bytes and chunk we get from the blob just written are the same as
+	// those obtained from an identical byte stream.
+	blob0 := writeBlobFromReader(t, ctx, bs[0], "", NewRandReader(1, totalLength, 0, io.EOF), 0)
+	checkBlobAgainstReader(t, ctx, bs[0], blob0, NewRandReader(1, totalLength, 0, io.EOF), 1)
+	checkBlobChunksAgainstReader(t, ctx, bs[0], blob0, NewRandReader(1, totalLength, 0, io.EOF), 2)
+
+	// ---------------------------------------------------------------------
+	// Write into bs[1] a blob that is similar to blob0, but not identical, and check it as above.
+	insertionInterval := 20 * 1024
+	blob1 := writeBlobFromReader(t, ctx, bs[1], "", NewRandReader(1, totalLength, insertionInterval, io.EOF), 3)
+	checkBlobAgainstReader(t, ctx, bs[1], blob1, NewRandReader(1, totalLength, insertionInterval, io.EOF), 4)
+	checkBlobChunksAgainstReader(t, ctx, bs[1], blob1, NewRandReader(1, totalLength, insertionInterval, io.EOF), 5)
+
+	// ---------------------------------------------------------------------
+	// Count the number of chunks, and the number of steps in the recipe
+	// for copying blob0 from bs[0] to bs[1].  We expect the that the
+	// former to be significantly bigger than the latter, because the
+	// insertionInterval is significantly larger than the expected chunk
+	// size.
+	cs := bs[0].BlobChunkStream(ctx, blob0)          // Stream of chunks in blob0
+	rs := bs[1].RecipeStreamFromChunkStream(ctx, cs) // Recipe from bs[1]
+
+	recipeLen := 0
+	chunkCount := 0
+	for rs.Advance() {
+		step := rs.Value()
+		if step.Chunk != nil {
+			chunkCount++
+		}
+		recipeLen++
+	}
+	if rs.Err() != nil {
+		t.Fatalf("RecipeStream got error: %v", rs.Err())
+	}
+
+	cs = bs[0].BlobChunkStream(ctx, blob0) // Get the original chunk count.
+	origChunkCount := 0
+	for cs.Advance() {
+		origChunkCount++
+	}
+	if cs.Err() != nil {
+		t.Fatalf("ChunkStream got error: %v", cs.Err())
+	}
+	if origChunkCount < chunkCount*5 {
+		t.Errorf("expected fewer chunks in repipe: recipeLen %d  chunkCount %d  origChunkCount %d\n",
+			recipeLen, chunkCount, origChunkCount)
+	}
+
+	// Copy blob0 from bs[0] to bs[1], using chunks from blob1 (already in bs[1]) where possible.
+	cs = bs[0].BlobChunkStream(ctx, blob0) // Stream of chunks in blob0
+	// In a real application, at this point the stream cs would be sent to the device with bs[1].
+	rs = bs[1].RecipeStreamFromChunkStream(ctx, cs) // Recipe from bs[1]
+	// Write blob with known blob name.
+	var bw localblobstore.BlobWriter
+	var err error
+	if bw, err = bs[1].NewBlobWriter(ctx, blob0); err != nil {
+		t.Fatalf("bs[1].NewBlobWriter yields error: %v", err)
+	}
+	var br localblobstore.BlobReader
+	const maxFragment = 1024 * 1024
+	blocks := make([]localblobstore.BlockOrFile, maxFragment/chunker.DefaultParam.MinChunk)
+	for gotStep := rs.Advance(); gotStep; {
+		step := rs.Value()
+		if step.Chunk == nil {
+			// This part of the blob can be read from an existing blob locally (at bs[1]).
+			if err = bw.AppendBlob(step.Blob, step.Size, step.Offset); err != nil {
+				t.Fatalf("AppendBlob(%v) yields error: %v", step, err)
+			}
+			gotStep = rs.Advance()
+		} else {
+			var fragmentSize int64
+			// In a real application, the sequence of chunk hashes
+			// in recipe steps would be communicated back to bs[0],
+			// which then finds the associated chunks.
+			var b int
+			for b = 0; gotStep && step.Chunk != nil && fragmentSize+chunker.DefaultParam.MaxChunk < maxFragment; b++ {
+				var loc localblobstore.Location
+				if loc, err = bs[0].LookupChunk(ctx, step.Chunk); err != nil {
+					t.Fatalf("bs[0] unexpectedly does not have chunk %v", step.Chunk)
+				}
+				if br != nil && br.Name() != loc.BlobName { // Close blob if we need a different one.
+					if err = br.Close(); err != nil {
+						t.Fatalf("unexpected error in BlobReader.Close(): %v", err)
+					}
+					br = nil
+				}
+				if br == nil { // Open blob if needed.
+					if br, err = bs[0].NewBlobReader(ctx, loc.BlobName); err != nil {
+						t.Fatalf("unexpected failure to create BlobReader on %q: %v", loc.BlobName, err)
+					}
+				}
+				if loc.Size > chunker.DefaultParam.MaxChunk {
+					t.Fatalf("chunk exceeds max chunk size: %d vs %d", loc.Size, chunker.DefaultParam.MaxChunk)
+				}
+				fragmentSize += loc.Size
+				if blocks[b].Block == nil {
+					blocks[b].Block = make([]byte, chunker.DefaultParam.MaxChunk)
+				}
+				blocks[b].Block = blocks[b].Block[:loc.Size]
+				var i int
+				var n int64
+				for n = int64(0); n != loc.Size; n += int64(i) {
+					if i, err = br.ReadAt(blocks[b].Block[n:loc.Size], n+loc.Offset); err != nil && err != io.EOF {
+						t.Fatalf("ReadAt on %q failed: %v", br.Name(), err)
+					}
+				}
+				if gotStep = rs.Advance(); gotStep {
+					step = rs.Value()
+				}
+			}
+			if err = bw.AppendFragment(blocks[:b]...); err != nil {
+				t.Fatalf("AppendFragment on %q failed: %v", bw.Name(), err)
+			}
+		}
+	}
+	if err = bw.Close(); err != nil {
+		t.Fatalf("BlobWriter.Close on %q failed: %v", bw.Name(), err)
+	}
+
+	// Check that the transferred blob in bs[1] is the same as the original
+	// stream used to make the blob in bs[0].
+	checkBlobAgainstReader(t, ctx, bs[1], blob0, NewRandReader(1, totalLength, 0, io.EOF), 6)
+	checkBlobChunksAgainstReader(t, ctx, bs[1], blob0, NewRandReader(1, totalLength, 0, io.EOF), 7)
+}
+
+// checkBlobContent() checks that the named blob has the specified content.
+func checkBlobContent(t *testing.T, ctx *context.T, bs localblobstore.BlobStore, blobName string, content []byte) {
+	var err error
+	var br localblobstore.BlobReader
+	var data []byte
+	if br, err = bs.NewBlobReader(ctx, blobName); err != nil {
+		t.Fatalf("localblobstore.NewBlobReader failed: %v\n", err)
+	}
+	if data, err = ioutil.ReadAll(br); err != nil && err != io.EOF {
+		t.Fatalf("Read on br failed: %v\n", err)
+	}
+	if !bytes.Equal(data, content) {
+		t.Fatalf("Read on %q got %q, wanted %v\n", blobName, data, content)
+	}
+	if err = br.Close(); err != nil {
+		t.Fatalf("br.Close failed: %v\n", err)
+	}
+}
+
+// CreateAndResume() tests that it's possible to create a blob with
+// NewBlobWriter(), immediately close it, and then resume writing with
+// ResumeBlobWriter.  This test is called out because syncbase does this, and
+// it exposed a bug in the reader code, which could not cope with a request to
+// read starting at the very end of a file, thus returning no bytes.
+func CreateAndResume(t *testing.T, ctx *context.T, bs localblobstore.BlobStore) {
+	var err error
+
+	// Create an empty, unfinalized blob.
+	var bw localblobstore.BlobWriter
+	if bw, err = bs.NewBlobWriter(ctx, ""); err != nil {
+		t.Fatalf("localblobstore.NewBlobWriter failed: %v\n", err)
+	}
+	blobName := bw.Name()
+	if err = bw.CloseWithoutFinalize(); err != nil {
+		t.Fatalf("bw.CloseWithoutFinalize failed: %v\n", verror.DebugString(err))
+	}
+
+	checkBlobContent(t, ctx, bs, blobName, nil)
+
+	// Reopen the blob, but append no bytes (an empty byte vector).
+	if bw, err = bs.ResumeBlobWriter(ctx, blobName); err != nil {
+		t.Fatalf("localblobstore.ResumeBlobWriter failed: %v\n", err)
+	}
+	if err = bw.AppendFragment(localblobstore.BlockOrFile{Block: []byte("")}); err != nil {
+		t.Fatalf("bw.AppendFragment failed: %v", err)
+	}
+	if err = bw.CloseWithoutFinalize(); err != nil {
+		t.Fatalf("bw.Close failed: %v\n", err)
+	}
+
+	checkBlobContent(t, ctx, bs, blobName, nil)
+
+	// Reopen the blob, and append a non-empty sequence of bytes.
+	content := []byte("some content")
+	if bw, err = bs.ResumeBlobWriter(ctx, blobName); err != nil {
+		t.Fatalf("localblobstore.ResumeBlobWriter.Close failed: %v\n", err)
+	}
+	if err = bw.AppendFragment(localblobstore.BlockOrFile{Block: content}); err != nil {
+		t.Fatalf("bw.AppendFragment failed: %v", err)
+	}
+	if err = bw.Close(); err != nil {
+		t.Fatalf("bw.Close failed: %v\n", err)
+	}
+
+	checkBlobContent(t, ctx, bs, blobName, content)
+}

diff --git a/services/syncbase/localblobstore/localblobstore_testlib/randreader.go b/services/syncbase/localblobstore/localblobstore_testlib/randreader.go
new file mode 100644
index 0000000..85e32d3
--- /dev/null
+++ b/services/syncbase/localblobstore/localblobstore_testlib/randreader.go

@@ -0,0 +1,53 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package localblobstore_testlib
+
+import "math/rand"
+
+// A RandReader contains a pointer to a rand.Read, and a size limit.  Its
+// pointers implement the Read() method from io.Reader, which yields bytes
+// obtained from the random number generator.
+type RandReader struct {
+	rand           *rand.Rand // Source of random bytes.
+	pos            int        // Number of bytes read.
+	limit          int        // Max number of bytes that may be read.
+	insertInterval int        // If non-zero, number of bytes between insertions of zero bytes.
+	eofErr         error      // error to be returned at the end of the stream
+}
+
+// NewRandReader() returns a new RandReader with the specified seed and size limit.
+// It yields eofErr when the end of the stream is reached.
+// If insertInterval is non-zero, a zero byte is inserted into the stream every
+// insertInterval bytes, before resuming getting bytes from the random number
+// generator.
+func NewRandReader(seed int64, limit int, insertInterval int, eofErr error) *RandReader {
+	r := new(RandReader)
+	r.rand = rand.New(rand.NewSource(seed))
+	r.limit = limit
+	r.insertInterval = insertInterval
+	r.eofErr = eofErr
+	return r
+}
+
+// Read() implements the io.Reader Read() method for *RandReader.
+func (r *RandReader) Read(buf []byte) (n int, err error) {
+	// Generate bytes up to the end of the stream, or the end of the buffer.
+	max := r.limit - r.pos
+	if len(buf) < max {
+		max = len(buf)
+	}
+	for ; n != max; n++ {
+		if r.insertInterval == 0 || (r.pos%r.insertInterval) != 0 {
+			buf[n] = byte(r.rand.Int31n(256))
+		} else {
+			buf[n] = 0
+		}
+		r.pos++
+	}
+	if r.pos == r.limit {
+		err = r.eofErr
+	}
+	return n, err
+}

diff --git a/services/syncbase/localblobstore/localblobstore_transfer_test.go b/services/syncbase/localblobstore/localblobstore_transfer_test.go
new file mode 100644
index 0000000..b5378ec
--- /dev/null
+++ b/services/syncbase/localblobstore/localblobstore_transfer_test.go

@@ -0,0 +1,368 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Example code for transferring a blob from one device to another.
+// See the simulateResumption constant to choose whether to simulate a full
+// transfer or a resumed one.
+package localblobstore_test
+
+import "bytes"
+import "fmt"
+import "io"
+import "io/ioutil"
+import "math/rand"
+import "os"
+
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore"
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore"
+import "v.io/v23/context"
+import "v.io/x/ref/test"
+import _ "v.io/x/ref/runtime/factories/generic"
+
+// simulateResumption tells the receiver whether to simulate having a partial
+// blob before blob transfer.
+const simulateResumption = true
+
+// createBlobStore() returns a new BlobStore, and the name of the directory
+// used to implement it.
+func createBlobStore(ctx *context.T) (bs localblobstore.BlobStore, dirName string) {
+	var err error
+	if dirName, err = ioutil.TempDir("", "localblobstore_transfer_test"); err != nil {
+		panic(err)
+	}
+	if bs, err = fs_cablobstore.Create(ctx, dirName); err != nil {
+		panic(err)
+	}
+	return bs, dirName
+}
+
+// createBlob writes a blob to bs of k 32kByte blocks drawn from a determinstic
+// but arbitrary random stream, starting at block offset within that stream.
+// Returns its name, which is "blob" if non-empty, and chosen arbitrarily otherwise.
+// The blob is finalized iff "complete" is true.
+func createBlob(ctx *context.T, bs localblobstore.BlobStore, blob string, complete bool, offset int, count int) string {
+	var bw localblobstore.BlobWriter
+	var err error
+	if bw, err = bs.NewBlobWriter(ctx, blob); err != nil {
+		panic(err)
+	}
+	blob = bw.Name()
+	var buffer [32 * 1024]byte
+	block := localblobstore.BlockOrFile{Block: buffer[:]}
+	r := rand.New(rand.NewSource(1)) // Always seed with 1 for repeatability.
+	for i := 0; i != offset+count; i++ {
+		for b := 0; b != len(buffer); b++ {
+			buffer[b] = byte(r.Int31n(256))
+		}
+		if i >= offset {
+			if err = bw.AppendFragment(block); err != nil {
+				panic(err)
+			}
+		}
+	}
+	if complete {
+		err = bw.Close()
+	} else {
+		err = bw.CloseWithoutFinalize()
+	}
+	if err != nil {
+		panic(err)
+	}
+	return blob
+}
+
+// A channelChunkStream turns a channel of chunk hashes into a ChunkStream.
+type channelChunkStream struct {
+	channel <-chan []byte
+	ok      bool
+	value   []byte
+}
+
+// newChannelChunkStream returns a ChunkStream, given a channel containing the
+// relevant chunk hashes.
+func newChannelChunkStream(ch <-chan []byte) localblobstore.ChunkStream {
+	return &channelChunkStream{channel: ch, ok: true}
+}
+
+// The following are the standard ChunkStream methods.
+func (cs *channelChunkStream) Advance() bool {
+	if cs.ok {
+		cs.value, cs.ok = <-cs.channel
+	}
+	return cs.ok
+}
+func (cs *channelChunkStream) Value(buf []byte) []byte { return cs.value }
+func (cs *channelChunkStream) Err() error              { return nil }
+func (cs *channelChunkStream) Cancel()                 {}
+
+// Example_blobTransfer() demonstrates how to transfer a blob incrementally
+// from one device's blob store to another.  In this code, the communication
+// between sender and receiver is modelled with Go channels.
+func Example_blobTransfer() {
+	ctx, shutdown := test.V23Init()
+	defer shutdown()
+
+	// ----------------------------------------------
+	// Channels used to send chunk hashes to receiver always end in
+	// ToSender or ToReceiver.
+	type blobData struct {
+		name     string
+		size     int64
+		checksum []byte
+	}
+	blobDataToReceiver := make(chan blobData)  // indicate basic data for blob
+	needChunksToSender := make(chan bool)      // indicate receiver does not have entire blob
+	chunkHashesToReceiver := make(chan []byte) // for initial trasfer of chunk hashes
+	chunkHashesToSender := make(chan []byte)   // to report which chunks receiver needs
+	chunksToReceiver := make(chan []byte)      // to report which chunks receiver needs
+
+	sDone := make(chan bool) // closed when sender done
+	rDone := make(chan bool) // closed when receiver done
+
+	// ----------------------------------------------
+	// The sender.
+	go func(ctx *context.T,
+		blobDataToReceiver chan<- blobData,
+		needChunksToSender <-chan bool,
+		chunkHashesToReceiver chan<- []byte,
+		chunkHashesToSender <-chan []byte,
+		chunksToReceiver chan<- []byte,
+		done chan<- bool) {
+
+		defer close(done)
+		var err error
+
+		bsS, bsSDir := createBlobStore(ctx)
+		defer os.RemoveAll(bsSDir)
+
+		blob := createBlob(ctx, bsS, "", true, 0, 32) // Create a 1M blob at the sender.
+
+		// 1. Send basic blob data to receiver.
+		var br localblobstore.BlobReader
+		if br, err = bsS.NewBlobReader(ctx, blob); err != nil {
+			panic(err)
+		}
+		blobDataToReceiver <- blobData{name: blob, size: br.Size(), checksum: br.Hash()}
+		br.Close()
+		close(blobDataToReceiver)
+
+		// 3. Get indication from receiver of whether it needs blob.
+		needChunks := <-needChunksToSender
+
+		if !needChunks { // Receiver has blob; done.
+			return
+		}
+
+		// 4. Send the chunk hashes to the receiver.  This proceeds concurrently
+		//    with the step below.
+		go func(ctx *context.T, blob string, chunkHashesToReceiver chan<- []byte) {
+			cs := bsS.BlobChunkStream(ctx, blob)
+			for cs.Advance() {
+				chunkHashesToReceiver <- cs.Value(nil)
+			}
+			if cs.Err() != nil {
+				panic(cs.Err())
+			}
+			close(chunkHashesToReceiver)
+		}(ctx, blob, chunkHashesToReceiver)
+
+		// 7. Get needed chunk hashes from receiver, find the relevant
+		//    data, and send it back to the receiver.
+		var cbr localblobstore.BlobReader // Cached read handle on most-recent-read blob, or nil
+		// Given chunk hash h from chunkHashesToSender, send chunk to chunksToReceiver.
+		for h := range chunkHashesToSender {
+			loc, err := bsS.LookupChunk(ctx, h)
+			for err == nil && (cbr == nil || cbr.Name() != loc.BlobName) {
+				if cbr != nil && cbr.Name() != loc.BlobName {
+					cbr.Close()
+					cbr = nil
+				}
+				if cbr == nil {
+					if cbr, err = bsS.NewBlobReader(ctx, loc.BlobName); err != nil {
+						bsS.GC(ctx) // A partially-deleted blob may be confusing things.
+						loc, err = bsS.LookupChunk(ctx, h)
+					}
+				}
+			}
+			var i int = 1
+			var n int64
+			buffer := make([]byte, loc.Size) // buffer for current chunk
+			for n = int64(0); n != loc.Size && i != 0 && err == nil; n += int64(i) {
+				if i, err = cbr.ReadAt(buffer[n:loc.Size], n+loc.Offset); err == io.EOF {
+					err = nil // EOF is expected
+				}
+			}
+			if n == loc.Size { // Got chunk.
+				chunksToReceiver <- buffer[:loc.Size]
+			}
+			if err != nil {
+				break
+			}
+		}
+		close(chunksToReceiver)
+		if cbr != nil {
+			cbr.Close()
+		}
+
+	}(ctx, blobDataToReceiver, needChunksToSender, chunkHashesToReceiver, chunkHashesToSender, chunksToReceiver, sDone)
+
+	// ----------------------------------------------
+	// The receiver.
+	go func(ctx *context.T,
+		blobDataToReceiver <-chan blobData,
+		needChunksToSender chan<- bool,
+		chunkHashesToReceiver <-chan []byte,
+		chunkHashesToSender chan<- []byte,
+		chunksToReceiver <-chan []byte,
+		done chan<- bool) {
+
+		defer close(done)
+		var err error
+
+		bsR, bsRDir := createBlobStore(ctx)
+		defer os.RemoveAll(bsRDir)
+
+		// 2. Receive basic blob data from sender.
+		blobInfo := <-blobDataToReceiver
+
+		if simulateResumption {
+			// Write a fraction of the (unfinalized) blob on the receiving side
+			// to check that the transfer process can resume a partial blob.
+			createBlob(ctx, bsR, blobInfo.name, false, 0, 10)
+		}
+
+		// 3. Tell sender whether the recevier already has the complete
+		//    blob.
+		needChunks := true
+		var br localblobstore.BlobReader
+		if br, err = bsR.NewBlobReader(ctx, blobInfo.name); err == nil {
+			if br.IsFinalized() {
+				if len(br.Hash()) == len(blobInfo.checksum) && bytes.Compare(br.Hash(), blobInfo.checksum) != 0 {
+					panic("receiver has a finalized blob with same name but different hash")
+				}
+				needChunks = false // The receiver already has the blob.
+			}
+			br.Close()
+		}
+		needChunksToSender <- needChunks
+		close(needChunksToSender)
+
+		if !needChunks { // Receiver has blob; done.
+			return
+		}
+
+		// 5. Receive the chunk hashes from the sender, and turn them
+		//    into a recipe.
+		cs := newChannelChunkStream(chunkHashesToReceiver)
+		rs := bsR.RecipeStreamFromChunkStream(ctx, cs)
+
+		// 6. The following thread sends the chunk hashes that the
+		//    receiver does not have to the sender.  It also makes
+		//    a duplicate of the stream on the channel rsCopy.  The
+		//    buffering in rsCopy allows the receiver to put several
+		//    chunks into a fragment.
+		rsCopy := make(chan localblobstore.RecipeStep, 100) // A buffered copy of the rs stream.
+		go func(ctx *context.T, rs localblobstore.RecipeStream, rsCopy chan<- localblobstore.RecipeStep, chunkHashesToSender chan<- []byte) {
+			for rs.Advance() {
+
+				step := rs.Value()
+				if step.Chunk != nil { // Data must be fetched from sender.
+					chunkHashesToSender <- step.Chunk
+				}
+				rsCopy <- step
+			}
+			close(chunkHashesToSender)
+			close(rsCopy)
+		}(ctx, rs, rsCopy, chunkHashesToSender)
+
+		// 8. The following thread splices the chunks from the sender
+		//    (on chunksToReceiver) into the recipe stream copy
+		//    (rsCopy) to generate a full recipe stream (rsFull) in
+		//    which chunks are actual data, rather than just hashes.
+		rsFull := make(chan localblobstore.RecipeStep) // A recipe stream containing chunk data, not just hashes.
+		go func(ctx *context.T, rsCopy <-chan localblobstore.RecipeStep, chunksToReceiver <-chan []byte, rsFull chan<- localblobstore.RecipeStep) {
+			var ok bool
+			for step := range rsCopy {
+				if step.Chunk != nil { // Data must be fetched from sender.
+					if step.Chunk, ok = <-chunksToReceiver; !ok {
+						break
+					}
+				}
+				rsFull <- step
+			}
+			close(rsFull)
+		}(ctx, rsCopy, chunksToReceiver, rsFull)
+
+		// 9. Write the blob using the recipe.
+		var chunksTransferred int
+		const fragmentThreshold = 1024 * 1024 // Try to write on-disc fragments fragments at least this big.
+		var ignoreBytes int64
+		var bw localblobstore.BlobWriter
+		if bw, err = bsR.ResumeBlobWriter(ctx, blobInfo.name); err != nil {
+			bw, err = bsR.NewBlobWriter(ctx, blobInfo.name)
+		} else {
+			ignoreBytes = bw.Size()
+		}
+		if err == nil {
+			var fragment []localblobstore.BlockOrFile
+			var fragmentSize int64
+			for step := range rsFull {
+				if step.Chunk == nil { // Data can be obtained from local blob.
+					if ignoreBytes >= step.Size { // Ignore chunks we already have.
+						ignoreBytes -= step.Size
+					} else {
+						err = bw.AppendBlob(step.Blob, step.Size-ignoreBytes, step.Offset+ignoreBytes)
+						ignoreBytes = 0
+					}
+				} else if ignoreBytes >= int64(len(step.Chunk)) { // Ignoer chunks we already have.
+					ignoreBytes -= int64(len(step.Chunk))
+				} else { // Data is from a chunk send by the sender.
+					chunksTransferred++
+					fragment = append(fragment, localblobstore.BlockOrFile{Block: step.Chunk[ignoreBytes:]})
+					fragmentSize += int64(len(step.Chunk)) - ignoreBytes
+					ignoreBytes = 0
+					if fragmentSize > fragmentThreshold {
+						err = bw.AppendFragment(fragment...)
+						fragment = fragment[:0]
+						fragmentSize = 0
+					}
+				}
+				if err != nil {
+					break
+				}
+			}
+			if err == nil && len(fragment) != 0 {
+				err = bw.AppendFragment(fragment...)
+			}
+			if err2 := bw.Close(); err == nil {
+				err = err2
+			}
+			if err != nil {
+				panic(err)
+			}
+		}
+
+		// 10. Verify that the blob was written correctly.
+		if br, err = bsR.NewBlobReader(ctx, blobInfo.name); err != nil {
+			panic(err)
+		}
+		if br.Size() != blobInfo.size {
+			panic("transferred blob has wrong size")
+		}
+		if len(br.Hash()) != len(blobInfo.checksum) || bytes.Compare(br.Hash(), blobInfo.checksum) != 0 {
+			panic("transferred blob has wrong checksum")
+		}
+		if err = br.Close(); err != nil {
+			panic(err)
+		}
+		fmt.Printf("%d chunks transferred\n", chunksTransferred)
+	}(ctx, blobDataToReceiver, needChunksToSender, chunkHashesToReceiver, chunkHashesToSender, chunksToReceiver, rDone)
+
+	// ----------------------------------------------
+	// Wait for sender and receiver to finish.
+	_ = <-sDone
+	_ = <-rDone
+
+	// Output: 635 chunks transferred
+}

diff --git a/services/syncbase/localblobstore/model.go b/services/syncbase/localblobstore/model.go
new file mode 100644
index 0000000..f51f455
--- /dev/null
+++ b/services/syncbase/localblobstore/model.go

@@ -0,0 +1,303 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package localblobstore is the interface to a local blob store.
+// Implementations include fs_cablobstore.
+//
+// Expected use
+// ============
+// These examples assume that bs, bsS (sender) and bsR (receiver) are blobstores.
+//
+// Writing blobs
+//      bw, err := bs.NewBlobWriter(ctx, "")  // For a new blob, implementation picks blob name.
+//      if err == nil {
+//		blobName := bw.Name()  // Get name the implementation picked.
+//   	  	... use bw.AppendFragment() to append data to the blob...
+//   	  	... and/or bw.AppendBlob() to append data that's in another existing blob...
+//        	err = bw.Close()
+//   	}
+//
+// Resume writing a blob that was partially written due to a crash (not yet finalized).
+//	bw, err := bs.ResumeBlobWriter(ctx, name)
+//	if err == nil {
+//		size := bw.Size() // The store has this many bytes from the blob.
+//		... write the remaining data using bwAppendFragment() and/or bw.AppendBlob()...
+//		err = bw.Close()
+//	}
+//
+// Reading blobs
+//	br, err := bs.NewBlobReader(ctx, name)
+//	if err == nil {
+//		... read bytes with br.ReadAt() or br.Read(), perhas with br.Seek()...
+//		err = br.Close()
+//	}
+//
+// Transferring blobs from one store to another:
+// See example in localblobstore_transfer_test.go
+// Summary:
+// - The sender sends the chunksum of the blob from BlobReader's Hash().
+// - The receiver checks whether it already has the blob, with the same
+//   checksum.
+// - If the receiver does not have the blob, the sender sends the list of chunk
+//   hashes in the blob using BlobChunkStream().
+// - The receiver uses RecipeStreamFromChunkStream() with the chunk hash stream
+//   from the sender, and tells the sender the chunk hashes of the chunks it
+//   needs.
+// - The sender uses LookupChunk() to find the data for each chunk the receiver
+//   needs, and sends it to the receiver.
+// - The receiver applies the recipe steps, with the actual chunkj data from
+//   the sender and its own local data.
+package localblobstore
+
+import "v.io/v23/context"
+
+// A BlobStore represents a simple, content-addressable store.
+type BlobStore interface {
+	// NewBlobReader() returns a pointer to a newly allocated BlobReader on
+	// the specified blobName.  BlobReaders should not be used concurrently
+	// by multiple threads.  Returned handles should be closed with
+	// Close().
+	NewBlobReader(ctx *context.T, blobName string) (br BlobReader, err error)
+
+	// NewBlobWriter() returns a pointer to a newly allocated BlobWriter on
+	// a newly created blob.  If "name" is non-empty, its is used to name
+	// the blob, and it must be in the format of a name returned by this
+	// interface (probably by another instance on another device).
+	// Otherwise, otherwise a new name is created, which can be found using
+	// the Name() method.  It is an error to attempt to overwrite a blob
+	// that already exists in this blob store.  BlobWriters should not be
+	// used concurrently by multiple threads.  The returned handle should
+	// be closed with either the Close() or CloseWithoutFinalize() method
+	// to avoid leaking file handles.
+	NewBlobWriter(ctx *context.T, name string) (bw BlobWriter, err error)
+
+	// ResumeBlobWriter() returns a pointer to a newly allocated BlobWriter on
+	// an old, but unfinalized blob name.
+	ResumeBlobWriter(ctx *context.T, blobName string) (bw BlobWriter, err error)
+
+	// DeleteBlob() deletes the named blob from the BlobStore.
+	DeleteBlob(ctx *context.T, blobName string) (err error)
+
+	// GC() removes old temp files and content-addressed blocks that are no
+	// longer referenced by any blob.  It may be called concurrently with
+	// other calls to GC(), and with uses of BlobReaders and BlobWriters.
+	GC(ctx *context.T) error
+
+	// BlobChunkStream() returns a ChunkStream that can be used to read the
+	// ordered list of content hashes of chunks in blob blobName.  It is
+	// expected that this list will be presented to
+	// RecipeStreamFromChunkStream() on another device, to create a recipe
+	// for transmitting the blob efficiently to that other device.
+	BlobChunkStream(ctx *context.T, blobName string) ChunkStream
+
+	// RecipeStreamFromChunkStream() returns a pointer to a RecipeStream
+	// that allows the client to iterate over each RecipeStep needed to
+	// create the blob formed by the chunks in chunkStream.  It is expected
+	// that this will be called on a receiving device, and be given a
+	// ChunkStream from a sending device, to yield a recipe for efficient
+	// chunk transfer.  RecipeStep values with non-nil Chunk fields need
+	// the chunk from the sender; once the data is returned it can be
+	// written with BlobWriter.AppendFragment().  Those with blob
+	// references can be written locally with BlobWriter.AppendBlob().
+	RecipeStreamFromChunkStream(ctx *context.T, chunkStream ChunkStream) RecipeStream
+
+	// LookupChunk() returns the location of a chunk with the specified chunk
+	// hash within the store.  It is expected that chunk hashes from
+	// RecipeStep entries from RecipeStreamFromChunkStream() will be mapped
+	// to blob Location values on the sender for transmission to the
+	// receiver.
+	LookupChunk(ctx *context.T, chunkHash []byte) (loc Location, err error)
+
+	// ListBlobIds() returns an iterator that can be used to enumerate the
+	// blobs in a BlobStore.  Expected use is:
+	//
+	//	iter := bs.ListBlobIds(ctx)
+	//	for iter.Advance() {
+	//	  // Process iter.Value() here.
+	//	}
+	//	if iter.Err() != nil {
+	//	  // The loop terminated early due to an error.
+	//	}
+	ListBlobIds(ctx *context.T) (iter Stream)
+
+	// ListCAIds() returns an iterator that can be used to enumerate the
+	// content-addressable fragments in a BlobStore.  Expected use is:
+	//
+	//	iter := bs.ListCAIds(ctx)
+	//	for iter.Advance() {
+	//	  // Process iter.Value() here.
+	//	}
+	//	if iter.Err() != nil {
+	//	  // The loop terminated early due to an error.
+	//	}
+	ListCAIds(ctx *context.T) (iter Stream)
+
+	// Root() returns the name of the root directory where the BlobStore is stored.
+	Root() string
+
+	// Close() closes the BlobStore.
+	Close() error
+}
+
+// A Location describes chunk's location within a blob.  It is returned by
+// BlobStore.LookupChunk().
+type Location struct {
+	BlobName string // name of blob
+	Offset   int64  // byte offset of chunk within blob
+	Size     int64  // size of chunk
+}
+
+// A BlobReader allows a blob to be read using the standard ReadAt(), Read(),
+// and Seek() calls.  A BlobReader can be created with NewBlobReader(), and
+// should be closed with the Close() method to avoid leaking file handles.
+type BlobReader interface {
+	// ReadAt() fills b[] with up to len(b) bytes of data starting at
+	// position "at" within the blob that the BlobReader indicates, and
+	// returns the number of bytes read.
+	ReadAt(b []byte, at int64) (n int, err error)
+
+	// Read() fills b[] with up to len(b) bytes of data starting at the
+	// current seek position of the BlobReader within the blob that the
+	// BlobReader indicates, and then both returns the number of bytes read
+	// and advances the BlobReader's seek position by that amount.
+	Read(b []byte) (n int, err error)
+
+	// Seek() sets the seek position of the BlobReader to offset if
+	// whence==0, offset+current_seek_position if whence==1, and
+	// offset+end_of_blob if whence==2, and then returns the current seek
+	// position.
+	Seek(offset int64, whence int) (result int64, err error)
+
+	// Close() indicates that the client will perform no further operations
+	// on the BlobReader.  It releases any resources held by the
+	// BlobReader.
+	Close() error
+
+	// Name() returns the BlobReader's name.
+	Name() string
+
+	// Size() returns the BlobReader's size.
+	Size() int64
+
+	// IsFinalized() returns whether the BlobReader has been finalized.
+	IsFinalized() bool
+
+	// Hash() returns the BlobReader's hash.  It may be nil if the blob is
+	// not finalized.
+	Hash() []byte
+}
+
+// A BlockOrFile represents a vector of bytes, and contains either a data
+// block (as a []byte), or a (file name, size, offset) triple.
+type BlockOrFile struct {
+	Block    []byte // If FileName is empty, the bytes represented.
+	FileName string // If non-empty, the name of the file containing the bytes.
+	Size     int64  // If FileName is non-empty, the number of bytes (or -1 for "all")
+	Offset   int64  // If FileName is non-empty, the offset of the relevant bytes within the file.
+}
+
+// A BlobWriter allows a blob to be written.  If a blob has not yet been
+// finalized, it also allows that blob to be extended.  A BlobWriter may be
+// created with NewBlobWriter(), and should be closed with Close() or
+// CloseWithoutFinalize().
+type BlobWriter interface {
+	// AppendBlob() adds a (substring of a) pre-existing blob to the blob
+	// being written by the BlobWriter.  The fragments of the pre-existing
+	// blob are not physically copied; they are referenced by both blobs.
+	AppendBlob(blobName string, size int64, offset int64) (err error)
+
+	// AppendFragment() appends a fragment to the blob being written by the
+	// BlobWriter, where the fragment is composed of the byte vectors
+	// described by the elements of item[].  The fragment is copied into
+	// the blob store.
+	AppendFragment(item ...BlockOrFile) (err error)
+
+	// Close() finalizes the BlobWriter, and indicates that the client will
+	// perform no further append operations on the BlobWriter.  Any
+	// internal open file handles are closed.
+	Close() (err error)
+
+	// CloseWithoutFinalize() indicates that the client will perform no
+	// further append operations on the BlobWriter, but does not finalize
+	// the blob.  Any internal open file handles are closed.  Clients are
+	// expected to need this operation infrequently.
+	CloseWithoutFinalize() (err error)
+
+	// Name() returns the BlobWriter's name.
+	Name() string
+
+	// Size() returns the BlobWriter's size.
+	Size() int64
+
+	// IsFinalized() returns whether the BlobWriter has been finalized.
+	IsFinalized() bool
+
+	// Hash() returns the BlobWriter's hash, reflecting the bytes written so far.
+	Hash() []byte
+}
+
+// A Stream represents an iterator that allows the client to enumerate
+// all the blobs or fragments in a BlobStore.
+//
+// The interfaces Stream, ChunkStream, RecipeStream all have four calls,
+// and differ only in the Value() call.
+type Stream interface {
+	// Advance() stages an item so that it may be retrieved via Value().
+	// Returns true iff there is an item to retrieve.  Advance() must be
+	// called before Value() is called.  The caller is expected to read
+	// until Advance() returns false, or to call Cancel().
+	Advance() bool
+
+	// Value() returns the item that was staged by Advance().  May panic if
+	// Advance() returned false or was not called.  Never blocks.
+	Value() (name string)
+
+	// Err() returns any error encountered by Advance.  Never blocks.
+	Err() error
+
+	// Cancel() indicates that the client wishes to cease reading from the stream.
+	// It causes the next call to Advance() to return false.  Never blocks.
+	// It may be called concurrently with other calls on the stream.
+	Cancel()
+}
+
+// A ChunkStream represents an iterator that allows the client to enumerate
+// the chunks in a blob.   See the comments for Stream for usage.
+type ChunkStream interface {
+	Advance() bool
+
+	// Value() returns the chunkHash that was staged by Advance().  May
+	// panic if Advance() returned false or was not called.  Never blocks.
+	// The result may share storage with buf[] if it is large enough;
+	// otherwise, a new buffer is allocated.  It is legal to call with
+	// buf==nil.
+	Value(buf []byte) (chunkHash []byte)
+
+	Err() error
+	Cancel()
+}
+
+// A RecipeStep describes one piece of a recipe for making a blob.
+// The step consists either of appending the chunk with content hash Chunk and size Size,
+// or (if Chunk==nil) the Size bytes from Blob, starting at Offset.
+type RecipeStep struct {
+	Chunk  []byte
+	Blob   string
+	Size   int64
+	Offset int64
+}
+
+// A RecipeStream represents an iterator that allows the client to obtain the
+// steps needed to construct a blob with a given ChunkStream, attempting to
+// reuse data in existing blobs.  See the comments for Stream for usage.
+type RecipeStream interface {
+	Advance() bool
+
+	// Value() returns the RecipeStep that was staged by Advance().  May panic if
+	// Advance() returned false or was not called.  Never blocks.
+	Value() RecipeStep
+
+	Err() error
+	Cancel()
+}

diff --git a/services/syncbase/server/app.go b/services/syncbase/server/app.go
new file mode 100644
index 0000000..c404f77
--- /dev/null
+++ b/services/syncbase/server/app.go

@@ -0,0 +1,292 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package server
+
+import (
+	"path"
+	"sync"
+
+	wire "v.io/syncbase/v23/services/syncbase"
+	nosqlwire "v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+	"v.io/syncbase/x/ref/services/syncbase/server/nosql"
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	"v.io/v23/glob"
+	"v.io/v23/rpc"
+	"v.io/v23/security/access"
+	"v.io/v23/verror"
+	"v.io/x/lib/vlog"
+)
+
+// app is a per-app singleton (i.e. not per-request) that handles App RPCs.
+type app struct {
+	name string
+	s    *service
+	// The fields below are initialized iff this app exists.
+	exists bool
+	// Guards the fields below. Held during database Create, Delete, and
+	// SetPermissions.
+	mu  sync.Mutex
+	dbs map[string]interfaces.Database
+}
+
+var (
+	_ wire.AppServerMethods = (*app)(nil)
+	_ interfaces.App        = (*app)(nil)
+)
+
+////////////////////////////////////////
+// RPC methods
+
+// TODO(sadovsky): Require the app name to match the client's blessing name.
+// I.e. reserve names at the app level of the hierarchy.
+func (a *app) Create(ctx *context.T, call rpc.ServerCall, perms access.Permissions) error {
+	if a.exists {
+		return verror.New(verror.ErrExist, ctx, a.name)
+	}
+	// This app does not yet exist; a is just an ephemeral handle that holds
+	// {name string, s *service}. a.s.createApp will create a new app handle and
+	// store it in a.s.apps[a.name].
+	return a.s.createApp(ctx, call, a.name, perms)
+}
+
+func (a *app) Delete(ctx *context.T, call rpc.ServerCall) error {
+	return a.s.deleteApp(ctx, call, a.name)
+}
+
+func (a *app) Exists(ctx *context.T, call rpc.ServerCall) (bool, error) {
+	if !a.exists {
+		return false, nil
+	}
+	return util.ErrorToExists(util.GetWithAuth(ctx, call, a.s.st, a.stKey(), &appData{}))
+}
+
+func (a *app) SetPermissions(ctx *context.T, call rpc.ServerCall, perms access.Permissions, version string) error {
+	if !a.exists {
+		return verror.New(verror.ErrNoExist, ctx, a.name)
+	}
+	return a.s.setAppPerms(ctx, call, a.name, perms, version)
+}
+
+func (a *app) GetPermissions(ctx *context.T, call rpc.ServerCall) (perms access.Permissions, version string, err error) {
+	if !a.exists {
+		return nil, "", verror.New(verror.ErrNoExist, ctx, a.name)
+	}
+	data := &appData{}
+	if err := util.GetWithAuth(ctx, call, a.s.st, a.stKey(), data); err != nil {
+		return nil, "", err
+	}
+	return data.Perms, util.FormatVersion(data.Version), nil
+}
+
+func (a *app) GlobChildren__(ctx *context.T, call rpc.GlobChildrenServerCall, matcher *glob.Element) error {
+	if !a.exists {
+		return verror.New(verror.ErrNoExist, ctx, a.name)
+	}
+	// Check perms.
+	sn := a.s.st.NewSnapshot()
+	if err := util.GetWithAuth(ctx, call, sn, a.stKey(), &appData{}); err != nil {
+		sn.Abort()
+		return err
+	}
+	return util.Glob(ctx, call, matcher, sn, sn.Abort, util.JoinKeyParts(util.DbInfoPrefix, a.name))
+}
+
+////////////////////////////////////////
+// interfaces.App methods
+
+func (a *app) Service() interfaces.Service {
+	return a.s
+}
+
+func (a *app) NoSQLDatabase(ctx *context.T, call rpc.ServerCall, dbName string) (interfaces.Database, error) {
+	if !a.exists {
+		vlog.Fatalf("app %q does not exist", a.name)
+	}
+	a.mu.Lock()
+	defer a.mu.Unlock()
+	d, ok := a.dbs[dbName]
+	if !ok {
+		return nil, verror.New(verror.ErrNoExist, ctx, dbName)
+	}
+	return d, nil
+}
+
+func (a *app) NoSQLDatabaseNames(ctx *context.T, call rpc.ServerCall) ([]string, error) {
+	if !a.exists {
+		vlog.Fatalf("app %q does not exist", a.name)
+	}
+	// In the future this API will likely be replaced by one that streams the
+	// database names.
+	a.mu.Lock()
+	defer a.mu.Unlock()
+	dbNames := make([]string, 0, len(a.dbs))
+	for n := range a.dbs {
+		dbNames = append(dbNames, n)
+	}
+	return dbNames, nil
+}
+
+func (a *app) CreateNoSQLDatabase(ctx *context.T, call rpc.ServerCall, dbName string, perms access.Permissions, metadata *nosqlwire.SchemaMetadata) error {
+	if !a.exists {
+		vlog.Fatalf("app %q does not exist", a.name)
+	}
+	// TODO(sadovsky): Crash if any step fails, and use WAL to ensure that if we
+	// crash, upon restart we execute any remaining steps before we start handling
+	// client requests.
+	//
+	// Steps:
+	// 1. Check appData perms, create dbInfo record.
+	// 2. Initialize database.
+	// 3. Flip dbInfo.Initialized to true. <===== CHANGE BECOMES VISIBLE
+	a.mu.Lock()
+	defer a.mu.Unlock()
+	if _, ok := a.dbs[dbName]; ok {
+		// TODO(sadovsky): Should this be ErrExistOrNoAccess, for privacy?
+		return verror.New(verror.ErrExist, ctx, dbName)
+	}
+
+	// 1. Check appData perms, create dbInfo record.
+	rootDir, engine := a.rootDirForDb(dbName), a.s.opts.Engine
+	aData := &appData{}
+	if err := store.RunInTransaction(a.s.st, func(tx store.Transaction) error {
+		// Check appData perms.
+		if err := util.GetWithAuth(ctx, call, tx, a.stKey(), aData); err != nil {
+			return err
+		}
+		// Check for "database already exists".
+		if _, err := a.getDbInfo(ctx, tx, dbName); verror.ErrorID(err) != verror.ErrNoExist.ID {
+			if err != nil {
+				return err
+			}
+			// TODO(sadovsky): Should this be ErrExistOrNoAccess, for privacy?
+			return verror.New(verror.ErrExist, ctx, dbName)
+		}
+		// Write new dbInfo.
+		info := &dbInfo{
+			Name:    dbName,
+			RootDir: rootDir,
+			Engine:  engine,
+		}
+		return a.putDbInfo(ctx, tx, dbName, info)
+	}); err != nil {
+		return err
+	}
+
+	// 2. Initialize database.
+	if perms == nil {
+		perms = aData.Perms
+	}
+	d, err := nosql.NewDatabase(ctx, a, dbName, metadata, nosql.DatabaseOptions{
+		Perms:   perms,
+		RootDir: rootDir,
+		Engine:  engine,
+	})
+	if err != nil {
+		return err
+	}
+
+	// 3. Flip dbInfo.Initialized to true.
+	if err := store.RunInTransaction(a.s.st, func(tx store.Transaction) error {
+		return a.updateDbInfo(ctx, tx, dbName, func(info *dbInfo) error {
+			info.Initialized = true
+			return nil
+		})
+	}); err != nil {
+		return err
+	}
+
+	a.dbs[dbName] = d
+	return nil
+}
+
+func (a *app) DeleteNoSQLDatabase(ctx *context.T, call rpc.ServerCall, dbName string) error {
+	if !a.exists {
+		vlog.Fatalf("app %q does not exist", a.name)
+	}
+	// TODO(sadovsky): Crash if any step fails, and use WAL to ensure that if we
+	// crash, upon restart we execute any remaining steps before we start handling
+	// client requests.
+	//
+	// Steps:
+	// 1. Check databaseData perms.
+	// 2. Flip dbInfo.Deleted to true. <===== CHANGE BECOMES VISIBLE
+	// 3. Delete database.
+	// 4. Delete dbInfo record.
+	a.mu.Lock()
+	defer a.mu.Unlock()
+	d, ok := a.dbs[dbName]
+	if !ok {
+		return nil // delete is idempotent
+	}
+
+	// 1. Check databaseData perms.
+	if err := d.CheckPermsInternal(ctx, call, d.St()); err != nil {
+		if verror.ErrorID(err) == verror.ErrNoExist.ID {
+			return nil // delete is idempotent
+		}
+		return err
+	}
+
+	// 2. Flip dbInfo.Deleted to true.
+	if err := store.RunInTransaction(a.s.st, func(tx store.Transaction) error {
+		return a.updateDbInfo(ctx, tx, dbName, func(info *dbInfo) error {
+			info.Deleted = true
+			return nil
+		})
+	}); err != nil {
+		return err
+	}
+
+	// 3. Delete database.
+	if err := d.St().Close(); err != nil {
+		return err
+	}
+	if err := util.DestroyStore(a.s.opts.Engine, a.rootDirForDb(dbName)); err != nil {
+		return err
+	}
+
+	// 4. Delete dbInfo record.
+	if err := a.delDbInfo(ctx, a.s.st, dbName); err != nil {
+		return err
+	}
+
+	delete(a.dbs, dbName)
+	return nil
+}
+
+func (a *app) SetDatabasePerms(ctx *context.T, call rpc.ServerCall, dbName string, perms access.Permissions, version string) error {
+	if !a.exists {
+		vlog.Fatalf("app %q does not exist", a.name)
+	}
+	a.mu.Lock()
+	defer a.mu.Unlock()
+	d, ok := a.dbs[dbName]
+	if !ok {
+		return verror.New(verror.ErrNoExist, ctx, dbName)
+	}
+	return d.SetPermsInternal(ctx, call, perms, version)
+}
+
+func (a *app) Name() string {
+	return a.name
+}
+
+////////////////////////////////////////
+// Internal helpers
+
+func (a *app) stKey() string {
+	return util.JoinKeyParts(util.AppPrefix, a.stKeyPart())
+}
+
+func (a *app) stKeyPart() string {
+	return a.name
+}
+
+func (a *app) rootDirForDb(dbName string) string {
+	return path.Join(a.s.opts.RootDir, "apps", a.name, dbName)
+}

diff --git a/services/syncbase/server/db_info.go b/services/syncbase/server/db_info.go
new file mode 100644
index 0000000..f750d57
--- /dev/null
+++ b/services/syncbase/server/db_info.go

@@ -0,0 +1,55 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package server
+
+// This file defines internal app methods for manipulating dbInfo.
+// None of these methods perform authorization checks.
+//
+// The fundamental reason why these methods are needed is that information about
+// a database is spread across two storage engines. The source of truth for the
+// existence of the database, as well as things like the database type, is the
+// service-level storage engine, while database permissions are tracked in the
+// database's storage engine.
+
+import (
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+)
+
+func dbInfoStKey(a *app, dbName string) string {
+	return util.JoinKeyParts(util.DbInfoPrefix, a.stKeyPart(), dbName)
+}
+
+// getDbInfo reads data from the storage engine.
+func (a *app) getDbInfo(ctx *context.T, sntx store.SnapshotOrTransaction, dbName string) (*dbInfo, error) {
+	info := &dbInfo{}
+	if err := util.Get(ctx, sntx, dbInfoStKey(a, dbName), info); err != nil {
+		return nil, err
+	}
+	return info, nil
+}
+
+// putDbInfo writes data to the storage engine.
+func (a *app) putDbInfo(ctx *context.T, tx store.Transaction, dbName string, info *dbInfo) error {
+	return util.Put(ctx, tx, dbInfoStKey(a, dbName), info)
+}
+
+// delDbInfo deletes data from the storage engine.
+func (a *app) delDbInfo(ctx *context.T, stw store.StoreWriter, dbName string) error {
+	return util.Delete(ctx, stw, dbInfoStKey(a, dbName))
+}
+
+// updateDbInfo performs a read-modify-write. fn should "modify" v.
+func (a *app) updateDbInfo(ctx *context.T, tx store.Transaction, dbName string, fn func(info *dbInfo) error) error {
+	info, err := a.getDbInfo(ctx, tx, dbName)
+	if err != nil {
+		return err
+	}
+	if err := fn(info); err != nil {
+		return err
+	}
+	return a.putDbInfo(ctx, tx, dbName, info)
+}

diff --git a/services/syncbase/server/db_info_test.go b/services/syncbase/server/db_info_test.go
new file mode 100644
index 0000000..7bc0870
--- /dev/null
+++ b/services/syncbase/server/db_info_test.go

@@ -0,0 +1,25 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package server
+
+import (
+	"testing"
+)
+
+func TestStKey(t *testing.T) {
+	tests := []struct {
+		appName string
+		dbName  string
+		stKey   string
+	}{
+		{"app1", "db1", "$dbInfo:app1:db1"},
+	}
+	for _, test := range tests {
+		got, want := dbInfoStKey(&app{name: test.appName}, test.dbName), test.stKey
+		if got != want {
+			t.Errorf("wrong stKey: got %q, want %q", got, want)
+		}
+	}
+}

diff --git a/services/syncbase/server/dispatcher.go b/services/syncbase/server/dispatcher.go
new file mode 100644
index 0000000..4b51a00
--- /dev/null
+++ b/services/syncbase/server/dispatcher.go

@@ -0,0 +1,85 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package server
+
+import (
+	"strings"
+
+	wire "v.io/syncbase/v23/services/syncbase"
+	pubutil "v.io/syncbase/v23/syncbase/util"
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+	"v.io/syncbase/x/ref/services/syncbase/server/nosql"
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/v23/context"
+	"v.io/v23/rpc"
+	"v.io/v23/security"
+	"v.io/v23/verror"
+)
+
+type dispatcher struct {
+	s *service
+}
+
+var _ rpc.Dispatcher = (*dispatcher)(nil)
+
+func NewDispatcher(s *service) *dispatcher {
+	return &dispatcher{s: s}
+}
+
+// We always return an AllowEveryone authorizer from Lookup(), and rely on our
+// RPC method implementations to perform proper authorization.
+var auth security.Authorizer = security.AllowEveryone()
+
+func (disp *dispatcher) Lookup(ctx *context.T, suffix string) (interface{}, security.Authorizer, error) {
+	suffix = strings.TrimPrefix(suffix, "/")
+	parts := strings.SplitN(suffix, "/", 2)
+
+	if len(suffix) == 0 {
+		return wire.ServiceServer(disp.s), auth, nil
+	}
+
+	if parts[0] == util.SyncbaseSuffix {
+		return interfaces.SyncServer(disp.s.sync), auth, nil
+	}
+
+	// Validate all key atoms up front, so that we can avoid doing so in all our
+	// method implementations.
+	appName := parts[0]
+	if !pubutil.ValidName(appName) {
+		return nil, nil, wire.NewErrInvalidName(nil, suffix)
+	}
+
+	aExists := false
+	var a *app
+	if aInt, err := disp.s.App(nil, nil, appName); err == nil {
+		a = aInt.(*app) // panics on failure, as desired
+		aExists = true
+	} else {
+		if verror.ErrorID(err) != verror.ErrNoExist.ID {
+			return nil, nil, err
+		} else {
+			a = &app{
+				name: appName,
+				s:    disp.s,
+			}
+		}
+	}
+
+	if len(parts) == 1 {
+		return wire.AppServer(a), auth, nil
+	}
+
+	// All database, table, and row methods require the app to exist. If it
+	// doesn't, abort early.
+	if !aExists {
+		return nil, nil, verror.New(verror.ErrNoExist, nil, a.name)
+	}
+
+	// Note, it's possible for the app to be deleted concurrently with downstream
+	// handling of this request. Depending on the order in which things execute,
+	// the client may not get an error, but in any case ultimately the store will
+	// end up in a consistent state.
+	return nosql.NewDispatcher(a).Lookup(ctx, parts[1])
+}

diff --git a/services/syncbase/server/interfaces/app.go b/services/syncbase/server/interfaces/app.go
new file mode 100644
index 0000000..e990b29
--- /dev/null
+++ b/services/syncbase/server/interfaces/app.go

@@ -0,0 +1,36 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package interfaces
+
+import (
+	wire "v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/v23/context"
+	"v.io/v23/rpc"
+	"v.io/v23/security/access"
+)
+
+// App is an internal interface to the app layer.
+type App interface {
+	// Service returns the service handle for this app.
+	Service() Service
+
+	// NoSQLDatabase returns the Database for the specified NoSQL database.
+	NoSQLDatabase(ctx *context.T, call rpc.ServerCall, dbName string) (Database, error)
+
+	// NoSQLDatabaseNames returns the names of the NoSQL databases within the App.
+	NoSQLDatabaseNames(ctx *context.T, call rpc.ServerCall) ([]string, error)
+
+	// CreateNoSQLDatabase creates the specified NoSQL database.
+	CreateNoSQLDatabase(ctx *context.T, call rpc.ServerCall, dbName string, perms access.Permissions, metadata *wire.SchemaMetadata) error
+
+	// DeleteNoSQLDatabase deletes the specified NoSQL database.
+	DeleteNoSQLDatabase(ctx *context.T, call rpc.ServerCall, dbName string) error
+
+	// SetDatabasePerms sets the perms for the specified database.
+	SetDatabasePerms(ctx *context.T, call rpc.ServerCall, dbName string, perms access.Permissions, version string) error
+
+	// Name returns the name of this app.
+	Name() string
+}

diff --git a/services/syncbase/server/interfaces/database.go b/services/syncbase/server/interfaces/database.go
new file mode 100644
index 0000000..8be30d8
--- /dev/null
+++ b/services/syncbase/server/interfaces/database.go

@@ -0,0 +1,33 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package interfaces
+
+import (
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	"v.io/v23/rpc"
+	"v.io/v23/security/access"
+)
+
+// Database is an internal interface to the database layer.
+type Database interface {
+	// St returns the storage engine instance for this database.
+	St() store.Store
+
+	// App returns the app handle for this database.
+	App() App
+
+	// CheckPermsInternal checks whether the given RPC (ctx, call) is allowed per
+	// the database perms.
+	// Designed for use from within App.DeleteNoSQLDatabase.
+	CheckPermsInternal(ctx *context.T, call rpc.ServerCall, st store.StoreReader) error
+
+	// SetPermsInternal updates the database perms.
+	// Designed for use from within App.SetDatabasePerms.
+	SetPermsInternal(ctx *context.T, call rpc.ServerCall, perms access.Permissions, version string) error
+
+	// Name returns the name of this database.
+	Name() string
+}

diff --git a/services/syncbase/server/interfaces/doc.go b/services/syncbase/server/interfaces/doc.go
new file mode 100644
index 0000000..384f2f7
--- /dev/null
+++ b/services/syncbase/server/interfaces/doc.go

@@ -0,0 +1,10 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package interfaces defines internal interfaces for various objects in the
+// Syncbase server implementation. Defining these interfaces in a separate
+// package helps prevent import cycles: all other packages can import the
+// interfaces package, and individual modules can pass each other interfaces to
+// enable bidirectional cross-package communication.
+package interfaces

diff --git a/services/syncbase/server/interfaces/service.go b/services/syncbase/server/interfaces/service.go
new file mode 100644
index 0000000..ce665e2
--- /dev/null
+++ b/services/syncbase/server/interfaces/service.go

@@ -0,0 +1,26 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package interfaces
+
+import (
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	"v.io/v23/rpc"
+)
+
+// Service is an internal interface to the service layer.
+type Service interface {
+	// St returns the storage engine instance for this service.
+	St() store.Store
+
+	// Sync returns the sync instance for this service.
+	Sync() SyncServerMethods
+
+	// App returns the App with the specified name.
+	App(ctx *context.T, call rpc.ServerCall, appName string) (App, error)
+
+	// AppNames returns the names of the Apps within the service.
+	AppNames(ctx *context.T, call rpc.ServerCall) ([]string, error)
+}

diff --git a/services/syncbase/server/interfaces/sync.vdl b/services/syncbase/server/interfaces/sync.vdl
new file mode 100644
index 0000000..b97e845
--- /dev/null
+++ b/services/syncbase/server/interfaces/sync.vdl

@@ -0,0 +1,59 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package interfaces
+
+import (
+	wire "v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/v23/security/access"
+)
+
+// Sync defines methods for data exchange between Syncbases.
+// TODO(hpucha): Flesh this out further.
+type Sync interface {
+	// GetDeltas returns the responder's current generation vector and all
+	// the missing log records when compared to the initiator's generation
+	// vector. This process happens one Database at a time encompassing all
+	// the SyncGroups common to the initiator and the responder. For each
+	// Database, the initiator sends a DeltaReq. In response, the
+	// responder sends a "Start" DeltaResp record, all the missing log
+	// records, the responder's genvector, and a "Finish" DeltaResp
+	// record. The initiator parses the stream between a Start and a Finish
+	// record as the response to its DeltaReq, and then moves on to the
+	// next Database in common with this responder.
+	GetDeltas(initiator string) stream<DeltaReq, DeltaResp> error {access.Read}
+
+	// SyncGroup-related methods.
+
+	// PublishSyncGroup is typically invoked on a "central" peer to publish
+	// the SyncGroup.
+	PublishSyncGroup(sg SyncGroup) error {access.Write}
+
+	// JoinSyncGroupAtAdmin is invoked by a prospective SyncGroup member's
+	// Syncbase on a SyncGroup admin. It checks whether the requestor is
+	// allowed to join the named SyncGroup, and if so, adds the requestor to
+	// the SyncGroup.
+	JoinSyncGroupAtAdmin(sgName, joinerName string, myInfo wire.SyncGroupMemberInfo) (SyncGroup | error) {access.Read}
+
+	// BlobSync methods.
+
+	// HaveBlob verifies that the peer has the requested blob, and if
+	// present, returns its size.
+	HaveBlob(br wire.BlobRef) (int64 | error)
+
+	// FetchBlob fetches the requested blob.
+	FetchBlob(br wire.BlobRef) stream<_, []byte> error
+
+	// Methods for incremental blob transfer. The transfer starts with the
+	// receiver making a FetchBlobRecipe call to the sender for a given
+	// BlobRef. The sender, in turn, sends the chunk hashes of all the
+	// chunks that make up the requested blob (blob recipe). The receiver
+	// looks up the chunk hashes in its local blob store, and identifies the
+	// missing ones. The receiver then fetches the missing chunks using a
+	// FetchChunks call from the sender. Finally, the receiver finishes the
+	// blob fetch by combining the chunks obtained over the network with the
+	// already available local chunks as per the blob recipe.
+	FetchBlobRecipe(br wire.BlobRef) stream<_, ChunkHash> error
+	FetchChunks() stream<ChunkHash, ChunkData> error
+}

diff --git a/services/syncbase/server/interfaces/sync.vdl.go b/services/syncbase/server/interfaces/sync.vdl.go
new file mode 100644
index 0000000..9cd1383
--- /dev/null
+++ b/services/syncbase/server/interfaces/sync.vdl.go

@@ -0,0 +1,946 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: sync.vdl
+
+package interfaces
+
+import (
+	// VDL system imports
+	"io"
+	"v.io/v23"
+	"v.io/v23/context"
+	"v.io/v23/rpc"
+	"v.io/v23/vdl"
+
+	// VDL user imports
+	"v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/v23/security/access"
+)
+
+// SyncClientMethods is the client interface
+// containing Sync methods.
+//
+// Sync defines methods for data exchange between Syncbases.
+// TODO(hpucha): Flesh this out further.
+type SyncClientMethods interface {
+	// GetDeltas returns the responder's current generation vector and all
+	// the missing log records when compared to the initiator's generation
+	// vector. This process happens one Database at a time encompassing all
+	// the SyncGroups common to the initiator and the responder. For each
+	// Database, the initiator sends a DeltaReq. In response, the
+	// responder sends a "Start" DeltaResp record, all the missing log
+	// records, the responder's genvector, and a "Finish" DeltaResp
+	// record. The initiator parses the stream between a Start and a Finish
+	// record as the response to its DeltaReq, and then moves on to the
+	// next Database in common with this responder.
+	GetDeltas(ctx *context.T, initiator string, opts ...rpc.CallOpt) (SyncGetDeltasClientCall, error)
+	// PublishSyncGroup is typically invoked on a "central" peer to publish
+	// the SyncGroup.
+	PublishSyncGroup(ctx *context.T, sg SyncGroup, opts ...rpc.CallOpt) error
+	// JoinSyncGroupAtAdmin is invoked by a prospective SyncGroup member's
+	// Syncbase on a SyncGroup admin. It checks whether the requestor is
+	// allowed to join the named SyncGroup, and if so, adds the requestor to
+	// the SyncGroup.
+	JoinSyncGroupAtAdmin(ctx *context.T, sgName string, joinerName string, myInfo nosql.SyncGroupMemberInfo, opts ...rpc.CallOpt) (SyncGroup, error)
+	// HaveBlob verifies that the peer has the requested blob, and if
+	// present, returns its size.
+	HaveBlob(ctx *context.T, br nosql.BlobRef, opts ...rpc.CallOpt) (int64, error)
+	// FetchBlob fetches the requested blob.
+	FetchBlob(ctx *context.T, br nosql.BlobRef, opts ...rpc.CallOpt) (SyncFetchBlobClientCall, error)
+	// Methods for incremental blob transfer. The transfer starts with the
+	// receiver making a FetchBlobRecipe call to the sender for a given
+	// BlobRef. The sender, in turn, sends the chunk hashes of all the
+	// chunks that make up the requested blob (blob recipe). The receiver
+	// looks up the chunk hashes in its local blob store, and identifies the
+	// missing ones. The receiver then fetches the missing chunks using a
+	// FetchChunks call from the sender. Finally, the receiver finishes the
+	// blob fetch by combining the chunks obtained over the network with the
+	// already available local chunks as per the blob recipe.
+	FetchBlobRecipe(ctx *context.T, br nosql.BlobRef, opts ...rpc.CallOpt) (SyncFetchBlobRecipeClientCall, error)
+	FetchChunks(*context.T, ...rpc.CallOpt) (SyncFetchChunksClientCall, error)
+}
+
+// SyncClientStub adds universal methods to SyncClientMethods.
+type SyncClientStub interface {
+	SyncClientMethods
+	rpc.UniversalServiceMethods
+}
+
+// SyncClient returns a client stub for Sync.
+func SyncClient(name string) SyncClientStub {
+	return implSyncClientStub{name}
+}
+
+type implSyncClientStub struct {
+	name string
+}
+
+func (c implSyncClientStub) GetDeltas(ctx *context.T, i0 string, opts ...rpc.CallOpt) (ocall SyncGetDeltasClientCall, err error) {
+	var call rpc.ClientCall
+	if call, err = v23.GetClient(ctx).StartCall(ctx, c.name, "GetDeltas", []interface{}{i0}, opts...); err != nil {
+		return
+	}
+	ocall = &implSyncGetDeltasClientCall{ClientCall: call}
+	return
+}
+
+func (c implSyncClientStub) PublishSyncGroup(ctx *context.T, i0 SyncGroup, opts ...rpc.CallOpt) (err error) {
+	err = v23.GetClient(ctx).Call(ctx, c.name, "PublishSyncGroup", []interface{}{i0}, nil, opts...)
+	return
+}
+
+func (c implSyncClientStub) JoinSyncGroupAtAdmin(ctx *context.T, i0 string, i1 string, i2 nosql.SyncGroupMemberInfo, opts ...rpc.CallOpt) (o0 SyncGroup, err error) {
+	err = v23.GetClient(ctx).Call(ctx, c.name, "JoinSyncGroupAtAdmin", []interface{}{i0, i1, i2}, []interface{}{&o0}, opts...)
+	return
+}
+
+func (c implSyncClientStub) HaveBlob(ctx *context.T, i0 nosql.BlobRef, opts ...rpc.CallOpt) (o0 int64, err error) {
+	err = v23.GetClient(ctx).Call(ctx, c.name, "HaveBlob", []interface{}{i0}, []interface{}{&o0}, opts...)
+	return
+}
+
+func (c implSyncClientStub) FetchBlob(ctx *context.T, i0 nosql.BlobRef, opts ...rpc.CallOpt) (ocall SyncFetchBlobClientCall, err error) {
+	var call rpc.ClientCall
+	if call, err = v23.GetClient(ctx).StartCall(ctx, c.name, "FetchBlob", []interface{}{i0}, opts...); err != nil {
+		return
+	}
+	ocall = &implSyncFetchBlobClientCall{ClientCall: call}
+	return
+}
+
+func (c implSyncClientStub) FetchBlobRecipe(ctx *context.T, i0 nosql.BlobRef, opts ...rpc.CallOpt) (ocall SyncFetchBlobRecipeClientCall, err error) {
+	var call rpc.ClientCall
+	if call, err = v23.GetClient(ctx).StartCall(ctx, c.name, "FetchBlobRecipe", []interface{}{i0}, opts...); err != nil {
+		return
+	}
+	ocall = &implSyncFetchBlobRecipeClientCall{ClientCall: call}
+	return
+}
+
+func (c implSyncClientStub) FetchChunks(ctx *context.T, opts ...rpc.CallOpt) (ocall SyncFetchChunksClientCall, err error) {
+	var call rpc.ClientCall
+	if call, err = v23.GetClient(ctx).StartCall(ctx, c.name, "FetchChunks", nil, opts...); err != nil {
+		return
+	}
+	ocall = &implSyncFetchChunksClientCall{ClientCall: call}
+	return
+}
+
+// SyncGetDeltasClientStream is the client stream for Sync.GetDeltas.
+type SyncGetDeltasClientStream interface {
+	// RecvStream returns the receiver side of the Sync.GetDeltas client stream.
+	RecvStream() interface {
+		// Advance stages an item so that it may be retrieved via Value.  Returns
+		// true iff there is an item to retrieve.  Advance must be called before
+		// Value is called.  May block if an item is not available.
+		Advance() bool
+		// Value returns the item that was staged by Advance.  May panic if Advance
+		// returned false or was not called.  Never blocks.
+		Value() DeltaResp
+		// Err returns any error encountered by Advance.  Never blocks.
+		Err() error
+	}
+	// SendStream returns the send side of the Sync.GetDeltas client stream.
+	SendStream() interface {
+		// Send places the item onto the output stream.  Returns errors
+		// encountered while sending, or if Send is called after Close or
+		// the stream has been canceled.  Blocks if there is no buffer
+		// space; will unblock when buffer space is available or after
+		// the stream has been canceled.
+		Send(item DeltaReq) error
+		// Close indicates to the server that no more items will be sent;
+		// server Recv calls will receive io.EOF after all sent items.
+		// This is an optional call - e.g. a client might call Close if it
+		// needs to continue receiving items from the server after it's
+		// done sending.  Returns errors encountered while closing, or if
+		// Close is called after the stream has been canceled.  Like Send,
+		// blocks if there is no buffer space available.
+		Close() error
+	}
+}
+
+// SyncGetDeltasClientCall represents the call returned from Sync.GetDeltas.
+type SyncGetDeltasClientCall interface {
+	SyncGetDeltasClientStream
+	// Finish performs the equivalent of SendStream().Close, then blocks until
+	// the server is done, and returns the positional return values for the call.
+	//
+	// Finish returns immediately if the call has been canceled; depending on the
+	// timing the output could either be an error signaling cancelation, or the
+	// valid positional return values from the server.
+	//
+	// Calling Finish is mandatory for releasing stream resources, unless the call
+	// has been canceled or any of the other methods return an error.  Finish should
+	// be called at most once.
+	Finish() error
+}
+
+type implSyncGetDeltasClientCall struct {
+	rpc.ClientCall
+	valRecv DeltaResp
+	errRecv error
+}
+
+func (c *implSyncGetDeltasClientCall) RecvStream() interface {
+	Advance() bool
+	Value() DeltaResp
+	Err() error
+} {
+	return implSyncGetDeltasClientCallRecv{c}
+}
+
+type implSyncGetDeltasClientCallRecv struct {
+	c *implSyncGetDeltasClientCall
+}
+
+func (c implSyncGetDeltasClientCallRecv) Advance() bool {
+	c.c.errRecv = c.c.Recv(&c.c.valRecv)
+	return c.c.errRecv == nil
+}
+func (c implSyncGetDeltasClientCallRecv) Value() DeltaResp {
+	return c.c.valRecv
+}
+func (c implSyncGetDeltasClientCallRecv) Err() error {
+	if c.c.errRecv == io.EOF {
+		return nil
+	}
+	return c.c.errRecv
+}
+func (c *implSyncGetDeltasClientCall) SendStream() interface {
+	Send(item DeltaReq) error
+	Close() error
+} {
+	return implSyncGetDeltasClientCallSend{c}
+}
+
+type implSyncGetDeltasClientCallSend struct {
+	c *implSyncGetDeltasClientCall
+}
+
+func (c implSyncGetDeltasClientCallSend) Send(item DeltaReq) error {
+	return c.c.Send(item)
+}
+func (c implSyncGetDeltasClientCallSend) Close() error {
+	return c.c.CloseSend()
+}
+func (c *implSyncGetDeltasClientCall) Finish() (err error) {
+	err = c.ClientCall.Finish()
+	return
+}
+
+// SyncFetchBlobClientStream is the client stream for Sync.FetchBlob.
+type SyncFetchBlobClientStream interface {
+	// RecvStream returns the receiver side of the Sync.FetchBlob client stream.
+	RecvStream() interface {
+		// Advance stages an item so that it may be retrieved via Value.  Returns
+		// true iff there is an item to retrieve.  Advance must be called before
+		// Value is called.  May block if an item is not available.
+		Advance() bool
+		// Value returns the item that was staged by Advance.  May panic if Advance
+		// returned false or was not called.  Never blocks.
+		Value() []byte
+		// Err returns any error encountered by Advance.  Never blocks.
+		Err() error
+	}
+}
+
+// SyncFetchBlobClientCall represents the call returned from Sync.FetchBlob.
+type SyncFetchBlobClientCall interface {
+	SyncFetchBlobClientStream
+	// Finish blocks until the server is done, and returns the positional return
+	// values for call.
+	//
+	// Finish returns immediately if the call has been canceled; depending on the
+	// timing the output could either be an error signaling cancelation, or the
+	// valid positional return values from the server.
+	//
+	// Calling Finish is mandatory for releasing stream resources, unless the call
+	// has been canceled or any of the other methods return an error.  Finish should
+	// be called at most once.
+	Finish() error
+}
+
+type implSyncFetchBlobClientCall struct {
+	rpc.ClientCall
+	valRecv []byte
+	errRecv error
+}
+
+func (c *implSyncFetchBlobClientCall) RecvStream() interface {
+	Advance() bool
+	Value() []byte
+	Err() error
+} {
+	return implSyncFetchBlobClientCallRecv{c}
+}
+
+type implSyncFetchBlobClientCallRecv struct {
+	c *implSyncFetchBlobClientCall
+}
+
+func (c implSyncFetchBlobClientCallRecv) Advance() bool {
+	c.c.errRecv = c.c.Recv(&c.c.valRecv)
+	return c.c.errRecv == nil
+}
+func (c implSyncFetchBlobClientCallRecv) Value() []byte {
+	return c.c.valRecv
+}
+func (c implSyncFetchBlobClientCallRecv) Err() error {
+	if c.c.errRecv == io.EOF {
+		return nil
+	}
+	return c.c.errRecv
+}
+func (c *implSyncFetchBlobClientCall) Finish() (err error) {
+	err = c.ClientCall.Finish()
+	return
+}
+
+// SyncFetchBlobRecipeClientStream is the client stream for Sync.FetchBlobRecipe.
+type SyncFetchBlobRecipeClientStream interface {
+	// RecvStream returns the receiver side of the Sync.FetchBlobRecipe client stream.
+	RecvStream() interface {
+		// Advance stages an item so that it may be retrieved via Value.  Returns
+		// true iff there is an item to retrieve.  Advance must be called before
+		// Value is called.  May block if an item is not available.
+		Advance() bool
+		// Value returns the item that was staged by Advance.  May panic if Advance
+		// returned false or was not called.  Never blocks.
+		Value() ChunkHash
+		// Err returns any error encountered by Advance.  Never blocks.
+		Err() error
+	}
+}
+
+// SyncFetchBlobRecipeClientCall represents the call returned from Sync.FetchBlobRecipe.
+type SyncFetchBlobRecipeClientCall interface {
+	SyncFetchBlobRecipeClientStream
+	// Finish blocks until the server is done, and returns the positional return
+	// values for call.
+	//
+	// Finish returns immediately if the call has been canceled; depending on the
+	// timing the output could either be an error signaling cancelation, or the
+	// valid positional return values from the server.
+	//
+	// Calling Finish is mandatory for releasing stream resources, unless the call
+	// has been canceled or any of the other methods return an error.  Finish should
+	// be called at most once.
+	Finish() error
+}
+
+type implSyncFetchBlobRecipeClientCall struct {
+	rpc.ClientCall
+	valRecv ChunkHash
+	errRecv error
+}
+
+func (c *implSyncFetchBlobRecipeClientCall) RecvStream() interface {
+	Advance() bool
+	Value() ChunkHash
+	Err() error
+} {
+	return implSyncFetchBlobRecipeClientCallRecv{c}
+}
+
+type implSyncFetchBlobRecipeClientCallRecv struct {
+	c *implSyncFetchBlobRecipeClientCall
+}
+
+func (c implSyncFetchBlobRecipeClientCallRecv) Advance() bool {
+	c.c.valRecv = ChunkHash{}
+	c.c.errRecv = c.c.Recv(&c.c.valRecv)
+	return c.c.errRecv == nil
+}
+func (c implSyncFetchBlobRecipeClientCallRecv) Value() ChunkHash {
+	return c.c.valRecv
+}
+func (c implSyncFetchBlobRecipeClientCallRecv) Err() error {
+	if c.c.errRecv == io.EOF {
+		return nil
+	}
+	return c.c.errRecv
+}
+func (c *implSyncFetchBlobRecipeClientCall) Finish() (err error) {
+	err = c.ClientCall.Finish()
+	return
+}
+
+// SyncFetchChunksClientStream is the client stream for Sync.FetchChunks.
+type SyncFetchChunksClientStream interface {
+	// RecvStream returns the receiver side of the Sync.FetchChunks client stream.
+	RecvStream() interface {
+		// Advance stages an item so that it may be retrieved via Value.  Returns
+		// true iff there is an item to retrieve.  Advance must be called before
+		// Value is called.  May block if an item is not available.
+		Advance() bool
+		// Value returns the item that was staged by Advance.  May panic if Advance
+		// returned false or was not called.  Never blocks.
+		Value() ChunkData
+		// Err returns any error encountered by Advance.  Never blocks.
+		Err() error
+	}
+	// SendStream returns the send side of the Sync.FetchChunks client stream.
+	SendStream() interface {
+		// Send places the item onto the output stream.  Returns errors
+		// encountered while sending, or if Send is called after Close or
+		// the stream has been canceled.  Blocks if there is no buffer
+		// space; will unblock when buffer space is available or after
+		// the stream has been canceled.
+		Send(item ChunkHash) error
+		// Close indicates to the server that no more items will be sent;
+		// server Recv calls will receive io.EOF after all sent items.
+		// This is an optional call - e.g. a client might call Close if it
+		// needs to continue receiving items from the server after it's
+		// done sending.  Returns errors encountered while closing, or if
+		// Close is called after the stream has been canceled.  Like Send,
+		// blocks if there is no buffer space available.
+		Close() error
+	}
+}
+
+// SyncFetchChunksClientCall represents the call returned from Sync.FetchChunks.
+type SyncFetchChunksClientCall interface {
+	SyncFetchChunksClientStream
+	// Finish performs the equivalent of SendStream().Close, then blocks until
+	// the server is done, and returns the positional return values for the call.
+	//
+	// Finish returns immediately if the call has been canceled; depending on the
+	// timing the output could either be an error signaling cancelation, or the
+	// valid positional return values from the server.
+	//
+	// Calling Finish is mandatory for releasing stream resources, unless the call
+	// has been canceled or any of the other methods return an error.  Finish should
+	// be called at most once.
+	Finish() error
+}
+
+type implSyncFetchChunksClientCall struct {
+	rpc.ClientCall
+	valRecv ChunkData
+	errRecv error
+}
+
+func (c *implSyncFetchChunksClientCall) RecvStream() interface {
+	Advance() bool
+	Value() ChunkData
+	Err() error
+} {
+	return implSyncFetchChunksClientCallRecv{c}
+}
+
+type implSyncFetchChunksClientCallRecv struct {
+	c *implSyncFetchChunksClientCall
+}
+
+func (c implSyncFetchChunksClientCallRecv) Advance() bool {
+	c.c.valRecv = ChunkData{}
+	c.c.errRecv = c.c.Recv(&c.c.valRecv)
+	return c.c.errRecv == nil
+}
+func (c implSyncFetchChunksClientCallRecv) Value() ChunkData {
+	return c.c.valRecv
+}
+func (c implSyncFetchChunksClientCallRecv) Err() error {
+	if c.c.errRecv == io.EOF {
+		return nil
+	}
+	return c.c.errRecv
+}
+func (c *implSyncFetchChunksClientCall) SendStream() interface {
+	Send(item ChunkHash) error
+	Close() error
+} {
+	return implSyncFetchChunksClientCallSend{c}
+}
+
+type implSyncFetchChunksClientCallSend struct {
+	c *implSyncFetchChunksClientCall
+}
+
+func (c implSyncFetchChunksClientCallSend) Send(item ChunkHash) error {
+	return c.c.Send(item)
+}
+func (c implSyncFetchChunksClientCallSend) Close() error {
+	return c.c.CloseSend()
+}
+func (c *implSyncFetchChunksClientCall) Finish() (err error) {
+	err = c.ClientCall.Finish()
+	return
+}
+
+// SyncServerMethods is the interface a server writer
+// implements for Sync.
+//
+// Sync defines methods for data exchange between Syncbases.
+// TODO(hpucha): Flesh this out further.
+type SyncServerMethods interface {
+	// GetDeltas returns the responder's current generation vector and all
+	// the missing log records when compared to the initiator's generation
+	// vector. This process happens one Database at a time encompassing all
+	// the SyncGroups common to the initiator and the responder. For each
+	// Database, the initiator sends a DeltaReq. In response, the
+	// responder sends a "Start" DeltaResp record, all the missing log
+	// records, the responder's genvector, and a "Finish" DeltaResp
+	// record. The initiator parses the stream between a Start and a Finish
+	// record as the response to its DeltaReq, and then moves on to the
+	// next Database in common with this responder.
+	GetDeltas(ctx *context.T, call SyncGetDeltasServerCall, initiator string) error
+	// PublishSyncGroup is typically invoked on a "central" peer to publish
+	// the SyncGroup.
+	PublishSyncGroup(ctx *context.T, call rpc.ServerCall, sg SyncGroup) error
+	// JoinSyncGroupAtAdmin is invoked by a prospective SyncGroup member's
+	// Syncbase on a SyncGroup admin. It checks whether the requestor is
+	// allowed to join the named SyncGroup, and if so, adds the requestor to
+	// the SyncGroup.
+	JoinSyncGroupAtAdmin(ctx *context.T, call rpc.ServerCall, sgName string, joinerName string, myInfo nosql.SyncGroupMemberInfo) (SyncGroup, error)
+	// HaveBlob verifies that the peer has the requested blob, and if
+	// present, returns its size.
+	HaveBlob(ctx *context.T, call rpc.ServerCall, br nosql.BlobRef) (int64, error)
+	// FetchBlob fetches the requested blob.
+	FetchBlob(ctx *context.T, call SyncFetchBlobServerCall, br nosql.BlobRef) error
+	// Methods for incremental blob transfer. The transfer starts with the
+	// receiver making a FetchBlobRecipe call to the sender for a given
+	// BlobRef. The sender, in turn, sends the chunk hashes of all the
+	// chunks that make up the requested blob (blob recipe). The receiver
+	// looks up the chunk hashes in its local blob store, and identifies the
+	// missing ones. The receiver then fetches the missing chunks using a
+	// FetchChunks call from the sender. Finally, the receiver finishes the
+	// blob fetch by combining the chunks obtained over the network with the
+	// already available local chunks as per the blob recipe.
+	FetchBlobRecipe(ctx *context.T, call SyncFetchBlobRecipeServerCall, br nosql.BlobRef) error
+	FetchChunks(*context.T, SyncFetchChunksServerCall) error
+}
+
+// SyncServerStubMethods is the server interface containing
+// Sync methods, as expected by rpc.Server.
+// The only difference between this interface and SyncServerMethods
+// is the streaming methods.
+type SyncServerStubMethods interface {
+	// GetDeltas returns the responder's current generation vector and all
+	// the missing log records when compared to the initiator's generation
+	// vector. This process happens one Database at a time encompassing all
+	// the SyncGroups common to the initiator and the responder. For each
+	// Database, the initiator sends a DeltaReq. In response, the
+	// responder sends a "Start" DeltaResp record, all the missing log
+	// records, the responder's genvector, and a "Finish" DeltaResp
+	// record. The initiator parses the stream between a Start and a Finish
+	// record as the response to its DeltaReq, and then moves on to the
+	// next Database in common with this responder.
+	GetDeltas(ctx *context.T, call *SyncGetDeltasServerCallStub, initiator string) error
+	// PublishSyncGroup is typically invoked on a "central" peer to publish
+	// the SyncGroup.
+	PublishSyncGroup(ctx *context.T, call rpc.ServerCall, sg SyncGroup) error
+	// JoinSyncGroupAtAdmin is invoked by a prospective SyncGroup member's
+	// Syncbase on a SyncGroup admin. It checks whether the requestor is
+	// allowed to join the named SyncGroup, and if so, adds the requestor to
+	// the SyncGroup.
+	JoinSyncGroupAtAdmin(ctx *context.T, call rpc.ServerCall, sgName string, joinerName string, myInfo nosql.SyncGroupMemberInfo) (SyncGroup, error)
+	// HaveBlob verifies that the peer has the requested blob, and if
+	// present, returns its size.
+	HaveBlob(ctx *context.T, call rpc.ServerCall, br nosql.BlobRef) (int64, error)
+	// FetchBlob fetches the requested blob.
+	FetchBlob(ctx *context.T, call *SyncFetchBlobServerCallStub, br nosql.BlobRef) error
+	// Methods for incremental blob transfer. The transfer starts with the
+	// receiver making a FetchBlobRecipe call to the sender for a given
+	// BlobRef. The sender, in turn, sends the chunk hashes of all the
+	// chunks that make up the requested blob (blob recipe). The receiver
+	// looks up the chunk hashes in its local blob store, and identifies the
+	// missing ones. The receiver then fetches the missing chunks using a
+	// FetchChunks call from the sender. Finally, the receiver finishes the
+	// blob fetch by combining the chunks obtained over the network with the
+	// already available local chunks as per the blob recipe.
+	FetchBlobRecipe(ctx *context.T, call *SyncFetchBlobRecipeServerCallStub, br nosql.BlobRef) error
+	FetchChunks(*context.T, *SyncFetchChunksServerCallStub) error
+}
+
+// SyncServerStub adds universal methods to SyncServerStubMethods.
+type SyncServerStub interface {
+	SyncServerStubMethods
+	// Describe the Sync interfaces.
+	Describe__() []rpc.InterfaceDesc
+}
+
+// SyncServer returns a server stub for Sync.
+// It converts an implementation of SyncServerMethods into
+// an object that may be used by rpc.Server.
+func SyncServer(impl SyncServerMethods) SyncServerStub {
+	stub := implSyncServerStub{
+		impl: impl,
+	}
+	// Initialize GlobState; always check the stub itself first, to handle the
+	// case where the user has the Glob method defined in their VDL source.
+	if gs := rpc.NewGlobState(stub); gs != nil {
+		stub.gs = gs
+	} else if gs := rpc.NewGlobState(impl); gs != nil {
+		stub.gs = gs
+	}
+	return stub
+}
+
+type implSyncServerStub struct {
+	impl SyncServerMethods
+	gs   *rpc.GlobState
+}
+
+func (s implSyncServerStub) GetDeltas(ctx *context.T, call *SyncGetDeltasServerCallStub, i0 string) error {
+	return s.impl.GetDeltas(ctx, call, i0)
+}
+
+func (s implSyncServerStub) PublishSyncGroup(ctx *context.T, call rpc.ServerCall, i0 SyncGroup) error {
+	return s.impl.PublishSyncGroup(ctx, call, i0)
+}
+
+func (s implSyncServerStub) JoinSyncGroupAtAdmin(ctx *context.T, call rpc.ServerCall, i0 string, i1 string, i2 nosql.SyncGroupMemberInfo) (SyncGroup, error) {
+	return s.impl.JoinSyncGroupAtAdmin(ctx, call, i0, i1, i2)
+}
+
+func (s implSyncServerStub) HaveBlob(ctx *context.T, call rpc.ServerCall, i0 nosql.BlobRef) (int64, error) {
+	return s.impl.HaveBlob(ctx, call, i0)
+}
+
+func (s implSyncServerStub) FetchBlob(ctx *context.T, call *SyncFetchBlobServerCallStub, i0 nosql.BlobRef) error {
+	return s.impl.FetchBlob(ctx, call, i0)
+}
+
+func (s implSyncServerStub) FetchBlobRecipe(ctx *context.T, call *SyncFetchBlobRecipeServerCallStub, i0 nosql.BlobRef) error {
+	return s.impl.FetchBlobRecipe(ctx, call, i0)
+}
+
+func (s implSyncServerStub) FetchChunks(ctx *context.T, call *SyncFetchChunksServerCallStub) error {
+	return s.impl.FetchChunks(ctx, call)
+}
+
+func (s implSyncServerStub) Globber() *rpc.GlobState {
+	return s.gs
+}
+
+func (s implSyncServerStub) Describe__() []rpc.InterfaceDesc {
+	return []rpc.InterfaceDesc{SyncDesc}
+}
+
+// SyncDesc describes the Sync interface.
+var SyncDesc rpc.InterfaceDesc = descSync
+
+// descSync hides the desc to keep godoc clean.
+var descSync = rpc.InterfaceDesc{
+	Name:    "Sync",
+	PkgPath: "v.io/syncbase/x/ref/services/syncbase/server/interfaces",
+	Doc:     "// Sync defines methods for data exchange between Syncbases.\n// TODO(hpucha): Flesh this out further.",
+	Methods: []rpc.MethodDesc{
+		{
+			Name: "GetDeltas",
+			Doc:  "// GetDeltas returns the responder's current generation vector and all\n// the missing log records when compared to the initiator's generation\n// vector. This process happens one Database at a time encompassing all\n// the SyncGroups common to the initiator and the responder. For each\n// Database, the initiator sends a DeltaReq. In response, the\n// responder sends a \"Start\" DeltaResp record, all the missing log\n// records, the responder's genvector, and a \"Finish\" DeltaResp\n// record. The initiator parses the stream between a Start and a Finish\n// record as the response to its DeltaReq, and then moves on to the\n// next Database in common with this responder.",
+			InArgs: []rpc.ArgDesc{
+				{"initiator", ``}, // string
+			},
+			Tags: []*vdl.Value{vdl.ValueOf(access.Tag("Read"))},
+		},
+		{
+			Name: "PublishSyncGroup",
+			Doc:  "// PublishSyncGroup is typically invoked on a \"central\" peer to publish\n// the SyncGroup.",
+			InArgs: []rpc.ArgDesc{
+				{"sg", ``}, // SyncGroup
+			},
+			Tags: []*vdl.Value{vdl.ValueOf(access.Tag("Write"))},
+		},
+		{
+			Name: "JoinSyncGroupAtAdmin",
+			Doc:  "// JoinSyncGroupAtAdmin is invoked by a prospective SyncGroup member's\n// Syncbase on a SyncGroup admin. It checks whether the requestor is\n// allowed to join the named SyncGroup, and if so, adds the requestor to\n// the SyncGroup.",
+			InArgs: []rpc.ArgDesc{
+				{"sgName", ``},     // string
+				{"joinerName", ``}, // string
+				{"myInfo", ``},     // nosql.SyncGroupMemberInfo
+			},
+			OutArgs: []rpc.ArgDesc{
+				{"", ``}, // SyncGroup
+			},
+			Tags: []*vdl.Value{vdl.ValueOf(access.Tag("Read"))},
+		},
+		{
+			Name: "HaveBlob",
+			Doc:  "// HaveBlob verifies that the peer has the requested blob, and if\n// present, returns its size.",
+			InArgs: []rpc.ArgDesc{
+				{"br", ``}, // nosql.BlobRef
+			},
+			OutArgs: []rpc.ArgDesc{
+				{"", ``}, // int64
+			},
+		},
+		{
+			Name: "FetchBlob",
+			Doc:  "// FetchBlob fetches the requested blob.",
+			InArgs: []rpc.ArgDesc{
+				{"br", ``}, // nosql.BlobRef
+			},
+		},
+		{
+			Name: "FetchBlobRecipe",
+			Doc:  "// Methods for incremental blob transfer. The transfer starts with the\n// receiver making a FetchBlobRecipe call to the sender for a given\n// BlobRef. The sender, in turn, sends the chunk hashes of all the\n// chunks that make up the requested blob (blob recipe). The receiver\n// looks up the chunk hashes in its local blob store, and identifies the\n// missing ones. The receiver then fetches the missing chunks using a\n// FetchChunks call from the sender. Finally, the receiver finishes the\n// blob fetch by combining the chunks obtained over the network with the\n// already available local chunks as per the blob recipe.",
+			InArgs: []rpc.ArgDesc{
+				{"br", ``}, // nosql.BlobRef
+			},
+		},
+		{
+			Name: "FetchChunks",
+		},
+	},
+}
+
+// SyncGetDeltasServerStream is the server stream for Sync.GetDeltas.
+type SyncGetDeltasServerStream interface {
+	// RecvStream returns the receiver side of the Sync.GetDeltas server stream.
+	RecvStream() interface {
+		// Advance stages an item so that it may be retrieved via Value.  Returns
+		// true iff there is an item to retrieve.  Advance must be called before
+		// Value is called.  May block if an item is not available.
+		Advance() bool
+		// Value returns the item that was staged by Advance.  May panic if Advance
+		// returned false or was not called.  Never blocks.
+		Value() DeltaReq
+		// Err returns any error encountered by Advance.  Never blocks.
+		Err() error
+	}
+	// SendStream returns the send side of the Sync.GetDeltas server stream.
+	SendStream() interface {
+		// Send places the item onto the output stream.  Returns errors encountered
+		// while sending.  Blocks if there is no buffer space; will unblock when
+		// buffer space is available.
+		Send(item DeltaResp) error
+	}
+}
+
+// SyncGetDeltasServerCall represents the context passed to Sync.GetDeltas.
+type SyncGetDeltasServerCall interface {
+	rpc.ServerCall
+	SyncGetDeltasServerStream
+}
+
+// SyncGetDeltasServerCallStub is a wrapper that converts rpc.StreamServerCall into
+// a typesafe stub that implements SyncGetDeltasServerCall.
+type SyncGetDeltasServerCallStub struct {
+	rpc.StreamServerCall
+	valRecv DeltaReq
+	errRecv error
+}
+
+// Init initializes SyncGetDeltasServerCallStub from rpc.StreamServerCall.
+func (s *SyncGetDeltasServerCallStub) Init(call rpc.StreamServerCall) {
+	s.StreamServerCall = call
+}
+
+// RecvStream returns the receiver side of the Sync.GetDeltas server stream.
+func (s *SyncGetDeltasServerCallStub) RecvStream() interface {
+	Advance() bool
+	Value() DeltaReq
+	Err() error
+} {
+	return implSyncGetDeltasServerCallRecv{s}
+}
+
+type implSyncGetDeltasServerCallRecv struct {
+	s *SyncGetDeltasServerCallStub
+}
+
+func (s implSyncGetDeltasServerCallRecv) Advance() bool {
+	s.s.valRecv = DeltaReq{}
+	s.s.errRecv = s.s.Recv(&s.s.valRecv)
+	return s.s.errRecv == nil
+}
+func (s implSyncGetDeltasServerCallRecv) Value() DeltaReq {
+	return s.s.valRecv
+}
+func (s implSyncGetDeltasServerCallRecv) Err() error {
+	if s.s.errRecv == io.EOF {
+		return nil
+	}
+	return s.s.errRecv
+}
+
+// SendStream returns the send side of the Sync.GetDeltas server stream.
+func (s *SyncGetDeltasServerCallStub) SendStream() interface {
+	Send(item DeltaResp) error
+} {
+	return implSyncGetDeltasServerCallSend{s}
+}
+
+type implSyncGetDeltasServerCallSend struct {
+	s *SyncGetDeltasServerCallStub
+}
+
+func (s implSyncGetDeltasServerCallSend) Send(item DeltaResp) error {
+	return s.s.Send(item)
+}
+
+// SyncFetchBlobServerStream is the server stream for Sync.FetchBlob.
+type SyncFetchBlobServerStream interface {
+	// SendStream returns the send side of the Sync.FetchBlob server stream.
+	SendStream() interface {
+		// Send places the item onto the output stream.  Returns errors encountered
+		// while sending.  Blocks if there is no buffer space; will unblock when
+		// buffer space is available.
+		Send(item []byte) error
+	}
+}
+
+// SyncFetchBlobServerCall represents the context passed to Sync.FetchBlob.
+type SyncFetchBlobServerCall interface {
+	rpc.ServerCall
+	SyncFetchBlobServerStream
+}
+
+// SyncFetchBlobServerCallStub is a wrapper that converts rpc.StreamServerCall into
+// a typesafe stub that implements SyncFetchBlobServerCall.
+type SyncFetchBlobServerCallStub struct {
+	rpc.StreamServerCall
+}
+
+// Init initializes SyncFetchBlobServerCallStub from rpc.StreamServerCall.
+func (s *SyncFetchBlobServerCallStub) Init(call rpc.StreamServerCall) {
+	s.StreamServerCall = call
+}
+
+// SendStream returns the send side of the Sync.FetchBlob server stream.
+func (s *SyncFetchBlobServerCallStub) SendStream() interface {
+	Send(item []byte) error
+} {
+	return implSyncFetchBlobServerCallSend{s}
+}
+
+type implSyncFetchBlobServerCallSend struct {
+	s *SyncFetchBlobServerCallStub
+}
+
+func (s implSyncFetchBlobServerCallSend) Send(item []byte) error {
+	return s.s.Send(item)
+}
+
+// SyncFetchBlobRecipeServerStream is the server stream for Sync.FetchBlobRecipe.
+type SyncFetchBlobRecipeServerStream interface {
+	// SendStream returns the send side of the Sync.FetchBlobRecipe server stream.
+	SendStream() interface {
+		// Send places the item onto the output stream.  Returns errors encountered
+		// while sending.  Blocks if there is no buffer space; will unblock when
+		// buffer space is available.
+		Send(item ChunkHash) error
+	}
+}
+
+// SyncFetchBlobRecipeServerCall represents the context passed to Sync.FetchBlobRecipe.
+type SyncFetchBlobRecipeServerCall interface {
+	rpc.ServerCall
+	SyncFetchBlobRecipeServerStream
+}
+
+// SyncFetchBlobRecipeServerCallStub is a wrapper that converts rpc.StreamServerCall into
+// a typesafe stub that implements SyncFetchBlobRecipeServerCall.
+type SyncFetchBlobRecipeServerCallStub struct {
+	rpc.StreamServerCall
+}
+
+// Init initializes SyncFetchBlobRecipeServerCallStub from rpc.StreamServerCall.
+func (s *SyncFetchBlobRecipeServerCallStub) Init(call rpc.StreamServerCall) {
+	s.StreamServerCall = call
+}
+
+// SendStream returns the send side of the Sync.FetchBlobRecipe server stream.
+func (s *SyncFetchBlobRecipeServerCallStub) SendStream() interface {
+	Send(item ChunkHash) error
+} {
+	return implSyncFetchBlobRecipeServerCallSend{s}
+}
+
+type implSyncFetchBlobRecipeServerCallSend struct {
+	s *SyncFetchBlobRecipeServerCallStub
+}
+
+func (s implSyncFetchBlobRecipeServerCallSend) Send(item ChunkHash) error {
+	return s.s.Send(item)
+}
+
+// SyncFetchChunksServerStream is the server stream for Sync.FetchChunks.
+type SyncFetchChunksServerStream interface {
+	// RecvStream returns the receiver side of the Sync.FetchChunks server stream.
+	RecvStream() interface {
+		// Advance stages an item so that it may be retrieved via Value.  Returns
+		// true iff there is an item to retrieve.  Advance must be called before
+		// Value is called.  May block if an item is not available.
+		Advance() bool
+		// Value returns the item that was staged by Advance.  May panic if Advance
+		// returned false or was not called.  Never blocks.
+		Value() ChunkHash
+		// Err returns any error encountered by Advance.  Never blocks.
+		Err() error
+	}
+	// SendStream returns the send side of the Sync.FetchChunks server stream.
+	SendStream() interface {
+		// Send places the item onto the output stream.  Returns errors encountered
+		// while sending.  Blocks if there is no buffer space; will unblock when
+		// buffer space is available.
+		Send(item ChunkData) error
+	}
+}
+
+// SyncFetchChunksServerCall represents the context passed to Sync.FetchChunks.
+type SyncFetchChunksServerCall interface {
+	rpc.ServerCall
+	SyncFetchChunksServerStream
+}
+
+// SyncFetchChunksServerCallStub is a wrapper that converts rpc.StreamServerCall into
+// a typesafe stub that implements SyncFetchChunksServerCall.
+type SyncFetchChunksServerCallStub struct {
+	rpc.StreamServerCall
+	valRecv ChunkHash
+	errRecv error
+}
+
+// Init initializes SyncFetchChunksServerCallStub from rpc.StreamServerCall.
+func (s *SyncFetchChunksServerCallStub) Init(call rpc.StreamServerCall) {
+	s.StreamServerCall = call
+}
+
+// RecvStream returns the receiver side of the Sync.FetchChunks server stream.
+func (s *SyncFetchChunksServerCallStub) RecvStream() interface {
+	Advance() bool
+	Value() ChunkHash
+	Err() error
+} {
+	return implSyncFetchChunksServerCallRecv{s}
+}
+
+type implSyncFetchChunksServerCallRecv struct {
+	s *SyncFetchChunksServerCallStub
+}
+
+func (s implSyncFetchChunksServerCallRecv) Advance() bool {
+	s.s.valRecv = ChunkHash{}
+	s.s.errRecv = s.s.Recv(&s.s.valRecv)
+	return s.s.errRecv == nil
+}
+func (s implSyncFetchChunksServerCallRecv) Value() ChunkHash {
+	return s.s.valRecv
+}
+func (s implSyncFetchChunksServerCallRecv) Err() error {
+	if s.s.errRecv == io.EOF {
+		return nil
+	}
+	return s.s.errRecv
+}
+
+// SendStream returns the send side of the Sync.FetchChunks server stream.
+func (s *SyncFetchChunksServerCallStub) SendStream() interface {
+	Send(item ChunkData) error
+} {
+	return implSyncFetchChunksServerCallSend{s}
+}
+
+type implSyncFetchChunksServerCallSend struct {
+	s *SyncFetchChunksServerCallStub
+}
+
+func (s implSyncFetchChunksServerCallSend) Send(item ChunkData) error {
+	return s.s.Send(item)
+}

diff --git a/services/syncbase/server/interfaces/sync_types.vdl b/services/syncbase/server/interfaces/sync_types.vdl
new file mode 100644
index 0000000..324928b
--- /dev/null
+++ b/services/syncbase/server/interfaces/sync_types.vdl

@@ -0,0 +1,129 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package interfaces
+
+import (
+	"time"
+
+	wire "v.io/syncbase/v23/services/syncbase/nosql"
+)
+
+const (
+	NoGroupId = GroupId(0)
+)
+
+// TODO(hpucha): These are not final yet. This is an intermediate step.
+
+const (
+        // NodeRec type log record adds a new node in the dag.
+        NodeRec = byte(0)
+
+        // LinkRec type log record adds a new link in the dag. Link records are
+        // added when a conflict is resolved by picking the local or the remote
+        // version as the resolution of a conflict, instead of creating a new
+        // version.
+        LinkRec = byte(1)
+)
+
+// PrefixGenVector is the generation vector for a data prefix, which maps each
+// device id to its last locally known generation in the scope of that prefix.
+type PrefixGenVector map[uint64]uint64
+
+// GenVector is the generation vector for a Database, and maps prefixes to their
+// generation vectors. Note that the prefixes in a GenVector are relative to the
+// the Application and Database name.
+type GenVector map[string]PrefixGenVector
+
+// LogRecMetadata represents the metadata of a single log record that is
+// exchanged between two peers. Each log record represents a change made to an
+// object in the store.
+//
+// TODO(hpucha): Add readset/scanset. Look into sending tx metadata only once
+// per transaction.
+type LogRecMetadata struct {
+        // Log related information.
+        Id        uint64 // device id that created the log record.
+	Gen       uint64 // generation number for the log record.
+	RecType   byte   // type of log record.
+
+        // Object related information.
+
+	// Id of the object that was updated. This id is relative to Application
+	// and Database names and is the store key for a particular row in a
+	// table.
+        ObjId       string
+        CurVers     string      // current version number of the object.
+        Parents     []string    // 0, 1 or 2 parent versions that the current version is derived from.
+	UpdTime     time.Time   // timestamp when the update is generated.
+	Delete      bool        // indicates whether the update resulted in object being deleted from the store.
+	BatchId     uint64      // unique id of the Batch this update belongs to.
+	BatchCount  uint64      // number of objects in the Batch.
+}
+
+// LogRec represents the on-wire representation of an entire log record: its
+// metadata and data. Value is the actual value of a store object.
+type LogRec struct {
+	Metadata LogRecMetadata
+	Value    []byte
+}
+
+// GroupId is a globally unique SyncGroup ID.
+type GroupId uint64
+
+// Possible states for a SyncGroup.
+type SyncGroupStatus enum {
+	// Indicates that a SyncGroup is operational, but publishing to the
+	// remote server is pending.
+	PublishPending
+
+	// Indicates that the SyncGroup is operational, but the publishing
+	// failed.
+	PublishRejected
+
+	// Indicates that the SyncGroup is operational and published.
+	Running
+}
+
+// SyncGroup contains the state of a SyncGroup object.
+type SyncGroup struct {
+	Id          GroupId                             // globally unique identifier generated by Syncbase
+	Name        string                              // globally unique Vanadium name chosen by app
+	SpecVersion string                              // version on SyncGroup spec for concurrency control
+	Spec        wire.SyncGroupSpec                  // app-given specification
+	Creator     string                              // Creator's Vanadium name
+	AppName     string                              // Globally unique App name
+	DbName      string                              // Database name within the App
+	Status      SyncGroupStatus                     // Status of the SyncGroup
+	Joiners     map[string]wire.SyncGroupMemberInfo // map of joiners to their metadata
+}
+
+// DeltaReq contains the initiator's genvector and the set of SyncGroups it is
+// interested in within a Database (specified by the AppName/DbName) when
+// requesting deltas for that Database.
+type DeltaReq struct {
+	AppName string
+	DbName string
+	SgIds   set[GroupId]
+	InitVec GenVector
+}
+
+// DeltaResp contains the responder's genvector or the missing log records
+// returned in response to an initiator's request for deltas for a Database.
+type DeltaResp union {
+	Start   bool
+	Finish  bool
+	Rec     LogRec
+	RespVec GenVector
+}
+
+// ChunkHash contains the hash of a chunk that is part of a blob's recipe.
+type ChunkHash struct {
+	Hash []byte
+}
+
+// ChunkData contains the data of a chunk.
+type ChunkData struct {
+	Data []byte
+}

diff --git a/services/syncbase/server/interfaces/sync_types.vdl.go b/services/syncbase/server/interfaces/sync_types.vdl.go
new file mode 100644
index 0000000..8ef80a8
--- /dev/null
+++ b/services/syncbase/server/interfaces/sync_types.vdl.go

@@ -0,0 +1,274 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: sync_types.vdl
+
+package interfaces
+
+import (
+	// VDL system imports
+	"fmt"
+	"v.io/v23/vdl"
+
+	// VDL user imports
+	"time"
+	"v.io/syncbase/v23/services/syncbase/nosql"
+	_ "v.io/v23/vdlroot/time"
+)
+
+// PrefixGenVector is the generation vector for a data prefix, which maps each
+// device id to its last locally known generation in the scope of that prefix.
+type PrefixGenVector map[uint64]uint64
+
+func (PrefixGenVector) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.PrefixGenVector"`
+}) {
+}
+
+// GenVector is the generation vector for a Database, and maps prefixes to their
+// generation vectors. Note that the prefixes in a GenVector are relative to the
+// the Application and Database name.
+type GenVector map[string]PrefixGenVector
+
+func (GenVector) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.GenVector"`
+}) {
+}
+
+// LogRecMetadata represents the metadata of a single log record that is
+// exchanged between two peers. Each log record represents a change made to an
+// object in the store.
+//
+// TODO(hpucha): Add readset/scanset. Look into sending tx metadata only once
+// per transaction.
+type LogRecMetadata struct {
+	// Log related information.
+	Id      uint64 // device id that created the log record.
+	Gen     uint64 // generation number for the log record.
+	RecType byte   // type of log record.
+	// Id of the object that was updated. This id is relative to Application
+	// and Database names and is the store key for a particular row in a
+	// table.
+	ObjId      string
+	CurVers    string    // current version number of the object.
+	Parents    []string  // 0, 1 or 2 parent versions that the current version is derived from.
+	UpdTime    time.Time // timestamp when the update is generated.
+	Delete     bool      // indicates whether the update resulted in object being deleted from the store.
+	BatchId    uint64    // unique id of the Batch this update belongs to.
+	BatchCount uint64    // number of objects in the Batch.
+}
+
+func (LogRecMetadata) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.LogRecMetadata"`
+}) {
+}
+
+// LogRec represents the on-wire representation of an entire log record: its
+// metadata and data. Value is the actual value of a store object.
+type LogRec struct {
+	Metadata LogRecMetadata
+	Value    []byte
+}
+
+func (LogRec) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.LogRec"`
+}) {
+}
+
+// GroupId is a globally unique SyncGroup ID.
+type GroupId uint64
+
+func (GroupId) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.GroupId"`
+}) {
+}
+
+// Possible states for a SyncGroup.
+type SyncGroupStatus int
+
+const (
+	SyncGroupStatusPublishPending SyncGroupStatus = iota
+	SyncGroupStatusPublishRejected
+	SyncGroupStatusRunning
+)
+
+// SyncGroupStatusAll holds all labels for SyncGroupStatus.
+var SyncGroupStatusAll = [...]SyncGroupStatus{SyncGroupStatusPublishPending, SyncGroupStatusPublishRejected, SyncGroupStatusRunning}
+
+// SyncGroupStatusFromString creates a SyncGroupStatus from a string label.
+func SyncGroupStatusFromString(label string) (x SyncGroupStatus, err error) {
+	err = x.Set(label)
+	return
+}
+
+// Set assigns label to x.
+func (x *SyncGroupStatus) Set(label string) error {
+	switch label {
+	case "PublishPending", "publishpending":
+		*x = SyncGroupStatusPublishPending
+		return nil
+	case "PublishRejected", "publishrejected":
+		*x = SyncGroupStatusPublishRejected
+		return nil
+	case "Running", "running":
+		*x = SyncGroupStatusRunning
+		return nil
+	}
+	*x = -1
+	return fmt.Errorf("unknown label %q in interfaces.SyncGroupStatus", label)
+}
+
+// String returns the string label of x.
+func (x SyncGroupStatus) String() string {
+	switch x {
+	case SyncGroupStatusPublishPending:
+		return "PublishPending"
+	case SyncGroupStatusPublishRejected:
+		return "PublishRejected"
+	case SyncGroupStatusRunning:
+		return "Running"
+	}
+	return ""
+}
+
+func (SyncGroupStatus) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.SyncGroupStatus"`
+	Enum struct{ PublishPending, PublishRejected, Running string }
+}) {
+}
+
+// SyncGroup contains the state of a SyncGroup object.
+type SyncGroup struct {
+	Id          GroupId                              // globally unique identifier generated by Syncbase
+	Name        string                               // globally unique Vanadium name chosen by app
+	SpecVersion string                               // version on SyncGroup spec for concurrency control
+	Spec        nosql.SyncGroupSpec                  // app-given specification
+	Creator     string                               // Creator's Vanadium name
+	AppName     string                               // Globally unique App name
+	DbName      string                               // Database name within the App
+	Status      SyncGroupStatus                      // Status of the SyncGroup
+	Joiners     map[string]nosql.SyncGroupMemberInfo // map of joiners to their metadata
+}
+
+func (SyncGroup) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.SyncGroup"`
+}) {
+}
+
+// DeltaReq contains the initiator's genvector and the set of SyncGroups it is
+// interested in within a Database (specified by the AppName/DbName) when
+// requesting deltas for that Database.
+type DeltaReq struct {
+	AppName string
+	DbName  string
+	SgIds   map[GroupId]struct{}
+	InitVec GenVector
+}
+
+func (DeltaReq) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.DeltaReq"`
+}) {
+}
+
+type (
+	// DeltaResp represents any single field of the DeltaResp union type.
+	//
+	// DeltaResp contains the responder's genvector or the missing log records
+	// returned in response to an initiator's request for deltas for a Database.
+	DeltaResp interface {
+		// Index returns the field index.
+		Index() int
+		// Interface returns the field value as an interface.
+		Interface() interface{}
+		// Name returns the field name.
+		Name() string
+		// __VDLReflect describes the DeltaResp union type.
+		__VDLReflect(__DeltaRespReflect)
+	}
+	// DeltaRespStart represents field Start of the DeltaResp union type.
+	DeltaRespStart struct{ Value bool }
+	// DeltaRespFinish represents field Finish of the DeltaResp union type.
+	DeltaRespFinish struct{ Value bool }
+	// DeltaRespRec represents field Rec of the DeltaResp union type.
+	DeltaRespRec struct{ Value LogRec }
+	// DeltaRespRespVec represents field RespVec of the DeltaResp union type.
+	DeltaRespRespVec struct{ Value GenVector }
+	// __DeltaRespReflect describes the DeltaResp union type.
+	__DeltaRespReflect struct {
+		Name  string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.DeltaResp"`
+		Type  DeltaResp
+		Union struct {
+			Start   DeltaRespStart
+			Finish  DeltaRespFinish
+			Rec     DeltaRespRec
+			RespVec DeltaRespRespVec
+		}
+	}
+)
+
+func (x DeltaRespStart) Index() int                      { return 0 }
+func (x DeltaRespStart) Interface() interface{}          { return x.Value }
+func (x DeltaRespStart) Name() string                    { return "Start" }
+func (x DeltaRespStart) __VDLReflect(__DeltaRespReflect) {}
+
+func (x DeltaRespFinish) Index() int                      { return 1 }
+func (x DeltaRespFinish) Interface() interface{}          { return x.Value }
+func (x DeltaRespFinish) Name() string                    { return "Finish" }
+func (x DeltaRespFinish) __VDLReflect(__DeltaRespReflect) {}
+
+func (x DeltaRespRec) Index() int                      { return 2 }
+func (x DeltaRespRec) Interface() interface{}          { return x.Value }
+func (x DeltaRespRec) Name() string                    { return "Rec" }
+func (x DeltaRespRec) __VDLReflect(__DeltaRespReflect) {}
+
+func (x DeltaRespRespVec) Index() int                      { return 3 }
+func (x DeltaRespRespVec) Interface() interface{}          { return x.Value }
+func (x DeltaRespRespVec) Name() string                    { return "RespVec" }
+func (x DeltaRespRespVec) __VDLReflect(__DeltaRespReflect) {}
+
+// ChunkHash contains the hash of a chunk that is part of a blob's recipe.
+type ChunkHash struct {
+	Hash []byte
+}
+
+func (ChunkHash) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.ChunkHash"`
+}) {
+}
+
+// ChunkData contains the data of a chunk.
+type ChunkData struct {
+	Data []byte
+}
+
+func (ChunkData) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.ChunkData"`
+}) {
+}
+
+func init() {
+	vdl.Register((*PrefixGenVector)(nil))
+	vdl.Register((*GenVector)(nil))
+	vdl.Register((*LogRecMetadata)(nil))
+	vdl.Register((*LogRec)(nil))
+	vdl.Register((*GroupId)(nil))
+	vdl.Register((*SyncGroupStatus)(nil))
+	vdl.Register((*SyncGroup)(nil))
+	vdl.Register((*DeltaReq)(nil))
+	vdl.Register((*DeltaResp)(nil))
+	vdl.Register((*ChunkHash)(nil))
+	vdl.Register((*ChunkData)(nil))
+}
+
+const NoGroupId = GroupId(0)
+
+// NodeRec type log record adds a new node in the dag.
+const NodeRec = byte(0)
+
+// LinkRec type log record adds a new link in the dag. Link records are
+// added when a conflict is resolved by picking the local or the remote
+// version as the resolution of a conflict, instead of creating a new
+// version.
+const LinkRec = byte(1)

diff --git a/services/syncbase/server/mojo_call.go b/services/syncbase/server/mojo_call.go
new file mode 100644
index 0000000..d0b9dac
--- /dev/null
+++ b/services/syncbase/server/mojo_call.go

@@ -0,0 +1,68 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build mojo
+
+package server
+
+import (
+	"v.io/v23"
+	"v.io/v23/context"
+	"v.io/v23/naming"
+	"v.io/v23/rpc"
+	"v.io/v23/security"
+)
+
+type mojoServerCall struct {
+	sec    security.Call
+	srv    rpc.Server
+	suffix string
+}
+
+// TODO(sadovsky): Synthesize endpoints and discharges as needed.
+func newMojoServerCall(ctx *context.T, srv rpc.Server, suffix string, method rpc.MethodDesc) rpc.ServerCall {
+	p := v23.GetPrincipal(ctx)
+	// HACK: For now, we set the remote (client, i.e. Mojo app) blessing to be the
+	// same as the local (server, i.e. Syncbase Mojo service) blessing.
+	// TODO(sadovsky): Eliminate this hack.
+	blessings := p.BlessingStore().Default()
+	return &mojoServerCall{
+		sec: security.NewCall(&security.CallParams{
+			Method:          method.Name,
+			MethodTags:      method.Tags,
+			Suffix:          suffix,
+			LocalPrincipal:  p,
+			LocalBlessings:  blessings,
+			RemoteBlessings: blessings,
+		}),
+		srv:    srv,
+		suffix: suffix,
+	}
+}
+
+var _ rpc.ServerCall = (*mojoServerCall)(nil)
+
+func (call *mojoServerCall) Security() security.Call {
+	return call.sec
+}
+
+func (call *mojoServerCall) Suffix() string {
+	return call.suffix
+}
+
+func (call *mojoServerCall) LocalEndpoint() naming.Endpoint {
+	return call.sec.LocalEndpoint()
+}
+
+func (call *mojoServerCall) RemoteEndpoint() naming.Endpoint {
+	return call.sec.RemoteEndpoint()
+}
+
+func (call *mojoServerCall) GrantedBlessings() security.Blessings {
+	return security.Blessings{}
+}
+
+func (call *mojoServerCall) Server() rpc.Server {
+	return call.srv
+}

diff --git a/services/syncbase/server/mojo_impl.go b/services/syncbase/server/mojo_impl.go
new file mode 100644
index 0000000..973bf7d
--- /dev/null
+++ b/services/syncbase/server/mojo_impl.go

@@ -0,0 +1,535 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build mojo
+
+// Implementation of Syncbase Mojo stubs. Our strategy is to translate Mojo stub
+// requests into Vanadium stub requests, and Vanadium stub responses into Mojo
+// stub responses. As part of this procedure, we synthesize "fake" ctx and call
+// objects to pass to the Vanadium stubs.
+
+package server
+
+import (
+	"bytes"
+	"fmt"
+	"strings"
+
+	"mojo/public/go/bindings"
+
+	mojom "mojom/syncbase"
+	wire "v.io/syncbase/v23/services/syncbase"
+	nosqlwire "v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/v23/context"
+	"v.io/v23/rpc"
+	"v.io/v23/security/access"
+	"v.io/v23/services/permissions"
+	"v.io/v23/verror"
+	"v.io/v23/vtrace"
+)
+
+const NoSchema int32 = -1
+
+type mojoImpl struct {
+	ctx  *context.T
+	srv  rpc.Server
+	disp rpc.Dispatcher
+}
+
+func NewMojoImpl(ctx *context.T, srv rpc.Server, disp rpc.Dispatcher) *mojoImpl {
+	return &mojoImpl{ctx: ctx, srv: srv, disp: disp}
+}
+
+func methodDesc(desc rpc.InterfaceDesc, name string) rpc.MethodDesc {
+	for _, method := range desc.Methods {
+		if method.Name == name {
+			return method
+		}
+	}
+	panic(fmt.Sprintf("unknown method: %s.%s", desc.Name, name))
+}
+
+func (m *mojoImpl) newCtxCall(suffix string, method rpc.MethodDesc) (*context.T, rpc.ServerCall) {
+	ctx, _ := vtrace.WithNewTrace(m.ctx)
+	return ctx, newMojoServerCall(ctx, m.srv, suffix, method)
+}
+
+////////////////////////////////////////
+// Struct converters
+
+func toMojoError(err error) mojom.Error {
+	if err == nil {
+		return mojom.Error{}
+	}
+	return mojom.Error{
+		Id:         string(verror.ErrorID(err)),
+		ActionCode: uint32(verror.Action(err)),
+		Msg:        err.Error(),
+	}
+}
+
+func toV23Perms(mPerms mojom.Perms) (access.Permissions, error) {
+	return access.ReadPermissions(strings.NewReader(mPerms.Json))
+}
+
+func toMojoPerms(vPerms access.Permissions) (mojom.Perms, error) {
+	b := new(bytes.Buffer)
+	if err := access.WritePermissions(b, vPerms); err != nil {
+		return mojom.Perms{}, err
+	}
+	return mojom.Perms{Json: b.String()}, nil
+}
+
+////////////////////////////////////////
+// Stub getters
+
+func (m *mojoImpl) lookupAndAuthorize(ctx *context.T, call rpc.ServerCall, suffix string) (interface{}, error) {
+	resInt, auth, err := m.disp.Lookup(ctx, suffix)
+	if err != nil {
+		return nil, err
+	}
+	if err := auth.Authorize(ctx, call.Security()); err != nil {
+		return nil, verror.New(verror.ErrNoAccess, ctx, err)
+	}
+	return resInt, nil
+}
+
+func (m *mojoImpl) getService(ctx *context.T, call rpc.ServerCall) (wire.ServiceServerStubMethods, error) {
+	resInt, err := m.lookupAndAuthorize(ctx, call, "")
+	if err != nil {
+		return nil, err
+	}
+	if res, ok := resInt.(wire.ServiceServerStubMethods); !ok {
+		return nil, verror.NewErrInternal(ctx)
+	} else {
+		return res, nil
+	}
+}
+
+func (m *mojoImpl) getApp(ctx *context.T, call rpc.ServerCall, name string) (wire.AppServerStubMethods, error) {
+	resInt, err := m.lookupAndAuthorize(ctx, call, name)
+	if err != nil {
+		return nil, err
+	}
+	if res, ok := resInt.(wire.AppServerStubMethods); !ok {
+		return nil, verror.NewErrInternal(ctx)
+	} else {
+		return res, nil
+	}
+}
+
+func (m *mojoImpl) getDb(ctx *context.T, call rpc.ServerCall, name string) (nosqlwire.DatabaseServerStubMethods, error) {
+	resInt, err := m.lookupAndAuthorize(ctx, call, name)
+	if err != nil {
+		return nil, err
+	}
+	if res, ok := resInt.(nosqlwire.DatabaseServerStubMethods); !ok {
+		return nil, verror.NewErrInternal(ctx)
+	} else {
+		return res, nil
+	}
+}
+
+func (m *mojoImpl) getTable(ctx *context.T, call rpc.ServerCall, name string) (nosqlwire.TableServerStubMethods, error) {
+	resInt, err := m.lookupAndAuthorize(ctx, call, name)
+	if err != nil {
+		return nil, err
+	}
+	if res, ok := resInt.(nosqlwire.TableServerStubMethods); !ok {
+		return nil, verror.NewErrInternal(ctx)
+	} else {
+		return res, nil
+	}
+}
+
+func (m *mojoImpl) getRow(ctx *context.T, call rpc.ServerCall, name string) (nosqlwire.RowServerStubMethods, error) {
+	resInt, err := m.lookupAndAuthorize(ctx, call, name)
+	if err != nil {
+		return nil, err
+	}
+	if res, ok := resInt.(nosqlwire.RowServerStubMethods); !ok {
+		return nil, verror.NewErrInternal(ctx)
+	} else {
+		return res, nil
+	}
+}
+
+////////////////////////////////////////
+// Service
+
+// TODO(sadovsky): All stub implementations return a nil error (the last return
+// value), since that error doesn't make it back to the IPC client. Chat with
+// rogulenko@ about whether we should change the Go Mojo stub generator to drop
+// these errors.
+func (m *mojoImpl) ServiceGetPermissions() (mojom.Error, mojom.Perms, string, error) {
+	ctx, call := m.newCtxCall("", methodDesc(permissions.ObjectDesc, "GetPermissions"))
+	stub, err := m.getService(ctx, call)
+	if err != nil {
+		return toMojoError(err), mojom.Perms{}, "", nil
+	}
+	vPerms, version, err := stub.GetPermissions(ctx, call)
+	if err != nil {
+		return toMojoError(err), mojom.Perms{}, "", nil
+	}
+	mPerms, err := toMojoPerms(vPerms)
+	if err != nil {
+		return toMojoError(err), mojom.Perms{}, "", nil
+	}
+	return toMojoError(err), mPerms, version, nil
+}
+
+func (m *mojoImpl) ServiceSetPermissions(mPerms mojom.Perms, version string) (mojom.Error, error) {
+	ctx, call := m.newCtxCall("", methodDesc(permissions.ObjectDesc, "SetPermissions"))
+	stub, err := m.getService(ctx, call)
+	if err != nil {
+		return toMojoError(err), nil
+	}
+	vPerms, err := toV23Perms(mPerms)
+	if err != nil {
+		return toMojoError(err), nil
+	}
+	err = stub.SetPermissions(ctx, call, vPerms, version)
+	return toMojoError(err), nil
+}
+
+////////////////////////////////////////
+// App
+
+func (m *mojoImpl) AppCreate(name string, mPerms mojom.Perms) (mojom.Error, error) {
+	ctx, call := m.newCtxCall(name, methodDesc(wire.AppDesc, "Create"))
+	stub, err := m.getApp(ctx, call, name)
+	if err != nil {
+		return toMojoError(err), nil
+	}
+	vPerms, err := toV23Perms(mPerms)
+	if err != nil {
+		return toMojoError(err), nil
+	}
+	err = stub.Create(ctx, call, vPerms)
+	return toMojoError(err), nil
+}
+
+func (m *mojoImpl) AppDelete(name string) (mojom.Error, error) {
+	ctx, call := m.newCtxCall(name, methodDesc(wire.AppDesc, "Delete"))
+	stub, err := m.getApp(ctx, call, name)
+	if err != nil {
+		return toMojoError(err), nil
+	}
+	err = stub.Delete(ctx, call)
+	return toMojoError(err), nil
+}
+
+func (m *mojoImpl) AppExists(name string) (mojom.Error, bool, error) {
+	ctx, call := m.newCtxCall(name, methodDesc(wire.AppDesc, "Exists"))
+	stub, err := m.getApp(ctx, call, name)
+	if err != nil {
+		return toMojoError(err), false, nil
+	}
+	exists, err := stub.Exists(ctx, call)
+	return toMojoError(err), exists, nil
+}
+
+func (m *mojoImpl) AppGetPermissions(name string) (mojom.Error, mojom.Perms, string, error) {
+	ctx, call := m.newCtxCall(name, methodDesc(permissions.ObjectDesc, "GetPermissions"))
+	stub, err := m.getApp(ctx, call, name)
+	if err != nil {
+		return toMojoError(err), mojom.Perms{}, "", nil
+	}
+	vPerms, version, err := stub.GetPermissions(ctx, call)
+	if err != nil {
+		return toMojoError(err), mojom.Perms{}, "", nil
+	}
+	mPerms, err := toMojoPerms(vPerms)
+	if err != nil {
+		return toMojoError(err), mojom.Perms{}, "", nil
+	}
+	return toMojoError(err), mPerms, version, nil
+}
+
+func (m *mojoImpl) AppSetPermissions(name string, mPerms mojom.Perms, version string) (mojom.Error, error) {
+	ctx, call := m.newCtxCall(name, methodDesc(permissions.ObjectDesc, "SetPermissions"))
+	stub, err := m.getApp(ctx, call, name)
+	if err != nil {
+		return toMojoError(err), nil
+	}
+	vPerms, err := toV23Perms(mPerms)
+	if err != nil {
+		return toMojoError(err), nil
+	}
+	err = stub.SetPermissions(ctx, call, vPerms, version)
+	return toMojoError(err), nil
+}
+
+////////////////////////////////////////
+// nosql.Database
+
+func (m *mojoImpl) DbCreate(name string, mPerms mojom.Perms) (mojom.Error, error) {
+	ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.DatabaseDesc, "Create"))
+	stub, err := m.getDb(ctx, call, name)
+	if err != nil {
+		return toMojoError(err), nil
+	}
+	vPerms, err := toV23Perms(mPerms)
+	if err != nil {
+		return toMojoError(err), nil
+	}
+	err = stub.Create(ctx, call, nil, vPerms)
+	return toMojoError(err), nil
+}
+
+func (m *mojoImpl) DbDelete(name string) (mojom.Error, error) {
+	ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.DatabaseDesc, "Delete"))
+	stub, err := m.getDb(ctx, call, name)
+	if err != nil {
+		return toMojoError(err), nil
+	}
+	err = stub.Delete(ctx, call, NoSchema)
+	return toMojoError(err), nil
+}
+
+func (m *mojoImpl) DbExists(name string) (mojom.Error, bool, error) {
+	ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.DatabaseDesc, "Exists"))
+	stub, err := m.getDb(ctx, call, name)
+	if err != nil {
+		return toMojoError(err), false, nil
+	}
+	exists, err := stub.Exists(ctx, call, NoSchema)
+	return toMojoError(err), exists, nil
+}
+
+func (m *mojoImpl) DbExec(name string, query string, stream mojom.ExecStream_Pointer) (mojom.Error, error) {
+	return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) DbBeginBatch(name string, bo *mojom.BatchOptions) (mojom.Error, string, error) {
+	return mojom.Error{}, "", nil
+}
+
+func (m *mojoImpl) DbCommit(name string) (mojom.Error, error) {
+	return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) DbAbort(name string) (mojom.Error, error) {
+	return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) DbGetPermissions(name string) (mojom.Error, mojom.Perms, string, error) {
+	ctx, call := m.newCtxCall(name, methodDesc(permissions.ObjectDesc, "GetPermissions"))
+	stub, err := m.getDb(ctx, call, name)
+	if err != nil {
+		return toMojoError(err), mojom.Perms{}, "", nil
+	}
+	vPerms, version, err := stub.GetPermissions(ctx, call)
+	if err != nil {
+		return toMojoError(err), mojom.Perms{}, "", nil
+	}
+	mPerms, err := toMojoPerms(vPerms)
+	if err != nil {
+		return toMojoError(err), mojom.Perms{}, "", nil
+	}
+	return toMojoError(err), mPerms, version, nil
+}
+
+func (m *mojoImpl) DbSetPermissions(name string, mPerms mojom.Perms, version string) (mojom.Error, error) {
+	ctx, call := m.newCtxCall(name, methodDesc(permissions.ObjectDesc, "SetPermissions"))
+	stub, err := m.getDb(ctx, call, name)
+	if err != nil {
+		return toMojoError(err), nil
+	}
+	vPerms, err := toV23Perms(mPerms)
+	if err != nil {
+		return toMojoError(err), nil
+	}
+	err = stub.SetPermissions(ctx, call, vPerms, version)
+	return toMojoError(err), nil
+}
+
+////////////////////////////////////////
+// nosql.Database:SyncGroupManager
+
+func (m *mojoImpl) DbGetSyncGroupNames(name string) (mojom.Error, []string, error) {
+	return mojom.Error{}, nil, nil
+}
+
+func (m *mojoImpl) DbCreateSyncGroup(name, sgName string, spec mojom.SyncGroupSpec, myInfo mojom.SyncGroupMemberInfo) (mojom.Error, error) {
+	return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) DbJoinSyncGroup(name, sgName string, myInfo mojom.SyncGroupMemberInfo) (mojom.Error, error) {
+	return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) DbLeaveSyncGroup(name, sgName string) (mojom.Error, error) {
+	return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) DbDestroySyncGroup(name, sgName string) (mojom.Error, error) {
+	return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) DbEjectFromSyncGroup(name, sgName string, member string) (mojom.Error, error) {
+	return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) DbGetSyncGroupSpec(name, sgName string) (mojom.Error, mojom.SyncGroupSpec, string, error) {
+	return mojom.Error{}, mojom.SyncGroupSpec{}, "", nil
+}
+
+func (m *mojoImpl) DbSetSyncGroupSpec(name, sgName string, spec mojom.SyncGroupSpec, version string) (mojom.Error, error) {
+	return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) DbGetSyncGroupMembers(name, sgName string) (mojom.Error, map[string]mojom.SyncGroupMemberInfo, error) {
+	return mojom.Error{}, nil, nil
+}
+
+////////////////////////////////////////
+// nosql.Table
+
+func (m *mojoImpl) TableCreate(name string, mPerms mojom.Perms) (mojom.Error, error) {
+	ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.TableDesc, "Create"))
+	stub, err := m.getTable(ctx, call, name)
+	if err != nil {
+		return toMojoError(err), nil
+	}
+	vPerms, err := toV23Perms(mPerms)
+	if err != nil {
+		return toMojoError(err), nil
+	}
+	err = stub.Create(ctx, call, NoSchema, vPerms)
+	return toMojoError(err), nil
+}
+
+func (m *mojoImpl) TableDelete(name string) (mojom.Error, error) {
+	ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.TableDesc, "Delete"))
+	stub, err := m.getTable(ctx, call, name)
+	if err != nil {
+		return toMojoError(err), nil
+	}
+	err = stub.Delete(ctx, call, NoSchema)
+	return toMojoError(err), nil
+}
+
+func (m *mojoImpl) TableExists(name string) (mojom.Error, bool, error) {
+	ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.TableDesc, "Exists"))
+	stub, err := m.getTable(ctx, call, name)
+	if err != nil {
+		return toMojoError(err), false, nil
+	}
+	exists, err := stub.Exists(ctx, call, NoSchema)
+	return toMojoError(err), exists, nil
+}
+
+func (m *mojoImpl) TableDeleteRowRange(name string, start, limit []byte) (mojom.Error, error) {
+	return mojom.Error{}, nil
+}
+
+type scanStreamImpl struct {
+	ctx   *context.T
+	proxy *mojom.ScanStream_Proxy
+}
+
+func (s *scanStreamImpl) Send(item interface{}) error {
+	kv, ok := item.(nosqlwire.KeyValue)
+	if !ok {
+		return verror.NewErrInternal(s.ctx)
+	}
+
+	return s.proxy.OnKeyValue(mojom.KeyValue{
+		Key:   kv.Key,
+		Value: kv.Value,
+	})
+}
+
+func (s *scanStreamImpl) Recv(_ interface{}) error {
+	// This should never be called.
+	return verror.NewErrInternal(s.ctx)
+}
+
+var _ rpc.Stream = (*scanStreamImpl)(nil)
+
+// TODO(nlacasse): Provide some way for the client to cancel the stream.
+func (m *mojoImpl) TableScan(name string, start, limit []byte, ptr mojom.ScanStream_Pointer) (mojom.Error, error) {
+	ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.TableDesc, "Scan"))
+	stub, err := m.getTable(ctx, call, name)
+	if err != nil {
+		return toMojoError(err), nil
+	}
+
+	proxy := mojom.NewScanStreamProxy(ptr, bindings.GetAsyncWaiter())
+
+	tableScanServerCallStub := &nosqlwire.TableScanServerCallStub{struct {
+		rpc.Stream
+		rpc.ServerCall
+	}{
+		&scanStreamImpl{
+			ctx:   ctx,
+			proxy: proxy,
+		},
+		call,
+	}}
+
+	err = stub.Scan(ctx, tableScanServerCallStub, NoSchema, start, limit)
+
+	// NOTE(nlacasse): Since we are already streaming, we send any error back
+	// to the client on the stream.  The TableScan function itself should not
+	// return an error at this point.
+	proxy.OnDone(toMojoError(err))
+	return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) TableGetPermissions(name, key string) (mojom.Error, []mojom.PrefixPerms, error) {
+	return mojom.Error{}, nil, nil
+}
+
+func (m *mojoImpl) TableSetPermissions(name, prefix string, mPerms mojom.Perms) (mojom.Error, error) {
+	return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) TableDeletePermissions(name, prefix string) (mojom.Error, error) {
+	return mojom.Error{}, nil
+}
+
+////////////////////////////////////////
+// nosql.Row
+
+func (m *mojoImpl) RowExists(name string) (mojom.Error, bool, error) {
+	ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.RowDesc, "Exists"))
+	stub, err := m.getRow(ctx, call, name)
+	if err != nil {
+		return toMojoError(err), false, nil
+	}
+	exists, err := stub.Exists(ctx, call, NoSchema)
+	return toMojoError(err), exists, nil
+}
+
+func (m *mojoImpl) RowGet(name string) (mojom.Error, []byte, error) {
+	ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.RowDesc, "Get"))
+	stub, err := m.getRow(ctx, call, name)
+	if err != nil {
+		return toMojoError(err), nil, nil
+	}
+	value, err := stub.Get(ctx, call, NoSchema)
+	return toMojoError(err), value, nil
+}
+
+func (m *mojoImpl) RowPut(name string, value []byte) (mojom.Error, error) {
+	ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.RowDesc, "Put"))
+	stub, err := m.getRow(ctx, call, name)
+	if err != nil {
+		return toMojoError(err), nil
+	}
+	err = stub.Put(ctx, call, NoSchema, value)
+	return toMojoError(err), nil
+}
+
+func (m *mojoImpl) RowDelete(name string) (mojom.Error, error) {
+	ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.RowDesc, "Delete"))
+	stub, err := m.getRow(ctx, call, name)
+	if err != nil {
+		return toMojoError(err), nil
+	}
+	err = stub.Delete(ctx, call, NoSchema)
+	return toMojoError(err), nil
+}

diff --git a/services/syncbase/server/nosql/database.go b/services/syncbase/server/nosql/database.go
new file mode 100644
index 0000000..a9f503f
--- /dev/null
+++ b/services/syncbase/server/nosql/database.go

@@ -0,0 +1,577 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+	"math/rand"
+	"path"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+
+	wire "v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/syncbase/v23/syncbase/nosql/query_db"
+	"v.io/syncbase/v23/syncbase/nosql/query_exec"
+	"v.io/syncbase/x/ref/services/syncbase/clock"
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/server/watchable"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	"v.io/v23/glob"
+	"v.io/v23/rpc"
+	"v.io/v23/security/access"
+	"v.io/v23/vdl"
+	"v.io/v23/verror"
+	"v.io/v23/vom"
+	"v.io/x/lib/vlog"
+)
+
+// database is a per-database singleton (i.e. not per-request). It does not
+// directly handle RPCs.
+// Note: If a database does not exist at the time of a database RPC, the
+// dispatcher creates a short-lived database object to service that particular
+// request.
+type database struct {
+	name string
+	a    interfaces.App
+	// The fields below are initialized iff this database exists.
+	exists bool
+	// TODO(sadovsky): Make st point to a store.Store wrapper that handles paging,
+	// and do not actually open the store in NewDatabase.
+	st store.Store // stores all data for a single database
+
+	// Active snapshots and transactions corresponding to client batches.
+	// TODO(sadovsky): Add timeouts and GC.
+	mu  sync.Mutex // protects the fields below
+	sns map[uint64]store.Snapshot
+	txs map[uint64]store.Transaction
+
+	// Active ConflictResolver connection from the app to this database.
+	// NOTE: For now, we assume there's only one open conflict resolution stream
+	// per database (typically, from the app that owns the database).
+	resolver wire.ConflictManagerStartConflictResolverServerCall
+}
+
+// databaseReq is a per-request object that handles Database RPCs.
+// It embeds database and tracks request-specific batch state.
+type databaseReq struct {
+	*database
+	// If non-nil, sn or tx will be non-nil.
+	batchId *uint64
+	sn      store.Snapshot
+	tx      store.Transaction
+}
+
+var (
+	_ wire.DatabaseServerMethods = (*databaseReq)(nil)
+	_ interfaces.Database        = (*database)(nil)
+)
+
+// DatabaseOptions configures a database.
+type DatabaseOptions struct {
+	// Database-level permissions.
+	Perms access.Permissions
+	// Root dir for data storage.
+	RootDir string
+	// Storage engine to use.
+	Engine string
+}
+
+// OpenDatabase opens a database and returns a *database for it. Designed for
+// use from within NewDatabase and server.NewService.
+func OpenDatabase(ctx *context.T, a interfaces.App, name string, opts DatabaseOptions, openOpts util.OpenOptions) (*database, error) {
+	st, err := util.OpenStore(opts.Engine, path.Join(opts.RootDir, opts.Engine), openOpts)
+	if err != nil {
+		return nil, err
+	}
+	vclock := clock.NewVClock(a.Service().St())
+	st, err = watchable.Wrap(st, vclock, &watchable.Options{
+		ManagedPrefixes: []string{util.RowPrefix, util.PermsPrefix},
+	})
+	if err != nil {
+		return nil, err
+	}
+	return &database{
+		name:   name,
+		a:      a,
+		exists: true,
+		st:     st,
+		sns:    make(map[uint64]store.Snapshot),
+		txs:    make(map[uint64]store.Transaction),
+	}, nil
+}
+
+// NewDatabase creates a new database instance and returns it.
+// Designed for use from within App.CreateNoSQLDatabase.
+func NewDatabase(ctx *context.T, a interfaces.App, name string, metadata *wire.SchemaMetadata, opts DatabaseOptions) (*database, error) {
+	if opts.Perms == nil {
+		return nil, verror.New(verror.ErrInternal, ctx, "perms must be specified")
+	}
+	d, err := OpenDatabase(ctx, a, name, opts, util.OpenOptions{CreateIfMissing: true, ErrorIfExists: true})
+	if err != nil {
+		return nil, err
+	}
+	data := &databaseData{
+		Name:           d.name,
+		Perms:          opts.Perms,
+		SchemaMetadata: metadata,
+	}
+	if err := util.Put(ctx, d.st, d.stKey(), data); err != nil {
+		return nil, err
+	}
+	return d, nil
+}
+
+////////////////////////////////////////
+// RPC methods
+
+func (d *databaseReq) Create(ctx *context.T, call rpc.ServerCall, metadata *wire.SchemaMetadata, perms access.Permissions) error {
+	if d.exists {
+		return verror.New(verror.ErrExist, ctx, d.name)
+	}
+	if d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	// This database does not yet exist; d is just an ephemeral handle that holds
+	// {name string, a *app}. d.a.CreateNoSQLDatabase will create a new database
+	// handle and store it in d.a.dbs[d.name].
+	return d.a.CreateNoSQLDatabase(ctx, call, d.name, perms, metadata)
+}
+
+func (d *databaseReq) Delete(ctx *context.T, call rpc.ServerCall, schemaVersion int32) error {
+	if d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	if err := d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+		return err
+	}
+	return d.a.DeleteNoSQLDatabase(ctx, call, d.name)
+}
+
+func (d *databaseReq) Exists(ctx *context.T, call rpc.ServerCall, schemaVersion int32) (bool, error) {
+	if !d.exists {
+		return false, nil
+	}
+	if err := d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+		return false, err
+	}
+	return util.ErrorToExists(util.GetWithAuth(ctx, call, d.st, d.stKey(), &databaseData{}))
+}
+
+var rng *rand.Rand = rand.New(rand.NewSource(time.Now().UTC().UnixNano()))
+
+func (d *databaseReq) BeginBatch(ctx *context.T, call rpc.ServerCall, schemaVersion int32, bo wire.BatchOptions) (string, error) {
+	if !d.exists {
+		return "", verror.New(verror.ErrNoExist, ctx, d.name)
+	}
+	if d.batchId != nil {
+		return "", wire.NewErrBoundToBatch(ctx)
+	}
+	if err := d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+		return "", err
+	}
+
+	d.mu.Lock()
+	defer d.mu.Unlock()
+	var id uint64
+	var batchType string
+	for {
+		id = uint64(rng.Int63())
+		if bo.ReadOnly {
+			if _, ok := d.sns[id]; !ok {
+				d.sns[id] = d.st.NewSnapshot()
+				batchType = "sn"
+				break
+			}
+		} else {
+			if _, ok := d.txs[id]; !ok {
+				d.txs[id] = d.st.NewTransaction()
+				batchType = "tx"
+				break
+			}
+		}
+	}
+	return strings.Join([]string{d.name, batchType, strconv.FormatUint(id, 10)}, util.BatchSep), nil
+}
+
+func (d *databaseReq) Commit(ctx *context.T, call rpc.ServerCall, schemaVersion int32) error {
+	if !d.exists {
+		return verror.New(verror.ErrNoExist, ctx, d.name)
+	}
+	if d.batchId == nil {
+		return wire.NewErrNotBoundToBatch(ctx)
+	}
+	if d.tx == nil {
+		return wire.NewErrReadOnlyBatch(ctx)
+	}
+	if err := d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+		return err
+	}
+	var err error
+	if err = d.tx.Commit(); err == nil {
+		d.mu.Lock()
+		delete(d.txs, *d.batchId)
+		d.mu.Unlock()
+	}
+	if verror.ErrorID(err) == store.ErrConcurrentTransaction.ID {
+		return verror.New(wire.ErrConcurrentBatch, ctx, err)
+	}
+	return err
+}
+
+func (d *databaseReq) Abort(ctx *context.T, call rpc.ServerCall, schemaVersion int32) error {
+	if !d.exists {
+		return verror.New(verror.ErrNoExist, ctx, d.name)
+	}
+	if d.batchId == nil {
+		return wire.NewErrNotBoundToBatch(ctx)
+	}
+	if err := d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+		return err
+	}
+	var err error
+	if d.tx != nil {
+		if err = d.tx.Abort(); err == nil {
+			d.mu.Lock()
+			delete(d.txs, *d.batchId)
+			d.mu.Unlock()
+		}
+	} else {
+		if err = d.sn.Abort(); err == nil {
+			d.mu.Lock()
+			delete(d.sns, *d.batchId)
+			d.mu.Unlock()
+		}
+	}
+	return err
+}
+
+func (d *databaseReq) Exec(ctx *context.T, call wire.DatabaseExecServerCall, schemaVersion int32, q string) error {
+	if err := d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+		return err
+	}
+	impl := func(headers []string, rs ResultStream, err error) error {
+		if err != nil {
+			return err
+		}
+		sender := call.SendStream()
+		// Push the headers first -- the client will retrieve them and return
+		// them separately from the results.
+		var resultHeaders []*vdl.Value
+		for _, header := range headers {
+			resultHeaders = append(resultHeaders, vdl.ValueOf(header))
+		}
+		sender.Send(resultHeaders)
+		for rs.Advance() {
+			result := rs.Result()
+			if err := sender.Send(result); err != nil {
+				rs.Cancel()
+				return err
+			}
+		}
+		return rs.Err()
+	}
+	var sntx store.SnapshotOrTransaction
+	if d.batchId != nil {
+		sntx = d.batchReader()
+	} else {
+		sntx = d.st.NewSnapshot()
+		defer sntx.Abort()
+	}
+	// queryDb implements query_db.Database
+	// which is needed by the query package's
+	// Exec function.
+	db := &queryDb{
+		ctx:  ctx,
+		call: call,
+		req:  d,
+		sntx: sntx,
+	}
+
+	return impl(query_exec.Exec(db, q))
+}
+
+func (d *databaseReq) SetPermissions(ctx *context.T, call rpc.ServerCall, perms access.Permissions, version string) error {
+	if !d.exists {
+		return verror.New(verror.ErrNoExist, ctx, d.name)
+	}
+	if d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	return d.a.SetDatabasePerms(ctx, call, d.name, perms, version)
+}
+
+func (d *databaseReq) GetPermissions(ctx *context.T, call rpc.ServerCall) (perms access.Permissions, version string, err error) {
+	if !d.exists {
+		return nil, "", verror.New(verror.ErrNoExist, ctx, d.name)
+	}
+	if d.batchId != nil {
+		return nil, "", wire.NewErrBoundToBatch(ctx)
+	}
+	data := &databaseData{}
+	if err := util.GetWithAuth(ctx, call, d.st, d.stKey(), data); err != nil {
+		return nil, "", err
+	}
+	return data.Perms, util.FormatVersion(data.Version), nil
+}
+
+func (d *databaseReq) GlobChildren__(ctx *context.T, call rpc.GlobChildrenServerCall, matcher *glob.Element) error {
+	if !d.exists {
+		return verror.New(verror.ErrNoExist, ctx, d.name)
+	}
+	if d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	// Check perms.
+	sn := d.st.NewSnapshot()
+	if err := util.GetWithAuth(ctx, call, sn, d.stKey(), &databaseData{}); err != nil {
+		sn.Abort()
+		return err
+	}
+	return util.Glob(ctx, call, matcher, sn, sn.Abort, util.TablePrefix)
+}
+
+////////////////////////////////////////
+// ResultStream interface
+
+// ResultStream is an interface for iterating through results (a.k.a, rows) returned from a
+// query.  Each resulting rows are arrays of vdl objects.
+type ResultStream interface {
+	// Advance stages an element so the client can retrieve it with Result.
+	// Advance returns true iff there is a result to retrieve. The client must
+	// call Advance before calling Result. The client must call Cancel if it
+	// does not iterate through all elements (i.e. until Advance returns false).
+	// Advance may block if an element is not immediately available.
+	Advance() bool
+
+	// Result returns the row (i.e., array of vdl Values) that was staged by Advance.
+	// Result may panic if Advance returned false or was not called at all.
+	// Result does not block.
+	Result() []*vdl.Value
+
+	// Err returns a non-nil error iff the stream encountered any errors. Err does
+	// not block.
+	Err() error
+
+	// Cancel notifies the ResultStream provider that it can stop producing results.
+	// The client must call Cancel if it does not iterate through all results
+	// (i.e. until Advance returns false). Cancel is idempotent and can be called
+	// concurrently with a goroutine that is iterating via Advance/Result.
+	// Cancel causes Advance to subsequently return false. Cancel does not block.
+	Cancel()
+}
+
+////////////////////////////////////////
+// interfaces.Database methods
+
+func (d *database) St() store.Store {
+	if !d.exists {
+		vlog.Fatalf("database %q does not exist", d.name)
+	}
+	return d.st
+}
+
+func (d *database) App() interfaces.App {
+	return d.a
+}
+
+func (d *database) CheckPermsInternal(ctx *context.T, call rpc.ServerCall, st store.StoreReader) error {
+	if !d.exists {
+		vlog.Fatalf("database %q does not exist", d.name)
+	}
+	return util.GetWithAuth(ctx, call, st, d.stKey(), &databaseData{})
+}
+
+func (d *database) SetPermsInternal(ctx *context.T, call rpc.ServerCall, perms access.Permissions, version string) error {
+	if !d.exists {
+		vlog.Fatalf("database %q does not exist", d.name)
+	}
+	return store.RunInTransaction(d.st, func(tx store.Transaction) error {
+		data := &databaseData{}
+		return util.UpdateWithAuth(ctx, call, tx, d.stKey(), data, func() error {
+			if err := util.CheckVersion(ctx, version, data.Version); err != nil {
+				return err
+			}
+			data.Perms = perms
+			data.Version++
+			return nil
+		})
+	})
+}
+
+func (d *database) Name() string {
+	return d.name
+}
+
+////////////////////////////////////////
+// query_db implementation
+
+// Implement query_db's Database, Table and KeyValueStream interfaces.
+type queryDb struct {
+	ctx  *context.T
+	call wire.DatabaseExecServerCall
+	req  *databaseReq
+	sntx store.SnapshotOrTransaction
+}
+
+func (db *queryDb) GetContext() *context.T {
+	return db.ctx
+}
+
+func (db *queryDb) GetTable(name string) (query_db.Table, error) {
+	tDb := &tableDb{
+		qdb: db,
+		req: &tableReq{
+			name: name,
+			d:    db.req,
+		},
+	}
+	// Now that we have a table, we need to check permissions.
+	if err := util.GetWithAuth(db.ctx, db.call, db.sntx, tDb.req.stKey(), &tableData{}); err != nil {
+		return nil, err
+	}
+	return tDb, nil
+}
+
+type tableDb struct {
+	qdb *queryDb
+	req *tableReq
+}
+
+func (t *tableDb) Scan(keyRanges query_db.KeyRanges) (query_db.KeyValueStream, error) {
+	streams := []store.Stream{}
+	for _, keyRange := range keyRanges {
+		// TODO(jkline): For now, acquire all of the streams at once to minimize the race condition.
+		//               Need a way to Scan multiple ranges at the same state of uncommitted changes.
+		streams = append(streams, t.qdb.sntx.Scan(util.ScanRangeArgs(util.JoinKeyParts(util.RowPrefix, t.req.name), keyRange.Start, keyRange.Limit)))
+	}
+	return &kvs{
+		t:        t,
+		curr:     0,
+		validRow: false,
+		it:       streams,
+		err:      nil,
+	}, nil
+}
+
+type kvs struct {
+	t         *tableDb
+	curr      int
+	validRow  bool
+	currKey   string
+	currValue *vdl.Value
+	it        []store.Stream // array of store.Streams
+	err       error
+}
+
+func (s *kvs) Advance() bool {
+	if s.err != nil {
+		return false
+	}
+	for s.curr < len(s.it) {
+		if s.it[s.curr].Advance() {
+			// key
+			keyBytes := s.it[s.curr].Key(nil)
+			parts := util.SplitKeyParts(string(keyBytes))
+			// TODO(rogulenko): Check access for the key.
+			s.currKey = parts[len(parts)-1]
+			// value
+			valueBytes := s.it[s.curr].Value(nil)
+			var currValue *vdl.Value
+			if err := vom.Decode(valueBytes, &currValue); err != nil {
+				s.validRow = false
+				s.err = err
+				return false
+			}
+			s.currValue = currValue
+			s.validRow = true
+			return true
+		}
+		// Advance returned false.  It could be an err, or it could
+		// be we've reached the end.
+		if err := s.it[s.curr].Err(); err != nil {
+			s.validRow = false
+			s.err = err
+			return false
+		}
+		// We've reached the end of the iterator for this keyRange.
+		// Jump to the next one.
+		s.it[s.curr] = nil
+		s.curr++
+		s.validRow = false
+	}
+	// There are no more prefixes to scan.
+	return false
+}
+
+func (s *kvs) KeyValue() (string, *vdl.Value) {
+	if !s.validRow {
+		return "", nil
+	}
+	return s.currKey, s.currValue
+}
+
+func (s *kvs) Err() error {
+	return s.err
+}
+
+func (s *kvs) Cancel() {
+	if s.it != nil {
+		for i := s.curr; i < len(s.it); i++ {
+			s.it[i].Cancel()
+		}
+		s.it = nil
+	}
+	// set curr to end of keyRanges so Advance will return false
+	s.curr = len(s.it)
+}
+
+////////////////////////////////////////
+// Internal helpers
+
+func (d *database) stKey() string {
+	return util.DatabasePrefix
+}
+
+func (d *databaseReq) batchReader() store.SnapshotOrTransaction {
+	if d.batchId == nil {
+		return nil
+	} else if d.sn != nil {
+		return d.sn
+	} else {
+		return d.tx
+	}
+}
+
+func (d *databaseReq) batchTransaction() (store.Transaction, error) {
+	if d.batchId == nil {
+		return nil, nil
+	} else if d.tx != nil {
+		return d.tx, nil
+	} else {
+		return nil, wire.NewErrReadOnlyBatch(nil)
+	}
+}
+
+// TODO(jlodhia): Schema check should happen within a transaction for each
+// operation in database, table and row. Do schema check along with permissions
+// check when fully-specified permission model is implemented.
+func (d *databaseReq) checkSchemaVersion(ctx *context.T, schemaVersion int32) error {
+	if !d.exists {
+		// database does not exist yet and hence there is no schema to check.
+		// This can happen if delete is called twice on the same database.
+		return nil
+	}
+	schemaMetadata, err := d.getSchemaMetadataWithoutAuth(ctx)
+	if err != nil {
+		return err
+	}
+	if (schemaMetadata == nil) || (schemaMetadata.Version == schemaVersion) {
+		return nil
+	}
+	return wire.NewErrSchemaVersionMismatch(ctx)
+}

diff --git a/services/syncbase/server/nosql/database_bm.go b/services/syncbase/server/nosql/database_bm.go
new file mode 100644
index 0000000..e646c42
--- /dev/null
+++ b/services/syncbase/server/nosql/database_bm.go

@@ -0,0 +1,95 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+	wire "v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/syncbase/x/ref/services/syncbase/vsync"
+	"v.io/v23/context"
+	"v.io/v23/rpc"
+)
+
+////////////////////////////////////////////////////////////////////////////////
+// RPCs for managing blobs between Syncbase and its clients.
+
+func (d *databaseReq) CreateBlob(ctx *context.T, call rpc.ServerCall) (wire.BlobRef, error) {
+	if d.batchId != nil {
+		return wire.NullBlobRef, wire.NewErrBoundToBatch(ctx)
+	}
+	sd := vsync.NewSyncDatabase(d)
+	return sd.CreateBlob(ctx, call)
+}
+
+func (d *databaseReq) PutBlob(ctx *context.T, call wire.BlobManagerPutBlobServerCall, br wire.BlobRef) error {
+	if d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	sd := vsync.NewSyncDatabase(d)
+	return sd.PutBlob(ctx, call, br)
+}
+
+func (d *databaseReq) CommitBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) error {
+	if d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	sd := vsync.NewSyncDatabase(d)
+	return sd.CommitBlob(ctx, call, br)
+}
+
+func (d *databaseReq) GetBlobSize(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) (int64, error) {
+	if d.batchId != nil {
+		return 0, wire.NewErrBoundToBatch(ctx)
+	}
+	sd := vsync.NewSyncDatabase(d)
+	return sd.GetBlobSize(ctx, call, br)
+}
+
+func (d *databaseReq) DeleteBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) error {
+	if d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	sd := vsync.NewSyncDatabase(d)
+	return sd.DeleteBlob(ctx, call, br)
+}
+
+func (d *databaseReq) GetBlob(ctx *context.T, call wire.BlobManagerGetBlobServerCall, br wire.BlobRef, offset int64) error {
+	if d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	sd := vsync.NewSyncDatabase(d)
+	return sd.GetBlob(ctx, call, br, offset)
+}
+
+func (d *databaseReq) FetchBlob(ctx *context.T, call wire.BlobManagerFetchBlobServerCall, br wire.BlobRef, priority uint64) error {
+	if d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	sd := vsync.NewSyncDatabase(d)
+	return sd.FetchBlob(ctx, call, br, priority)
+}
+
+func (d *databaseReq) PinBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) error {
+	if d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	sd := vsync.NewSyncDatabase(d)
+	return sd.PinBlob(ctx, call, br)
+}
+
+func (d *databaseReq) UnpinBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) error {
+	if d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	sd := vsync.NewSyncDatabase(d)
+	return sd.UnpinBlob(ctx, call, br)
+}
+
+func (d *databaseReq) KeepBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef, rank uint64) error {
+	if d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	sd := vsync.NewSyncDatabase(d)
+	return sd.KeepBlob(ctx, call, br, rank)
+}

diff --git a/services/syncbase/server/nosql/database_crm.go b/services/syncbase/server/nosql/database_crm.go
new file mode 100644
index 0000000..e1135a9
--- /dev/null
+++ b/services/syncbase/server/nosql/database_crm.go

@@ -0,0 +1,20 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+	wire "v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/v23/context"
+)
+
+////////////////////////////////////////
+// ConflictManager RPC methods
+
+func (d *databaseReq) StartConflictResolver(ctx *context.T, call wire.ConflictManagerStartConflictResolverServerCall) error {
+	// Store the conflict resolver connection in the per-app, per-database
+	// singleton so that sync can access it.
+	d.database.resolver = call
+	return nil
+}

diff --git a/services/syncbase/server/nosql/database_sgm.go b/services/syncbase/server/nosql/database_sgm.go
new file mode 100644
index 0000000..cc1a73d
--- /dev/null
+++ b/services/syncbase/server/nosql/database_sgm.go

@@ -0,0 +1,85 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+	wire "v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/syncbase/x/ref/services/syncbase/vsync"
+	"v.io/v23/context"
+	"v.io/v23/rpc"
+	"v.io/v23/verror"
+)
+
+////////////////////////////////////////
+// SyncGroup RPC methods
+
+func (d *databaseReq) GetSyncGroupNames(ctx *context.T, call rpc.ServerCall) ([]string, error) {
+	if d.batchId != nil {
+		return nil, wire.NewErrBoundToBatch(ctx)
+	}
+	sd := vsync.NewSyncDatabase(d)
+	return sd.GetSyncGroupNames(ctx, call)
+}
+
+func (d *databaseReq) CreateSyncGroup(ctx *context.T, call rpc.ServerCall, sgName string, spec wire.SyncGroupSpec, myInfo wire.SyncGroupMemberInfo) error {
+	if d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	sd := vsync.NewSyncDatabase(d)
+	return sd.CreateSyncGroup(ctx, call, sgName, spec, myInfo)
+}
+
+func (d *databaseReq) JoinSyncGroup(ctx *context.T, call rpc.ServerCall, sgName string, myInfo wire.SyncGroupMemberInfo) (wire.SyncGroupSpec, error) {
+	if d.batchId != nil {
+		return wire.SyncGroupSpec{}, wire.NewErrBoundToBatch(ctx)
+	}
+	sd := vsync.NewSyncDatabase(d)
+	return sd.JoinSyncGroup(ctx, call, sgName, myInfo)
+}
+
+func (d *databaseReq) LeaveSyncGroup(ctx *context.T, call rpc.ServerCall, sgName string) error {
+	if d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	return verror.NewErrNotImplemented(ctx)
+}
+
+func (d *databaseReq) DestroySyncGroup(ctx *context.T, call rpc.ServerCall, sgName string) error {
+	if d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	return verror.NewErrNotImplemented(ctx)
+}
+
+func (d *databaseReq) EjectFromSyncGroup(ctx *context.T, call rpc.ServerCall, sgName, member string) error {
+	if d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	return verror.NewErrNotImplemented(ctx)
+}
+
+func (d *databaseReq) GetSyncGroupSpec(ctx *context.T, call rpc.ServerCall, sgName string) (wire.SyncGroupSpec, string, error) {
+	if d.batchId != nil {
+		return wire.SyncGroupSpec{}, "", wire.NewErrBoundToBatch(ctx)
+	}
+	sd := vsync.NewSyncDatabase(d)
+	return sd.GetSyncGroupSpec(ctx, call, sgName)
+}
+
+func (d *databaseReq) SetSyncGroupSpec(ctx *context.T, call rpc.ServerCall, sgName string, spec wire.SyncGroupSpec, version string) error {
+	if d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	sd := vsync.NewSyncDatabase(d)
+	return sd.SetSyncGroupSpec(ctx, call, sgName, spec, version)
+}
+
+func (d *databaseReq) GetSyncGroupMembers(ctx *context.T, call rpc.ServerCall, sgName string) (map[string]wire.SyncGroupMemberInfo, error) {
+	if d.batchId != nil {
+		return nil, wire.NewErrBoundToBatch(ctx)
+	}
+	sd := vsync.NewSyncDatabase(d)
+	return sd.GetSyncGroupMembers(ctx, call, sgName)
+}

diff --git a/services/syncbase/server/nosql/database_sm.go b/services/syncbase/server/nosql/database_sm.go
new file mode 100644
index 0000000..3c87a6b
--- /dev/null
+++ b/services/syncbase/server/nosql/database_sm.go

@@ -0,0 +1,64 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+	wire "v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	"v.io/v23/rpc"
+	"v.io/v23/verror"
+)
+
+////////////////////////////////////////
+// SchemaManager RPC methods
+
+func (d *databaseReq) GetSchemaMetadata(ctx *context.T, call rpc.ServerCall) (wire.SchemaMetadata, error) {
+	metadata := wire.SchemaMetadata{}
+
+	if !d.exists {
+		return metadata, verror.New(verror.ErrNoExist, ctx, d.Name())
+	}
+
+	// Check permissions on Database and retreve schema metadata.
+	dbData := databaseData{}
+	if err := util.GetWithAuth(ctx, call, d.st, d.stKey(), &dbData); err != nil {
+		return metadata, err
+	}
+	if dbData.SchemaMetadata == nil {
+		return metadata, verror.New(verror.ErrNoExist, ctx, "Schema does not exist for the db")
+	}
+	return *dbData.SchemaMetadata, nil
+}
+
+func (d *databaseReq) SetSchemaMetadata(ctx *context.T, call rpc.ServerCall, metadata wire.SchemaMetadata) error {
+	// Check if database exists
+	if !d.exists {
+		return verror.New(verror.ErrNoExist, ctx, d.Name())
+	}
+
+	// Check permissions on Database and store schema metadata.
+	return store.RunInTransaction(d.st, func(tx store.Transaction) error {
+		dbData := databaseData{}
+		return util.UpdateWithAuth(ctx, call, tx, d.stKey(), &dbData, func() error {
+			// NOTE: For now we expect the client to not issue multiple
+			// concurrent SetSchemaMetadata calls.
+			dbData.SchemaMetadata = &metadata
+			return nil
+		})
+	})
+}
+
+func (d *databaseReq) getSchemaMetadataWithoutAuth(ctx *context.T) (*wire.SchemaMetadata, error) {
+	if !d.exists {
+		return nil, verror.New(verror.ErrInternal, ctx, "field store in database cannot be nil")
+	}
+	dbData := databaseData{}
+	if err := util.Get(ctx, d.st, d.stKey(), &dbData); err != nil {
+		return nil, err
+	}
+	return dbData.SchemaMetadata, nil
+}

diff --git a/services/syncbase/server/nosql/database_watch.go b/services/syncbase/server/nosql/database_watch.go
new file mode 100644
index 0000000..35228e6
--- /dev/null
+++ b/services/syncbase/server/nosql/database_watch.go

@@ -0,0 +1,217 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+	"bytes"
+	"strings"
+
+	wire "v.io/syncbase/v23/services/syncbase/nosql"
+	pubutil "v.io/syncbase/v23/syncbase/util"
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/server/watchable"
+	"v.io/v23/context"
+	"v.io/v23/naming"
+	"v.io/v23/rpc"
+	"v.io/v23/services/watch"
+	"v.io/v23/vdl"
+	"v.io/v23/verror"
+)
+
+// GetResumeMarker implements the wire.DatabaseWatcher interface.
+func (d *databaseReq) GetResumeMarker(ctx *context.T, call rpc.ServerCall) (watch.ResumeMarker, error) {
+	if !d.exists {
+		return nil, verror.New(verror.ErrNoExist, ctx, d.name)
+	}
+	if d.batchId != nil {
+		return watchable.GetResumeMarker(d.batchReader())
+	} else {
+		return watchable.GetResumeMarker(d.st)
+	}
+}
+
+// WatchGlob implements the wire.DatabaseWatcher interface.
+func (d *databaseReq) WatchGlob(ctx *context.T, call watch.GlobWatcherWatchGlobServerCall, req watch.GlobRequest) error {
+	// TODO(rogulenko): Check permissions here and in other methods.
+	if !d.exists {
+		return verror.New(verror.ErrNoExist, ctx, d.name)
+	}
+	if d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	// Parse the pattern.
+	if !strings.HasSuffix(req.Pattern, "*") {
+		return verror.New(verror.ErrBadArg, ctx, req.Pattern)
+	}
+	table, prefix, err := pubutil.ParseTableRowPair(ctx, strings.TrimSuffix(req.Pattern, "*"))
+	if err != nil {
+		return err
+	}
+	// Get the resume marker and fetch the initial state if necessary.
+	resumeMarker := req.ResumeMarker
+	if bytes.Equal(resumeMarker, []byte("now")) || len(resumeMarker) == 0 {
+		var err error
+		if resumeMarker, err = watchable.GetResumeMarker(d.st); err != nil {
+			return err
+		}
+		if len(req.ResumeMarker) == 0 {
+			// TODO(rogulenko): Fetch the initial state.
+			return verror.NewErrNotImplemented(ctx)
+		}
+	}
+	t := tableReq{
+		name: table,
+		d:    d,
+	}
+	return t.watchUpdates(ctx, call, prefix, resumeMarker)
+}
+
+// watchUpdates waits for database updates and sends them to the client.
+// This function does two steps in a for loop:
+// - scan through the watch log until the end, sending all updates to the client
+// - wait for one of two signals: new updates available or the call is canceled.
+// The 'new updates' signal is sent by a worker goroutine that translates a
+// condition variable signal to a Go channel. The worker goroutine waits on the
+// condition variable for changes. Whenever the state changes, the worker sends
+// a signal through the Go channel.
+func (t *tableReq) watchUpdates(ctx *context.T, call watch.GlobWatcherWatchGlobServerCall, prefix string, resumeMarker watch.ResumeMarker) error {
+	// The Go channel to send notifications from the worker to the main
+	// goroutine.
+	hasUpdates := make(chan struct{})
+	// The Go channel to signal the worker to stop. The worker might block
+	// on the condition variable, but we don't want the main goroutine
+	// to wait for the worker to stop, so we create a buffered channel.
+	cancelWorker := make(chan struct{}, 1)
+	defer close(cancelWorker)
+	go func() {
+		waitForChange := watchable.WatchUpdates(t.d.st)
+		var state, newState uint64 = 1, 1
+		for {
+			// Wait until the state changes or the main function returns.
+			for newState == state {
+				select {
+				case <-cancelWorker:
+					return
+				default:
+				}
+				newState = waitForChange(state)
+			}
+			// Update the current state to the new value and sends a signal to
+			// the main goroutine.
+			state = newState
+			if state == 0 {
+				close(hasUpdates)
+				return
+			}
+			// cancelWorker is closed as soons as the main function returns.
+			select {
+			case hasUpdates <- struct{}{}:
+			case <-cancelWorker:
+				return
+			}
+		}
+	}()
+
+	sender := call.SendStream()
+	for {
+		// Drain the log queue.
+		for {
+			logs, nextResumeMarker, err := watchable.ReadBatchFromLog(t.d.st, resumeMarker)
+			if err != nil {
+				return err
+			}
+			if logs == nil {
+				// No new log records available now.
+				break
+			}
+			resumeMarker = nextResumeMarker
+			changes, err := t.processLogBatch(ctx, call, prefix, logs)
+			if err != nil {
+				return err
+			}
+			if changes == nil {
+				// All batch changes are filtered out.
+				continue
+			}
+			changes[len(changes)-1].ResumeMarker = resumeMarker
+			for _, change := range changes {
+				if err := sender.Send(change); err != nil {
+					return err
+				}
+			}
+		}
+		// Wait for new updates or cancel.
+		select {
+		case _, ok := <-hasUpdates:
+			if !ok {
+				return verror.NewErrAborted(ctx)
+			}
+		case <-ctx.Done():
+			return ctx.Err()
+		}
+	}
+}
+
+// processLogBatch converts []*watchable.LogEntry to []watch.Change, filtering
+// out unnecessary or inaccessible log records.
+func (t *tableReq) processLogBatch(ctx *context.T, call rpc.ServerCall, prefix string, logs []*watchable.LogEntry) ([]watch.Change, error) {
+	sn := t.d.st.NewSnapshot()
+	defer sn.Abort()
+	var changes []watch.Change
+	for _, logEntry := range logs {
+		var opKey string
+		switch op := logEntry.Op.(type) {
+		case watchable.OpPut:
+			opKey = string(op.Value.Key)
+		case watchable.OpDelete:
+			opKey = string(op.Value.Key)
+		default:
+			continue
+		}
+		parts := util.SplitKeyParts(opKey)
+		// TODO(rogulenko): Currently we process only rows, i.e. keys of the form
+		// $row:xxx:yyy. Consider processing other keys.
+		if len(parts) != 3 || parts[0] != util.RowPrefix {
+			continue
+		}
+		table, row := parts[1], parts[2]
+		// Filter out unnecessary rows and rows that we can't access.
+		if table != t.name || !strings.HasPrefix(row, prefix) {
+			continue
+		}
+		if err := t.checkAccess(ctx, call, sn, row); err != nil {
+			if verror.ErrorID(err) != verror.ErrNoAccess.ID {
+				return nil, err
+			}
+			continue
+		}
+		change := watch.Change{
+			Name:      naming.Join(table, row),
+			Continued: true,
+		}
+		switch op := logEntry.Op.(type) {
+		case watchable.OpPut:
+			rowValue, err := watchable.GetAtVersion(ctx, sn, op.Value.Key, nil, op.Value.Version)
+			if err != nil {
+				return nil, err
+			}
+			change.State = watch.Exists
+			change.Value = vdl.ValueOf(wire.StoreChange{
+				Value:    rowValue,
+				FromSync: logEntry.FromSync,
+			})
+		case watchable.OpDelete:
+			change.State = watch.DoesNotExist
+			change.Value = vdl.ValueOf(wire.StoreChange{
+				FromSync: logEntry.FromSync,
+			})
+		}
+		changes = append(changes, change)
+	}
+	if len(changes) > 0 {
+		changes[len(changes)-1].Continued = false
+	}
+	return changes, nil
+}

diff --git a/services/syncbase/server/nosql/dispatcher.go b/services/syncbase/server/nosql/dispatcher.go
new file mode 100644
index 0000000..284f939
--- /dev/null
+++ b/services/syncbase/server/nosql/dispatcher.go

@@ -0,0 +1,141 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+	"strconv"
+	"strings"
+
+	wire "v.io/syncbase/v23/services/syncbase"
+	nosqlWire "v.io/syncbase/v23/services/syncbase/nosql"
+	pubutil "v.io/syncbase/v23/syncbase/util"
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/v23/context"
+	"v.io/v23/rpc"
+	"v.io/v23/security"
+	"v.io/v23/verror"
+	"v.io/x/lib/vlog"
+)
+
+type dispatcher struct {
+	a interfaces.App
+}
+
+var _ rpc.Dispatcher = (*dispatcher)(nil)
+
+func NewDispatcher(a interfaces.App) *dispatcher {
+	return &dispatcher{a: a}
+}
+
+// We always return an AllowEveryone authorizer from Lookup(), and rely on our
+// RPC method implementations to perform proper authorization.
+var auth security.Authorizer = security.AllowEveryone()
+
+func (disp *dispatcher) Lookup(_ *context.T, suffix string) (interface{}, security.Authorizer, error) {
+	suffix = strings.TrimPrefix(suffix, "/")
+	parts := strings.Split(suffix, "/")
+
+	if len(parts) == 0 {
+		vlog.Fatal("invalid nosql.dispatcher Lookup")
+	}
+
+	dParts := strings.Split(parts[0], util.BatchSep)
+	dName := dParts[0]
+
+	// Validate all key atoms up front, so that we can avoid doing so in all our
+	// method implementations.
+	if !pubutil.ValidName(dName) {
+		return nil, nil, wire.NewErrInvalidName(nil, suffix)
+	}
+	for _, s := range parts[1:] {
+		if !pubutil.ValidName(s) {
+			return nil, nil, wire.NewErrInvalidName(nil, suffix)
+		}
+	}
+
+	dExists := false
+	var d *database
+	if dInt, err := disp.a.NoSQLDatabase(nil, nil, dName); err == nil {
+		d = dInt.(*database) // panics on failure, as desired
+		dExists = true
+	} else {
+		if verror.ErrorID(err) != verror.ErrNoExist.ID {
+			return nil, nil, err
+		} else {
+			// Database does not exist. Create a short-lived database object to
+			// service this request.
+			d = &database{
+				name: dName,
+				a:    disp.a,
+			}
+		}
+	}
+
+	dReq := &databaseReq{database: d}
+	if !setBatchFields(dReq, dParts) {
+		return nil, nil, wire.NewErrInvalidName(nil, suffix)
+	}
+	if len(parts) == 1 {
+		return nosqlWire.DatabaseServer(dReq), auth, nil
+	}
+
+	// All table and row methods require the database to exist. If it doesn't,
+	// abort early.
+	if !dExists {
+		return nil, nil, verror.New(verror.ErrNoExist, nil, d.name)
+	}
+
+	// Note, it's possible for the database to be deleted concurrently with
+	// downstream handling of this request. Depending on the order in which things
+	// execute, the client may not get an error, but in any case ultimately the
+	// store will end up in a consistent state.
+	tReq := &tableReq{
+		name: parts[1],
+		d:    dReq,
+	}
+	if len(parts) == 2 {
+		return nosqlWire.TableServer(tReq), auth, nil
+	}
+
+	rReq := &rowReq{
+		key: parts[2],
+		t:   tReq,
+	}
+	if len(parts) == 3 {
+		return nosqlWire.RowServer(rReq), auth, nil
+	}
+
+	return nil, nil, verror.NewErrNoExist(nil)
+}
+
+// setBatchFields sets the batch-related fields in databaseReq based on the
+// value of dParts, the parts of the database name component. It returns false
+// if dParts is malformed.
+func setBatchFields(d *databaseReq, dParts []string) bool {
+	if len(dParts) == 1 {
+		return true
+	}
+	if len(dParts) != 3 {
+		return false
+	}
+	batchId, err := strconv.ParseUint(dParts[2], 0, 64)
+	if err != nil {
+		return false
+	}
+	d.batchId = &batchId
+	d.mu.Lock()
+	defer d.mu.Unlock()
+	var ok bool
+	switch dParts[1] {
+	case "sn":
+		d.sn, ok = d.sns[batchId]
+	case "tx":
+		d.tx, ok = d.txs[batchId]
+	default:
+		return false
+	}
+	return ok
+}

diff --git a/services/syncbase/server/nosql/row.go b/services/syncbase/server/nosql/row.go
new file mode 100644
index 0000000..a6fbf9d
--- /dev/null
+++ b/services/syncbase/server/nosql/row.go

@@ -0,0 +1,141 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+	wire "v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	"v.io/v23/rpc"
+	"v.io/v23/verror"
+)
+
+// rowReq is a per-request object that handles Row RPCs.
+type rowReq struct {
+	key string
+	t   *tableReq
+}
+
+var (
+	_ wire.RowServerMethods = (*rowReq)(nil)
+)
+
+////////////////////////////////////////
+// RPC methods
+
+func (r *rowReq) Exists(ctx *context.T, call rpc.ServerCall, schemaVersion int32) (bool, error) {
+	_, err := r.Get(ctx, call, schemaVersion)
+	return util.ErrorToExists(err)
+}
+
+func (r *rowReq) Get(ctx *context.T, call rpc.ServerCall, schemaVersion int32) ([]byte, error) {
+	impl := func(sntx store.SnapshotOrTransaction) ([]byte, error) {
+		if err := r.t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+			return []byte{}, err
+		}
+		return r.get(ctx, call, sntx)
+	}
+	if r.t.d.batchId != nil {
+		return impl(r.t.d.batchReader())
+	} else {
+		sn := r.t.d.st.NewSnapshot()
+		defer sn.Abort()
+		return impl(sn)
+	}
+}
+
+func (r *rowReq) Put(ctx *context.T, call rpc.ServerCall, schemaVersion int32, value []byte) error {
+	impl := func(tx store.Transaction) error {
+		if err := r.t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+			return err
+		}
+		return r.put(ctx, call, tx, value)
+	}
+	if r.t.d.batchId != nil {
+		if tx, err := r.t.d.batchTransaction(); err != nil {
+			return err
+		} else {
+			return impl(tx)
+		}
+	} else {
+		return store.RunInTransaction(r.t.d.st, impl)
+	}
+}
+
+func (r *rowReq) Delete(ctx *context.T, call rpc.ServerCall, schemaVersion int32) error {
+	impl := func(tx store.Transaction) error {
+		if err := r.t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+			return err
+		}
+		return r.delete(ctx, call, tx)
+	}
+	if r.t.d.batchId != nil {
+		if tx, err := r.t.d.batchTransaction(); err != nil {
+			return err
+		} else {
+			return impl(tx)
+		}
+	} else {
+		return store.RunInTransaction(r.t.d.st, impl)
+	}
+}
+
+////////////////////////////////////////
+// Internal helpers
+
+func (r *rowReq) stKey() string {
+	return util.JoinKeyParts(util.RowPrefix, r.stKeyPart())
+}
+
+func (r *rowReq) stKeyPart() string {
+	return util.JoinKeyParts(r.t.stKeyPart(), r.key)
+}
+
+// checkAccess checks that this row's table exists in the database, and performs
+// an authorization check.
+func (r *rowReq) checkAccess(ctx *context.T, call rpc.ServerCall, sntx store.SnapshotOrTransaction) error {
+	return r.t.checkAccess(ctx, call, sntx, r.key)
+}
+
+// get reads data from the storage engine.
+// Performs authorization check.
+func (r *rowReq) get(ctx *context.T, call rpc.ServerCall, sntx store.SnapshotOrTransaction) ([]byte, error) {
+	if err := r.checkAccess(ctx, call, sntx); err != nil {
+		return nil, err
+	}
+	value, err := sntx.Get([]byte(r.stKey()), nil)
+	if err != nil {
+		if verror.ErrorID(err) == store.ErrUnknownKey.ID {
+			return nil, verror.New(verror.ErrNoExist, ctx, r.stKey())
+		}
+		return nil, verror.New(verror.ErrInternal, ctx, err)
+	}
+	return value, nil
+}
+
+// put writes data to the storage engine.
+// Performs authorization check.
+func (r *rowReq) put(ctx *context.T, call rpc.ServerCall, tx store.Transaction, value []byte) error {
+	if err := r.checkAccess(ctx, call, tx); err != nil {
+		return err
+	}
+	if err := tx.Put([]byte(r.stKey()), value); err != nil {
+		return verror.New(verror.ErrInternal, ctx, err)
+	}
+	return nil
+}
+
+// delete deletes data from the storage engine.
+// Performs authorization check.
+func (r *rowReq) delete(ctx *context.T, call rpc.ServerCall, tx store.Transaction) error {
+	if err := r.checkAccess(ctx, call, tx); err != nil {
+		return err
+	}
+	if err := tx.Delete([]byte(r.stKey())); err != nil {
+		return verror.New(verror.ErrInternal, ctx, err)
+	}
+	return nil
+}

diff --git a/services/syncbase/server/nosql/table.go b/services/syncbase/server/nosql/table.go
new file mode 100644
index 0000000..8cf744e
--- /dev/null
+++ b/services/syncbase/server/nosql/table.go

@@ -0,0 +1,486 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+	"strings"
+
+	wire "v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	"v.io/v23/glob"
+	"v.io/v23/rpc"
+	"v.io/v23/security/access"
+	"v.io/v23/verror"
+	"v.io/v23/vom"
+)
+
+// tableReq is a per-request object that handles Table RPCs.
+type tableReq struct {
+	name string
+	d    *databaseReq
+}
+
+var (
+	_ wire.TableServerMethods = (*tableReq)(nil)
+)
+
+////////////////////////////////////////
+// RPC methods
+
+func (t *tableReq) Create(ctx *context.T, call rpc.ServerCall, schemaVersion int32, perms access.Permissions) error {
+	if t.d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	if err := t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+		return err
+	}
+	return store.RunInTransaction(t.d.st, func(tx store.Transaction) error {
+		// Check databaseData perms.
+		dData := &databaseData{}
+		if err := util.GetWithAuth(ctx, call, tx, t.d.stKey(), dData); err != nil {
+			return err
+		}
+		// Check for "table already exists".
+		if err := util.Get(ctx, tx, t.stKey(), &tableData{}); verror.ErrorID(err) != verror.ErrNoExist.ID {
+			if err != nil {
+				return err
+			}
+			// TODO(sadovsky): Should this be ErrExistOrNoAccess, for privacy?
+			return verror.New(verror.ErrExist, ctx, t.name)
+		}
+		// Write new tableData.
+		if perms == nil {
+			perms = dData.Perms
+		}
+		data := &tableData{
+			Name:  t.name,
+			Perms: perms,
+		}
+		return util.Put(ctx, tx, t.stKey(), data)
+	})
+}
+
+func (t *tableReq) Delete(ctx *context.T, call rpc.ServerCall, schemaVersion int32) error {
+	if t.d.batchId != nil {
+		return wire.NewErrBoundToBatch(ctx)
+	}
+	if err := t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+		return err
+	}
+	return store.RunInTransaction(t.d.st, func(tx store.Transaction) error {
+		// Read-check-delete tableData.
+		if err := util.GetWithAuth(ctx, call, tx, t.stKey(), &tableData{}); err != nil {
+			if verror.ErrorID(err) == verror.ErrNoExist.ID {
+				return nil // delete is idempotent
+			}
+			return err
+		}
+		// TODO(sadovsky): Delete all rows in this table.
+		return util.Delete(ctx, tx, t.stKey())
+	})
+}
+
+func (t *tableReq) Exists(ctx *context.T, call rpc.ServerCall, schemaVersion int32) (bool, error) {
+	if err := t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+		return false, err
+	}
+	return util.ErrorToExists(util.GetWithAuth(ctx, call, t.d.st, t.stKey(), &tableData{}))
+}
+
+func (t *tableReq) DeleteRowRange(ctx *context.T, call rpc.ServerCall, schemaVersion int32, start, limit []byte) error {
+	impl := func(tx store.Transaction) error {
+		// Check for table-level access before doing a scan.
+		if err := t.checkAccess(ctx, call, tx, ""); err != nil {
+			return err
+		}
+		// Check if the db schema version and the version provided by client
+		// matches.
+		if err := t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+			return err
+		}
+		it := tx.Scan(util.ScanRangeArgs(util.JoinKeyParts(util.RowPrefix, t.name), string(start), string(limit)))
+		key := []byte{}
+		for it.Advance() {
+			key = it.Key(key)
+			// Check perms.
+			parts := util.SplitKeyParts(string(key))
+			externalKey := parts[len(parts)-1]
+			if err := t.checkAccess(ctx, call, tx, externalKey); err != nil {
+				// TODO(rogulenko): Revisit this behavior. Probably we should
+				// delete all rows that we have access to.
+				it.Cancel()
+				return err
+			}
+			// Delete the key-value pair.
+			if err := tx.Delete(key); err != nil {
+				return verror.New(verror.ErrInternal, ctx, err)
+			}
+		}
+		if err := it.Err(); err != nil {
+			return verror.New(verror.ErrInternal, ctx, err)
+		}
+		return nil
+	}
+	if t.d.batchId != nil {
+		if tx, err := t.d.batchTransaction(); err != nil {
+			return err
+		} else {
+			return impl(tx)
+		}
+	} else {
+		return store.RunInTransaction(t.d.st, impl)
+	}
+}
+
+func (t *tableReq) Scan(ctx *context.T, call wire.TableScanServerCall, schemaVersion int32, start, limit []byte) error {
+	impl := func(sntx store.SnapshotOrTransaction) error {
+		// Check for table-level access before doing a scan.
+		if err := t.checkAccess(ctx, call, sntx, ""); err != nil {
+			return err
+		}
+		if err := t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+			return err
+		}
+		it := sntx.Scan(util.ScanRangeArgs(util.JoinKeyParts(util.RowPrefix, t.name), string(start), string(limit)))
+		sender := call.SendStream()
+		key, value := []byte{}, []byte{}
+		for it.Advance() {
+			key, value = it.Key(key), it.Value(value)
+			// Check perms.
+			parts := util.SplitKeyParts(string(key))
+			externalKey := parts[len(parts)-1]
+			if err := t.checkAccess(ctx, call, sntx, externalKey); err != nil {
+				it.Cancel()
+				return err
+			}
+			if err := sender.Send(wire.KeyValue{Key: externalKey, Value: value}); err != nil {
+				it.Cancel()
+				return err
+			}
+		}
+		if err := it.Err(); err != nil {
+			return verror.New(verror.ErrInternal, ctx, err)
+		}
+		return nil
+	}
+	if t.d.batchId != nil {
+		return impl(t.d.batchReader())
+	} else {
+		sntx := t.d.st.NewSnapshot()
+		defer sntx.Abort()
+		return impl(sntx)
+	}
+}
+
+func (t *tableReq) GetPermissions(ctx *context.T, call rpc.ServerCall, schemaVersion int32, key string) ([]wire.PrefixPermissions, error) {
+	impl := func(sntx store.SnapshotOrTransaction) ([]wire.PrefixPermissions, error) {
+		// Check permissions only at table level.
+		if err := t.checkAccess(ctx, call, sntx, ""); err != nil {
+			return nil, err
+		}
+		if err := t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+			return nil, err
+		}
+		// Get the most specific permissions object.
+		prefix, prefixPerms, err := t.permsForKey(ctx, sntx, key)
+		if err != nil {
+			return nil, err
+		}
+		result := []wire.PrefixPermissions{{Prefix: prefix, Perms: prefixPerms.Perms}}
+		// Collect all parent permissions objects all the way up to the table level.
+		for prefix != "" {
+			prefix = prefixPerms.Parent
+			if prefixPerms, err = t.permsForPrefix(ctx, sntx, prefixPerms.Parent); err != nil {
+				return nil, err
+			}
+			result = append(result, wire.PrefixPermissions{Prefix: prefix, Perms: prefixPerms.Perms})
+		}
+		return result, nil
+	}
+	if t.d.batchId != nil {
+		return impl(t.d.batchReader())
+	} else {
+		sntx := t.d.st.NewSnapshot()
+		defer sntx.Abort()
+		return impl(sntx)
+	}
+}
+
+func (t *tableReq) SetPermissions(ctx *context.T, call rpc.ServerCall, schemaVersion int32, prefix string, perms access.Permissions) error {
+	impl := func(tx store.Transaction) error {
+		if err := t.checkAccess(ctx, call, tx, prefix); err != nil {
+			return err
+		}
+		if err := t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+			return err
+		}
+		// Concurrent transactions that touch this table should fail with
+		// ErrConcurrentTransaction when this transaction commits.
+		if err := t.lock(ctx, tx); err != nil {
+			return err
+		}
+		if prefix == "" {
+			data := &tableData{}
+			return util.UpdateWithAuth(ctx, call, tx, t.stKey(), data, func() error {
+				data.Perms = perms
+				return nil
+			})
+		}
+		// Get the most specific permissions object.
+		parent, prefixPerms, err := t.permsForKey(ctx, tx, prefix)
+		if err != nil {
+			return err
+		}
+		// In case there is no permissions object for the given prefix, we need
+		// to add a new node to the prefix permissions tree. We do it by updating
+		// parents for all children of the prefix to the node corresponding to
+		// the prefix.
+		if parent != prefix {
+			if err := t.updateParentRefs(ctx, tx, prefix, prefix); err != nil {
+				return err
+			}
+		} else {
+			parent = prefixPerms.Parent
+		}
+		stPrefix := t.prefixPermsKey(prefix)
+		stPrefixLimit := stPrefix + util.PrefixRangeLimitSuffix
+		prefixPerms = stPrefixPerms{Parent: parent, Perms: perms}
+		// Put the (prefix, perms) pair to the database.
+		if err := util.Put(ctx, tx, stPrefix, prefixPerms); err != nil {
+			return err
+		}
+		return util.Put(ctx, tx, stPrefixLimit, prefixPerms)
+	}
+	if t.d.batchId != nil {
+		if tx, err := t.d.batchTransaction(); err != nil {
+			return err
+		} else {
+			return impl(tx)
+		}
+	} else {
+		return store.RunInTransaction(t.d.st, impl)
+	}
+}
+
+func (t *tableReq) DeletePermissions(ctx *context.T, call rpc.ServerCall, schemaVersion int32, prefix string) error {
+	if prefix == "" {
+		return verror.New(verror.ErrBadArg, ctx, prefix)
+	}
+	impl := func(tx store.Transaction) error {
+		if err := t.checkAccess(ctx, call, tx, prefix); err != nil {
+			return err
+		}
+		if err := t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+			return err
+		}
+		// Concurrent transactions that touch this table should fail with
+		// ErrConcurrentTransaction when this transaction commits.
+		if err := t.lock(ctx, tx); err != nil {
+			return err
+		}
+		// Get the most specific permissions object.
+		parent, prefixPerms, err := t.permsForKey(ctx, tx, prefix)
+		if err != nil {
+			return err
+		}
+		if parent != prefix {
+			// This can happen only if there is no permissions object for the
+			// given prefix. Since DeletePermissions is idempotent, return nil.
+			return nil
+		}
+		// We need to delete the node corresponding to the prefix from the prefix
+		// permissions tree. We do it by updating parents for all children of the
+		// prefix to the parent of the node corresponding to the prefix.
+		if err := t.updateParentRefs(ctx, tx, prefix, prefixPerms.Parent); err != nil {
+			return err
+		}
+		stPrefix := []byte(t.prefixPermsKey(prefix))
+		stPrefixLimit := append(stPrefix, util.PrefixRangeLimitSuffix...)
+		if err := tx.Delete(stPrefix); err != nil {
+			return err
+		}
+		return tx.Delete(stPrefixLimit)
+	}
+	if t.d.batchId != nil {
+		if tx, err := t.d.batchTransaction(); err != nil {
+			return err
+		} else {
+			return impl(tx)
+		}
+	} else {
+		return store.RunInTransaction(t.d.st, impl)
+	}
+}
+
+func (t *tableReq) GlobChildren__(ctx *context.T, call rpc.GlobChildrenServerCall, matcher *glob.Element) error {
+	impl := func(sntx store.SnapshotOrTransaction, closeSntx func() error) error {
+		// Check perms.
+		if err := t.checkAccess(ctx, call, sntx, ""); err != nil {
+			closeSntx()
+			return err
+		}
+		// TODO(rogulenko): Check prefix permissions for children.
+		return util.Glob(ctx, call, matcher, sntx, closeSntx, util.JoinKeyParts(util.RowPrefix, t.name))
+	}
+	if t.d.batchId != nil {
+		return impl(t.d.batchReader(), func() error {
+			return nil
+		})
+	} else {
+		sn := t.d.st.NewSnapshot()
+		return impl(sn, sn.Abort)
+	}
+}
+
+////////////////////////////////////////
+// Internal helpers
+
+func (t *tableReq) stKey() string {
+	return util.JoinKeyParts(util.TablePrefix, t.stKeyPart())
+}
+
+func (t *tableReq) stKeyPart() string {
+	return t.name
+}
+
+// updateParentRefs updates the parent for all children of the given
+// prefix to newParent.
+func (t *tableReq) updateParentRefs(ctx *context.T, tx store.Transaction, prefix, newParent string) error {
+	stPrefix := []byte(t.prefixPermsKey(prefix))
+	stPrefixStart := append(stPrefix, 0)
+	stPrefixLimit := append(stPrefix, util.PrefixRangeLimitSuffix...)
+	it := tx.Scan(stPrefixStart, stPrefixLimit)
+	var key, value []byte
+	for it.Advance() {
+		key, value = it.Key(key), it.Value(value)
+		var prefixPerms stPrefixPerms
+		if err := vom.Decode(value, &prefixPerms); err != nil {
+			it.Cancel()
+			return verror.New(verror.ErrInternal, ctx, err)
+		}
+		prefixPerms.Parent = newParent
+		if err := util.Put(ctx, tx, string(key), prefixPerms); err != nil {
+			it.Cancel()
+			return err
+		}
+	}
+	if err := it.Err(); err != nil {
+		return verror.New(verror.ErrInternal, ctx, err)
+	}
+	return nil
+}
+
+// lock invalidates all in-flight transactions that have touched this table,
+// such that any subsequent tx.Commit() will return ErrConcurrentTransaction.
+//
+// It is necessary to call lock() every time prefix permissions are updated so
+// that snapshots inside all transactions reflect up-to-date permissions. Since
+// every public function that touches this table has to read the table-level
+// permissions object, it suffices to add the key of this object to the write
+// set of the current transaction.
+//
+// TODO(rogulenko): Revisit this behavior to provide more granularity.
+// One option is to add a prefix and its parent to the write set of the current
+// transaction when the permissions object for that prefix is updated.
+func (t *tableReq) lock(ctx *context.T, tx store.Transaction) error {
+	var data tableData
+	if err := util.Get(ctx, tx, t.stKey(), &data); err != nil {
+		return err
+	}
+	return util.Put(ctx, tx, t.stKey(), data)
+}
+
+// checkAccess checks that this table exists in the database, and performs
+// an authorization check. The access is checked at table level and at the
+// level of the most specific prefix for the given key.
+// TODO(rogulenko): Revisit this behavior. Eventually we'll want the table-level
+// access check to be a check for "Resolve", i.e. also check access to
+// service, app and database.
+func (t *tableReq) checkAccess(ctx *context.T, call rpc.ServerCall, sntx store.SnapshotOrTransaction, key string) error {
+	prefix, prefixPerms, err := t.permsForKey(ctx, sntx, key)
+	if err != nil {
+		return err
+	}
+	if prefix != "" {
+		if err := util.GetWithAuth(ctx, call, sntx, t.stKey(), &tableData{}); err != nil {
+			return err
+		}
+	}
+	auth, _ := access.PermissionsAuthorizer(prefixPerms.Perms, access.TypicalTagType())
+	if err := auth.Authorize(ctx, call.Security()); err != nil {
+		return verror.New(verror.ErrNoAccess, ctx, prefix)
+	}
+	return nil
+}
+
+// permsForKey returns the longest prefix of the given key that has
+// associated permissions, along with its permissions object.
+// permsForKey doesn't perform an authorization check.
+//
+// Effectively, we represent all prefixes as a forest T, where each vertex maps
+// to a prefix. A parent for a string is the maximum proper prefix of it that
+// belongs to T. Each prefix P from T is represented as a pair of entries with
+// keys P and P~ with values of type stPrefixPerms (parent + perms). High level
+// explanation of how this function works:
+// 1	iter = db.Scan(K, "")
+// 		Here last character of iter.Key() is removed automatically if it is '~'
+// 2	if hasPrefix(K, iter.Key()) return iter.Value()
+// 3	return parent(iter.Key())
+// Short proof:
+// iter returned on line 1 points to one of the following:
+// - a string t that is equal to K;
+// - a string t~: if t is not a prefix of K, then K < t < t~ which
+//   contradicts with property of returned iterator on line 1 => t is prefix of
+//   K; also t is the largest prefix of K, as all larger prefixes of K are
+//   less than t~; in this case line 2 returns correct result;
+// - a string t that doesn't end with '~': it can't be a prefix of K, as all
+//   proper prefixes of K are less than K; parent(t) is a prefix of K, otherwise
+//   K < parent(t) < t; parent(t) is the largest prefix of K, otherwise t is a
+//   prefix of K; in this case line 3 returns correct result.
+func (t *tableReq) permsForKey(ctx *context.T, sntx store.SnapshotOrTransaction, key string) (string, stPrefixPerms, error) {
+	it := sntx.Scan(util.ScanRangeArgs(util.JoinKeyParts(util.PermsPrefix, t.name), key, ""))
+	if !it.Advance() {
+		prefixPerms, err := t.permsForPrefix(ctx, sntx, "")
+		return "", prefixPerms, err
+	}
+	defer it.Cancel()
+	parts := util.SplitKeyParts(string(it.Key(nil)))
+	prefix := strings.TrimSuffix(parts[len(parts)-1], util.PrefixRangeLimitSuffix)
+	value := it.Value(nil)
+	var prefixPerms stPrefixPerms
+	if err := vom.Decode(value, &prefixPerms); err != nil {
+		return "", stPrefixPerms{}, verror.New(verror.ErrInternal, ctx, err)
+	}
+	if strings.HasPrefix(key, prefix) {
+		return prefix, prefixPerms, nil
+	}
+	prefixPerms, err := t.permsForPrefix(ctx, sntx, prefixPerms.Parent)
+	return prefixPerms.Parent, prefixPerms, err
+}
+
+// permsForPrefix returns the permissions object associated with the
+// provided prefix.
+func (t *tableReq) permsForPrefix(ctx *context.T, sntx store.SnapshotOrTransaction, prefix string) (stPrefixPerms, error) {
+	if prefix == "" {
+		var data tableData
+		if err := util.Get(ctx, sntx, t.stKey(), &data); err != nil {
+			return stPrefixPerms{}, err
+		}
+		return stPrefixPerms{Perms: data.Perms}, nil
+	}
+	var prefixPerms stPrefixPerms
+	if err := util.Get(ctx, sntx, t.prefixPermsKey(prefix), &prefixPerms); err != nil {
+		return stPrefixPerms{}, verror.New(verror.ErrInternal, ctx, err)
+	}
+	return prefixPerms, nil
+}
+
+// prefixPermsKey returns the key used for storing permissions for the given
+// prefix in the table.
+func (t *tableReq) prefixPermsKey(prefix string) string {
+	return util.JoinKeyParts(util.PermsPrefix, t.name, prefix)
+}

diff --git a/services/syncbase/server/nosql/types.go b/services/syncbase/server/nosql/types.go
new file mode 100644
index 0000000..7a87916
--- /dev/null
+++ b/services/syncbase/server/nosql/types.go

@@ -0,0 +1,23 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/v23/security/access"
+)
+
+var (
+	_ util.Permser = (*databaseData)(nil)
+	_ util.Permser = (*tableData)(nil)
+)
+
+func (data *databaseData) GetPerms() access.Permissions {
+	return data.Perms
+}
+
+func (data *tableData) GetPerms() access.Permissions {
+	return data.Perms
+}

diff --git a/services/syncbase/server/nosql/types.vdl b/services/syncbase/server/nosql/types.vdl
new file mode 100644
index 0000000..8ede239
--- /dev/null
+++ b/services/syncbase/server/nosql/types.vdl

@@ -0,0 +1,39 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+	"v.io/v23/security/access"
+	"v.io/syncbase/v23/services/syncbase/nosql"
+)
+
+// databaseData represents the persistent state of a Database.
+type databaseData struct {
+	Name           string
+	Version        uint64 // covers the Perms field below
+	Perms          access.Permissions
+	SchemaMetadata ?nosql.SchemaMetadata
+}
+
+// tableData represents the persistent state of a Table.
+// TODO(sadovsky): Decide whether to track "empty-prefix" perms here.
+type tableData struct {
+	Name  string
+	Perms access.Permissions
+}
+
+// stPrefixPerms describes internal representation of prefix permissions
+// in the store.
+//
+// Each pair of (key, perms) is stored as two key-value pairs:
+// "$perms:%table:key"  - stPrefixPerms{parent, perms}
+// "$perms:%table:key~" - stPrefixPerms{parent, perms}
+// where "~" represents a reserved char that's lexicographically greater than
+// all chars allowed by clients, %table is the name of the table and parent is
+// the longest proper prefix of the key that has associated permissions object.
+type stPrefixPerms struct {
+	Parent string
+	Perms  access.Permissions
+}

diff --git a/services/syncbase/server/nosql/types.vdl.go b/services/syncbase/server/nosql/types.vdl.go
new file mode 100644
index 0000000..bf5f346
--- /dev/null
+++ b/services/syncbase/server/nosql/types.vdl.go

@@ -0,0 +1,67 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: types.vdl
+
+package nosql
+
+import (
+	// VDL system imports
+	"v.io/v23/vdl"
+
+	// VDL user imports
+	"v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/v23/security/access"
+)
+
+// databaseData represents the persistent state of a Database.
+type databaseData struct {
+	Name           string
+	Version        uint64 // covers the Perms field below
+	Perms          access.Permissions
+	SchemaMetadata *nosql.SchemaMetadata
+}
+
+func (databaseData) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/nosql.databaseData"`
+}) {
+}
+
+// tableData represents the persistent state of a Table.
+// TODO(sadovsky): Decide whether to track "empty-prefix" perms here.
+type tableData struct {
+	Name  string
+	Perms access.Permissions
+}
+
+func (tableData) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/nosql.tableData"`
+}) {
+}
+
+// stPrefixPerms describes internal representation of prefix permissions
+// in the store.
+//
+// Each pair of (key, perms) is stored as two key-value pairs:
+// "$perms:%table:key"  - stPrefixPerms{parent, perms}
+// "$perms:%table:key~" - stPrefixPerms{parent, perms}
+// where "~" represents a reserved char that's lexicographically greater than
+// all chars allowed by clients, %table is the name of the table and parent is
+// the longest proper prefix of the key that has associated permissions object.
+type stPrefixPerms struct {
+	Parent string
+	Perms  access.Permissions
+}
+
+func (stPrefixPerms) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/nosql.stPrefixPerms"`
+}) {
+}
+
+func init() {
+	vdl.Register((*databaseData)(nil))
+	vdl.Register((*tableData)(nil))
+	vdl.Register((*stPrefixPerms)(nil))
+}

diff --git a/services/syncbase/server/server_test.go b/services/syncbase/server/server_test.go
new file mode 100644
index 0000000..d6b51ad
--- /dev/null
+++ b/services/syncbase/server/server_test.go

@@ -0,0 +1,25 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package server_test
+
+// Note: Most of our unit tests are client-side and cover end-to-end behavior.
+// Tests of the "server" package (and below) specifically target aspects of the
+// implementation that are difficult to test from the client side.
+
+import (
+	"testing"
+
+	tu "v.io/syncbase/v23/syncbase/testutil"
+	_ "v.io/x/ref/runtime/factories/generic"
+)
+
+////////////////////////////////////////
+// Test cases
+
+// TODO(sadovsky): Write some tests.
+func TestSomething(t *testing.T) {
+	_, _, cleanup := tu.SetupOrDie(nil)
+	defer cleanup()
+}

diff --git a/services/syncbase/server/service.go b/services/syncbase/server/service.go
new file mode 100644
index 0000000..180692f
--- /dev/null
+++ b/services/syncbase/server/service.go

@@ -0,0 +1,307 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package server
+
+// TODO(sadovsky): Check Resolve access on parent where applicable. Relatedly,
+// convert ErrNoExist and ErrNoAccess to ErrNoExistOrNoAccess where needed to
+// preserve privacy.
+
+import (
+	"path"
+	"sync"
+
+	wire "v.io/syncbase/v23/services/syncbase"
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+	"v.io/syncbase/x/ref/services/syncbase/server/nosql"
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/syncbase/x/ref/services/syncbase/vsync"
+	"v.io/v23/context"
+	"v.io/v23/glob"
+	"v.io/v23/rpc"
+	"v.io/v23/security/access"
+	"v.io/v23/verror"
+	"v.io/v23/vom"
+)
+
+// service is a singleton (i.e. not per-request) that handles Service RPCs.
+type service struct {
+	st   store.Store // keeps track of which apps and databases exist, etc.
+	sync interfaces.SyncServerMethods
+	opts ServiceOptions
+	// Guards the fields below. Held during app Create, Delete, and
+	// SetPermissions.
+	mu   sync.Mutex
+	apps map[string]*app
+}
+
+var (
+	_ wire.ServiceServerMethods = (*service)(nil)
+	_ interfaces.Service        = (*service)(nil)
+)
+
+// ServiceOptions configures a service.
+type ServiceOptions struct {
+	// Service-level permissions.
+	Perms access.Permissions
+	// Root dir for data storage.
+	RootDir string
+	// Storage engine to use (for service and per-database engines).
+	Engine string
+	// RPC server for this service. Needed to advertise this service in
+	// mount tables attached to SyncGroups.
+	Server rpc.Server
+}
+
+// NewService creates a new service instance and returns it.
+// TODO(sadovsky): If possible, close all stores when the server is stopped.
+func NewService(ctx *context.T, call rpc.ServerCall, opts ServiceOptions) (*service, error) {
+	if opts.Perms == nil {
+		return nil, verror.New(verror.ErrInternal, ctx, "perms must be specified")
+	}
+	st, err := util.OpenStore(opts.Engine, path.Join(opts.RootDir, opts.Engine), util.OpenOptions{CreateIfMissing: true, ErrorIfExists: false})
+	if err != nil {
+		return nil, err
+	}
+	s := &service{
+		st:   st,
+		opts: opts,
+		apps: map[string]*app{},
+	}
+	data := &serviceData{
+		Perms: opts.Perms,
+	}
+	if err := util.Get(ctx, st, s.stKey(), &serviceData{}); verror.ErrorID(err) != verror.ErrNoExist.ID {
+		if err != nil {
+			return nil, err
+		}
+		// Service exists. Initialize in-memory data structures.
+		// Read all apps, populate apps map.
+		aIt := st.Scan(util.ScanPrefixArgs(util.AppPrefix, ""))
+		aBytes := []byte{}
+		for aIt.Advance() {
+			aBytes = aIt.Value(aBytes)
+			aData := &appData{}
+			if err := vom.Decode(aBytes, aData); err != nil {
+				return nil, verror.New(verror.ErrInternal, ctx, err)
+			}
+			a := &app{
+				name:   aData.Name,
+				s:      s,
+				exists: true,
+				dbs:    make(map[string]interfaces.Database),
+			}
+			s.apps[a.name] = a
+			// Read all dbs for this app, populate dbs map.
+			dIt := st.Scan(util.ScanPrefixArgs(util.JoinKeyParts(util.DbInfoPrefix, aData.Name), ""))
+			dBytes := []byte{}
+			for dIt.Advance() {
+				dBytes = dIt.Value(dBytes)
+				info := &dbInfo{}
+				if err := vom.Decode(dBytes, info); err != nil {
+					return nil, verror.New(verror.ErrInternal, ctx, err)
+				}
+				d, err := nosql.OpenDatabase(ctx, a, info.Name, nosql.DatabaseOptions{
+					RootDir: info.RootDir,
+					Engine:  info.Engine,
+				}, util.OpenOptions{
+					CreateIfMissing: false,
+					ErrorIfExists:   false,
+				})
+				if err != nil {
+					return nil, verror.New(verror.ErrInternal, ctx, err)
+				}
+				a.dbs[info.Name] = d
+			}
+			if err := dIt.Err(); err != nil {
+				return nil, verror.New(verror.ErrInternal, ctx, err)
+			}
+		}
+		if err := aIt.Err(); err != nil {
+			return nil, verror.New(verror.ErrInternal, ctx, err)
+		}
+	} else {
+		// Service does not exist.
+		if err := util.Put(ctx, st, s.stKey(), data); err != nil {
+			return nil, err
+		}
+	}
+	// Note, vsync.New internally handles both first-time and subsequent
+	// invocations.
+	if s.sync, err = vsync.New(ctx, call, s, opts.Server, opts.RootDir); err != nil {
+		return nil, err
+	}
+	return s, nil
+}
+
+////////////////////////////////////////
+// RPC methods
+
+func (s *service) SetPermissions(ctx *context.T, call rpc.ServerCall, perms access.Permissions, version string) error {
+	return store.RunInTransaction(s.st, func(tx store.Transaction) error {
+		data := &serviceData{}
+		return util.UpdateWithAuth(ctx, call, tx, s.stKey(), data, func() error {
+			if err := util.CheckVersion(ctx, version, data.Version); err != nil {
+				return err
+			}
+			data.Perms = perms
+			data.Version++
+			return nil
+		})
+	})
+}
+
+func (s *service) GetPermissions(ctx *context.T, call rpc.ServerCall) (perms access.Permissions, version string, err error) {
+	data := &serviceData{}
+	if err := util.GetWithAuth(ctx, call, s.st, s.stKey(), data); err != nil {
+		return nil, "", err
+	}
+	return data.Perms, util.FormatVersion(data.Version), nil
+}
+
+func (s *service) GlobChildren__(ctx *context.T, call rpc.GlobChildrenServerCall, matcher *glob.Element) error {
+	// Check perms.
+	sn := s.st.NewSnapshot()
+	if err := util.GetWithAuth(ctx, call, sn, s.stKey(), &serviceData{}); err != nil {
+		sn.Abort()
+		return err
+	}
+	return util.Glob(ctx, call, matcher, sn, sn.Abort, util.AppPrefix)
+}
+
+////////////////////////////////////////
+// interfaces.Service methods
+
+func (s *service) St() store.Store {
+	return s.st
+}
+
+func (s *service) Sync() interfaces.SyncServerMethods {
+	return s.sync
+}
+
+func (s *service) App(ctx *context.T, call rpc.ServerCall, appName string) (interfaces.App, error) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	// Note, currently the service's apps map as well as per-app dbs maps are
+	// populated at startup.
+	a, ok := s.apps[appName]
+	if !ok {
+		return nil, verror.New(verror.ErrNoExist, ctx, appName)
+	}
+	return a, nil
+}
+
+func (s *service) AppNames(ctx *context.T, call rpc.ServerCall) ([]string, error) {
+	// In the future this API will likely be replaced by one that streams the app
+	// names.
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	appNames := make([]string, 0, len(s.apps))
+	for n := range s.apps {
+		appNames = append(appNames, n)
+	}
+	return appNames, nil
+}
+
+////////////////////////////////////////
+// App management methods
+
+func (s *service) createApp(ctx *context.T, call rpc.ServerCall, appName string, perms access.Permissions) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if _, ok := s.apps[appName]; ok {
+		return verror.New(verror.ErrExist, ctx, appName)
+	}
+
+	a := &app{
+		name:   appName,
+		s:      s,
+		exists: true,
+		dbs:    make(map[string]interfaces.Database),
+	}
+
+	if err := store.RunInTransaction(s.st, func(tx store.Transaction) error {
+		// Check serviceData perms.
+		sData := &serviceData{}
+		if err := util.GetWithAuth(ctx, call, tx, s.stKey(), sData); err != nil {
+			return err
+		}
+		// Check for "app already exists".
+		if err := util.Get(ctx, tx, a.stKey(), &appData{}); verror.ErrorID(err) != verror.ErrNoExist.ID {
+			if err != nil {
+				return err
+			}
+			return verror.New(verror.ErrExist, ctx, appName)
+		}
+		// Write new appData.
+		if perms == nil {
+			perms = sData.Perms
+		}
+		data := &appData{
+			Name:  appName,
+			Perms: perms,
+		}
+		return util.Put(ctx, tx, a.stKey(), data)
+	}); err != nil {
+		return err
+	}
+
+	s.apps[appName] = a
+	return nil
+}
+
+func (s *service) deleteApp(ctx *context.T, call rpc.ServerCall, appName string) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	a, ok := s.apps[appName]
+	if !ok {
+		return nil // delete is idempotent
+	}
+
+	if err := store.RunInTransaction(s.st, func(tx store.Transaction) error {
+		// Read-check-delete appData.
+		if err := util.GetWithAuth(ctx, call, tx, a.stKey(), &appData{}); err != nil {
+			if verror.ErrorID(err) == verror.ErrNoExist.ID {
+				return nil // delete is idempotent
+			}
+			return err
+		}
+		// TODO(sadovsky): Delete all databases in this app.
+		return util.Delete(ctx, tx, a.stKey())
+	}); err != nil {
+		return err
+	}
+
+	delete(s.apps, appName)
+	return nil
+}
+
+func (s *service) setAppPerms(ctx *context.T, call rpc.ServerCall, appName string, perms access.Permissions, version string) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	a, ok := s.apps[appName]
+	if !ok {
+		return verror.New(verror.ErrNoExist, ctx, appName)
+	}
+	return store.RunInTransaction(s.st, func(tx store.Transaction) error {
+		data := &appData{}
+		return util.UpdateWithAuth(ctx, call, tx, a.stKey(), data, func() error {
+			if err := util.CheckVersion(ctx, version, data.Version); err != nil {
+				return err
+			}
+			data.Perms = perms
+			data.Version++
+			return nil
+		})
+	})
+}
+
+////////////////////////////////////////
+// Other internal helpers
+
+func (s *service) stKey() string {
+	return util.ServicePrefix
+}

diff --git a/services/syncbase/server/types.go b/services/syncbase/server/types.go
new file mode 100644
index 0000000..2879d56
--- /dev/null
+++ b/services/syncbase/server/types.go

@@ -0,0 +1,23 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package server
+
+import (
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/v23/security/access"
+)
+
+var (
+	_ util.Permser = (*serviceData)(nil)
+	_ util.Permser = (*appData)(nil)
+)
+
+func (data *serviceData) GetPerms() access.Permissions {
+	return data.Perms
+}
+
+func (data *appData) GetPerms() access.Permissions {
+	return data.Perms
+}

diff --git a/services/syncbase/server/types.vdl b/services/syncbase/server/types.vdl
new file mode 100644
index 0000000..4999f77
--- /dev/null
+++ b/services/syncbase/server/types.vdl

@@ -0,0 +1,34 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package server
+
+import (
+	"v.io/v23/security/access"
+)
+
+// serviceData represents the persistent state of a Service.
+type serviceData struct {
+	Version uint64 // covers the fields below
+	Perms   access.Permissions
+}
+
+// appData represents the persistent state of an App.
+type appData struct {
+	Name    string
+	Version uint64 // covers the fields below
+	Perms   access.Permissions
+}
+
+// dbInfo contains information about one database for an App.
+// TODO(sadovsky): Track NoSQL vs. SQL.
+type dbInfo struct {
+	Name        string
+	Initialized bool
+	Deleted     bool
+	// Select fields from nosql.DatabaseOptions, needed in order to open storage
+	// engine on restart.
+	RootDir string // interpreted by storage engine
+	Engine string // name of storage engine, e.g. "leveldb"
+}

diff --git a/services/syncbase/server/types.vdl.go b/services/syncbase/server/types.vdl.go
new file mode 100644
index 0000000..aec38cd
--- /dev/null
+++ b/services/syncbase/server/types.vdl.go

@@ -0,0 +1,62 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: types.vdl
+
+package server
+
+import (
+	// VDL system imports
+	"v.io/v23/vdl"
+
+	// VDL user imports
+	"v.io/v23/security/access"
+)
+
+// serviceData represents the persistent state of a Service.
+type serviceData struct {
+	Version uint64 // covers the fields below
+	Perms   access.Permissions
+}
+
+func (serviceData) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server.serviceData"`
+}) {
+}
+
+// appData represents the persistent state of an App.
+type appData struct {
+	Name    string
+	Version uint64 // covers the fields below
+	Perms   access.Permissions
+}
+
+func (appData) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server.appData"`
+}) {
+}
+
+// dbInfo contains information about one database for an App.
+// TODO(sadovsky): Track NoSQL vs. SQL.
+type dbInfo struct {
+	Name        string
+	Initialized bool
+	Deleted     bool
+	// Select fields from nosql.DatabaseOptions, needed in order to open storage
+	// engine on restart.
+	RootDir string // interpreted by storage engine
+	Engine  string // name of storage engine, e.g. "leveldb"
+}
+
+func (dbInfo) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server.dbInfo"`
+}) {
+}
+
+func init() {
+	vdl.Register((*serviceData)(nil))
+	vdl.Register((*appData)(nil))
+	vdl.Register((*dbInfo)(nil))
+}

diff --git a/services/syncbase/server/util/constants.go b/services/syncbase/server/util/constants.go
new file mode 100644
index 0000000..ab2e401
--- /dev/null
+++ b/services/syncbase/server/util/constants.go

@@ -0,0 +1,51 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package util
+
+import (
+	"time"
+)
+
+// TODO(sadovsky): Consider using shorter strings.
+
+// Constants related to storage engine keys.
+const (
+	AppPrefix      = "$app"
+	ClockPrefix    = "$clock"
+	DatabasePrefix = "$database"
+	DbInfoPrefix   = "$dbInfo"
+	LogPrefix      = "$log"
+	PermsPrefix    = "$perms"
+	RowPrefix      = "$row"
+	ServicePrefix  = "$service"
+	SyncPrefix     = "$sync"
+	TablePrefix    = "$table"
+	VersionPrefix  = "$version"
+)
+
+// Constants related to object names.
+const (
+	// Service object name suffix for Syncbase-to-Syncbase RPCs.
+	SyncbaseSuffix = "$sync"
+	// Separator for batch info in database names.
+	BatchSep = ":"
+	// Separator for parts of storage engine keys.
+	KeyPartSep = ":"
+	// PrefixRangeLimitSuffix is the suffix of a key which indicates the end of
+	// a prefix range. Should be more than any regular key in the store.
+	// TODO(rogulenko): Change this constant to something out of the UTF8 space.
+	PrefixRangeLimitSuffix = "~"
+)
+
+// Constants related to syncbase clock.
+const (
+	// The pool.ntp.org project is a big virtual cluster of timeservers
+	// providing reliable easy to use NTP service for millions of clients.
+	// See more at http://www.pool.ntp.org/en/
+	NtpServerPool            = "pool.ntp.org"
+	NtpSampleCount           = 15
+	LocalClockDriftThreshold = float64(time.Second)
+	NtpDiffThreshold         = float64(2 * time.Second)
+)

diff --git a/services/syncbase/server/util/glob.go b/services/syncbase/server/util/glob.go
new file mode 100644
index 0000000..4a73870
--- /dev/null
+++ b/services/syncbase/server/util/glob.go

@@ -0,0 +1,40 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package util
+
+import (
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	"v.io/v23/glob"
+	"v.io/v23/naming"
+	"v.io/v23/rpc"
+	"v.io/x/lib/vlog"
+)
+
+// NOTE(nlacasse): Syncbase handles Glob requests by implementing
+// GlobChildren__ at each level (service, app, database, table).
+
+// Glob performs a glob. It calls closeSntx to close sntx.
+func Glob(ctx *context.T, call rpc.GlobChildrenServerCall, matcher *glob.Element, sntx store.SnapshotOrTransaction, closeSntx func() error, stKeyPrefix string) error {
+	prefix, _ := matcher.FixedPrefix()
+	it := sntx.Scan(ScanPrefixArgs(stKeyPrefix, prefix))
+	defer closeSntx()
+	key := []byte{}
+	for it.Advance() {
+		key = it.Key(key)
+		parts := SplitKeyParts(string(key))
+		name := parts[len(parts)-1]
+		if matcher.Match(name) {
+			if err := call.SendStream().Send(naming.GlobChildrenReplyName{Value: name}); err != nil {
+				return err
+			}
+		}
+	}
+	if err := it.Err(); err != nil {
+		vlog.VI(1).Infof("Glob() failed: %v", err)
+		call.SendStream().Send(naming.GlobChildrenReplyError{Value: naming.GlobError{Error: err}})
+	}
+	return nil
+}

diff --git a/services/syncbase/server/util/key_util.go b/services/syncbase/server/util/key_util.go
new file mode 100644
index 0000000..80a8a6d
--- /dev/null
+++ b/services/syncbase/server/util/key_util.go

@@ -0,0 +1,37 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package util
+
+import (
+	"strings"
+
+	"v.io/syncbase/v23/syncbase/util"
+)
+
+// JoinKeyParts builds keys for accessing data in the storage engine.
+func JoinKeyParts(parts ...string) string {
+	// TODO(sadovsky): Figure out which delimiter makes the most sense.
+	return strings.Join(parts, KeyPartSep)
+}
+
+// SplitKeyParts is the inverse of JoinKeyParts.
+func SplitKeyParts(key string) []string {
+	return strings.Split(key, KeyPartSep)
+}
+
+// ScanPrefixArgs returns args for sn.Scan() for the specified prefix.
+func ScanPrefixArgs(stKeyPrefix, prefix string) ([]byte, []byte) {
+	return ScanRangeArgs(stKeyPrefix, util.PrefixRangeStart(prefix), util.PrefixRangeLimit(prefix))
+}
+
+// ScanRangeArgs returns args for sn.Scan() for the specified range.
+// If limit is "", all rows with keys >= start are included.
+func ScanRangeArgs(stKeyPrefix, start, limit string) ([]byte, []byte) {
+	fullStart, fullLimit := JoinKeyParts(stKeyPrefix, start), JoinKeyParts(stKeyPrefix, limit)
+	if limit == "" {
+		fullLimit = util.PrefixRangeLimit(fullLimit)
+	}
+	return []byte(fullStart), []byte(fullLimit)
+}

diff --git a/services/syncbase/server/util/key_util_test.go b/services/syncbase/server/util/key_util_test.go
new file mode 100644
index 0000000..2240531
--- /dev/null
+++ b/services/syncbase/server/util/key_util_test.go

@@ -0,0 +1,83 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package util_test
+
+import (
+	"reflect"
+	"testing"
+
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+)
+
+type kpt struct {
+	parts []string
+	key   string
+}
+
+var keyPartTests []kpt = []kpt{
+	{[]string{"a", "b"}, "a:b"},
+	{[]string{"aa", "bb"}, "aa:bb"},
+	{[]string{"a", "b", "c"}, "a:b:c"},
+}
+
+func TestJoinKeyParts(t *testing.T) {
+	for _, test := range keyPartTests {
+		got, want := util.JoinKeyParts(test.parts...), test.key
+		if !reflect.DeepEqual(got, want) {
+			t.Errorf("%v: got %q, want %q", test.parts, got, want)
+		}
+	}
+}
+
+func TestSplitKeyParts(t *testing.T) {
+	for _, test := range keyPartTests {
+		got, want := util.SplitKeyParts(test.key), test.parts
+		if !reflect.DeepEqual(got, want) {
+			t.Errorf("%q: got %v, want %v", test.key, got, want)
+		}
+	}
+}
+
+func TestScanPrefixArgs(t *testing.T) {
+	tests := []struct {
+		stKeyPrefix, prefix, wantStart, wantLimit string
+	}{
+		{"x", "", "x:", "x;"},
+		{"x", "a", "x:a", "x:b"},
+		{"x", "a\xff", "x:a\xff", "x:b"},
+	}
+	for _, test := range tests {
+		start, limit := util.ScanPrefixArgs(test.stKeyPrefix, test.prefix)
+		gotStart, gotLimit := string(start), string(limit)
+		if gotStart != test.wantStart {
+			t.Errorf("{%q, %q} start: got %q, want %q", test.stKeyPrefix, test.prefix, gotStart, test.wantStart)
+		}
+		if gotLimit != test.wantLimit {
+			t.Errorf("{%q, %q} limit: got %q, want %q", test.stKeyPrefix, test.prefix, gotLimit, test.wantLimit)
+		}
+	}
+}
+
+func TestScanRangeArgs(t *testing.T) {
+	tests := []struct {
+		stKeyPrefix, start, limit, wantStart, wantLimit string
+	}{
+		{"x", "", "", "x:", "x;"},   // limit "" means "no limit"
+		{"x", "a", "", "x:a", "x;"}, // limit "" means "no limit"
+		{"x", "a", "b", "x:a", "x:b"},
+		{"x", "a", "a", "x:a", "x:a"}, // empty range
+		{"x", "b", "a", "x:b", "x:a"}, // empty range
+	}
+	for _, test := range tests {
+		start, limit := util.ScanRangeArgs(test.stKeyPrefix, test.start, test.limit)
+		gotStart, gotLimit := string(start), string(limit)
+		if gotStart != test.wantStart {
+			t.Errorf("{%q, %q, %q} start: got %q, want %q", test.stKeyPrefix, test.start, test.limit, gotStart, test.wantStart)
+		}
+		if gotLimit != test.wantLimit {
+			t.Errorf("{%q, %q, %q} limit: got %q, want %q", test.stKeyPrefix, test.start, test.limit, gotLimit, test.wantLimit)
+		}
+	}
+}

diff --git a/services/syncbase/server/util/store_util.go b/services/syncbase/server/util/store_util.go
new file mode 100644
index 0000000..b8f1905
--- /dev/null
+++ b/services/syncbase/server/util/store_util.go

@@ -0,0 +1,164 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package util
+
+import (
+	"os"
+	"strconv"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/syncbase/x/ref/services/syncbase/store/leveldb"
+	"v.io/syncbase/x/ref/services/syncbase/store/memstore"
+	"v.io/v23/context"
+	"v.io/v23/rpc"
+	"v.io/v23/security/access"
+	"v.io/v23/verror"
+	"v.io/v23/vom"
+)
+
+func FormatVersion(version uint64) string {
+	return strconv.FormatUint(version, 10)
+}
+
+func CheckVersion(ctx *context.T, presented string, actual uint64) error {
+	if presented != "" && presented != FormatVersion(actual) {
+		return verror.NewErrBadVersion(ctx)
+	}
+	return nil
+}
+
+// TODO(sadovsky): Perhaps these functions should strip key prefixes such as
+// "$table:" from the error messages they return.
+
+type Permser interface {
+	// GetPerms returns the Permissions for this Layer.
+	GetPerms() access.Permissions
+}
+
+// Get does st.Get(k, v) and wraps the returned error.
+func Get(ctx *context.T, st store.StoreReader, k string, v interface{}) error {
+	bytes, err := st.Get([]byte(k), nil)
+	if err != nil {
+		if verror.ErrorID(err) == store.ErrUnknownKey.ID {
+			return verror.New(verror.ErrNoExist, ctx, k)
+		}
+		return verror.New(verror.ErrInternal, ctx, err)
+	}
+	if err = vom.Decode(bytes, v); err != nil {
+		return verror.New(verror.ErrInternal, ctx, err)
+	}
+	return nil
+}
+
+// GetWithAuth does Get followed by an auth check.
+func GetWithAuth(ctx *context.T, call rpc.ServerCall, st store.StoreReader, k string, v Permser) error {
+	if err := Get(ctx, st, k, v); err != nil {
+		return err
+	}
+	auth, _ := access.PermissionsAuthorizer(v.GetPerms(), access.TypicalTagType())
+	if err := auth.Authorize(ctx, call.Security()); err != nil {
+		return verror.New(verror.ErrNoAccess, ctx, err)
+	}
+	return nil
+}
+
+// Put does stw.Put(k, v) and wraps the returned error.
+func Put(ctx *context.T, stw store.StoreWriter, k string, v interface{}) error {
+	bytes, err := vom.Encode(v)
+	if err != nil {
+		return verror.New(verror.ErrInternal, ctx, err)
+	}
+	if err = stw.Put([]byte(k), bytes); err != nil {
+		return verror.New(verror.ErrInternal, ctx, err)
+	}
+	return nil
+}
+
+// Delete does stw.Delete(k, v) and wraps the returned error.
+func Delete(ctx *context.T, stw store.StoreWriter, k string) error {
+	if err := stw.Delete([]byte(k)); err != nil {
+		return verror.New(verror.ErrInternal, ctx, err)
+	}
+	return nil
+}
+
+// UpdateWithAuth performs a read-modify-write.
+// Input v is populated by the "read" step. fn should "modify" v.
+// Performs an auth check as part of the "read" step.
+func UpdateWithAuth(ctx *context.T, call rpc.ServerCall, tx store.Transaction, k string, v Permser, fn func() error) error {
+	if err := GetWithAuth(ctx, call, tx, k, v); err != nil {
+		return err
+	}
+	if err := fn(); err != nil {
+		return err
+	}
+	return Put(ctx, tx, k, v)
+}
+
+// Wraps a call to Get and returns true if Get found the object, false
+// otherwise, suppressing ErrNoExist. Access errors are suppressed as well
+// because they imply existence in some Get implementations.
+// TODO(ivanpi): Revisit once ACL specification is finalized.
+func ErrorToExists(err error) (bool, error) {
+	if err == nil {
+		return true, nil
+	}
+	switch verror.ErrorID(err) {
+	case verror.ErrNoExist.ID:
+		return false, nil
+	case verror.ErrNoAccess.ID, verror.ErrNoExistOrNoAccess.ID:
+		return false, nil
+	default:
+		return false, err
+	}
+}
+
+type OpenOptions struct {
+	CreateIfMissing bool
+	ErrorIfExists   bool
+}
+
+// OpenStore opens the given store.Store. OpenOptions are respected to the
+// degree possible for the specified engine.
+func OpenStore(engine, path string, opts OpenOptions) (store.Store, error) {
+	switch engine {
+	case "memstore":
+		if !opts.CreateIfMissing {
+			return nil, verror.New(verror.ErrInternal, nil, "cannot open memstore")
+		}
+		// By definition, the memstore does not already exist.
+		return memstore.New(), nil
+	case "leveldb":
+		leveldbOpts := leveldb.OpenOptions{
+			CreateIfMissing: opts.CreateIfMissing,
+			ErrorIfExists:   opts.ErrorIfExists,
+		}
+		if opts.CreateIfMissing {
+			// Note, os.MkdirAll is a noop if the path already exists. We rely on
+			// leveldb to enforce ErrorIfExists.
+			if err := os.MkdirAll(path, 0700); err != nil {
+				return nil, verror.New(verror.ErrInternal, nil, err)
+			}
+		}
+		return leveldb.Open(path, leveldbOpts)
+	default:
+		return nil, verror.New(verror.ErrBadArg, nil, engine)
+	}
+}
+
+func DestroyStore(engine, path string) error {
+	switch engine {
+	case "memstore":
+		// memstore doesn't persist any data on the disc, do nothing.
+		return nil
+	case "leveldb":
+		if err := os.RemoveAll(path); err != nil {
+			return verror.New(verror.ErrInternal, nil, err)
+		}
+		return nil
+	default:
+		return verror.New(verror.ErrBadArg, nil, engine)
+	}
+}

diff --git a/services/syncbase/server/watchable/snapshot.go b/services/syncbase/server/watchable/snapshot.go
new file mode 100644
index 0000000..37af4e1
--- /dev/null
+++ b/services/syncbase/server/watchable/snapshot.go

@@ -0,0 +1,45 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+import (
+	"v.io/syncbase/x/ref/services/syncbase/store"
+)
+
+type snapshot struct {
+	store.SnapshotSpecImpl
+	isn store.Snapshot
+	st  *wstore
+}
+
+var _ store.Snapshot = (*snapshot)(nil)
+
+func newSnapshot(st *wstore) *snapshot {
+	return &snapshot{
+		isn: st.ist.NewSnapshot(),
+		st:  st,
+	}
+}
+
+// Abort implements the store.Snapshot interface.
+func (s *snapshot) Abort() error {
+	return s.isn.Abort()
+}
+
+// Get implements the store.StoreReader interface.
+func (s *snapshot) Get(key, valbuf []byte) ([]byte, error) {
+	if !s.st.managesKey(key) {
+		return s.isn.Get(key, valbuf)
+	}
+	return getVersioned(s.isn, key, valbuf)
+}
+
+// Scan implements the store.StoreReader interface.
+func (s *snapshot) Scan(start, limit []byte) store.Stream {
+	if !s.st.managesRange(start, limit) {
+		return s.isn.Scan(start, limit)
+	}
+	return newStreamVersioned(s.isn, start, limit)
+}

diff --git a/services/syncbase/server/watchable/store.go b/services/syncbase/server/watchable/store.go
new file mode 100644
index 0000000..0b19678
--- /dev/null
+++ b/services/syncbase/server/watchable/store.go

@@ -0,0 +1,159 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package watchable provides a Syncbase-specific store.Store wrapper that
+// provides versioned storage for specified prefixes and maintains a watchable
+// log of operations performed on versioned records. This log forms the basis
+// for the implementation of client-facing watch as well as the sync module's
+// internal watching of store updates.
+//
+// LogEntry records are stored chronologically, using keys of the form
+// "$log:<seq>". Sequence numbers are zero-padded to ensure that the
+// lexicographic order matches the numeric order.
+//
+// Version number records are stored using keys of the form "$version:<key>",
+// where <key> is the client-specified key.
+package watchable
+
+import (
+	"fmt"
+	"strings"
+	"sync"
+
+	pubutil "v.io/syncbase/v23/syncbase/util"
+	"v.io/syncbase/x/ref/services/syncbase/clock"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+)
+
+// Store is a store.Store that provides versioned storage and a watchable oplog.
+// TODO(sadovsky): Extend interface.
+type Store interface {
+	store.Store
+}
+
+// Options configures a watchable.Store.
+type Options struct {
+	// Key prefixes to version and log. If nil, all keys are managed.
+	ManagedPrefixes []string
+}
+
+// Wrap returns a watchable.Store that wraps the given store.Store.
+func Wrap(st store.Store, vclock *clock.VClock, opts *Options) (Store, error) {
+	seq, err := getNextLogSeq(st)
+	if err != nil {
+		return nil, err
+	}
+	return &wstore{
+		ist:     st,
+		watcher: newWatcher(),
+		opts:    opts,
+		seq:     seq,
+		clock:   vclock,
+	}, nil
+}
+
+type wstore struct {
+	ist     store.Store
+	watcher *watcher
+	opts    *Options
+	mu      sync.Mutex    // held during transaction commits; protects seq
+	seq     uint64        // the next sequence number to be used for a new commit
+	clock   *clock.VClock // used to provide write timestamps
+}
+
+var _ Store = (*wstore)(nil)
+
+// Close implements the store.Store interface.
+func (st *wstore) Close() error {
+	st.watcher.close()
+	return st.ist.Close()
+}
+
+// Get implements the store.StoreReader interface.
+func (st *wstore) Get(key, valbuf []byte) ([]byte, error) {
+	if !st.managesKey(key) {
+		return st.ist.Get(key, valbuf)
+	}
+	sn := newSnapshot(st)
+	defer sn.Abort()
+	return sn.Get(key, valbuf)
+}
+
+// Scan implements the store.StoreReader interface.
+func (st *wstore) Scan(start, limit []byte) store.Stream {
+	if !st.managesRange(start, limit) {
+		return st.ist.Scan(start, limit)
+	}
+	// TODO(sadovsky): Close snapshot once stream is finished or canceled.
+	return newSnapshot(st).Scan(start, limit)
+}
+
+// Put implements the store.StoreWriter interface.
+func (st *wstore) Put(key, value []byte) error {
+	// Use watchable.Store transaction so this op gets logged.
+	return store.RunInTransaction(st, func(tx store.Transaction) error {
+		return tx.Put(key, value)
+	})
+}
+
+// Delete implements the store.StoreWriter interface.
+func (st *wstore) Delete(key []byte) error {
+	// Use watchable.Store transaction so this op gets logged.
+	return store.RunInTransaction(st, func(tx store.Transaction) error {
+		return tx.Delete(key)
+	})
+}
+
+// NewTransaction implements the store.Store interface.
+func (st *wstore) NewTransaction() store.Transaction {
+	return newTransaction(st)
+}
+
+// NewSnapshot implements the store.Store interface.
+func (st *wstore) NewSnapshot() store.Snapshot {
+	return newSnapshot(st)
+}
+
+// GetOptions returns the options configured on a watchable.Store.
+// TODO(rdaoud): expose watchable store through an interface and change this
+// function to be a method on the store.
+func GetOptions(st store.Store) (*Options, error) {
+	wst := st.(*wstore)
+	return wst.opts, nil
+}
+
+////////////////////////////////////////
+// Internal helpers
+
+func (st *wstore) managesKey(key []byte) bool {
+	if st.opts.ManagedPrefixes == nil {
+		return true
+	}
+	ikey := string(key)
+	// TODO(sadovsky): Optimize, e.g. use binary search (here and below).
+	for _, p := range st.opts.ManagedPrefixes {
+		if strings.HasPrefix(ikey, p) {
+			return true
+		}
+	}
+	return false
+}
+
+func (st *wstore) managesRange(start, limit []byte) bool {
+	if st.opts.ManagedPrefixes == nil {
+		return true
+	}
+	istart, ilimit := string(start), string(limit)
+	for _, p := range st.opts.ManagedPrefixes {
+		pstart, plimit := pubutil.PrefixRangeStart(p), pubutil.PrefixRangeLimit(p)
+		if pstart <= istart && ilimit <= plimit {
+			return true
+		}
+		if !(plimit <= istart || ilimit <= pstart) {
+			// If this happens, there's a bug in the Syncbase server implementation.
+			panic(fmt.Sprintf("partial overlap: %q %q %q", p, start, limit))
+		}
+	}
+	return false
+}

diff --git a/services/syncbase/server/watchable/store_test.go b/services/syncbase/server/watchable/store_test.go
new file mode 100644
index 0000000..8c1c370
--- /dev/null
+++ b/services/syncbase/server/watchable/store_test.go

@@ -0,0 +1,74 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+import (
+	"runtime"
+	"testing"
+
+	"v.io/syncbase/x/ref/services/syncbase/clock"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/syncbase/x/ref/services/syncbase/store/test"
+)
+
+func init() {
+	runtime.GOMAXPROCS(10)
+}
+
+func TestStream(t *testing.T) {
+	runTest(t, []string{}, test.RunStreamTest)
+	runTest(t, nil, test.RunStreamTest)
+}
+
+func TestSnapshot(t *testing.T) {
+	runTest(t, []string{}, test.RunSnapshotTest)
+	runTest(t, nil, test.RunSnapshotTest)
+}
+
+func TestStoreState(t *testing.T) {
+	runTest(t, []string{}, test.RunStoreStateTest)
+	runTest(t, nil, test.RunStoreStateTest)
+}
+
+func TestClose(t *testing.T) {
+	runTest(t, []string{}, test.RunCloseTest)
+	runTest(t, nil, test.RunCloseTest)
+}
+
+func TestReadWriteBasic(t *testing.T) {
+	runTest(t, []string{}, test.RunReadWriteBasicTest)
+	runTest(t, nil, test.RunReadWriteBasicTest)
+}
+
+func TestReadWriteRandom(t *testing.T) {
+	runTest(t, []string{}, test.RunReadWriteRandomTest)
+	runTest(t, nil, test.RunReadWriteRandomTest)
+}
+
+func TestConcurrentTransactions(t *testing.T) {
+	runTest(t, []string{}, test.RunConcurrentTransactionsTest)
+	runTest(t, nil, test.RunConcurrentTransactionsTest)
+}
+
+func TestTransactionState(t *testing.T) {
+	runTest(t, []string{}, test.RunTransactionStateTest)
+	runTest(t, nil, test.RunTransactionStateTest)
+}
+
+func TestTransactionsWithGet(t *testing.T) {
+	runTest(t, []string{}, test.RunTransactionsWithGetTest)
+	runTest(t, nil, test.RunTransactionsWithGetTest)
+}
+
+func runTest(t *testing.T, mp []string, f func(t *testing.T, st store.Store)) {
+	st, destroy := createStore()
+	defer destroy()
+	vClock := clock.NewVClockWithMockServices(clock.MockStorageAdapter(), nil, nil)
+	st, err := Wrap(st, vClock, &Options{ManagedPrefixes: mp})
+	if err != nil {
+		t.Fatal(err)
+	}
+	f(t, st)
+}

diff --git a/services/syncbase/server/watchable/stream.go b/services/syncbase/server/watchable/stream.go
new file mode 100644
index 0000000..26502e1
--- /dev/null
+++ b/services/syncbase/server/watchable/stream.go

@@ -0,0 +1,94 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+import (
+	"sync"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+)
+
+// stream streams keys and values for versioned records.
+type stream struct {
+	iit      store.Stream
+	sntx     store.SnapshotOrTransaction
+	mu       sync.Mutex
+	err      error
+	hasValue bool
+	key      []byte
+	value    []byte
+}
+
+var _ store.Stream = (*stream)(nil)
+
+// newStreamVersioned creates a new stream. It assumes all records in range
+// [start, limit) are managed, i.e. versioned.
+func newStreamVersioned(sntx store.SnapshotOrTransaction, start, limit []byte) *stream {
+	return &stream{
+		iit:  sntx.Scan(makeVersionKey(start), makeVersionKey(limit)),
+		sntx: sntx,
+	}
+}
+
+// Advance implements the store.Stream interface.
+func (s *stream) Advance() bool {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	s.hasValue = false
+	if s.err != nil {
+		return false
+	}
+	if advanced := s.iit.Advance(); !advanced {
+		return false
+	}
+	versionKey, version := s.iit.Key(nil), s.iit.Value(nil)
+	s.key = []byte(join(split(string(versionKey))[1:]...)) // drop "$version" prefix
+	s.value, s.err = s.sntx.Get(makeAtVersionKey(s.key, version), nil)
+	if s.err != nil {
+		return false
+	}
+	s.hasValue = true
+	return true
+}
+
+// Key implements the store.Stream interface.
+func (s *stream) Key(keybuf []byte) []byte {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if !s.hasValue {
+		panic("nothing staged")
+	}
+	return store.CopyBytes(keybuf, s.key)
+}
+
+// Value implements the store.Stream interface.
+func (s *stream) Value(valbuf []byte) []byte {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if !s.hasValue {
+		panic("nothing staged")
+	}
+	return store.CopyBytes(valbuf, s.value)
+}
+
+// Err implements the store.Stream interface.
+func (s *stream) Err() error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.err != nil {
+		return convertError(s.err)
+	}
+	return s.iit.Err()
+}
+
+// Cancel implements the store.Stream interface.
+func (s *stream) Cancel() {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.err != nil {
+		return
+	}
+	s.iit.Cancel()
+}

diff --git a/services/syncbase/server/watchable/test_util.go b/services/syncbase/server/watchable/test_util.go
new file mode 100644
index 0000000..e14854a
--- /dev/null
+++ b/services/syncbase/server/watchable/test_util.go

@@ -0,0 +1,144 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+import (
+	"fmt"
+	"io/ioutil"
+	"math"
+	"time"
+
+	"v.io/syncbase/x/ref/services/syncbase/clock"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/syncbase/x/ref/services/syncbase/store/leveldb"
+	"v.io/syncbase/x/ref/services/syncbase/store/memstore"
+	"v.io/v23/vom"
+)
+
+// This file provides utility methods for tests related to watchable store.
+
+////////////////////////////////////////////////////////////
+// Functions for store creation/cleanup
+
+// createStore returns a store along with a function to destroy the store
+// once it is no longer needed.
+func createStore() (store.Store, func()) {
+	var st store.Store
+	// With Memstore, TestReadWriteRandom is slow with ManagedPrefixes=nil since
+	// every watchable.Store.Get() takes a snapshot, and memstore snapshots are
+	// relatively expensive since the entire data map is copied. LevelDB snapshots
+	// are cheap, so with LevelDB ManagedPrefixes=nil is still reasonably fast.
+	if false {
+		st = memstore.New()
+		return st, func() {
+			st.Close()
+		}
+	} else {
+		path := getPath()
+		st = createLevelDB(path)
+		return st, func() {
+			destroyLevelDB(st, path)
+		}
+	}
+}
+
+func getPath() string {
+	path, err := ioutil.TempDir("", "syncbase_leveldb")
+	if err != nil {
+		panic(fmt.Sprintf("can't create temp dir: %v", err))
+	}
+	return path
+}
+
+func createLevelDB(path string) store.Store {
+	st, err := leveldb.Open(path, leveldb.OpenOptions{CreateIfMissing: true, ErrorIfExists: true})
+	if err != nil {
+		panic(fmt.Sprintf("can't open db at %v: %v", path, err))
+	}
+	return st
+}
+
+func destroyLevelDB(st store.Store, path string) {
+	st.Close()
+	if err := leveldb.Destroy(path); err != nil {
+		panic(fmt.Sprintf("can't destroy db at %v: %v", path, err))
+	}
+}
+
+////////////////////////////////////////////////////////////
+// Functions related to watchable store
+
+func getSeq(st Store) uint64 {
+	wst := st.(*wstore)
+	return wst.seq
+}
+
+// logEntryReader provides a stream-like interface to scan over the log entries
+// of a single batch, starting for a given sequence number.  It opens a stream
+// that scans the log from the sequence number given.  It stops after reading
+// the last entry in that batch (indicated by a false Continued flag).
+type logEntryReader struct {
+	stream store.Stream // scan stream on the store Database
+	done   bool         // true after reading the last batch entry
+	key    string       // key of most recent log entry read
+	entry  LogEntry     // most recent log entry read
+}
+
+func newLogEntryReader(st store.Store, seq uint64) *logEntryReader {
+	stream := st.Scan([]byte(logEntryKey(seq)), []byte(logEntryKey(math.MaxUint64)))
+	return &logEntryReader{stream: stream}
+}
+
+func (ler *logEntryReader) Advance() bool {
+	if ler.done {
+		return false
+	}
+
+	if ler.stream.Advance() {
+		ler.key = string(ler.stream.Key(nil))
+		if err := vom.Decode(ler.stream.Value(nil), &ler.entry); err != nil {
+			panic(fmt.Errorf("Failed to decode LogEntry for key: %q", ler.key))
+		}
+		if ler.entry.Continued == false {
+			ler.done = true
+		}
+		return true
+	}
+
+	ler.key = ""
+	ler.entry = LogEntry{}
+	return false
+}
+
+func (ler *logEntryReader) GetEntry() (string, LogEntry) {
+	return ler.key, ler.entry
+}
+
+////////////////////////////////////////////////////////////
+// Clock related utility code
+
+type mockSystemClock struct {
+	time      time.Time     // current time returned by call to Now()
+	increment time.Duration // how much to increment the clock by for subsequent calls to Now()
+}
+
+func newMockSystemClock(firstTimestamp time.Time, increment time.Duration) *mockSystemClock {
+	return &mockSystemClock{
+		time:      firstTimestamp,
+		increment: increment,
+	}
+}
+
+func (sc *mockSystemClock) Now() time.Time {
+	now := sc.time
+	sc.time = sc.time.Add(sc.increment)
+	return now
+}
+
+func (sc *mockSystemClock) ElapsedTime() (time.Duration, error) {
+	return sc.increment, nil
+}
+
+var _ clock.SystemClock = (*mockSystemClock)(nil)

diff --git a/services/syncbase/server/watchable/transaction.go b/services/syncbase/server/watchable/transaction.go
new file mode 100644
index 0000000..8a67f8f
--- /dev/null
+++ b/services/syncbase/server/watchable/transaction.go

@@ -0,0 +1,304 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+import (
+	"fmt"
+	"math"
+	"sync"
+
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	"v.io/v23/verror"
+)
+
+type transaction struct {
+	itx store.Transaction
+	st  *wstore
+	mu  sync.Mutex // protects the fields below
+	err error
+	ops []Op
+	// fromSync is true when a transaction is created by sync.  This causes
+	// the log entries written at commit time to have their "FromSync" field
+	// set to true.  That in turn causes the sync watcher to filter out such
+	// updates since sync already knows about them (echo suppression).
+	fromSync bool
+}
+
+var _ store.Transaction = (*transaction)(nil)
+
+func cp(src []byte) []byte {
+	dst := make([]byte, len(src))
+	for i := 0; i < len(src); i++ {
+		dst[i] = src[i]
+	}
+	return dst
+}
+
+func cpStrings(src []string) []string {
+	dst := make([]string, len(src))
+	for i := 0; i < len(src); i++ {
+		dst[i] = src[i]
+	}
+	return dst
+}
+
+func newTransaction(st *wstore) *transaction {
+	return &transaction{
+		itx: st.ist.NewTransaction(),
+		st:  st,
+	}
+}
+
+// Get implements the store.StoreReader interface.
+func (tx *transaction) Get(key, valbuf []byte) ([]byte, error) {
+	tx.mu.Lock()
+	defer tx.mu.Unlock()
+	if tx.err != nil {
+		return valbuf, convertError(tx.err)
+	}
+	var err error
+	if !tx.st.managesKey(key) {
+		valbuf, err = tx.itx.Get(key, valbuf)
+	} else {
+		valbuf, err = getVersioned(tx.itx, key, valbuf)
+		tx.ops = append(tx.ops, &OpGet{GetOp{Key: cp(key)}})
+	}
+	return valbuf, err
+}
+
+// Scan implements the store.StoreReader interface.
+func (tx *transaction) Scan(start, limit []byte) store.Stream {
+	tx.mu.Lock()
+	defer tx.mu.Unlock()
+	if tx.err != nil {
+		return &store.InvalidStream{Error: tx.err}
+	}
+	var it store.Stream
+	if !tx.st.managesRange(start, limit) {
+		it = tx.itx.Scan(start, limit)
+	} else {
+		it = newStreamVersioned(tx.itx, start, limit)
+		tx.ops = append(tx.ops, &OpScan{ScanOp{Start: cp(start), Limit: cp(limit)}})
+	}
+	return it
+}
+
+// Put implements the store.StoreWriter interface.
+func (tx *transaction) Put(key, value []byte) error {
+	tx.mu.Lock()
+	defer tx.mu.Unlock()
+	if tx.err != nil {
+		return convertError(tx.err)
+	}
+	if !tx.st.managesKey(key) {
+		return tx.itx.Put(key, value)
+	}
+	version, err := putVersioned(tx.itx, key, value)
+	if err != nil {
+		return err
+	}
+	tx.ops = append(tx.ops, &OpPut{PutOp{Key: cp(key), Version: version}})
+	return nil
+}
+
+// Delete implements the store.StoreWriter interface.
+func (tx *transaction) Delete(key []byte) error {
+	tx.mu.Lock()
+	defer tx.mu.Unlock()
+	if tx.err != nil {
+		return convertError(tx.err)
+	}
+	var err error
+	if !tx.st.managesKey(key) {
+		return tx.itx.Delete(key)
+	}
+	err = deleteVersioned(tx.itx, key)
+	if err != nil {
+		return err
+	}
+	tx.ops = append(tx.ops, &OpDelete{DeleteOp{Key: cp(key)}})
+	return nil
+}
+
+// Commit implements the store.Transaction interface.
+func (tx *transaction) Commit() error {
+	tx.mu.Lock()
+	defer tx.mu.Unlock()
+	if tx.err != nil {
+		return convertError(tx.err)
+	}
+	tx.err = verror.New(verror.ErrBadState, nil, store.ErrMsgCommittedTxn)
+	tx.st.mu.Lock()
+	defer tx.st.mu.Unlock()
+	// Check if there is enough space left in the sequence number.
+	if (math.MaxUint64 - tx.st.seq) < uint64(len(tx.ops)) {
+		return verror.New(verror.ErrInternal, nil, "seq maxed out")
+	}
+	// Write LogEntry records.
+	timestamp := tx.st.clock.Now(nil).UnixNano()
+	seq := tx.st.seq
+	for i, op := range tx.ops {
+		key := logEntryKey(seq)
+		value := &LogEntry{
+			Op:              op,
+			CommitTimestamp: timestamp,
+			FromSync:        tx.fromSync,
+			Continued:       i < len(tx.ops)-1,
+		}
+		if err := util.Put(nil, tx.itx, key, value); err != nil {
+			return err
+		}
+		seq++
+	}
+	if err := tx.itx.Commit(); err != nil {
+		return err
+	}
+	tx.st.seq = seq
+	tx.st.watcher.broadcastUpdates()
+	return nil
+}
+
+// Abort implements the store.Transaction interface.
+func (tx *transaction) Abort() error {
+	tx.mu.Lock()
+	defer tx.mu.Unlock()
+	if tx.err != nil {
+		return convertError(tx.err)
+	}
+	tx.err = verror.New(verror.ErrCanceled, nil, store.ErrMsgAbortedTxn)
+	return tx.itx.Abort()
+}
+
+// AddSyncGroupOp injects a SyncGroup operation notification in the log entries
+// that the transaction writes when it is committed.  It allows the SyncGroup
+// operations (create, join, leave, destroy) to notify the sync watcher of the
+// change at its proper position in the timeline (the transaction commit).
+// Note: this is an internal function used by sync, not part of the interface.
+func AddSyncGroupOp(ctx *context.T, tx store.Transaction, prefixes []string, remove bool) error {
+	wtx := tx.(*transaction)
+	wtx.mu.Lock()
+	defer wtx.mu.Unlock()
+	if wtx.err != nil {
+		return convertError(wtx.err)
+	}
+	// Make a defensive copy of prefixes slice.
+	wtx.ops = append(wtx.ops, &OpSyncGroup{SyncGroupOp{Prefixes: cpStrings(prefixes), Remove: remove}})
+	return nil
+}
+
+// AddSyncSnapshotOp injects a sync snapshot operation notification in the log
+// entries that the transaction writes when it is committed.  It allows the
+// SyncGroup create or join operations to notify the sync watcher of the
+// current keys and their versions to use when initializing the sync metadata
+// at the point in the timeline when these keys become syncable (at commit).
+// Note: this is an internal function used by sync, not part of the interface.
+func AddSyncSnapshotOp(ctx *context.T, tx store.Transaction, key, version []byte) error {
+	wtx := tx.(*transaction)
+	wtx.mu.Lock()
+	defer wtx.mu.Unlock()
+	if wtx.err != nil {
+		return convertError(wtx.err)
+	}
+	if !wtx.st.managesKey(key) {
+		return verror.New(verror.ErrInternal, ctx, fmt.Sprintf("cannot create SyncSnapshotOp on unmanaged key: %s", string(key)))
+	}
+	wtx.ops = append(wtx.ops, &OpSyncSnapshot{SyncSnapshotOp{Key: cp(key), Version: cp(version)}})
+	return nil
+}
+
+// SetTransactionFromSync marks this transaction as created by sync as opposed
+// to one created by an application.  The net effect is that, at commit time,
+// the log entries written are marked as made by sync.  This allows the sync
+// Watcher to ignore them (echo suppression) because it made these updates.
+// Note: this is an internal function used by sync, not part of the interface.
+// TODO(rdaoud): support a generic echo-suppression mechanism for apps as well
+// maybe by having a creator ID in the transaction and log entries.
+// TODO(rdaoud): fold this flag (or creator ID) into Tx options when available.
+func SetTransactionFromSync(tx store.Transaction) {
+	wtx := tx.(*transaction)
+	wtx.mu.Lock()
+	defer wtx.mu.Unlock()
+	wtx.fromSync = true
+}
+
+// GetVersion returns the current version of a managed key. This method is used
+// by the Sync module when the initiator is attempting to add new versions of
+// objects. Reading the version key is used for optimistic concurrency
+// control. At minimum, an object implementing the Transaction interface is
+// required since this is a Get operation.
+func GetVersion(ctx *context.T, tx store.Transaction, key []byte) ([]byte, error) {
+	switch w := tx.(type) {
+	case *transaction:
+		w.mu.Lock()
+		defer w.mu.Unlock()
+		if w.err != nil {
+			return nil, convertError(w.err)
+		}
+		return getVersion(w.itx, key)
+	}
+	return nil, verror.New(verror.ErrInternal, ctx, "unsupported store type")
+}
+
+// GetAtVersion returns the value of a managed key at the requested
+// version. This method is used by the Sync module when the responder needs to
+// send objects over the wire. At minimum, an object implementing the
+// StoreReader interface is required since this is a Get operation.
+func GetAtVersion(ctx *context.T, st store.StoreReader, key, valbuf, version []byte) ([]byte, error) {
+	switch w := st.(type) {
+	case *snapshot:
+		return getAtVersion(w.isn, key, valbuf, version)
+	case *transaction:
+		w.mu.Lock()
+		defer w.mu.Unlock()
+		if w.err != nil {
+			return valbuf, convertError(w.err)
+		}
+		return getAtVersion(w.itx, key, valbuf, version)
+	case *wstore:
+		return getAtVersion(w.ist, key, valbuf, version)
+	}
+	return nil, verror.New(verror.ErrInternal, ctx, "unsupported store type")
+}
+
+// PutAtVersion puts a value for the managed key at the requested version. This
+// method is used by the Sync module exclusively when the initiator adds objects
+// with versions created on other Syncbases. At minimum, an object implementing
+// the Transaction interface is required since this is a Put operation.
+func PutAtVersion(ctx *context.T, tx store.Transaction, key, valbuf, version []byte) error {
+	wtx := tx.(*transaction)
+
+	wtx.mu.Lock()
+	defer wtx.mu.Unlock()
+	if wtx.err != nil {
+		return convertError(wtx.err)
+	}
+
+	// Note that we do not enqueue a PutOp in the log since this Put is not
+	// updating the current version of a key.
+	return wtx.itx.Put(makeAtVersionKey(key, version), valbuf)
+}
+
+// PutVersion updates the version of a managed key to the requested
+// version. This method is used by the Sync module exclusively when the
+// initiator selects which of the already stored versions (via PutAtVersion
+// calls) becomes the current version. At minimum, an object implementing
+// the Transaction interface is required since this is a Put operation.
+func PutVersion(ctx *context.T, tx store.Transaction, key, version []byte) error {
+	wtx := tx.(*transaction)
+
+	wtx.mu.Lock()
+	defer wtx.mu.Unlock()
+	if wtx.err != nil {
+		return convertError(wtx.err)
+	}
+
+	if err := wtx.itx.Put(makeVersionKey(key), version); err != nil {
+		return err
+	}
+	wtx.ops = append(wtx.ops, &OpPut{PutOp{Key: cp(key), Version: cp(version)}})
+	return nil
+}

diff --git a/services/syncbase/server/watchable/transaction_test.go b/services/syncbase/server/watchable/transaction_test.go
new file mode 100644
index 0000000..5fcdf94
--- /dev/null
+++ b/services/syncbase/server/watchable/transaction_test.go

@@ -0,0 +1,225 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+import (
+	"bytes"
+	"fmt"
+	"reflect"
+	"runtime/debug"
+	"testing"
+	"time"
+
+	"v.io/syncbase/x/ref/services/syncbase/clock"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+)
+
+type testData struct {
+	key       string
+	createVal string
+	updateVal string
+}
+
+var data1 testData = testData{
+	key:       "key-a",
+	createVal: "val-a1",
+	updateVal: "val-a2",
+}
+
+var data2 testData = testData{
+	key:       "key-b",
+	createVal: "val-b1",
+	updateVal: "val-b2",
+}
+
+func checkAndUpdate(tx store.Transaction, data testData) error {
+	// check and update data1
+	keyBytes := []byte(data.key)
+	val, err := tx.Get(keyBytes, nil)
+	if err != nil {
+		return fmt.Errorf("can't get key %q: %v", data.key, err)
+	}
+	if !bytes.Equal(val, []byte(data.createVal)) {
+		return fmt.Errorf("Unexpected value for key %q: %q", data.key, string(val))
+	}
+	if err := tx.Put(keyBytes, []byte(data.updateVal)); err != nil {
+		return fmt.Errorf("can't put {%q: %v}: %v", data.key, data.updateVal, err)
+	}
+	return nil
+}
+
+func verifyCommitLog(t *testing.T, st store.Store, seq uint64, wantNumEntries int, wantTimestamp time.Time) {
+	ler := newLogEntryReader(st, seq)
+	numEntries := 0
+	for ler.Advance() {
+		_, entry := ler.GetEntry()
+		numEntries++
+		if entry.CommitTimestamp != wantTimestamp.UnixNano() {
+			t.Errorf("Unexpected timestamp found for entry: got %v, want %v", entry.CommitTimestamp, wantTimestamp.UnixNano())
+		}
+	}
+	if numEntries != wantNumEntries {
+		t.Errorf("Unexpected number of log entries: got %v, want %v", numEntries, wantNumEntries)
+	}
+}
+
+func TestLogEntryTimestamps(t *testing.T) {
+	ist, destroy := createStore()
+	defer destroy()
+	t1 := time.Now()
+	inc := time.Duration(1) * time.Second
+	mockClock := newMockSystemClock(t1, inc)
+	var mockAdapter clock.StorageAdapter = clock.MockStorageAdapter()
+
+	vclock := clock.NewVClockWithMockServices(mockAdapter, mockClock, nil)
+	wst1, err := Wrap(ist, vclock, &Options{ManagedPrefixes: nil})
+	if err != nil {
+		t.Fatalf("Wrap failed: %v", err)
+	}
+	seqForCreate := getSeq(wst1)
+
+	// Create data in store
+	if err := store.RunInTransaction(wst1, func(tx store.Transaction) error {
+		// add data1
+		if err := tx.Put([]byte(data1.key), []byte(data1.createVal)); err != nil {
+			return fmt.Errorf("can't put {%q: %v}: %v", data1.key, data1.createVal, err)
+		}
+		// add data2
+		if err := tx.Put([]byte(data2.key), []byte(data2.createVal)); err != nil {
+			return fmt.Errorf("can't put {%q: %v}: %v", data2.key, data2.createVal, err)
+		}
+		return nil
+	}); err != nil {
+		panic(fmt.Errorf("can't commit transaction: %v", err))
+	}
+
+	// read and verify LogEntries written as part of above transaction
+	// We expect 2 entries in the log for the two puts.
+	// Timestamp from mockclock for the commit should be t1
+	verifyCommitLog(t, ist, seqForCreate, 2, t1)
+
+	// Update data already present in store with a new watchable store
+	wst2, err := Wrap(ist, vclock, &Options{ManagedPrefixes: nil})
+	if err != nil {
+		t.Fatalf("Wrap failed: %v", err)
+	}
+	seqForUpdate := getSeq(wst2)
+	// We expect the sequence number to have moved by +2 for the two puts.
+	if seqForUpdate != (seqForCreate + 2) {
+		t.Errorf("unexpected sequence number for update. seq for create: %d, seq for update: %d", seqForCreate, seqForUpdate)
+	}
+
+	if err := store.RunInTransaction(wst2, func(tx store.Transaction) error {
+		if err := checkAndUpdate(tx, data1); err != nil {
+			return err
+		}
+		if err := checkAndUpdate(tx, data2); err != nil {
+			return err
+		}
+		return nil
+	}); err != nil {
+		panic(fmt.Errorf("can't commit transaction: %v", err))
+	}
+
+	// read and verify LogEntries written as part of above transaction
+	// We expect 4 entries in the log for the two gets and two puts.
+	// Timestamp from mockclock for the commit should be t1 + 1 sec
+	t2 := t1.Add(inc)
+	verifyCommitLog(t, ist, seqForUpdate, 4, t2)
+}
+
+func eq(t *testing.T, got, want interface{}) {
+	if !reflect.DeepEqual(got, want) {
+		debug.PrintStack()
+		t.Fatalf("got %v, want %v", got, want)
+	}
+}
+
+func TestOpLogConsistency(t *testing.T) {
+	ist, destroy := createStore()
+	defer destroy()
+	t1 := time.Now()
+	inc := time.Duration(1) * time.Second
+	mockClock := newMockSystemClock(t1, inc)
+	var mockAdapter clock.StorageAdapter = clock.MockStorageAdapter()
+
+	vclock := clock.NewVClockWithMockServices(mockAdapter, mockClock, nil)
+	wst, err := Wrap(ist, vclock, &Options{ManagedPrefixes: nil})
+	if err != nil {
+		t.Fatalf("Wrap failed: %v", err)
+	}
+
+	if err := store.RunInTransaction(wst, func(tx store.Transaction) error {
+		putKey, putVal := []byte("foo"), []byte("bar")
+		if err := tx.Put(putKey, putVal); err != nil {
+			return err
+		}
+		getKey := []byte("foo")
+		if getVal, err := tx.Get(getKey, nil); err != nil {
+			return err
+		} else {
+			eq(t, getVal, putVal)
+		}
+		start, limit := []byte("aaa"), []byte("bbb")
+		tx.Scan(start, limit)
+		delKey := []byte("foo")
+		if err := tx.Delete(delKey); err != nil {
+			return err
+		}
+		sgPrefixes := []string{"sga", "sgb"}
+		if err := AddSyncGroupOp(nil, tx, sgPrefixes, false); err != nil {
+			return err
+		}
+		snKey, snVersion := []byte("aa"), []byte("123")
+		if err := AddSyncSnapshotOp(nil, tx, snKey, snVersion); err != nil {
+			return err
+		}
+		pvKey, pvVersion := []byte("pv"), []byte("456")
+		if err := PutVersion(nil, tx, pvKey, pvVersion); err != nil {
+			return err
+		}
+		for _, buf := range [][]byte{putKey, putVal, getKey, start, limit, delKey, snKey, snVersion, pvKey, pvVersion} {
+			buf[0] = '#'
+		}
+		sgPrefixes[0] = "zebra"
+		return nil
+	}); err != nil {
+		t.Fatalf("failed to commit txn: %v", err)
+	}
+
+	// Read first (and only) batch.
+	ler := newLogEntryReader(ist, 0)
+	numEntries, wantNumEntries := 0, 7
+	sawPut := false
+	for ler.Advance() {
+		_, entry := ler.GetEntry()
+		numEntries++
+		switch op := entry.Op.(type) {
+		case OpGet:
+			eq(t, string(op.Value.Key), "foo")
+		case OpScan:
+			eq(t, string(op.Value.Start), "aaa")
+			eq(t, string(op.Value.Limit), "bbb")
+		case OpPut:
+			if !sawPut {
+				eq(t, string(op.Value.Key), "foo")
+				sawPut = true
+			} else {
+				eq(t, string(op.Value.Key), "pv")
+				eq(t, string(op.Value.Version), "456")
+			}
+		case OpDelete:
+			eq(t, string(op.Value.Key), "foo")
+		case OpSyncGroup:
+			eq(t, op.Value.Prefixes, []string{"sga", "sgb"})
+		case OpSyncSnapshot:
+			eq(t, string(op.Value.Key), "aa")
+			eq(t, string(op.Value.Version), "123")
+		default:
+			t.Fatalf("Unexpected op type in entry: %v", entry)
+		}
+	}
+	eq(t, numEntries, wantNumEntries)
+}

diff --git a/services/syncbase/server/watchable/types.vdl b/services/syncbase/server/watchable/types.vdl
new file mode 100644
index 0000000..3f5181b
--- /dev/null
+++ b/services/syncbase/server/watchable/types.vdl

@@ -0,0 +1,77 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+// GetOp represents a store get operation.
+type GetOp struct {
+	Key []byte
+}
+
+// ScanOp represents a store scan operation.
+type ScanOp struct {
+	Start []byte
+	Limit []byte
+}
+
+// PutOp represents a store put operation.  The new version is written instead
+// of the value to avoid duplicating the user data in the store.  The version
+// is used to access the user data of that specific mutation.
+type PutOp struct {
+	Key     []byte
+	Version []byte
+}
+
+// DeleteOp represents a store delete operation.
+type DeleteOp struct {
+	Key []byte
+}
+
+// SyncGroupOp represents a change in SyncGroup tracking, adding or removing
+// key prefixes to sync.  SyncGroup prefixes cannot be changed, this is used
+// to track changes due to SyncGroup create/join/leave/destroy.
+type SyncGroupOp struct {
+	Prefixes []string
+	Remove   bool
+}
+
+// SyncSnapshotOp represents a snapshot operation when creating and joining a
+// SyncGroup.  The sync watcher needs to get a snapshot of the Database at the
+// point of creating/joining a SyncGroup.  A SyncSnapshotOp entry is written to
+// the log for each Database key that falls within the SyncGroup prefixes.  This
+// allows sync to initialize its metadata at the correct versions of the objects
+// when they become syncable.  These log entries should be filtered by the
+// client-facing Watch interface because the user data did not actually change.
+type SyncSnapshotOp struct {
+	Key     []byte
+	Version []byte
+}
+
+// Op represents a store operation.
+type Op union {
+	Get          GetOp
+	Scan         ScanOp
+	Put          PutOp
+	Delete       DeleteOp
+	SyncGroup    SyncGroupOp
+	SyncSnapshot SyncSnapshotOp
+}
+
+// LogEntry represents a single store operation. This operation may have been
+// part of a transaction, as signified by the Continued boolean. Read-only
+// operations (and read-only transactions) are not logged.
+type LogEntry struct {
+	// The store operation that was performed.
+	Op Op
+
+	// Time when the operation was committed.
+	CommitTimestamp int64
+
+	// Operation came from sync (used for echo suppression).
+	FromSync bool
+
+	// If true, this entry is followed by more entries that belong to the same
+	// commit as this entry.
+	Continued bool
+}

diff --git a/services/syncbase/server/watchable/types.vdl.go b/services/syncbase/server/watchable/types.vdl.go
new file mode 100644
index 0000000..5fd2e04
--- /dev/null
+++ b/services/syncbase/server/watchable/types.vdl.go

@@ -0,0 +1,189 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: types.vdl
+
+package watchable
+
+import (
+	// VDL system imports
+	"v.io/v23/vdl"
+)
+
+// GetOp represents a store get operation.
+type GetOp struct {
+	Key []byte
+}
+
+func (GetOp) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/watchable.GetOp"`
+}) {
+}
+
+// ScanOp represents a store scan operation.
+type ScanOp struct {
+	Start []byte
+	Limit []byte
+}
+
+func (ScanOp) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/watchable.ScanOp"`
+}) {
+}
+
+// PutOp represents a store put operation.  The new version is written instead
+// of the value to avoid duplicating the user data in the store.  The version
+// is used to access the user data of that specific mutation.
+type PutOp struct {
+	Key     []byte
+	Version []byte
+}
+
+func (PutOp) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/watchable.PutOp"`
+}) {
+}
+
+// DeleteOp represents a store delete operation.
+type DeleteOp struct {
+	Key []byte
+}
+
+func (DeleteOp) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/watchable.DeleteOp"`
+}) {
+}
+
+// SyncGroupOp represents a change in SyncGroup tracking, adding or removing
+// key prefixes to sync.  SyncGroup prefixes cannot be changed, this is used
+// to track changes due to SyncGroup create/join/leave/destroy.
+type SyncGroupOp struct {
+	Prefixes []string
+	Remove   bool
+}
+
+func (SyncGroupOp) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/watchable.SyncGroupOp"`
+}) {
+}
+
+// SyncSnapshotOp represents a snapshot operation when creating and joining a
+// SyncGroup.  The sync watcher needs to get a snapshot of the Database at the
+// point of creating/joining a SyncGroup.  A SyncSnapshotOp entry is written to
+// the log for each Database key that falls within the SyncGroup prefixes.  This
+// allows sync to initialize its metadata at the correct versions of the objects
+// when they become syncable.  These log entries should be filtered by the
+// client-facing Watch interface because the user data did not actually change.
+type SyncSnapshotOp struct {
+	Key     []byte
+	Version []byte
+}
+
+func (SyncSnapshotOp) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/watchable.SyncSnapshotOp"`
+}) {
+}
+
+type (
+	// Op represents any single field of the Op union type.
+	//
+	// Op represents a store operation.
+	Op interface {
+		// Index returns the field index.
+		Index() int
+		// Interface returns the field value as an interface.
+		Interface() interface{}
+		// Name returns the field name.
+		Name() string
+		// __VDLReflect describes the Op union type.
+		__VDLReflect(__OpReflect)
+	}
+	// OpGet represents field Get of the Op union type.
+	OpGet struct{ Value GetOp }
+	// OpScan represents field Scan of the Op union type.
+	OpScan struct{ Value ScanOp }
+	// OpPut represents field Put of the Op union type.
+	OpPut struct{ Value PutOp }
+	// OpDelete represents field Delete of the Op union type.
+	OpDelete struct{ Value DeleteOp }
+	// OpSyncGroup represents field SyncGroup of the Op union type.
+	OpSyncGroup struct{ Value SyncGroupOp }
+	// OpSyncSnapshot represents field SyncSnapshot of the Op union type.
+	OpSyncSnapshot struct{ Value SyncSnapshotOp }
+	// __OpReflect describes the Op union type.
+	__OpReflect struct {
+		Name  string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/watchable.Op"`
+		Type  Op
+		Union struct {
+			Get          OpGet
+			Scan         OpScan
+			Put          OpPut
+			Delete       OpDelete
+			SyncGroup    OpSyncGroup
+			SyncSnapshot OpSyncSnapshot
+		}
+	}
+)
+
+func (x OpGet) Index() int               { return 0 }
+func (x OpGet) Interface() interface{}   { return x.Value }
+func (x OpGet) Name() string             { return "Get" }
+func (x OpGet) __VDLReflect(__OpReflect) {}
+
+func (x OpScan) Index() int               { return 1 }
+func (x OpScan) Interface() interface{}   { return x.Value }
+func (x OpScan) Name() string             { return "Scan" }
+func (x OpScan) __VDLReflect(__OpReflect) {}
+
+func (x OpPut) Index() int               { return 2 }
+func (x OpPut) Interface() interface{}   { return x.Value }
+func (x OpPut) Name() string             { return "Put" }
+func (x OpPut) __VDLReflect(__OpReflect) {}
+
+func (x OpDelete) Index() int               { return 3 }
+func (x OpDelete) Interface() interface{}   { return x.Value }
+func (x OpDelete) Name() string             { return "Delete" }
+func (x OpDelete) __VDLReflect(__OpReflect) {}
+
+func (x OpSyncGroup) Index() int               { return 4 }
+func (x OpSyncGroup) Interface() interface{}   { return x.Value }
+func (x OpSyncGroup) Name() string             { return "SyncGroup" }
+func (x OpSyncGroup) __VDLReflect(__OpReflect) {}
+
+func (x OpSyncSnapshot) Index() int               { return 5 }
+func (x OpSyncSnapshot) Interface() interface{}   { return x.Value }
+func (x OpSyncSnapshot) Name() string             { return "SyncSnapshot" }
+func (x OpSyncSnapshot) __VDLReflect(__OpReflect) {}
+
+// LogEntry represents a single store operation. This operation may have been
+// part of a transaction, as signified by the Continued boolean. Read-only
+// operations (and read-only transactions) are not logged.
+type LogEntry struct {
+	// The store operation that was performed.
+	Op Op
+	// Time when the operation was committed.
+	CommitTimestamp int64
+	// Operation came from sync (used for echo suppression).
+	FromSync bool
+	// If true, this entry is followed by more entries that belong to the same
+	// commit as this entry.
+	Continued bool
+}
+
+func (LogEntry) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/watchable.LogEntry"`
+}) {
+}
+
+func init() {
+	vdl.Register((*GetOp)(nil))
+	vdl.Register((*ScanOp)(nil))
+	vdl.Register((*PutOp)(nil))
+	vdl.Register((*DeleteOp)(nil))
+	vdl.Register((*SyncGroupOp)(nil))
+	vdl.Register((*SyncSnapshotOp)(nil))
+	vdl.Register((*Op)(nil))
+	vdl.Register((*LogEntry)(nil))
+}

diff --git a/services/syncbase/server/watchable/util.go b/services/syncbase/server/watchable/util.go
new file mode 100644
index 0000000..8eb606e
--- /dev/null
+++ b/services/syncbase/server/watchable/util.go

@@ -0,0 +1,93 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+// TODO(sadovsky): Avoid copying back and forth between []byte's and strings.
+// We should probably convert incoming strings to []byte's as early as possible,
+// and deal exclusively in []byte's internally.
+// TODO(rdaoud): I propose we standardize on key and version being strings and
+// the value being []byte within Syncbase.  We define invalid characters in the
+// key space (and reserve "$" and ":").  The lower storage engine layers are
+// free to map that to what they need internally ([]byte or string).
+
+import (
+	"fmt"
+	"math/rand"
+	"sync"
+	"time"
+
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/verror"
+)
+
+var (
+	rng     *rand.Rand = rand.New(rand.NewSource(time.Now().UTC().UnixNano()))
+	rngLock sync.Mutex
+)
+
+// NewVersion returns a new version for a store entry mutation.
+func NewVersion() []byte {
+	// TODO(rdaoud): revisit the number of bits: should we use 128 bits?
+	// Note: the version has to be unique per object key, not on its own.
+	// TODO(rdaoud): move sync's rand64() to a general Syncbase spot and
+	// reuse it here.
+	rngLock.Lock()
+	num := rng.Int63()
+	rngLock.Unlock()
+
+	return []byte(fmt.Sprintf("%x", num))
+}
+
+func makeVersionKey(key []byte) []byte {
+	return []byte(join(util.VersionPrefix, string(key)))
+}
+
+func makeAtVersionKey(key, version []byte) []byte {
+	return []byte(join(string(key), string(version)))
+}
+
+func getVersion(sntx store.SnapshotOrTransaction, key []byte) ([]byte, error) {
+	return sntx.Get(makeVersionKey(key), nil)
+}
+
+func getAtVersion(st store.StoreReader, key, valbuf, version []byte) ([]byte, error) {
+	return st.Get(makeAtVersionKey(key, version), valbuf)
+}
+
+func getVersioned(sntx store.SnapshotOrTransaction, key, valbuf []byte) ([]byte, error) {
+	version, err := getVersion(sntx, key)
+	if err != nil {
+		return valbuf, err
+	}
+	return getAtVersion(sntx, key, valbuf, version)
+}
+
+func putVersioned(tx store.Transaction, key, value []byte) ([]byte, error) {
+	version := NewVersion()
+	if err := tx.Put(makeVersionKey(key), version); err != nil {
+		return nil, err
+	}
+	if err := tx.Put(makeAtVersionKey(key, version), value); err != nil {
+		return nil, err
+	}
+	return version, nil
+}
+
+func deleteVersioned(tx store.Transaction, key []byte) error {
+	return tx.Delete(makeVersionKey(key))
+}
+
+func join(parts ...string) string {
+	return util.JoinKeyParts(parts...)
+}
+
+func split(key string) []string {
+	return util.SplitKeyParts(key)
+}
+
+func convertError(err error) error {
+	return verror.Convert(verror.IDAction{}, nil, err)
+}

diff --git a/services/syncbase/server/watchable/util_test.go b/services/syncbase/server/watchable/util_test.go
new file mode 100644
index 0000000..193c06d
--- /dev/null
+++ b/services/syncbase/server/watchable/util_test.go

@@ -0,0 +1,33 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+import (
+	"testing"
+
+	"v.io/syncbase/x/ref/services/syncbase/clock"
+)
+
+// TestGetNextLogSeq tests that the getNextLogSeq helper works on range 0..10.
+func TestGetNextLogSeq(t *testing.T) {
+	st, destroy := createStore()
+	defer destroy()
+	var mockAdapter clock.StorageAdapter = clock.MockStorageAdapter()
+	vclock := clock.NewVClockWithMockServices(mockAdapter, nil, nil)
+	st, err := Wrap(st, vclock, &Options{})
+	if err != nil {
+		t.Fatal(err)
+	}
+	for i := uint64(0); i <= uint64(10); i++ {
+		seq, err := getNextLogSeq(st)
+		if err != nil {
+			t.Fatalf("failed to get log seq: %v", err)
+		}
+		if got, want := seq, i; got != want {
+			t.Fatalf("unexpected log seq: got %v, want %v", got, want)
+		}
+		st.Put([]byte(logEntryKey(i)), nil)
+	}
+}

diff --git a/services/syncbase/server/watchable/watcher.go b/services/syncbase/server/watchable/watcher.go
new file mode 100644
index 0000000..fc0481a
--- /dev/null
+++ b/services/syncbase/server/watchable/watcher.go

@@ -0,0 +1,212 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+import (
+	"fmt"
+	"strconv"
+	"sync"
+
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/services/watch"
+	"v.io/v23/verror"
+	"v.io/v23/vom"
+	"v.io/x/lib/vlog"
+)
+
+// watcher maintains a state and a condition variable. The watcher sends
+// a broadcast signal every time the state changes. The state is increased
+// by 1 every time the store has new data. Initially the state equals to 1.
+// If the state becomes 0, then the watcher is closed and the state will not
+// be changed later.
+// TODO(rogulenko): Broadcast a signal from time to time to unblock waiting
+// clients.
+type watcher struct {
+	mu    *sync.RWMutex
+	cond  *sync.Cond
+	state uint64
+}
+
+func newWatcher() *watcher {
+	mu := &sync.RWMutex{}
+	return &watcher{
+		mu:    mu,
+		cond:  sync.NewCond(mu.RLocker()),
+		state: 1,
+	}
+}
+
+// close closes the watcher.
+func (w *watcher) close() {
+	w.mu.Lock()
+	w.state = 0
+	w.cond.Broadcast()
+	w.mu.Unlock()
+}
+
+// broadcastUpdates broadcast the update notification to watch clients.
+func (w *watcher) broadcastUpdates() {
+	w.mu.Lock()
+	if w.state != 0 {
+		w.state++
+		w.cond.Broadcast()
+	} else {
+		vlog.Error("broadcastUpdates() called on a closed watcher")
+	}
+	w.mu.Unlock()
+}
+
+// WatchUpdates returns a function that can be used to watch for changes of
+// the database. The store maintains a state (initially 1) that is increased
+// by 1 every time the store has new data. The waitForChange function takes
+// the last returned state and blocks until the state changes, returning the new
+// state. State equal to 0 means the store is closed and no updates will come
+// later. If waitForChange function takes a state different from the current
+// state of the store or the store is closed, the waitForChange function returns
+// immediately. It might happen that the waitForChange function returns
+// a non-zero state equal to the state passed as the argument. This behavior
+// helps to unblock clients if the store doesn't have updates for a long period
+// of time.
+func WatchUpdates(st store.Store) (waitForChange func(state uint64) uint64) {
+	// TODO(rogulenko): Remove dynamic type assertion here and in other places.
+	watcher := st.(*wstore).watcher
+	return func(state uint64) uint64 {
+		watcher.cond.L.Lock()
+		defer watcher.cond.L.Unlock()
+		if watcher.state != 0 && watcher.state == state {
+			watcher.cond.Wait()
+		}
+		return watcher.state
+	}
+}
+
+// GetResumeMarker returns the ResumeMarker that points to the current end
+// of the event log.
+func GetResumeMarker(st store.StoreReader) (watch.ResumeMarker, error) {
+	seq, err := getNextLogSeq(st)
+	return watch.ResumeMarker(logEntryKey(seq)), err
+}
+
+// MakeResumeMarker converts a sequence number to the resume marker.
+func MakeResumeMarker(seq uint64) watch.ResumeMarker {
+	return watch.ResumeMarker(logEntryKey(seq))
+}
+
+func logEntryKey(seq uint64) string {
+	// Note: MaxUint64 is 0xffffffffffffffff.
+	// TODO(sadovsky): Use a more space-efficient lexicographic number encoding.
+	return join(util.LogPrefix, fmt.Sprintf("%016x", seq))
+}
+
+// ReadBatchFromLog returns a batch of watch log records (a transaction) from
+// the given database and the new resume marker at the end of the batch.
+func ReadBatchFromLog(st store.Store, resumeMarker watch.ResumeMarker) ([]*LogEntry, watch.ResumeMarker, error) {
+	seq, err := parseResumeMarker(string(resumeMarker))
+	if err != nil {
+		return nil, resumeMarker, err
+	}
+	_, scanLimit := util.ScanPrefixArgs(util.LogPrefix, "")
+	scanStart := resumeMarker
+	endOfBatch := false
+
+	// Use the store directly to scan these read-only log entries, no need
+	// to create a snapshot since they are never overwritten.  Read and
+	// buffer a batch before processing it.
+	var logs []*LogEntry
+	stream := st.Scan(scanStart, scanLimit)
+	for stream.Advance() {
+		seq++
+		var logEnt LogEntry
+		if err := vom.Decode(stream.Value(nil), &logEnt); err != nil {
+			return nil, resumeMarker, err
+		}
+
+		logs = append(logs, &logEnt)
+
+		// Stop if this is the end of the batch.
+		if logEnt.Continued == false {
+			endOfBatch = true
+			break
+		}
+	}
+
+	if err = stream.Err(); err != nil {
+		return nil, resumeMarker, err
+	}
+	if !endOfBatch {
+		if len(logs) > 0 {
+			vlog.Fatalf("end of batch not found after %d entries", len(logs))
+		}
+		return nil, resumeMarker, nil
+	}
+	return logs, watch.ResumeMarker(logEntryKey(seq)), nil
+}
+
+func parseResumeMarker(resumeMarker string) (uint64, error) {
+	parts := split(resumeMarker)
+	if len(parts) != 2 {
+		return 0, verror.New(watch.ErrUnknownResumeMarker, nil, resumeMarker)
+	}
+	seq, err := strconv.ParseUint(parts[1], 16, 64)
+	if err != nil {
+		return 0, verror.New(watch.ErrUnknownResumeMarker, nil, resumeMarker)
+	}
+	return seq, nil
+}
+
+// logEntryExists returns true iff the log contains an entry with the given
+// sequence number.
+func logEntryExists(st store.StoreReader, seq uint64) (bool, error) {
+	_, err := st.Get([]byte(logEntryKey(seq)), nil)
+	if err != nil && verror.ErrorID(err) != store.ErrUnknownKey.ID {
+		return false, err
+	}
+	return err == nil, nil
+}
+
+// getNextLogSeq returns the next sequence number to be used for a new commit.
+// NOTE: this function assumes that all sequence numbers in the log represent
+// some range [start, limit] without gaps.
+func getNextLogSeq(st store.StoreReader) (uint64, error) {
+	// Determine initial value for seq.
+	// TODO(sadovsky): Consider using a bigger seq.
+
+	// Find the beginning of the log.
+	it := st.Scan(util.ScanPrefixArgs(util.LogPrefix, ""))
+	if !it.Advance() {
+		return 0, nil
+	}
+	if it.Err() != nil {
+		return 0, it.Err()
+	}
+	seq, err := parseResumeMarker(string(it.Key(nil)))
+	if err != nil {
+		return 0, err
+	}
+	var step uint64 = 1
+	// Suppose the actual value we are looking for is S. First, we estimate the
+	// range for S. We find seq, step: seq < S <= seq + step.
+	for {
+		if ok, err := logEntryExists(st, seq+step); err != nil {
+			return 0, err
+		} else if !ok {
+			break
+		}
+		seq += step
+		step *= 2
+	}
+	// Next we keep the seq < S <= seq + step invariant, reducing step to 1.
+	for step > 1 {
+		step /= 2
+		if ok, err := logEntryExists(st, seq+step); err != nil {
+			return 0, err
+		} else if ok {
+			seq += step
+		}
+	}
+	// Now seq < S <= seq + 1, thus S = seq + 1.
+	return seq + 1, nil
+}

diff --git a/services/syncbase/server/watchable/watcher_test.go b/services/syncbase/server/watchable/watcher_test.go
new file mode 100644
index 0000000..c978123
--- /dev/null
+++ b/services/syncbase/server/watchable/watcher_test.go

@@ -0,0 +1,93 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+import (
+	"bytes"
+	"fmt"
+	"testing"
+
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+)
+
+// TestWatchLogBatch tests fetching a batch of log records.
+func TestWatchLogBatch(t *testing.T) {
+	runTest(t, []string{util.RowPrefix, util.PermsPrefix}, runWatchLogBatchTest)
+}
+
+// runWatchLogBatchTest tests fetching a batch of log records.
+func runWatchLogBatchTest(t *testing.T, st store.Store) {
+	// Create a set of batches to fill the log queue.
+	numTx, numPut := 3, 4
+
+	makeKeyVal := func(batchNum, recNum int) ([]byte, []byte) {
+		key := util.JoinKeyParts(util.RowPrefix, fmt.Sprintf("foo-%d-%d", batchNum, recNum))
+		val := fmt.Sprintf("val-%d-%d", batchNum, recNum)
+		return []byte(key), []byte(val)
+	}
+
+	for i := 0; i < numTx; i++ {
+		tx := st.NewTransaction()
+		for j := 0; j < numPut; j++ {
+			key, val := makeKeyVal(i, j)
+			if err := tx.Put(key, val); err != nil {
+				t.Errorf("cannot put %s (%s): %v", key, val, err)
+			}
+		}
+		tx.Commit()
+	}
+
+	// Fetch the batches and a few more empty fetches and verify them.
+	resmark := MakeResumeMarker(0)
+	var seq uint64
+
+	for i := 0; i < (numTx + 3); i++ {
+		logs, newResmark, err := ReadBatchFromLog(st, resmark)
+		if err != nil {
+			t.Fatalf("can't get watch log batch: %v", err)
+		}
+		if i < numTx {
+			if len(logs) != numPut {
+				t.Errorf("log fetch (i=%d) wrong log seq: %d instead of %d",
+					i, len(logs), numPut)
+			}
+
+			seq += uint64(len(logs))
+			expResmark := MakeResumeMarker(seq)
+			if !bytes.Equal(newResmark, expResmark) {
+				t.Errorf("log fetch (i=%d) wrong resmark: %s instead of %s",
+					i, newResmark, expResmark)
+			}
+
+			for j, log := range logs {
+				op := log.Op.(OpPut)
+				expKey, expVal := makeKeyVal(i, j)
+				key := op.Value.Key
+				if !bytes.Equal(key, expKey) {
+					t.Errorf("log fetch (i=%d, j=%d) bad key: %s instead of %s",
+						i, j, key, expKey)
+				}
+				tx := st.NewTransaction()
+				var val []byte
+				val, err := GetAtVersion(nil, tx, key, val, op.Value.Version)
+				if err != nil {
+					t.Errorf("log fetch (i=%d, j=%d) cannot GetAtVersion(): %v", i, j, err)
+				}
+				if !bytes.Equal(val, expVal) {
+					t.Errorf("log fetch (i=%d, j=%d) bad value: %s instead of %s",
+						i, j, val, expVal)
+				}
+				tx.Abort()
+			}
+		} else {
+			if logs != nil || !bytes.Equal(newResmark, resmark) {
+				t.Errorf("NOP log fetch (i=%d) had changes: %d logs, resmask %s",
+					i, len(logs), newResmark)
+			}
+		}
+		resmark = newResmark
+	}
+}

diff --git a/services/syncbase/signing/hashcache/hashcache.go b/services/syncbase/signing/hashcache/hashcache.go
new file mode 100644
index 0000000..26e8c94
--- /dev/null
+++ b/services/syncbase/signing/hashcache/hashcache.go

@@ -0,0 +1,77 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package hashcache implements a simple cache intended to be indexed by hash
+// values.  The keys are of type []byte.  Values are arbitrary interface{}
+// values.  Entries may expire if not used for a duration specified by the
+// client.
+package hashcache
+
+import "sync"
+import "time"
+
+// An internalValue is the client's data plus the data's expiry time.
+type internalValue struct {
+	data   interface{}
+	expiry time.Time
+}
+
+// A Cache allows the user to store arbitrary values, keyed by the contents of
+// byte vectors.  Entries may be added, deleted, and looked up.  They may
+// expire if not used.
+type Cache struct {
+	expiry            time.Duration
+	mu                sync.Mutex // protects fields below.
+	entries           map[string]*internalValue
+	insertionsSinceGC int // number of insertions since last GC
+}
+
+// New() returns a pointer to a new, empty Cache.
+// Entries may expire if not used for "expiry".
+func New(expiry time.Duration) *Cache {
+	return &Cache{expiry: expiry, entries: make(map[string]*internalValue)}
+}
+
+// Lookup() returns the data associated with key[] in *c, and whether there is
+// such a value.  The client may not modify the returned data; it is shared
+// with *c.
+func (c *Cache) Lookup(key []byte) (data interface{}, isPresent bool) {
+	var value *internalValue
+	c.mu.Lock()
+	value, isPresent = c.entries[string(key)]
+	if isPresent {
+		value.expiry = time.Now().Add(c.expiry)
+		data = value.data
+	}
+	c.mu.Unlock()
+	return data, isPresent
+}
+
+// Add() associates data with key[] in *c.  Any data previously associated with
+// key[] are forgotten.  The implementation may discard the association at some
+// future time (set by NewCache()) to limit the size of the cache.  data may
+// not be modified after this call; it is shared with *c.
+func (c *Cache) Add(key []byte, data interface{}) {
+	c.mu.Lock()
+	now := time.Now()
+	c.entries[string(key)] = &internalValue{data: data, expiry: now.Add(c.expiry)}
+	c.insertionsSinceGC++
+	// Scan to expire entries if 20% were added since last scan.
+	if c.insertionsSinceGC*5 > len(c.entries) {
+		for ik, iv := range c.entries {
+			if iv.expiry.Before(now) {
+				delete(c.entries, ik)
+			}
+		}
+		c.insertionsSinceGC = 0
+	}
+	c.mu.Unlock()
+}
+
+// Delete() removes any association of data with key[] in *c.
+func (c *Cache) Delete(key []byte) {
+	c.mu.Lock()
+	delete(c.entries, string(key))
+	c.mu.Unlock()
+}

diff --git a/services/syncbase/signing/hashcache/hashcache_test.go b/services/syncbase/signing/hashcache/hashcache_test.go
new file mode 100644
index 0000000..96ba865
--- /dev/null
+++ b/services/syncbase/signing/hashcache/hashcache_test.go

@@ -0,0 +1,83 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package hashcache_test tests the hashcache package.
+package hashcache_test
+
+import "runtime"
+import "testing"
+import "time"
+
+import "v.io/syncbase/x/ref/services/syncbase/signing/hashcache"
+
+// checkHashesWithNoData() checks that hash[start:] have no data in the cache.
+// (The start index is passed, rather than expecting the caller to sub-slice,
+// so that error messages refer to the index.)
+func checkHashesWithNoData(t *testing.T, cache *hashcache.Cache, start int, hash [][]byte) {
+	_, _, callerLine, _ := runtime.Caller(1)
+	for i := start; i != len(hash); i++ {
+		value, found := cache.Lookup(hash[i])
+		if value != nil || found {
+			t.Errorf("line %d: unset cache entry hash[%d]=%v has value %v, but is expected not to be set", callerLine, i, hash[i], value)
+		}
+	}
+}
+
+func TestCache(t *testing.T) {
+	hash := [][]byte{
+		[]byte{0x00, 0x01, 0x02, 0x3},
+		[]byte{0x04, 0x05, 0x06, 0x7},
+		[]byte{0x08, 0x09, 0x0a, 0xb}}
+	var value interface{}
+	var found bool
+	var want string
+
+	cache := hashcache.New(5 * time.Second)
+
+	// The cache should initially have none of the keys.
+	checkHashesWithNoData(t, cache, 0, hash)
+
+	// Add the first key, and check that it's there.
+	want = "hash0"
+	cache.Add(hash[0], want)
+	value, found = cache.Lookup(hash[0])
+	if s, ok := value.(string); !found || !ok || s != want {
+		t.Errorf("cache entry hash[%d]=%v got %v, want %v", 0, hash[0], s, want)
+	}
+	checkHashesWithNoData(t, cache, 1, hash)
+
+	// Add the second key, and check that both it and the first key are there.
+	want = "hash1"
+	cache.Add(hash[1], want)
+	value, found = cache.Lookup(hash[1])
+	if s, ok := value.(string); !ok || s != want {
+		t.Errorf("cache entry hash[%d]=%v got %v, want %v", 1, hash[1], s, want)
+	}
+	want = "hash0"
+	value, found = cache.Lookup(hash[0])
+	if s, ok := value.(string); !found || !ok || s != want {
+		t.Errorf("cache entry hash[%d]=%v got %v, want %v", 0, hash[0], s, want)
+	}
+	checkHashesWithNoData(t, cache, 2, hash)
+
+	// Wait for all entries to time out.
+	time.Sleep(6 * time.Second) // sleep past expiry time
+
+	// Add the first key again, and so many that it will trigger garbage
+	// collection.
+	for i := 0; i != 10; i++ {
+		want = "hash0 again"
+		cache.Add(hash[0], want)
+		value, found = cache.Lookup(hash[0])
+		if s, ok := value.(string); !found || !ok || s != want {
+			t.Errorf("cache entry hash[%d]=%v got %v, want %v", 0, hash[0], s, want)
+		}
+	}
+	// The entry for hash1 should have expired, since the expiry time has
+	// passed, and many things have been inserted into the cache.
+	checkHashesWithNoData(t, cache, 1, hash)
+
+	cache.Delete(hash[0])
+	checkHashesWithNoData(t, cache, 0, hash)
+}

diff --git a/services/syncbase/signing/krl/krl.go b/services/syncbase/signing/krl/krl.go
new file mode 100644
index 0000000..422f53e
--- /dev/null
+++ b/services/syncbase/signing/krl/krl.go

@@ -0,0 +1,39 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package krl implements a trivial, in-memory key revocation list.
+// It is a placeholder for a real key revocation mechanism.
+package krl
+
+import "crypto/sha256"
+import "time"
+
+// A KRL is a key revocation list.  It maps the hashes of keys that have been revoked
+// to revocation times.
+type KRL struct {
+	table map[[sha256.Size]byte]time.Time
+}
+
+var notYetRevoked = time.Now().Add(100 * 365 * 24 * time.Hour) // far future
+
+// New() returns a pointer to a new, empty key recovation list.
+func New() *KRL {
+	return &KRL{table: make(map[[sha256.Size]byte]time.Time)}
+}
+
+// Revoke() inserts an entry into *krl recording that key[] was revoked at time
+// "when".
+func (krl *KRL) Revoke(key []byte, when time.Time) {
+	krl.table[sha256.Sum256(key)] = when
+}
+
+// RevocationTime() returns the revocation time for key[].
+// If key[] is not in the list, a time in the far future is returned.
+func (krl *KRL) RevocationTime(key []byte) (whenRevoked time.Time) {
+	var found bool
+	if whenRevoked, found = krl.table[sha256.Sum256(key)]; !found {
+		whenRevoked = notYetRevoked
+	}
+	return whenRevoked
+}

diff --git a/services/syncbase/signing/krl/krl_test.go b/services/syncbase/signing/krl/krl_test.go
new file mode 100644
index 0000000..73f48ad
--- /dev/null
+++ b/services/syncbase/signing/krl/krl_test.go

@@ -0,0 +1,54 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package krl tests the key revocation list package.
+package krl_test
+
+import "runtime"
+import "testing"
+import "time"
+
+import "v.io/syncbase/x/ref/services/syncbase/signing/krl"
+
+// checkKeysNotRevoked() checks that key[start:] have not been revoked.  (The
+// start index is passed, rather than expecting the called to sub-slice, so
+// that error messages refer to the expected index.)
+func checkKeysNotRevoked(t *testing.T, krl *krl.KRL, start int, key [][]byte, now time.Time) {
+	_, _, callerLine, _ := runtime.Caller(1)
+	year := 365 * 24 * time.Hour
+	for i := start; i != len(key); i++ {
+		revoked := krl.RevocationTime(key[i])
+		if revoked.Before(now.Add(year)) {
+			t.Errorf("line %d: unrevoked key[%d]=%v has revocation time %v, which is not far enough in the future", callerLine, i, key[i], revoked)
+		}
+	}
+}
+
+func TestKRL(t *testing.T) {
+	now := time.Now()
+	key := [][]byte{
+		[]byte{0x00, 0x01, 0x02, 0x3},
+		[]byte{0x04, 0x05, 0x06, 0x7},
+		[]byte{0x08, 0x09, 0x0a, 0xb}}
+	var revoked time.Time
+
+	krl := krl.New()
+
+	checkKeysNotRevoked(t, krl, 0, key, now)
+
+	krl.Revoke(key[0], now)
+	if revoked = krl.RevocationTime(key[0]); !revoked.Equal(now) {
+		t.Errorf("unrevoked key %v has revocation time %v, but expected %v", key[0], revoked, now)
+	}
+	checkKeysNotRevoked(t, krl, 1, key, now)
+
+	krl.Revoke(key[1], now)
+	if revoked = krl.RevocationTime(key[0]); !revoked.Equal(now) {
+		t.Errorf("unrevoked key %v has revocation time %v, but expected %v", key[0], revoked, now)
+	}
+	if revoked = krl.RevocationTime(key[1]); !revoked.Equal(now) {
+		t.Errorf("unrevoked key %v has revocation time %v, but expected %v", key[1], revoked, now)
+	}
+	checkKeysNotRevoked(t, krl, 2, key, now)
+}

diff --git a/services/syncbase/signing/signeddata.vdl b/services/syncbase/signing/signeddata.vdl
new file mode 100644
index 0000000..4b4ceb8
--- /dev/null
+++ b/services/syncbase/signing/signeddata.vdl

@@ -0,0 +1,72 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package signing
+
+import "v.io/v23/security"
+
+// A DataWithSignature represents a signed, and possibily validated, collection
+// of Item structs.
+//
+// If IsValidated==false and the AuthorSigned signature is valid, it means:
+//    The signer whose Blessings have hash BlessingsHash asserts Data.
+//
+// If IsValidated==true and both AuthorSigned and ValidatorSigned signatures are is valid,
+// it means both:
+// 1) The signer whose Blessings b have hash BlessingsHash asserts Data.
+// 2) If vd is the ValidatorData with hash ValidatorDataHash, the owner of
+//    vd.PublicKey asserts that it checked that at least the names vd.Names[] were
+//    valid in b.
+//
+// The sender obtains:
+// - BlessingsHash (and the wire form of the blessings) with ValidationCache.AddBlessings().  
+// - ValidatorDataHash (and the wire form of the ValidataData)  with ValidationCache.AddValidatorData().
+//
+// The receiver looks up:
+// - BlessingsHash with ValidationCache.LookupBlessingsData()
+// - ValidatorDataHash with ValidationCache.LookupValidatorData()
+//
+// If not yet there, the receiver inserts the valus into its ValidationCache with:
+// - ValidationCache.AddWireBlessings()
+// - ValidationCache.AddValidatorData()
+type DataWithSignature struct {
+	Data          []Item
+        // BlessingsHash is a key for the validation cache; the corresponding
+        // cached value is a security.Blessings.
+	BlessingsHash []byte
+	// AuthorSigned is the signature of Data and BlessingsHash using the
+	// private key associated with the blessings hashed in BlessingsHash.
+	AuthorSigned  security.Signature
+
+	IsValidated bool // Whether fields below are meaningful.
+
+        // ValidatorDataHash is a key for the validation cache returned by
+        // ValidatorData.Hash(); the corresponding cached value is the
+        // ValidatorData.
+	ValidatorDataHash []byte
+	ValidatorSigned   security.Signature
+}
+
+// An Item represents either a marshalled data item or its SHA-256 hash.
+// The Data field is a []byte, rather than an "any" to make signatures
+// determistic.  VOM encoding is not deterministic for two reasons:
+// - map elements may be marshalled in any order
+// - different versions of VOM may marshal in different ways.
+// Thus, the initial producer of a data item marshals the data once, and it is
+// this marshalled form that is transmitted from device to device.  If the
+// data were unmarshalled and then remarsahalled, the signatures might not
+// match.  The Hash field is used instead of the Data field when the recipient
+// of the DataWithSignature is not permitted to see certain Items' Data
+// fields.
+type Item union {
+	Data []byte   // Marshalled form of data.
+	Hash []byte   // Hash of what would have been in Data, as returned by SumByteVectorWithLength(Data).
+}
+
+// WireValidatorData is the wire form of ValidatorData.
+// It excludes the unmarshalled form of the public key.
+type WireValidatorData struct {
+	Names               []string  // Names of valid signing blessings in the Blessings referred to by BlessingsHash.
+	MarshalledPublicKey []byte    // PublicKey, marshalled with MarshalBinary().
+}

diff --git a/services/syncbase/signing/signeddata.vdl.go b/services/syncbase/signing/signeddata.vdl.go
new file mode 100644
index 0000000..f96fe50
--- /dev/null
+++ b/services/syncbase/signing/signeddata.vdl.go

@@ -0,0 +1,128 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: signeddata.vdl
+
+package signing
+
+import (
+	// VDL system imports
+	"v.io/v23/vdl"
+
+	// VDL user imports
+	"v.io/v23/security"
+)
+
+// A DataWithSignature represents a signed, and possibily validated, collection
+// of Item structs.
+//
+// If IsValidated==false and the AuthorSigned signature is valid, it means:
+//    The signer whose Blessings have hash BlessingsHash asserts Data.
+//
+// If IsValidated==true and both AuthorSigned and ValidatorSigned signatures are is valid,
+// it means both:
+// 1) The signer whose Blessings b have hash BlessingsHash asserts Data.
+// 2) If vd is the ValidatorData with hash ValidatorDataHash, the owner of
+//    vd.PublicKey asserts that it checked that at least the names vd.Names[] were
+//    valid in b.
+//
+// The sender obtains:
+// - BlessingsHash (and the wire form of the blessings) with ValidationCache.AddBlessings().
+// - ValidatorDataHash (and the wire form of the ValidataData)  with ValidationCache.AddValidatorData().
+//
+// The receiver looks up:
+// - BlessingsHash with ValidationCache.LookupBlessingsData()
+// - ValidatorDataHash with ValidationCache.LookupValidatorData()
+//
+// If not yet there, the receiver inserts the valus into its ValidationCache with:
+// - ValidationCache.AddWireBlessings()
+// - ValidationCache.AddValidatorData()
+type DataWithSignature struct {
+	Data []Item
+	// BlessingsHash is a key for the validation cache; the corresponding
+	// cached value is a security.Blessings.
+	BlessingsHash []byte
+	// AuthorSigned is the signature of Data and BlessingsHash using the
+	// private key associated with the blessings hashed in BlessingsHash.
+	AuthorSigned security.Signature
+	IsValidated  bool // Whether fields below are meaningful.
+	// ValidatorDataHash is a key for the validation cache returned by
+	// ValidatorData.Hash(); the corresponding cached value is the
+	// ValidatorData.
+	ValidatorDataHash []byte
+	ValidatorSigned   security.Signature
+}
+
+func (DataWithSignature) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/signing.DataWithSignature"`
+}) {
+}
+
+type (
+	// Item represents any single field of the Item union type.
+	//
+	// An Item represents either a marshalled data item or its SHA-256 hash.
+	// The Data field is a []byte, rather than an "any" to make signatures
+	// determistic.  VOM encoding is not deterministic for two reasons:
+	// - map elements may be marshalled in any order
+	// - different versions of VOM may marshal in different ways.
+	// Thus, the initial producer of a data item marshals the data once, and it is
+	// this marshalled form that is transmitted from device to device.  If the
+	// data were unmarshalled and then remarsahalled, the signatures might not
+	// match.  The Hash field is used instead of the Data field when the recipient
+	// of the DataWithSignature is not permitted to see certain Items' Data
+	// fields.
+	Item interface {
+		// Index returns the field index.
+		Index() int
+		// Interface returns the field value as an interface.
+		Interface() interface{}
+		// Name returns the field name.
+		Name() string
+		// __VDLReflect describes the Item union type.
+		__VDLReflect(__ItemReflect)
+	}
+	// ItemData represents field Data of the Item union type.
+	ItemData struct{ Value []byte } // Marshalled form of data.
+	// ItemHash represents field Hash of the Item union type.
+	ItemHash struct{ Value []byte } // Hash of what would have been in Data, as returned by SumByteVectorWithLength(Data).
+	// __ItemReflect describes the Item union type.
+	__ItemReflect struct {
+		Name  string `vdl:"v.io/syncbase/x/ref/services/syncbase/signing.Item"`
+		Type  Item
+		Union struct {
+			Data ItemData
+			Hash ItemHash
+		}
+	}
+)
+
+func (x ItemData) Index() int                 { return 0 }
+func (x ItemData) Interface() interface{}     { return x.Value }
+func (x ItemData) Name() string               { return "Data" }
+func (x ItemData) __VDLReflect(__ItemReflect) {}
+
+func (x ItemHash) Index() int                 { return 1 }
+func (x ItemHash) Interface() interface{}     { return x.Value }
+func (x ItemHash) Name() string               { return "Hash" }
+func (x ItemHash) __VDLReflect(__ItemReflect) {}
+
+// WireValidatorData is the wire form of ValidatorData.
+// It excludes the unmarshalled form of the public key.
+type WireValidatorData struct {
+	Names               []string // Names of valid signing blessings in the Blessings referred to by BlessingsHash.
+	MarshalledPublicKey []byte   // PublicKey, marshalled with MarshalBinary().
+}
+
+func (WireValidatorData) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/signing.WireValidatorData"`
+}) {
+}
+
+func init() {
+	vdl.Register((*DataWithSignature)(nil))
+	vdl.Register((*Item)(nil))
+	vdl.Register((*WireValidatorData)(nil))
+}

diff --git a/services/syncbase/signing/signing.go b/services/syncbase/signing/signing.go
new file mode 100644
index 0000000..0115f37
--- /dev/null
+++ b/services/syncbase/signing/signing.go

@@ -0,0 +1,358 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package signing signs syncbase updates using public key signatures, and
+// allows these signatures to be checked on other nodes.
+//
+// The functionality is geared specifically towards syncbase synchronization
+// because it is designed to allow a signature to remain valid during its
+// propagation across the syncgroup once it has been accepted by at least one
+// member of a syncgroup, even if the original key or its blessings are
+// invalidated in the meantime.
+//
+// There are three types of participant:
+// - an "author", which creates an update, and signs it with Sign().
+// - one or more "validators", each of which receives a change directly from
+//   the author, and applies Check() to validate it.
+// - zero or more "checkers', each of whom receives a change from a validator
+//   or another checker, and applied Check() to check it.
+//
+// A validator checks the signature and blessings provided by the author, and
+// then appends its own signature, vouching for the fact that the author's
+// signature was good at the time the validator saw it.
+//
+// A checker checks the signatures of both the author and validator but uses
+// weaker checks for signature validity than a validator.  In particular, it
+// uses a significant grace period for key expiry so that a change admitted to
+// the syncgroup by a validator has an opportunity to propagate to all the
+// nodes in the syncgroup if the keys or blessings are revoked after the change
+// is admitted, but before it is fully propagated.  The intent is that the
+// grace period be chosen to be greater than the diameter of the syncgroup
+// (measured in time).  One way to ensure that is to insist that members sync
+// with a central server at least every T time units, and make the grace period
+// be 2T.  The central server may sign the data anew to allow new members to pick
+// it up.
+//
+// The model is further complicated by performance concerns.  An update written
+// to syncbase might be quite small (perhaps tens of bytes) but:
+// a) a public key signature or verification can take on the order of a
+//    millisecond.  (Currently, ECDSA signing might a little under 1ms and
+//    verification just over 2ms on a workstation.  A checker performs two such
+//    verifications.)
+// b) unmarshalling even a simple Blessings object can take milliseconds. (!)
+// c) marshalling a public key can take 10us.
+// d) a Blessings object is of the order of a kilobyte of more, which may
+//    represent substantial space overhead if duplicated.
+//
+// Because of (a), we wish to batch syncbase updates, so that a single
+// signature check applies to several updates.  Thus the Data in a
+// DataWithSignature is a vector of Item, rather than a single Item.
+//
+// However, we will not always wish to put all updates in the same batch.  For
+// example, an author and a validator might share two different syncgroups with
+// different memberships.  In such a case, the author might keep the batches
+// for one syncgoup separate from batches for the other syncgroup, even though
+// the author blessings and validator identities are the same for all the
+// batches.  Thus, because of (b,c,d), it's worth decoupling the author's
+// Blessings data and the validator's key data separately from the signed
+// batches itself, so that the blessings and validator data can be processed
+// once, even though several batches of updates are being sent.  A
+// ValidationCache is used to hold this data separately, and allow it to be
+// sent just once, rather than once per signature.
+//
+// Lastly, imagine that the author sends a batch of 10 updates to a validator,
+// and the validator then syncs with a checker that is permitted to see only
+// half of the updates; perhaps ACLs prevent if from seeing the others.  This
+// requires that the signature on the batch remain valid even if some of the
+// updates in the batch are removed.  This is accomplished via the Item type,
+// which is a VDL union type that contains either the bytes of the marshalled
+// form of the update, or (if the update must not be sent) the SHA-256 hash of
+// the data (which can be computed with SumByteVectorWithLength()).
+package signing
+
+import "bytes"
+import "crypto/sha256"
+import "encoding/binary"
+import "hash"
+import "time"
+
+import "v.io/syncbase/x/ref/services/syncbase/signing/krl"
+import "v.io/v23/context"
+import "v.io/v23/security"
+import "v.io/v23/verror"
+
+const pkgPath = "v.io/syncbase/x/ref/services/syncbase/signing"
+
+// These are among the errors may be returned by Check(), and indicate that the
+// operation should be retried when new data has been added to the
+// ValidationCache.  The errors are public to make it easier for the client to
+// test for them.
+var (
+	ErrNeedAuthorBlessingsAndValidatorDataForHash = verror.Register(
+		pkgPath+".ErrNeedAuthorBlessingsAndValidatorDataForHash",
+		verror.RetryRefetch,
+		"{1:}{2:} The ValidationCache contains neither the author blessings nor the validator data{:_}")
+	ErrNeedAuthorBlessingsForHash = verror.Register(
+		pkgPath+".ErrNeedAuthorBlessingsForHash",
+		verror.RetryRefetch,
+		"{1:}{2:} The ValidationCache does not contain the author blessings{:_}")
+	ErrNeedValidatorDataForHash = verror.Register(
+		pkgPath+".ErrNeedValidatorDataForHash",
+		verror.RetryRefetch,
+		"{1:}{2:} The ValidationCache does not contain the validator data{:_}")
+)
+
+// These errors are less likely to be tested for, and so are not exported.
+var (
+	errAuthorKeyIsRevoked = verror.Register(
+		pkgPath+".errAuthorKeyIsRevoked",
+		verror.NoRetry,
+		"{1:}{2:} The author key has been revoked{:_}")
+	errBadAuthorSignature = verror.Register(
+		pkgPath+".errBadAuthorSignature",
+		verror.NoRetry,
+		"{1:}{2:} Author signature verification failed{:_}")
+	errBadValidatorSignature = verror.Register(
+		pkgPath+".errBadValidatorSignature",
+		verror.NoRetry,
+		"{1:}{2:} Validator signature verification failed{:_}")
+	errAuthorBlessingsHaveNoValidNames = verror.Register(
+		pkgPath+".errAuthorBlessingsHaveNoValidNames",
+		verror.NoRetry,
+		"{1:}{2:} Author Blessings have no valid names{:_}")
+	errMayNotValidateOwnSignature = verror.Register(
+		pkgPath+".errMayNotValidateOwnSignature",
+		verror.NoRetry,
+		"{1:}{2:} Author may not validate its own signature{:_}")
+	errSenderIsNotAuthor = verror.Register(
+		pkgPath+".errSenderIsNotAuthor",
+		verror.NoRetry,
+		"{1:}{2:} Author is not sender of RPC; will not validate{:_}")
+	errValidatesWrongNames = verror.Register(
+		pkgPath+".errValidatesWrongNames",
+		verror.NoRetry,
+		"{1:}{2:} The validated names are not a subset of the names sent by the checker{:_}")
+	errValidatorIsSigner = verror.Register(
+		pkgPath+".errValidatorIsSigner",
+		verror.NoRetry,
+		"{1:}{2:} The signature was validated by its author; treating as invalid{:_}")
+	errValidatorKeyIsRevoked = verror.Register(
+		pkgPath+".errValidatorKeyIsRevoked",
+		verror.NoRetry,
+		"{1:}{2:} The validator key is revoked{:_}")
+)
+
+// --------------------------------------------
+
+// SignData() uses authorPrincipal to sign data using blessings (which must be
+// associated with the authorPrincipal).  A pointer to a newly constructed
+// DataWithSignature with IsValidated==false is returned.  Ensures that the
+// blessings are stored in *cache.  Typically, "authorPrincipal" is obtained from
+// v23.GetPrincipal(ctx).
+//
+// If a recipient of the result *d complains that it does not understand the
+// hash d.BlessingHash, the signer should present it with
+// blessingsData.MarshalledBlessings, which will allow the recipient to
+// construct the Blessings.  The Blessings are transmitted out of line because
+// they are large, and may be reused for multiple signatures.
+func SignData(ctx *context.T, cache *ValidationCache, authorPrincipal security.Principal,
+	blessings security.Blessings, data []Item) (d *DataWithSignature, blessingsData *BlessingsData, err error) {
+
+	d = new(DataWithSignature)
+	d.Data = data
+	d.BlessingsHash, blessingsData, err = cache.AddBlessings(ctx, blessings)
+	if err == nil {
+		d.AuthorSigned, err = authorPrincipal.Sign(d.authorSignatureHash())
+	}
+	return d, blessingsData, err
+}
+
+// hashByteVectorWithLength() calls hasher.Write() on a representation of
+// len(b), followed by the contents of b.
+func hashByteVectorWithLength(hasher hash.Hash, b []byte) {
+	var length [8]byte
+	binary.LittleEndian.PutUint64(length[:], uint64(len(b)))
+	hasher.Write(length[:])
+	hasher.Write(b)
+}
+
+// SumByteVectorWithLength() returns a SHA-256 hash of
+// len(b), followed by the contents of b.
+func SumByteVectorWithLength(b []byte) []byte {
+	hasher := sha256.New()
+	var length [8]byte
+	binary.LittleEndian.PutUint64(length[:], uint64(len(b)))
+	hasher.Write(length[:])
+	hasher.Write(b)
+	return hasher.Sum(nil)[:]
+}
+
+// authorSignatureHash() returns the hash that the author should sign.
+func (d *DataWithSignature) authorSignatureHash() []byte {
+	hasher := sha256.New()
+	var length [8]byte
+	binary.LittleEndian.PutUint64(length[:], uint64(len(d.Data)))
+	hasher.Write(length[:])
+	for i := range d.Data {
+		if data, gotData := d.Data[i].(ItemData); gotData {
+			hasher.Write(SumByteVectorWithLength(data.Value))
+		} else if hash, gotHash := d.Data[i].(ItemHash); gotHash {
+			hasher.Write(hash.Value)
+		} else {
+			// d.Data is neither a Data nor a Hash.  This shouldn't
+			// happen unless the mashalled data is somehow
+			// corrupted.  The signature will not match unless the
+			// original author of the data was seeing the same.
+			hasher.Write([]byte("no data"))
+		}
+	}
+	hashByteVectorWithLength(hasher, d.BlessingsHash)
+	return hasher.Sum(nil)[:]
+}
+
+// validatorSignatureHash() returns the hash that the validator should sign,
+// given the hash that the author signed.
+func (d *DataWithSignature) validatorSignatureHash(authorSignatureHash []byte) []byte {
+	var buffer [32]byte
+	var buf []byte = buffer[:]
+	if len(d.AuthorSigned.Hash) > len(buf) {
+		buf = make([]byte, len(d.AuthorSigned.Hash))
+	}
+	hasher := sha256.New()
+	hashByteVectorWithLength(hasher, authorSignatureHash)
+	hashByteVectorWithLength(hasher, d.AuthorSigned.Purpose)
+	hashByteVectorWithLength(hasher, buf[:copy(buf, d.AuthorSigned.Hash)])
+	hashByteVectorWithLength(hasher, d.AuthorSigned.R)
+	hashByteVectorWithLength(hasher, d.AuthorSigned.S)
+	hashByteVectorWithLength(hasher, d.ValidatorDataHash)
+	return hasher.Sum(nil)[:]
+}
+
+// Check() verifies the signature(s) on *d:
+//
+// If d.IsValidated==false, checks that:
+//   1. the author's signature is available in *cache.
+//   2. the author's signature over its blessings and the data is
+//      cyprotgraphically valid.
+//   3. security.SigningBlessingNames() yields a non-empty list of names when
+//      applied to the author's blessings.
+//   4. the author's public key is not known to be revoked.
+//   5. the local's public key (call.LocalPrincipal().PublicKey()) is not known
+//      to be revoked.
+//   6. the author's public key is the public key of the RPC caller.
+//   7. the author's public key and the local public key differ.
+// If checks pass and there are no other errors:
+//   - records the list of names found in check (3) in the ValidationData
+//   - adds a validation signature using the local public key (which is now the
+//     validator)
+//   - sets d.IsValidated
+//   - returns the list of names found in check (3), and a nil error.
+// Otherwise returns a nil list of names and a non-nil error.
+//
+// If d.Validated==true, checks that:
+//   1. the author's signature and the validator data are available in *cache.
+//   2. the author's signature over its blessings and the data is
+//      cyprotgraphically valid.
+//   8. the list of names stored in the ValidatorData by the validator is
+//      non-empty.
+//   9. the author's public key and the validator's public key differ.
+//  10. the list of names stored in the ValidatorData by the validator is a
+//      subset of the list of names that the author's blessings could have
+//      represented.
+//  11. the author's public key is not known to be revoked more than
+//      gracePeriod ago.
+//  12. the validator's public key is not known to be revoked more than
+//      gracePeriod ago.
+//  13. the validator's signature is cryptographically valid.
+// If checks pass and there are no other errors:
+//   - returns the list of names in the validator's data, and a nil error.
+// Otherwise returns a nil list of names and a non-nil error.
+func (d *DataWithSignature) Check(ctx *context.T, cache *ValidationCache, call security.Call,
+	krl *krl.KRL, gracePeriod time.Duration) (names []string, err error) {
+
+	// Verify that we have the Blessings and ValidatorData.
+	var authorBlessingsData *BlessingsData = cache.LookupBlessingsData(ctx, d.BlessingsHash)
+	var validatorData *ValidatorData
+	if d.IsValidated {
+		validatorData = cache.LookupValidatorData(ctx, d.ValidatorDataHash)
+	}
+	if authorBlessingsData == nil || (validatorData == nil && d.IsValidated) { // Check (1).
+		if authorBlessingsData == nil && (validatorData == nil && d.IsValidated) {
+			err = verror.New(ErrNeedAuthorBlessingsAndValidatorDataForHash, ctx)
+		} else if authorBlessingsData == nil {
+			err = verror.New(ErrNeedAuthorBlessingsForHash, ctx)
+		} else {
+			err = verror.New(ErrNeedValidatorDataForHash, ctx)
+		}
+	}
+
+	// Check the author signature.
+	var authorSignatureHash []byte
+	if err == nil {
+		authorSignatureHash = d.authorSignatureHash()
+		if !d.AuthorSigned.Verify(authorBlessingsData.UnmarshalledBlessings.PublicKey(), authorSignatureHash) { // Check (2).
+			err = verror.New(errBadAuthorSignature, ctx)
+		}
+	}
+
+	// Check or create the validator signature.
+	now := time.Now()
+	if err != nil {
+		// err already set
+	} else if !d.IsValidated {
+		// Not yet validated, so this run will attempt to validate.
+		var validatedNames []string
+		var localKeyMarshalled []byte
+		var senderKeyMarshalled []byte
+		validatedNames, _ = security.SigningBlessingNames(ctx, call.LocalPrincipal(),
+			authorBlessingsData.UnmarshalledBlessings)
+		if len(validatedNames) == 0 { // Check (3).
+			err = verror.New(errAuthorBlessingsHaveNoValidNames, ctx)
+		} else if localKeyMarshalled, err = call.LocalPrincipal().PublicKey().MarshalBinary(); err != nil {
+			// err already set
+		} else if krl.RevocationTime(authorBlessingsData.MarshalledPublicKey).Before(now) { // Check (4).
+			err = verror.New(errAuthorKeyIsRevoked, ctx)
+		} else if krl.RevocationTime(localKeyMarshalled).Before(now) { // Check (5).
+			err = verror.New(errValidatorKeyIsRevoked, ctx)
+		} else if senderKeyMarshalled, err = call.RemoteBlessings().PublicKey().MarshalBinary(); err != nil {
+			// err already set
+		} else if !bytes.Equal(senderKeyMarshalled, authorBlessingsData.MarshalledPublicKey) { // Check (6).
+			err = verror.New(errSenderIsNotAuthor, ctx)
+		} else if bytes.Equal(localKeyMarshalled, authorBlessingsData.MarshalledPublicKey) { // Check (7).
+			err = verror.New(errMayNotValidateOwnSignature, ctx)
+		} else {
+			// Local principal is different from author, so can validate.
+			validatorData = &ValidatorData{
+				Names:               validatedNames,
+				PublicKey:           call.LocalPrincipal().PublicKey(),
+				MarshalledPublicKey: localKeyMarshalled,
+			}
+			d.ValidatorDataHash = cache.AddValidatorData(ctx, validatorData)
+			d.ValidatorSigned, err = call.LocalPrincipal().Sign(d.validatorSignatureHash(authorSignatureHash))
+			d.IsValidated = (err == nil)
+		}
+	} else { // Data already validated; check the validator siganture.
+		if len(validatorData.Names) == 0 { // Check (8).
+			err = verror.New(errAuthorBlessingsHaveNoValidNames, ctx)
+		} else if bytes.Equal(validatorData.MarshalledPublicKey, authorBlessingsData.MarshalledPublicKey) { // Check (9).
+			err = verror.New(errValidatorIsSigner, ctx)
+		} else if !authorBlessingsData.UnmarshalledBlessings.CouldHaveNames(validatorData.Names) { // Check (10).
+			err = verror.New(errValidatesWrongNames, ctx)
+		} else if krl.RevocationTime(authorBlessingsData.MarshalledPublicKey).Before(now.Add(-gracePeriod)) { // Check (11).
+			err = verror.New(errAuthorKeyIsRevoked, ctx)
+		} else if krl.RevocationTime(validatorData.MarshalledPublicKey).Before(now.Add(-gracePeriod)) { // Check (12).
+			err = verror.New(errValidatorKeyIsRevoked, ctx)
+		} else if !d.ValidatorSigned.Verify(validatorData.PublicKey, d.validatorSignatureHash(authorSignatureHash)) { // Check (13).
+			err = verror.New(errBadValidatorSignature, ctx)
+		} // else success.
+	}
+
+	// If there were no errors, return the list of names from the validator.
+	if err == nil {
+		names = make([]string, len(validatorData.Names))
+		copy(names, validatorData.Names)
+	}
+
+	return names, err
+}

diff --git a/services/syncbase/signing/signing_test.go b/services/syncbase/signing/signing_test.go
new file mode 100644
index 0000000..96881f3
--- /dev/null
+++ b/services/syncbase/signing/signing_test.go

@@ -0,0 +1,421 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package signing_test implements a test for the package
+// v.io/syncbase/x/ref/services/syncbase/signing
+package signing_test
+
+import "crypto/sha256"
+import "testing"
+import "time"
+
+import "v.io/syncbase/x/ref/services/syncbase/signing"
+import "v.io/syncbase/x/ref/services/syncbase/signing/krl"
+import "v.io/v23/naming"
+import "v.io/v23/security"
+import "v.io/v23/vdl"
+import "v.io/v23/vom"
+import "v.io/v23/verror"
+import "v.io/x/ref/test"
+import lib_security "v.io/x/ref/lib/security"
+
+import _ "v.io/x/ref/runtime/factories/generic"
+
+// --------------------------------------
+// The following implements a fake security.Call.
+type fakeCall struct {
+	localPrincipal  security.Principal
+	localBlessings  security.Blessings
+	remoteBlessings security.Blessings
+}
+
+func (fc *fakeCall) Timestamp() time.Time                            { return time.Now() }
+func (fc *fakeCall) Method() string                                  { return "the_method_name" }
+func (fc *fakeCall) MethodTags() []*vdl.Value                        { return nil }
+func (fc *fakeCall) Suffix() string                                  { return "the_suffix" }
+func (fc *fakeCall) LocalDischarges() map[string]security.Discharge  { return nil }
+func (fc *fakeCall) RemoteDischarges() map[string]security.Discharge { return nil }
+func (fc *fakeCall) LocalPrincipal() security.Principal              { return fc.localPrincipal }
+func (fc *fakeCall) LocalBlessings() security.Blessings              { return fc.localBlessings }
+func (fc *fakeCall) RemoteBlessings() security.Blessings             { return fc.remoteBlessings }
+func (fc *fakeCall) LocalEndpoint() naming.Endpoint                  { return nil }
+func (fc *fakeCall) RemoteEndpoint() naming.Endpoint                 { return nil }
+
+// --------------------------------------
+
+// A principalDesc holds the local state of a single principal in the tests below.
+type principalDesc struct {
+	name                string
+	principal           security.Principal
+	blessings           security.Blessings
+	krl                 *krl.KRL
+	authorBlessingsData *signing.BlessingsData
+	names               []string
+	marshalledBlessings []byte
+	blessingsHash       []byte
+	validatorData       *signing.ValidatorData
+	validatorHash       []byte
+	cache               *signing.ValidationCache
+	data                *signing.DataWithSignature
+}
+
+// makePrincipal() returns a pointer to a newly-initialized principalDesc,
+// with a unique key, and a single blessing named with its own name.
+func makePrincipal(t testing.TB, name string) (desc *principalDesc) {
+	var err error
+	desc = new(principalDesc)
+	desc.name = name
+	desc.principal, err = lib_security.NewPrincipal()
+	if err != nil {
+		t.Fatalf("security.CreatePrincipal %q failed: %v", desc.name, err)
+	}
+	desc.blessings, err = desc.principal.BlessSelf(desc.name)
+	if err != nil {
+		t.Fatalf("principal.BlessSelf %q failed: %v", desc.name, err)
+	}
+	desc.krl = krl.New()
+	desc.cache = signing.NewValidationCache(5 * time.Second)
+	return desc
+}
+
+// makePrincipals() creares one principal per name, and adds
+// the blessings of each to the roots of all.
+func makePrincipals(t testing.TB, names ...string) (principals []*principalDesc) {
+	for i := range names {
+		principals = append(principals, makePrincipal(t, names[i]))
+	}
+	for i := range principals {
+		for j := range principals {
+			principals[j].principal.AddToRoots(principals[i].blessings)
+		}
+	}
+	return principals
+}
+
+// BenchmarkHashData() measures the time taken to do a cryptogrqaphic hash of
+// 1kBytes.
+func BenchmarkHashData(b *testing.B) {
+	var block [1024]byte
+	hasher := sha256.New()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		hasher.Write(block[:])
+	}
+}
+
+// BenchmarkSignData() measures the time taken to sign something with
+// signing.SignData().
+func BenchmarkSignData(b *testing.B) {
+	ctx, shutdown := test.V23Init()
+	defer shutdown()
+	var err error
+	author := makePrincipal(b, "author")
+	dataToSign := []signing.Item{signing.ItemData{Value: []byte("hello")}}
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		author.data, author.authorBlessingsData, err =
+			signing.SignData(ctx, author.cache, author.principal, author.blessings, dataToSign)
+	}
+	if err != nil {
+		panic(err)
+	}
+}
+
+// BenchmarkSign1000Data() measures the time taken to sign 1000 small data
+// items with signing.SignData().
+func BenchmarkSign1000Data(b *testing.B) {
+	ctx, shutdown := test.V23Init()
+	defer shutdown()
+	var err error
+	author := makePrincipal(b, "author")
+	var dataToSign []signing.Item
+	for i := 0; i != 1000; i++ {
+		dataToSign = append(dataToSign, signing.ItemData{Value: []byte("hello")})
+	}
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		author.data, author.authorBlessingsData, err =
+			signing.SignData(ctx, author.cache, author.principal, author.blessings, dataToSign)
+	}
+	if err != nil {
+		panic(err)
+	}
+}
+
+// BenchmarkSignData() measures the time taken to check a validated signature
+// with DataWithSignature.Check().
+func BenchmarkCheckData(b *testing.B) {
+	ctx, shutdown := test.V23Init()
+	defer shutdown()
+	var err error
+
+	principals := makePrincipals(b, "author", "validator", "checker")
+	author := principals[0]
+	validator := principals[1]
+	checker := principals[2]
+
+	dataToSign := []signing.Item{signing.ItemData{Value: []byte("hello")}}
+	author.data, author.authorBlessingsData, err =
+		signing.SignData(ctx, author.cache, author.principal, author.blessings, dataToSign)
+	if err != nil {
+		panic(err)
+	}
+	callToValidator := fakeCall{
+		localPrincipal:  validator.principal,
+		localBlessings:  validator.blessings,
+		remoteBlessings: author.blessings,
+	}
+	validator.names, err = author.data.Check(ctx, author.cache, &callToValidator, validator.krl, 24*30*time.Hour)
+	if err != nil {
+		panic(err)
+	}
+	callToChecker := fakeCall{
+		localPrincipal:  checker.principal,
+		localBlessings:  checker.blessings,
+		remoteBlessings: validator.blessings,
+	}
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		checker.names, err = author.data.Check(ctx, author.cache, &callToChecker, checker.krl, 24*30*time.Hour)
+	}
+}
+
+// BenchmarkSign1000Data() measures the time taken to check a validated
+// signature over 1000 small data items with DataWithSignature.Check().
+func BenchmarkCheck1000Data(b *testing.B) {
+	ctx, shutdown := test.V23Init()
+	defer shutdown()
+	var err error
+
+	principals := makePrincipals(b, "author", "validator", "checker")
+	author := principals[0]
+	validator := principals[1]
+	checker := principals[2]
+
+	var dataToSign []signing.Item
+	for i := 0; i != 1000; i++ {
+		dataToSign = append(dataToSign, signing.ItemData{Value: []byte("hello")})
+	}
+	author.data, author.authorBlessingsData, err =
+		signing.SignData(ctx, author.cache, author.principal, author.blessings, dataToSign)
+	if err != nil {
+		panic(err)
+	}
+	callToValidator := fakeCall{
+		localPrincipal:  validator.principal,
+		localBlessings:  validator.blessings,
+		remoteBlessings: author.blessings,
+	}
+	validator.names, err = author.data.Check(ctx, author.cache, &callToValidator, validator.krl, 24*30*time.Hour)
+	if err != nil {
+		panic(err)
+	}
+	callToChecker := fakeCall{
+		localPrincipal:  checker.principal,
+		localBlessings:  checker.blessings,
+		remoteBlessings: validator.blessings,
+	}
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		checker.names, err = author.data.Check(ctx, author.cache, &callToChecker, checker.krl, 24*30*time.Hour)
+	}
+}
+
+// BenchmarkMarshallBlessings() measures the time taken to marshal a Blessings.
+func BenchmarkMarshallBlessings(b *testing.B) {
+	var err error
+	author := makePrincipal(b, "author")
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		author.marshalledBlessings, err = vom.Encode(author.blessings)
+	}
+	if err != nil {
+		b.Fatalf("vom.Encode failed: %v", err)
+	}
+}
+
+// BenchmarkUnmarshallBlessings() measures the time taken to unmashal a Blessings.
+func BenchmarkUnmarshallBlessings(b *testing.B) {
+	var err error
+	author := makePrincipal(b, "author")
+	author.marshalledBlessings, err = vom.Encode(author.blessings)
+	if err != nil {
+		b.Fatalf("vom.Encode failed: %v", err)
+	}
+	var blessings security.Blessings
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		err = vom.Decode(author.marshalledBlessings, &blessings)
+	}
+	if err != nil {
+		b.Fatalf("vom.Encode failed: %v", err)
+	}
+}
+
+// BenchmarkMarshallPublicKey() measures the time taken to marshal a PublicKey.
+func BenchmarkMarshallPublicKey(b *testing.B) {
+	var err error
+	author := makePrincipal(b, "author")
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, err = author.principal.PublicKey().MarshalBinary()
+	}
+	if err != nil {
+		b.Fatalf("MarshalBinary() failed: %v", err)
+	}
+}
+
+// BenchmarkUnmarshallPublicKey() measures the time taken to unmarshal a PublicKey.
+func BenchmarkUnmarshallPublicKey(b *testing.B) {
+	var err error
+	author := makePrincipal(b, "author")
+	var marshalledKey []byte
+	marshalledKey, err = author.principal.PublicKey().MarshalBinary()
+	if err != nil {
+		b.Fatalf("MarshalBinary() failed: %v", err)
+	}
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, err = security.UnmarshalPublicKey(marshalledKey)
+	}
+	if err != nil {
+		b.Fatalf("MarshalBinary() failed: %v", err)
+	}
+}
+
+// TestSignData() tests that a complete flow of signing, validating, and
+// checking works on a DataWithSignature.
+func TestSignData(t *testing.T) {
+	ctx, shutdown := test.V23Init()
+	defer shutdown()
+
+	var err error
+
+	principals := makePrincipals(t, "author", "validator", "checker")
+	author := principals[0]
+	validator := principals[1]
+	checker := principals[2]
+
+	// Add each princpipal's blessings to each principal's roots.
+	pdList := []*principalDesc{author, validator, checker}
+	for i := 0; i != len(pdList); i++ {
+		for j := 0; j != len(pdList); j++ {
+			pdList[j].principal.AddToRoots(pdList[i].blessings)
+		}
+	}
+
+	// --------------------------------------
+	// Author
+	// Sign some data.
+	dataToSign := []signing.Item{
+		signing.ItemData{Value: []byte("hello")},
+		signing.ItemData{Value: []byte("world")},
+		signing.ItemData{Value: []byte("!")},
+	}
+	author.data, author.authorBlessingsData, err =
+		signing.SignData(ctx, author.cache, author.principal, author.blessings, dataToSign)
+	if err != nil {
+		t.Fatalf("signing.SignData failed: %v", err)
+	}
+	if author.data.IsValidated {
+		t.Fatalf("signing.SignData generated data with IsValidated set")
+	}
+
+	// --------------------------------------
+	// Validator
+	callToValidator := fakeCall{
+		localPrincipal:  validator.principal,
+		localBlessings:  validator.blessings,
+		remoteBlessings: author.blessings,
+	}
+	// The validator receives author.data from the author.
+	validator.data = new(signing.DataWithSignature)
+	*validator.data = *author.data
+	// Initially the validator doesn't have the author BlessingsData.
+	validator.authorBlessingsData = validator.cache.LookupBlessingsData(ctx, validator.data.BlessingsHash)
+	if validator.authorBlessingsData != nil {
+		t.Errorf("found non-nil BlessingsData for validator.data.BlessingsHash in validator's ValidationCache")
+	}
+	validator.names, err = validator.data.Check(ctx, validator.cache, &callToValidator, validator.krl, 24*30*time.Hour)
+	if verror.ErrorID(err) != signing.ErrNeedAuthorBlessingsForHash.ID {
+		t.Fatalf("validator.data.Check got err %v, want %s", err, signing.ErrNeedAuthorBlessingsForHash.ID)
+	}
+
+	// The validator receives the author's marshalled blessings from the author.
+	validator.marshalledBlessings = author.authorBlessingsData.MarshalledBlessings
+	validator.blessingsHash, validator.authorBlessingsData, err = validator.cache.AddWireBlessings(ctx, validator.marshalledBlessings)
+	if err != nil {
+		t.Fatalf("validator can't add author's marshalled belssings to its ValidationCache: %v", err)
+	}
+
+	validator.names, err = validator.data.Check(ctx, validator.cache, &callToValidator, validator.krl, 24*30*time.Hour)
+	if err != nil {
+		t.Fatalf("validator error calling Check() on data: %v", err)
+	}
+	if !validator.data.IsValidated {
+		t.Fatalf("signing.Check didn't set IsValidated")
+	}
+	// Validator's cache should now have the author's BlessingData, and the validator's ValidatorData.
+	validator.authorBlessingsData = validator.cache.LookupBlessingsData(ctx, validator.data.BlessingsHash)
+	if validator.authorBlessingsData == nil {
+		t.Errorf("didn't finf BlessingsData for validator.data.BlessingsHash in validator's ValidationCache")
+	}
+	validator.validatorData = validator.cache.LookupValidatorData(ctx, validator.data.ValidatorDataHash)
+
+	// --------------------------------------
+	// Checker
+	callToChecker := fakeCall{
+		localPrincipal:  checker.principal,
+		localBlessings:  checker.blessings,
+		remoteBlessings: validator.blessings,
+	}
+	// The checker recieves validator.data from the validator, except that
+	// data item 1 is replaced by its hash, because (for example) the
+	// check is not allowed to see it.
+	checker.data = new(signing.DataWithSignature)
+	*checker.data = *validator.data
+	checker.data.Data[1] = signing.ItemHash{Value: signing.SumByteVectorWithLength(checker.data.Data[1].(signing.ItemData).Value)}
+
+	// Initially the checker doesn't have the author BlessingsData, or the validator ValidatorData.
+	checker.authorBlessingsData = checker.cache.LookupBlessingsData(ctx, checker.data.BlessingsHash)
+	if checker.authorBlessingsData != nil {
+		t.Errorf("found non-nil blessings data for checker.data.BlessingsHash hash in checker's ValidationCache")
+	}
+	checker.names, err = checker.data.Check(ctx, checker.cache, &callToChecker, checker.krl, 24*30*time.Hour)
+	if verror.ErrorID(err) != signing.ErrNeedAuthorBlessingsAndValidatorDataForHash.ID {
+		t.Fatalf("checker.data.Check got err %v, want %s", err, signing.ErrNeedAuthorBlessingsAndValidatorDataForHash.ID)
+	}
+
+	// The checker receives the author's marshalled blessings from the validator.
+	checker.marshalledBlessings = validator.marshalledBlessings
+	checker.blessingsHash, checker.authorBlessingsData, err = checker.cache.AddWireBlessings(ctx, checker.marshalledBlessings)
+	if err != nil {
+		t.Fatalf("checker can't add author's marshalled belssings to its ValidationCache: %v", err)
+	}
+	checker.names, err = checker.data.Check(ctx, checker.cache, &callToChecker, checker.krl, 24*30*time.Hour)
+	if verror.ErrorID(err) != signing.ErrNeedValidatorDataForHash.ID {
+		t.Fatalf("checker.data.Check got err %v, want %s", err, signing.ErrNeedValidatorDataForHash.ID)
+	}
+
+	// The checker receives the validator's data from the validator, passing through the wire format.
+	wvd := signing.ToWireValidatorData(validator.validatorData)
+	var vd signing.ValidatorData
+	vd, err = signing.FromWireValidatorData(&wvd)
+	if err != nil {
+		t.Fatalf("signing.FromWireValidatorData got error:  %v", err)
+	}
+	checker.validatorData = &vd
+
+	// The checker adds the ValidatorData to its cache.
+	checker.validatorHash = checker.cache.AddValidatorData(ctx, checker.validatorData)
+
+	// And now the Check() operation should work.
+	checker.names, err = checker.data.Check(ctx, checker.cache, &callToChecker, checker.krl, 24*30*time.Hour)
+	if err != nil {
+		t.Fatalf("checker.data.Check got unexpected err %v", err)
+	}
+}

diff --git a/services/syncbase/signing/validationcache.go b/services/syncbase/signing/validationcache.go
new file mode 100644
index 0000000..1c95bd0
--- /dev/null
+++ b/services/syncbase/signing/validationcache.go

@@ -0,0 +1,190 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This module implements a cache of data associated with the
+// signatures, keyed by hash values of the data.  The intent is that
+// communicating devices will refer to the data using hashes, and transmit the
+// data itself only if the device on the other side does not have the data in
+// its cache.
+
+package signing
+
+import "crypto/sha256"
+import "encoding/binary"
+import "time"
+
+import "v.io/syncbase/x/ref/services/syncbase/signing/hashcache"
+import "v.io/v23/context"
+import "v.io/v23/security"
+import "v.io/v23/vom"
+
+// --------------------------------------------
+
+// A BlessingsData contains information about a security.Blessings object.  The
+// object itself is referred to by UnmarshalledBlessings.  The implementation
+// constructs all instances; the client should not modify fields.
+type BlessingsData struct {
+	UnmarshalledBlessings security.Blessings // The Blessings.
+	MarshalledBlessings   []byte             // VOM encoded Blessings.
+	MarshalledPublicKey   []byte             // Value from blessings.PublicKey().MarshalBinary().
+}
+
+// A ValidatorData is the extra data that a validator signs when validating and
+// signing a DataWithSignature.  Clients may construct instances to pass to
+// AddValidatorData(), but should not modify the fields of a constructed
+// ValidatorData.
+type ValidatorData struct {
+	Names               []string           // Names of valid signing blessings in the Blessings referred to by BlessingsHash.
+	PublicKey           security.PublicKey // The key used to create ValidatorSigned.
+	MarshalledPublicKey []byte             // PublicKey, marshalled with MarshalBinary().
+}
+
+// hash() returns the hash of *vd.  This hash should be used in the
+// ValidatorDataHash field of DataWithSignature, and as the cache key of *vd
+// in a ValidationCache.
+func (vd *ValidatorData) hash() []byte {
+	hasher := sha256.New()
+	var buffer [256]byte
+	var buf []byte = buffer[:]
+	binary.LittleEndian.PutUint64(buf[:], uint64(len(vd.Names)))
+	hasher.Write(buf[:8])
+	for i := range vd.Names {
+		if len(vd.Names[i]) > len(buf) {
+			buf = make([]byte, len(vd.Names[i])+256)
+		}
+		hashByteVectorWithLength(hasher, []byte(vd.Names[i]))
+	}
+	hashByteVectorWithLength(hasher, vd.MarshalledPublicKey)
+	return hasher.Sum(nil)[:]
+}
+
+// A ValidationCache records recently-seen instances of BlessingsData and
+// ValidatorData values, keys by hashes of the blessings and validator keys
+// respectively.  Values may expire from the cache if unused for a duration
+// specified with NewValidationCache().
+type ValidationCache struct {
+	blessingsCache *hashcache.Cache
+	validatorCache *hashcache.Cache
+}
+
+// NewValidationCache() returns a pointer to a new, empty ValidationCache with
+// the specified expiry duration..
+func NewValidationCache(expiry time.Duration) *ValidationCache {
+	return &ValidationCache{
+		blessingsCache: hashcache.New(expiry),
+		validatorCache: hashcache.New(expiry)}
+}
+
+// LookupBlessingsData() returns a pointer to the BlessingsData associated with
+// blessingsHash in *vc.  blessingsHash should have been returned by a previous
+// call to AddBlessings() or AddWireBlessings() (possibly on another machine).
+// nil is returned if the data is not present.  The client should not modify
+// *result, since it is shared with *vc.
+func (vc *ValidationCache) LookupBlessingsData(ctx *context.T, blessingsHash []byte) (result *BlessingsData) {
+	value, found := vc.blessingsCache.Lookup(blessingsHash)
+	if found {
+		result = value.(*BlessingsData)
+	}
+	return result
+}
+
+// addBlessings() adds a BlessingsData for blessings to *vc, and returns a hash
+// value, which if passed to LookupBlessingsData() will yield a pointer to the
+// BlessingsData, or a non-nil error.  The fields of BlessingsData other than
+// MarshalledBlessings and UnmarshalledBlessings are constructed by this
+// routine.  Requires that blessings and marshalledBlessings represent the same
+// data, or that marshalledBlessings be nil.
+func (vc *ValidationCache) addBlessings(ctx *context.T, blessings security.Blessings,
+	marshalledBlessings []byte) (blessingsHash []byte, data *BlessingsData, err error) {
+
+	blessingsHash = blessings.UniqueID()
+	if value, found := vc.blessingsCache.Lookup(blessingsHash); found {
+		data = value.(*BlessingsData)
+	} else { // not found
+		var marshalledKey []byte
+		if marshalledBlessings == nil {
+			marshalledBlessings, err = vom.Encode(blessings)
+		}
+		if err == nil {
+			marshalledKey, err = blessings.PublicKey().MarshalBinary()
+		}
+		if err == nil {
+			data = &BlessingsData{
+				UnmarshalledBlessings: blessings,
+				MarshalledBlessings:   marshalledBlessings,
+				MarshalledPublicKey:   marshalledKey}
+			vc.blessingsCache.Add(blessingsHash, data)
+		}
+	}
+	return blessingsHash, data, err
+}
+
+// AddBlessings() adds a BlessingsData for blessings to *cv, and
+// returns a hash value, which if passed to LookupBlessingsData() will yield a
+// pointer to the BlessingsData, or a non-nil error.  The fields of
+// BlessingsData other than UnmarshalledBlessings are constructed by this
+// routine.
+func (vc *ValidationCache) AddBlessings(ctx *context.T, blessings security.Blessings) (blessingsHash []byte, data *BlessingsData, err error) {
+	return vc.addBlessings(ctx, blessings, nil)
+}
+
+// AddWireBlessings() adds a BlessingsData for blessings to *cv, and
+// returns a hash value, which if passed to LookupBlessingsData() will yield a
+// pointer to the BlessingsData, or a non-nil error.  The fields of
+// BlessingsData other than MarshalledBlessings are constructed by this
+// routine.
+func (vc *ValidationCache) AddWireBlessings(ctx *context.T,
+	marshalledBlessings []byte) (blessingsHash []byte, data *BlessingsData, err error) {
+
+	var blessings security.Blessings
+	err = vom.Decode(marshalledBlessings, &blessings)
+	if err == nil {
+		blessingsHash, data, err = vc.addBlessings(ctx, blessings, marshalledBlessings)
+	}
+	return blessingsHash, data, err
+}
+
+// LookupValidatorData() returns a pointer to the ValidatorData associated with
+// hash validatorHash in *vc validatorHash should have been returned by a
+// previous call to AddValidatorData() (possibly on another machine).  nil is
+// returned if the data is not present.  The client should not modifiy *result,
+// since it it shared with *vc.
+func (vc *ValidationCache) LookupValidatorData(ctx *context.T, validatorHash []byte) (result *ValidatorData) {
+	value, found := vc.validatorCache.Lookup(validatorHash)
+	if found {
+		result = value.(*ValidatorData)
+	}
+	return result
+}
+
+// AddValidatorData() adds a ValidatorData *vd to cache *vc, and returns a hash
+// value, which if passed to LookupValidatorData() will yield a pointer to the
+// ValidatorData.  The client should not modify *vd after the call, since it is
+// shared with *vc.
+func (vc *ValidationCache) AddValidatorData(ctx *context.T, vd *ValidatorData) (validatorDataHash []byte) {
+	validatorDataHash = vd.hash()
+	vc.validatorCache.Add(validatorDataHash, vd)
+	return validatorDataHash
+}
+
+// ToWireValidatorData() puts the wire form of ValidatorData *vd in *wvd.
+func ToWireValidatorData(vd *ValidatorData) (wvd WireValidatorData) {
+	wvd.Names = make([]string, len(vd.Names))
+	copy(wvd.Names, vd.Names)
+	wvd.MarshalledPublicKey = make([]byte, len(vd.MarshalledPublicKey))
+	copy(wvd.MarshalledPublicKey, vd.MarshalledPublicKey)
+	return wvd
+}
+
+// FromWireValidatorData() puts the in-memory form of WireValidatorData *wvd in *vd.
+func FromWireValidatorData(wvd *WireValidatorData) (vd ValidatorData, err error) {
+	vd.PublicKey, err = security.UnmarshalPublicKey(wvd.MarshalledPublicKey)
+	if err == nil {
+		vd.Names = make([]string, len(wvd.Names))
+		copy(vd.Names, wvd.Names)
+		vd.MarshalledPublicKey = make([]byte, len(wvd.MarshalledPublicKey))
+		copy(vd.MarshalledPublicKey, wvd.MarshalledPublicKey)
+	}
+	return vd, err
+}

diff --git a/services/syncbase/signing/validationcache_test.go b/services/syncbase/signing/validationcache_test.go
new file mode 100644
index 0000000..a45835b
--- /dev/null
+++ b/services/syncbase/signing/validationcache_test.go

@@ -0,0 +1,189 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file tests the validationcache.go module.
+
+package signing_test
+
+import "bytes"
+import "testing"
+import "time"
+
+import "v.io/syncbase/x/ref/services/syncbase/signing"
+import "v.io/v23/security"
+import "v.io/x/ref/test"
+import _ "v.io/x/ref/runtime/factories/generic"
+import lib_security "v.io/x/ref/lib/security"
+
+// A principalVDesc holds the local state of a single principal in the tests below.
+type principalVDesc struct {
+	name          string
+	principal     security.Principal
+	blessings     security.Blessings
+	blessingsHash []byte
+	blessingsData *signing.BlessingsData
+	validatorHash []byte
+	validatorData *signing.ValidatorData
+	cache         *signing.ValidationCache
+}
+
+// makePrincipalVDesc() returns a pointer to a newly-initialized principalVDesc,
+// with a unique key, and a single blessing named with its own name.
+func makePrincipalVDesc(t *testing.T, name string) (desc *principalVDesc) {
+	var err error
+	desc = new(principalVDesc)
+	desc.name = name
+	desc.principal, err = lib_security.NewPrincipal()
+	if err != nil {
+		t.Fatalf("lib_security.NewPrincipal %q failed: %v", desc.name, err)
+	}
+	desc.blessings, err = desc.principal.BlessSelf(desc.name)
+	if err != nil {
+		t.Fatalf("principal.BlessSelf %q failed: %v", desc.name, err)
+	}
+	desc.cache = signing.NewValidationCache(5 * time.Second)
+	return desc
+}
+
+func TestValidationCache(t *testing.T) {
+	ctx, shutdown := test.V23Init()
+	defer shutdown()
+
+	var err error
+
+	// Make a principalVDesc for each of the author, validator, and checker.
+	// (The author creates a signed change; the validator is a device
+	// author syncs with; the checker is a device a validator syncs with.)
+	author := makePrincipalVDesc(t, "author")
+	validator := makePrincipalVDesc(t, "validator")
+	checker := makePrincipalVDesc(t, "checker")
+
+	// Add each princpipal's blessings to each principal's roots.
+	pdList := []*principalVDesc{author, validator, checker}
+	for i := 0; i != len(pdList); i++ {
+		for j := 0; j != len(pdList); j++ {
+			pdList[j].principal.AddToRoots(pdList[i].blessings)
+		}
+	}
+
+	// --------------------------------------
+	// Author
+	arbitraryBlessingsData := author.cache.LookupBlessingsData(ctx, []byte{0x00})
+	if arbitraryBlessingsData != nil {
+		t.Errorf("found non-nil blessings data for nonsense hash in author's ValidationCache")
+	}
+	author.blessingsHash, author.blessingsData, err = author.cache.AddBlessings(ctx, author.blessings)
+	if err != nil {
+		t.Fatalf("error from author.cache.AddBlessings(): %v", err)
+	}
+	// Check that the author's data is as we expect.
+	if author.cache.LookupBlessingsData(ctx, author.blessingsHash) != author.blessingsData {
+		t.Fatalf("found wrong blessings data for hash in author's ValidationCache: %v vs %v",
+			author.cache.LookupBlessingsData(ctx, author.blessingsHash), author.blessingsData)
+	}
+
+	// --------------------------------------
+	// Validator
+	// The validator receives author.blessingsHash from the author.
+	// Initially the validator doesn't have the author BlessingsData.
+	authorBlessingsData := validator.cache.LookupBlessingsData(ctx, author.blessingsHash)
+	if authorBlessingsData != nil {
+		t.Errorf("found non-nil blessings data for author.blessingsHash hash in validator's ValidationCache")
+	}
+	// The validator receives the author's marshalled blessings from the author.
+	validator.blessingsHash, validator.blessingsData, err =
+		validator.cache.AddWireBlessings(ctx, author.blessingsData.MarshalledBlessings)
+	if err != nil {
+		t.Fatalf("validator can't add author's marshalled blessings to its ValidationCache: %v", err)
+	}
+	if !bytes.Equal(author.blessingsHash, validator.blessingsHash) {
+		t.Errorf("validator's copy of the blessingsHash different from author's")
+	}
+	// Check that we could have got the blessingsData with a lookup if this were the second time.
+	if validator.cache.LookupBlessingsData(ctx, validator.blessingsHash) != validator.blessingsData {
+		t.Fatalf("found wrong blessings data for hash in validator's ValidationCache")
+	}
+	var marshalledPublicKey []byte
+	marshalledPublicKey, err = validator.principal.PublicKey().MarshalBinary()
+	if err != nil {
+		t.Fatalf("validator.principal.PublicKey().MarshalBinary() got error: %v", err)
+	}
+
+	var validatedNames []string
+	validatedNames, _ = security.SigningBlessingNames(ctx, validator.principal,
+		validator.blessingsData.UnmarshalledBlessings)
+	validator.validatorData = &signing.ValidatorData{
+		Names:               validatedNames,
+		PublicKey:           validator.principal.PublicKey(),
+		MarshalledPublicKey: marshalledPublicKey}
+	validator.validatorHash = validator.cache.AddValidatorData(ctx, validator.validatorData)
+	if validator.cache.LookupValidatorData(ctx, validator.validatorHash) != validator.validatorData {
+		t.Fatalf("LookupValidatorData returned wrong ValidatorData pointer in validator")
+	}
+
+	// --------------------------------------
+	// Checker
+	// The checker receives validator.blessingsHash from the validator.
+	// Initially the checker doesn't have the author BlessingsData.
+	authorBlessingsData = checker.cache.LookupBlessingsData(ctx, validator.blessingsHash)
+	if authorBlessingsData != nil {
+		t.Errorf("found non-nil blessings data for author.blessingsHash hash in checker's ValidationCache")
+	}
+	// The checker receives the author's marshalled blessings from the validator.
+	checker.blessingsHash, checker.blessingsData, err =
+		checker.cache.AddWireBlessings(ctx, validator.blessingsData.MarshalledBlessings)
+	if err != nil {
+		t.Fatalf("checker can't add author's marshalled blessings (from validator) to ValidationCache: %v", err)
+	}
+	if !bytes.Equal(author.blessingsHash, checker.blessingsHash) {
+		t.Errorf("checker's copy of the blessingsHash different from author's")
+	}
+	// Check that we could have got the blessingsData with a lookup if this where the second time.
+	if checker.cache.LookupBlessingsData(ctx, checker.blessingsHash) != checker.blessingsData {
+		t.Fatalf("found wrong blessings data for hash in checker's ValidationCache")
+	}
+	// The checker recieves validator.validatorHash from the validator.
+	// Initially the checker doesn't have the ValidatorData.
+	validatorData := checker.cache.LookupValidatorData(ctx, validator.validatorHash)
+	if validatorData != nil {
+		t.Errorf("found non-nil validator data for validator.validatorHash hash in checker's ValidationCache")
+	}
+	// The checker receives the validator's data from the validator (or another checker).
+	checker.validatorHash = checker.cache.AddValidatorData(ctx, validator.validatorData)
+	if !bytes.Equal(validator.validatorHash, checker.validatorHash) {
+		t.Fatalf("checker's copy of the validatorHash different from validator's")
+	}
+	// Get the validatorData
+	checker.validatorData = checker.cache.LookupValidatorData(ctx, checker.validatorHash)
+	if checker.validatorData == nil {
+		t.Fatalf("found nil valdidatorData for checker.validatorHash hash in checker's ValidationCache")
+	}
+}
+
+func TestWireValidatorData(t *testing.T) {
+	var err error
+
+	pDesc := makePrincipalVDesc(t, "some_principal")
+
+	var vd signing.ValidatorData
+	vd.Names = []string{"wombat", "foo"}
+	vd.PublicKey = pDesc.principal.PublicKey()
+	vd.MarshalledPublicKey, err = vd.PublicKey.MarshalBinary()
+	if err != nil {
+		t.Fatalf("failed to marshel public key: %v\n", err)
+	}
+
+	var wvd signing.WireValidatorData
+	var vd2 signing.ValidatorData
+
+	wvd = signing.ToWireValidatorData(&vd)
+	vd2, err = signing.FromWireValidatorData(&wvd)
+	if err != nil {
+		t.Fatalf("FromWireValidatorData failed: %v\n", err)
+	}
+	if len(vd.Names) != len(vd2.Names) {
+		t.Fatalf("ToWireValidatorData/FromWireValidatorData failed to transfer Names list correctly:\nold\n%v\n\nnew\n%v\n\nwire\n%v\n",
+			vd, vd2, wvd)
+	}
+}

diff --git a/services/syncbase/store/benchmark/benchmark.go b/services/syncbase/store/benchmark/benchmark.go
new file mode 100644
index 0000000..c794fa9
--- /dev/null
+++ b/services/syncbase/store/benchmark/benchmark.go

@@ -0,0 +1,144 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package benchmark
+
+import (
+	"fmt"
+	"math/rand"
+	"testing"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+)
+
+// RandomGenerator is a helper for generating random data.
+type RandomGenerator struct {
+	rand.Rand
+	data []byte
+	pos  int
+}
+
+// NewRandomGenerator returns a new generator of pseudo-random byte sequences
+// seeded with the given value. Every N bytes produced by this generator can be
+// compressed to (compressionRatio * N) bytes.
+func NewRandomGenerator(seed int64, compressionRatio float64) *RandomGenerator {
+	gen := &RandomGenerator{
+		*rand.New(rand.NewSource(seed)),
+		[]byte{},
+		0,
+	}
+	for len(gen.data) < 1000*1000 {
+		// We generate compressible byte sequences to test Snappy compression
+		// engine used by LevelDB.
+		gen.data = append(gen.data, gen.compressibleBytes(100, compressionRatio)...)
+	}
+	return gen
+}
+
+// randomBytes generates n pseudo-random bytes from range [' '..'~'].
+func (r *RandomGenerator) randomBytes(n int) (bytes []byte) {
+	for i := 0; i < n; i++ {
+		bytes = append(bytes, byte(' '+r.Intn(95))) // ' ' .. '~'
+	}
+	return
+}
+
+// compressibleBytes generates a sequence of n pseudo-random bytes that can
+// be compressed to ~(compressionRatio * n) bytes.
+func (r *RandomGenerator) compressibleBytes(n int, compressionRatio float64) (bytes []byte) {
+	raw := int(float64(n) * compressionRatio)
+	if raw < 1 {
+		raw = 1
+	}
+	rawData := r.randomBytes(raw)
+	// Duplicate the random data until we have filled n bytes.
+	for len(bytes) < n {
+		bytes = append(bytes, rawData...)
+	}
+	return bytes[0:n]
+}
+
+// generate returns a sequence of n pseudo-random bytes.
+func (r *RandomGenerator) generate(n int) []byte {
+	if r.pos+n > len(r.data) {
+		r.pos = 0
+		if n >= len(r.data) {
+			panic(fmt.Sprintf("length(%d) is too big", n))
+		}
+	}
+	r.pos += n
+	return r.data[r.pos-n : r.pos]
+}
+
+// Config is a set of settings required to run a benchmark.
+type Config struct {
+	Rand *RandomGenerator
+	// St is the database to use. Initially it should be empty.
+	St       store.Store
+	KeyLen   int // size of each key
+	ValueLen int // size of each value
+}
+
+// WriteSequential writes b.N values in sequential key order.
+func WriteSequential(b *testing.B, config *Config) {
+	doWrite(b, config, true)
+}
+
+// WriteRandom writes b.N values in random key order.
+func WriteRandom(b *testing.B, config *Config) {
+	doWrite(b, config, false)
+}
+
+func doWrite(b *testing.B, config *Config, seq bool) {
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		var k int
+		if seq {
+			k = i
+		} else {
+			k = config.Rand.Intn(b.N)
+		}
+		key := []byte(fmt.Sprintf("%0[2]*[1]d", k, config.KeyLen))
+		if err := config.St.Put(key, config.Rand.generate(config.ValueLen)); err != nil {
+			b.Fatalf("put error: %v", err)
+		}
+	}
+}
+
+// ReadSequential reads b.N values in sequential key order.
+func ReadSequential(b *testing.B, config *Config) {
+	WriteSequential(b, config)
+	b.ResetTimer()
+	s := config.St.Scan([]byte("0"), []byte("z"))
+	var key, value []byte
+	for i := 0; i < b.N; i++ {
+		if !s.Advance() {
+			b.Fatalf("can't read next value: %v", s.Err())
+		}
+		key = s.Key(key)
+		value = s.Value(value)
+	}
+	s.Cancel()
+}
+
+// ReadRandom reads b.N values in random key order.
+func ReadRandom(b *testing.B, config *Config) {
+	WriteSequential(b, config)
+	b.ResetTimer()
+	var value []byte
+	var err error
+	for i := 0; i < b.N; i++ {
+		key := []byte(fmt.Sprintf("%0[2]*[1]d", config.Rand.Intn(b.N), config.KeyLen))
+		if value, err = config.St.Get(key, value); err != nil {
+			b.Fatalf("can't read value for key %s: %v", key, err)
+		}
+	}
+}
+
+// Overwrite overwrites b.N values in random key order.
+func Overwrite(b *testing.B, config *Config) {
+	WriteSequential(b, config)
+	b.ResetTimer()
+	WriteRandom(b, config)
+}

diff --git a/services/syncbase/store/constants.go b/services/syncbase/store/constants.go
new file mode 100644
index 0000000..26551fa
--- /dev/null
+++ b/services/syncbase/store/constants.go

@@ -0,0 +1,15 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package store
+
+// TODO(sadovsky): Maybe define verrors for these.
+const (
+	ErrMsgClosedStore     = "closed store"
+	ErrMsgAbortedSnapshot = "aborted snapshot"
+	ErrMsgCanceledStream  = "canceled stream"
+	ErrMsgCommittedTxn    = "already called commit"
+	ErrMsgAbortedTxn      = "already called abort"
+	ErrMsgExpiredTxn      = "expired transaction"
+)

diff --git a/services/syncbase/store/invalid_types.go b/services/syncbase/store/invalid_types.go
new file mode 100644
index 0000000..a230684
--- /dev/null
+++ b/services/syncbase/store/invalid_types.go

@@ -0,0 +1,121 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package store
+
+import (
+	"v.io/v23/verror"
+)
+
+// InvalidSnapshot is a Snapshot for which all methods return errors.
+type InvalidSnapshot struct {
+	SnapshotSpecImpl
+	Error error // returned by all methods
+}
+
+// InvalidStream is a Stream for which all methods return errors.
+type InvalidStream struct {
+	Error error // returned by all methods
+}
+
+// InvalidTransaction is a Transaction for which all methods return errors.
+type InvalidTransaction struct {
+	Error error // returned by all methods
+}
+
+var (
+	_ Snapshot    = (*InvalidSnapshot)(nil)
+	_ Stream      = (*InvalidStream)(nil)
+	_ Transaction = (*InvalidTransaction)(nil)
+)
+
+////////////////////////////////////////////////////////////
+// InvalidSnapshot
+
+// Abort implements the store.Snapshot interface.
+func (s *InvalidSnapshot) Abort() error {
+	return convertError(s.Error)
+}
+
+// Get implements the store.StoreReader interface.
+func (s *InvalidSnapshot) Get(key, valbuf []byte) ([]byte, error) {
+	return valbuf, convertError(s.Error)
+}
+
+// Scan implements the store.StoreReader interface.
+func (s *InvalidSnapshot) Scan(start, limit []byte) Stream {
+	return &InvalidStream{s.Error}
+}
+
+////////////////////////////////////////////////////////////
+// InvalidStream
+
+// Advance implements the store.Stream interface.
+func (s *InvalidStream) Advance() bool {
+	return false
+}
+
+// Key implements the store.Stream interface.
+func (s *InvalidStream) Key(keybuf []byte) []byte {
+	panic(s.Error)
+}
+
+// Value implements the store.Stream interface.
+func (s *InvalidStream) Value(valbuf []byte) []byte {
+	panic(s.Error)
+}
+
+// Err implements the store.Stream interface.
+func (s *InvalidStream) Err() error {
+	return convertError(s.Error)
+}
+
+// Cancel implements the store.Stream interface.
+func (s *InvalidStream) Cancel() {
+}
+
+////////////////////////////////////////////////////////////
+// InvalidTransaction
+
+// ResetForRetry implements the store.Transaction interface.
+func (tx *InvalidTransaction) ResetForRetry() {
+	panic(tx.Error)
+}
+
+// Get implements the store.StoreReader interface.
+func (tx *InvalidTransaction) Get(key, valbuf []byte) ([]byte, error) {
+	return valbuf, convertError(tx.Error)
+}
+
+// Scan implements the store.StoreReader interface.
+func (tx *InvalidTransaction) Scan(start, limit []byte) Stream {
+	return &InvalidStream{tx.Error}
+}
+
+// Put implements the store.StoreWriter interface.
+func (tx *InvalidTransaction) Put(key, value []byte) error {
+	return convertError(tx.Error)
+}
+
+// Delete implements the store.StoreWriter interface.
+func (tx *InvalidTransaction) Delete(key []byte) error {
+	return convertError(tx.Error)
+}
+
+// Commit implements the store.Transaction interface.
+func (tx *InvalidTransaction) Commit() error {
+	return convertError(tx.Error)
+}
+
+// Abort implements the store.Transaction interface.
+func (tx *InvalidTransaction) Abort() error {
+	return convertError(tx.Error)
+}
+
+////////////////////////////////////////////////////////////
+// Internal helpers
+
+func convertError(err error) error {
+	return verror.Convert(verror.IDAction{}, nil, err)
+}

diff --git a/services/syncbase/store/leveldb/benchmark_test.go b/services/syncbase/store/leveldb/benchmark_test.go
new file mode 100644
index 0000000..7de0062
--- /dev/null
+++ b/services/syncbase/store/leveldb/benchmark_test.go

@@ -0,0 +1,52 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package leveldb
+
+import (
+	"testing"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/syncbase/x/ref/services/syncbase/store/benchmark"
+)
+
+func testConfig(db store.Store) *benchmark.Config {
+	return &benchmark.Config{
+		Rand:     benchmark.NewRandomGenerator(23917, 0.5),
+		St:       db,
+		KeyLen:   20,
+		ValueLen: 100,
+	}
+}
+
+func runBenchmark(b *testing.B, f func(*testing.B, *benchmark.Config)) {
+	db, dbPath := newDB()
+	defer destroyDB(db, dbPath)
+	f(b, testConfig(db))
+}
+
+// BenchmarkWriteSequential writes b.N values in sequential key order.
+func BenchmarkWriteSequential(b *testing.B) {
+	runBenchmark(b, benchmark.WriteSequential)
+}
+
+// BenchmarkWriteRandom writes b.N values in random key order.
+func BenchmarkWriteRandom(b *testing.B) {
+	runBenchmark(b, benchmark.WriteRandom)
+}
+
+// BenchmarkOverwrite overwrites b.N values in random key order.
+func BenchmarkOverwrite(b *testing.B) {
+	runBenchmark(b, benchmark.Overwrite)
+}
+
+// BenchmarkReadSequential reads b.N values in sequential key order.
+func BenchmarkReadSequential(b *testing.B) {
+	runBenchmark(b, benchmark.ReadSequential)
+}
+
+// BenchmarkReadRandom reads b.N values in random key order.
+func BenchmarkReadRandom(b *testing.B) {
+	runBenchmark(b, benchmark.ReadRandom)
+}

diff --git a/services/syncbase/store/leveldb/db.go b/services/syncbase/store/leveldb/db.go
new file mode 100644
index 0000000..0430561
--- /dev/null
+++ b/services/syncbase/store/leveldb/db.go

@@ -0,0 +1,176 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package leveldb provides a LevelDB-based implementation of store.Store.
+package leveldb
+
+// #cgo LDFLAGS: -lleveldb -lsnappy
+// #include <stdlib.h>
+// #include "leveldb/c.h"
+// #include "syncbase_leveldb.h"
+import "C"
+import (
+	"fmt"
+	"sync"
+	"unsafe"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/syncbase/x/ref/services/syncbase/store/transactions"
+	"v.io/v23/verror"
+)
+
+// db is a wrapper around LevelDB that implements the transactions.BatchStore
+// interface.
+type db struct {
+	// mu protects the state of the db.
+	mu   sync.RWMutex
+	node *store.ResourceNode
+	cDb  *C.leveldb_t
+	// Default read/write options.
+	readOptions  *C.leveldb_readoptions_t
+	writeOptions *C.leveldb_writeoptions_t
+	err          error
+}
+
+type OpenOptions struct {
+	CreateIfMissing bool
+	ErrorIfExists   bool
+}
+
+// Open opens the database located at the given path.
+func Open(path string, opts OpenOptions) (store.Store, error) {
+	var cError *C.char
+	cPath := C.CString(path)
+	defer C.free(unsafe.Pointer(cPath))
+
+	var cOptsCreateIfMissing, cOptsErrorIfExists C.uchar
+	if opts.CreateIfMissing {
+		cOptsCreateIfMissing = 1
+	}
+	if opts.ErrorIfExists {
+		cOptsErrorIfExists = 1
+	}
+
+	cOpts := C.leveldb_options_create()
+	C.leveldb_options_set_create_if_missing(cOpts, cOptsCreateIfMissing)
+	C.leveldb_options_set_error_if_exists(cOpts, cOptsErrorIfExists)
+	C.leveldb_options_set_paranoid_checks(cOpts, 1)
+	defer C.leveldb_options_destroy(cOpts)
+
+	cDb := C.leveldb_open(cOpts, cPath, &cError)
+	if err := goError(cError); err != nil {
+		return nil, err
+	}
+	readOptions := C.leveldb_readoptions_create()
+	C.leveldb_readoptions_set_verify_checksums(readOptions, 1)
+	return transactions.Wrap(&db{
+		node:         store.NewResourceNode(),
+		cDb:          cDb,
+		readOptions:  readOptions,
+		writeOptions: C.leveldb_writeoptions_create(),
+	}), nil
+}
+
+// Close implements the store.Store interface.
+func (d *db) Close() error {
+	d.mu.Lock()
+	defer d.mu.Unlock()
+	if d.err != nil {
+		return store.ConvertError(d.err)
+	}
+	d.node.Close()
+	C.leveldb_close(d.cDb)
+	d.cDb = nil
+	C.leveldb_readoptions_destroy(d.readOptions)
+	d.readOptions = nil
+	C.leveldb_writeoptions_destroy(d.writeOptions)
+	d.writeOptions = nil
+	d.err = verror.New(verror.ErrCanceled, nil, store.ErrMsgClosedStore)
+	return nil
+}
+
+// Destroy removes all physical data of the database located at the given path.
+func Destroy(path string) error {
+	var cError *C.char
+	cPath := C.CString(path)
+	defer C.free(unsafe.Pointer(cPath))
+	cOpts := C.leveldb_options_create()
+	defer C.leveldb_options_destroy(cOpts)
+	C.leveldb_destroy_db(cOpts, cPath, &cError)
+	return goError(cError)
+}
+
+// Get implements the store.StoreReader interface.
+func (d *db) Get(key, valbuf []byte) ([]byte, error) {
+	return d.getWithOpts(key, valbuf, d.readOptions)
+}
+
+// Scan implements the store.StoreReader interface.
+func (d *db) Scan(start, limit []byte) store.Stream {
+	d.mu.RLock()
+	defer d.mu.RUnlock()
+	if d.err != nil {
+		return &store.InvalidStream{Error: d.err}
+	}
+	return newStream(d, d.node, start, limit, d.readOptions)
+}
+
+// NewSnapshot implements the store.Store interface.
+func (d *db) NewSnapshot() store.Snapshot {
+	d.mu.RLock()
+	defer d.mu.RUnlock()
+	if d.err != nil {
+		return &store.InvalidSnapshot{Error: d.err}
+	}
+	return newSnapshot(d, d.node)
+}
+
+// WriteBatch implements the transactions.BatchStore interface.
+func (d *db) WriteBatch(batch ...transactions.WriteOp) error {
+	d.mu.Lock()
+	defer d.mu.Unlock()
+	if d.err != nil {
+		return d.err
+	}
+	cBatch := C.leveldb_writebatch_create()
+	defer C.leveldb_writebatch_destroy(cBatch)
+	for _, write := range batch {
+		switch write.T {
+		case transactions.PutOp:
+			cKey, cKeyLen := cSlice(write.Key)
+			cVal, cValLen := cSlice(write.Value)
+			C.leveldb_writebatch_put(cBatch, cKey, cKeyLen, cVal, cValLen)
+		case transactions.DeleteOp:
+			cKey, cKeyLen := cSlice(write.Key)
+			C.leveldb_writebatch_delete(cBatch, cKey, cKeyLen)
+		default:
+			panic(fmt.Sprintf("unknown write operation type: %v", write.T))
+		}
+	}
+	var cError *C.char
+	C.leveldb_write(d.cDb, d.writeOptions, cBatch, &cError)
+	return goError(cError)
+}
+
+// getWithOpts returns the value for the given key.
+// cOpts may contain a pointer to a snapshot.
+func (d *db) getWithOpts(key, valbuf []byte, cOpts *C.leveldb_readoptions_t) ([]byte, error) {
+	d.mu.RLock()
+	defer d.mu.RUnlock()
+	if d.err != nil {
+		return valbuf, store.ConvertError(d.err)
+	}
+	var cError *C.char
+	var valLen C.size_t
+	cStr, cLen := cSlice(key)
+	val := C.leveldb_get(d.cDb, cOpts, cStr, cLen, &valLen, &cError)
+	if err := goError(cError); err != nil {
+		return valbuf, err
+	}
+	if val == nil {
+		return valbuf, verror.New(store.ErrUnknownKey, nil, string(key))
+	}
+	defer C.leveldb_free(unsafe.Pointer(val))
+	return store.CopyBytes(valbuf, goBytes(val, valLen)), nil
+}

diff --git a/services/syncbase/store/leveldb/db_test.go b/services/syncbase/store/leveldb/db_test.go
new file mode 100644
index 0000000..88cddc2
--- /dev/null
+++ b/services/syncbase/store/leveldb/db_test.go

@@ -0,0 +1,123 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package leveldb
+
+import (
+	"fmt"
+	"io/ioutil"
+	"runtime"
+	"testing"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/syncbase/x/ref/services/syncbase/store/test"
+)
+
+func init() {
+	runtime.GOMAXPROCS(10)
+}
+
+func TestStream(t *testing.T) {
+	runTest(t, test.RunStreamTest)
+}
+
+func TestSnapshot(t *testing.T) {
+	runTest(t, test.RunSnapshotTest)
+}
+
+func TestStoreState(t *testing.T) {
+	runTest(t, test.RunStoreStateTest)
+}
+
+func TestClose(t *testing.T) {
+	runTest(t, test.RunCloseTest)
+}
+
+func TestReadWriteBasic(t *testing.T) {
+	runTest(t, test.RunReadWriteBasicTest)
+}
+
+func TestReadWriteRandom(t *testing.T) {
+	runTest(t, test.RunReadWriteRandomTest)
+}
+
+func TestConcurrentTransactions(t *testing.T) {
+	runTest(t, test.RunConcurrentTransactionsTest)
+}
+
+func TestTransactionState(t *testing.T) {
+	runTest(t, test.RunTransactionStateTest)
+}
+
+func TestTransactionsWithGet(t *testing.T) {
+	runTest(t, test.RunTransactionsWithGetTest)
+}
+
+func TestOpenOptions(t *testing.T) {
+	path, err := ioutil.TempDir("", "syncbase_leveldb")
+	if err != nil {
+		t.Fatalf("can't create temp dir: %v", err)
+	}
+	// DB is missing => call should fail.
+	st, err := Open(path, OpenOptions{CreateIfMissing: false, ErrorIfExists: false})
+	if err == nil {
+		t.Fatalf("open should've failed")
+	}
+	// DB is missing => call should succeed.
+	st, err = Open(path, OpenOptions{CreateIfMissing: true, ErrorIfExists: false})
+	if err != nil {
+		t.Fatalf("open failed: %v", err)
+	}
+	st.Close()
+	// DB exists => call should succeed.
+	st, err = Open(path, OpenOptions{CreateIfMissing: false, ErrorIfExists: false})
+	if err != nil {
+		t.Fatalf("open failed: %v", err)
+	}
+	st.Close()
+	// DB exists => call should fail.
+	st, err = Open(path, OpenOptions{CreateIfMissing: false, ErrorIfExists: true})
+	if err == nil {
+		t.Fatalf("open should've failed")
+	}
+	// DB exists => call should fail.
+	st, err = Open(path, OpenOptions{CreateIfMissing: true, ErrorIfExists: true})
+	if err == nil {
+		t.Fatalf("open should've failed")
+	}
+	// DB exists => call should succeed.
+	st, err = Open(path, OpenOptions{CreateIfMissing: true, ErrorIfExists: false})
+	if err != nil {
+		t.Fatalf("open failed: %v", err)
+	}
+	st.Close()
+	if err := Destroy(path); err != nil {
+		t.Fatalf("destroy failed: %v", err)
+	}
+}
+
+func runTest(t *testing.T, f func(t *testing.T, st store.Store)) {
+	st, dbPath := newDB()
+	defer destroyDB(st, dbPath)
+	f(t, st)
+}
+
+func newDB() (store.Store, string) {
+	path, err := ioutil.TempDir("", "syncbase_leveldb")
+	if err != nil {
+		panic(fmt.Sprintf("can't create temp dir: %v", err))
+	}
+	st, err := Open(path, OpenOptions{CreateIfMissing: true, ErrorIfExists: true})
+	if err != nil {
+		panic(fmt.Sprintf("can't open db at %v: %v", path, err))
+	}
+	return st, path
+}
+
+func destroyDB(st store.Store, path string) {
+	st.Close()
+	if err := Destroy(path); err != nil {
+		panic(fmt.Sprintf("can't destroy db at %v: %v", path, err))
+	}
+}

diff --git a/services/syncbase/store/leveldb/snapshot.go b/services/syncbase/store/leveldb/snapshot.go
new file mode 100644
index 0000000..a403127
--- /dev/null
+++ b/services/syncbase/store/leveldb/snapshot.go

@@ -0,0 +1,82 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package leveldb
+
+// #include "leveldb/c.h"
+import "C"
+import (
+	"sync"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/verror"
+)
+
+// snapshot is a wrapper around LevelDB snapshot that implements
+// the store.Snapshot interface.
+type snapshot struct {
+	store.SnapshotSpecImpl
+	// mu protects the state of the snapshot.
+	mu        sync.RWMutex
+	node      *store.ResourceNode
+	d         *db
+	cSnapshot *C.leveldb_snapshot_t
+	cOpts     *C.leveldb_readoptions_t
+	err       error
+}
+
+var _ store.Snapshot = (*snapshot)(nil)
+
+func newSnapshot(d *db, parent *store.ResourceNode) *snapshot {
+	cSnapshot := C.leveldb_create_snapshot(d.cDb)
+	cOpts := C.leveldb_readoptions_create()
+	C.leveldb_readoptions_set_verify_checksums(cOpts, 1)
+	C.leveldb_readoptions_set_snapshot(cOpts, cSnapshot)
+	s := &snapshot{
+		node:      store.NewResourceNode(),
+		d:         d,
+		cSnapshot: cSnapshot,
+		cOpts:     cOpts,
+	}
+	parent.AddChild(s.node, func() {
+		s.Abort()
+	})
+	return s
+}
+
+// Abort implements the store.Snapshot interface.
+func (s *snapshot) Abort() error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.err != nil {
+		return store.ConvertError(s.err)
+	}
+	s.node.Close()
+	C.leveldb_readoptions_destroy(s.cOpts)
+	s.cOpts = nil
+	C.leveldb_release_snapshot(s.d.cDb, s.cSnapshot)
+	s.cSnapshot = nil
+	s.err = verror.New(verror.ErrCanceled, nil, store.ErrMsgAbortedSnapshot)
+	return nil
+}
+
+// Get implements the store.StoreReader interface.
+func (s *snapshot) Get(key, valbuf []byte) ([]byte, error) {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	if s.err != nil {
+		return valbuf, store.ConvertError(s.err)
+	}
+	return s.d.getWithOpts(key, valbuf, s.cOpts)
+}
+
+// Scan implements the store.StoreReader interface.
+func (s *snapshot) Scan(start, limit []byte) store.Stream {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	if s.err != nil {
+		return &store.InvalidStream{Error: s.err}
+	}
+	return newStream(s.d, s.node, start, limit, s.cOpts)
+}

diff --git a/services/syncbase/store/leveldb/stream.go b/services/syncbase/store/leveldb/stream.go
new file mode 100644
index 0000000..2d592b4
--- /dev/null
+++ b/services/syncbase/store/leveldb/stream.go

@@ -0,0 +1,150 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package leveldb
+
+// #include "leveldb/c.h"
+// #include "syncbase_leveldb.h"
+import "C"
+import (
+	"bytes"
+	"sync"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/verror"
+)
+
+// stream is a wrapper around LevelDB iterator that implements
+// the store.Stream interface.
+type stream struct {
+	// mu protects the state of the stream.
+	mu    sync.Mutex
+	node  *store.ResourceNode
+	cIter *C.syncbase_leveldb_iterator_t
+	limit []byte
+
+	hasAdvanced bool
+	err         error
+
+	// hasValue is true iff a value has been staged. If hasValue is true,
+	// key and value point to the staged key/value pair. The underlying buffers
+	// of key and value are allocated on the C heap until Cancel is called,
+	// at which point they are copied to the Go heap.
+	hasValue bool
+	key      []byte
+	value    []byte
+}
+
+var _ store.Stream = (*stream)(nil)
+
+func newStream(d *db, parent *store.ResourceNode, start, limit []byte, cOpts *C.leveldb_readoptions_t) *stream {
+	cStr, size := cSlice(start)
+	cIter := C.syncbase_leveldb_create_iterator(d.cDb, cOpts, cStr, size)
+	s := &stream{
+		node:  store.NewResourceNode(),
+		cIter: cIter,
+		limit: limit,
+	}
+	parent.AddChild(s.node, func() {
+		s.Cancel()
+	})
+	return s
+}
+
+// destroyLeveldbIter destroys the underlying C iterator.
+// Assumes mu is held.
+func (s *stream) destroyLeveldbIter() {
+	s.node.Close()
+	C.syncbase_leveldb_iter_destroy(s.cIter)
+	s.cIter = nil
+}
+
+// Advance implements the store.Stream interface.
+func (s *stream) Advance() bool {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	s.hasValue = false
+	if s.cIter == nil {
+		return false
+	}
+	// The C iterator starts out initialized, pointing at the first value; we
+	// shouldn't move it during the first Advance() call.
+	if !s.hasAdvanced {
+		s.hasAdvanced = true
+	} else {
+		C.syncbase_leveldb_iter_next(s.cIter)
+	}
+	if s.cIter.is_valid != 0 && (len(s.limit) == 0 || bytes.Compare(s.cKey(), s.limit) < 0) {
+		s.hasValue = true
+		s.key = s.cKey()
+		s.value = s.cVal()
+		return true
+	}
+
+	var cError *C.char
+	C.syncbase_leveldb_iter_get_error(s.cIter, &cError)
+	s.err = goError(cError)
+	s.destroyLeveldbIter()
+	return false
+}
+
+// Key implements the store.Stream interface.
+func (s *stream) Key(keybuf []byte) []byte {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if !s.hasValue {
+		panic("nothing staged")
+	}
+	return store.CopyBytes(keybuf, s.key)
+}
+
+// Value implements the store.Stream interface.
+func (s *stream) Value(valbuf []byte) []byte {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if !s.hasValue {
+		panic("nothing staged")
+	}
+	return store.CopyBytes(valbuf, s.value)
+}
+
+// Err implements the store.Stream interface.
+func (s *stream) Err() error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return store.ConvertError(s.err)
+}
+
+// Cancel implements the store.Stream interface.
+// TODO(rogulenko): make Cancel non-blocking.
+func (s *stream) Cancel() {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.cIter == nil {
+		return
+	}
+	// s.hasValue will be false if Advance has never been called.
+	if s.hasValue {
+		// We copy the key and the value from the C heap to the Go heap before
+		// deallocating the C iterator.
+		s.key = store.CopyBytes(nil, s.cKey())
+		s.value = store.CopyBytes(nil, s.cVal())
+	}
+	s.err = verror.New(verror.ErrCanceled, nil, store.ErrMsgCanceledStream)
+	s.destroyLeveldbIter()
+}
+
+// cKey returns the current key.
+// The returned []byte points to a buffer allocated on the C heap. This buffer
+// is valid until the next call to Advance or Cancel.
+func (it *stream) cKey() []byte {
+	return goBytes(it.cIter.key, it.cIter.key_len)
+}
+
+// cVal returns the current value.
+// The returned []byte points to a buffer allocated on the C heap. This buffer
+// is valid until the next call to Advance or Cancel.
+func (it *stream) cVal() []byte {
+	return goBytes(it.cIter.val, it.cIter.val_len)
+}

diff --git a/services/syncbase/store/leveldb/syncbase_leveldb.cc b/services/syncbase/store/leveldb/syncbase_leveldb.cc
new file mode 100644
index 0000000..8c6f7e6
--- /dev/null
+++ b/services/syncbase/store/leveldb/syncbase_leveldb.cc

@@ -0,0 +1,47 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file is intended to be C++ so that we can access C++ LevelDB interface
+// directly if necessary.
+
+#include "syncbase_leveldb.h"
+
+extern "C" {
+
+static void PopulateIteratorFields(syncbase_leveldb_iterator_t* iter) {
+  iter->is_valid = leveldb_iter_valid(iter->rep);
+  if (!iter->is_valid) {
+    return;
+  }
+  iter->key = leveldb_iter_key(iter->rep, &iter->key_len);
+  iter->val = leveldb_iter_value(iter->rep, &iter->val_len);
+}
+
+syncbase_leveldb_iterator_t* syncbase_leveldb_create_iterator(
+    leveldb_t* db,
+    const leveldb_readoptions_t* options,
+    const char* start, size_t start_len) {
+  syncbase_leveldb_iterator_t* result = new syncbase_leveldb_iterator_t;
+  result->rep = leveldb_create_iterator(db, options);
+  leveldb_iter_seek(result->rep, start, start_len);
+  PopulateIteratorFields(result);
+  return result;
+}
+
+void syncbase_leveldb_iter_destroy(syncbase_leveldb_iterator_t* iter) {
+  leveldb_iter_destroy(iter->rep);
+  delete iter;
+}
+
+void syncbase_leveldb_iter_next(syncbase_leveldb_iterator_t* iter) {
+  leveldb_iter_next(iter->rep);
+  PopulateIteratorFields(iter);
+}
+
+void syncbase_leveldb_iter_get_error(
+    const syncbase_leveldb_iterator_t* iter, char** errptr) {
+  leveldb_iter_get_error(iter->rep, errptr);
+}
+
+}  // end extern "C"

diff --git a/services/syncbase/store/leveldb/syncbase_leveldb.h b/services/syncbase/store/leveldb/syncbase_leveldb.h
new file mode 100644
index 0000000..d2faa82
--- /dev/null
+++ b/services/syncbase/store/leveldb/syncbase_leveldb.h

@@ -0,0 +1,61 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// This file contains helpers to minimize the number of cgo calls, which have
+// some overhead.
+// Some conventions:
+//
+// Errors are represented by a null-terminated C string. NULL means no error.
+// All operations that can raise an error are passed a "char** errptr" as the
+// last argument. *errptr should be NULL.
+// On failure, leveldb sets *errptr to a malloc()ed error message.
+//
+// All of the pointer arguments must be non-NULL.
+
+#ifndef V_IO_SYNCBASE_X_REF_SERVICES_SYNCBASE_STORE_LEVELDB_SYNCBASE_LEVELDB_H_
+#define V_IO_SYNCBASE_X_REF_SERVICES_SYNCBASE_STORE_LEVELDB_SYNCBASE_LEVELDB_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "leveldb/c.h"
+
+// Fields of this struct are accessed from go directly without cgo calls.
+struct syncbase_leveldb_iterator_t {
+  leveldb_iterator_t* rep;
+  unsigned char is_valid;
+  char const* key;
+  size_t key_len;
+  char const* val;
+  size_t val_len;
+};
+
+typedef struct syncbase_leveldb_iterator_t syncbase_leveldb_iterator_t;
+
+// Returns iterator that points to first key that is not less than |start|.
+// The returned iterator must be passed to |syncbase_leveldb_iter_destroy|
+// when finished.
+syncbase_leveldb_iterator_t* syncbase_leveldb_create_iterator(
+    leveldb_t* db,
+    const leveldb_readoptions_t* options,
+    const char* start, size_t start_len);
+
+// Deallocates iterator returned by |syncbase_leveldb_create_iterator|.
+void syncbase_leveldb_iter_destroy(syncbase_leveldb_iterator_t*);
+
+// Moves to the next entry in the source. After this call, |is_valid| is
+// true iff the iterator was not positioned at the last entry in the source.
+// REQUIRES: |is_valid| is true.
+void syncbase_leveldb_iter_next(syncbase_leveldb_iterator_t* iter);
+
+// Returns a non-nil error iff the iterator encountered any errors.
+void syncbase_leveldb_iter_get_error(
+    const syncbase_leveldb_iterator_t* iter, char** errptr);
+
+#ifdef __cplusplus
+}  // end extern "C"
+#endif
+
+#endif  // V_IO_SYNCBASE_X_REF_SERVICES_SYNCBASE_STORE_LEVELDB_SYNCBASE_LEVELDB_H_

diff --git a/services/syncbase/store/leveldb/util.go b/services/syncbase/store/leveldb/util.go
new file mode 100644
index 0000000..dce69bb
--- /dev/null
+++ b/services/syncbase/store/leveldb/util.go

@@ -0,0 +1,46 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package leveldb
+
+// #include "leveldb/c.h"
+import "C"
+import (
+	"reflect"
+	"unsafe"
+
+	"v.io/v23/verror"
+)
+
+// goError copies C error into Go heap and frees C buffer.
+func goError(cError *C.char) error {
+	if cError == nil {
+		return nil
+	}
+	err := verror.New(verror.ErrInternal, nil, C.GoString(cError))
+	C.leveldb_free(unsafe.Pointer(cError))
+	return err
+}
+
+// cSlice converts Go []byte to C string without copying the data.
+// This function behaves similarly to standard Go slice copying or sub-slicing,
+// in that the caller need not worry about ownership or garbage collection.
+func cSlice(str []byte) (*C.char, C.size_t) {
+	if len(str) == 0 {
+		return nil, 0
+	}
+	data := unsafe.Pointer((*reflect.StringHeader)(unsafe.Pointer(&str)).Data)
+	return (*C.char)(data), C.size_t(len(str))
+}
+
+// goBytes converts C string to Go []byte without copying the data.
+// This function behaves similarly to cSlice.
+func goBytes(str *C.char, size C.size_t) []byte {
+	ptr := unsafe.Pointer(&reflect.SliceHeader{
+		Data: uintptr(unsafe.Pointer(str)),
+		Len:  int(size),
+		Cap:  int(size),
+	})
+	return *(*[]byte)(ptr)
+}

diff --git a/services/syncbase/store/memstore/snapshot.go b/services/syncbase/store/memstore/snapshot.go
new file mode 100644
index 0000000..310f6e2
--- /dev/null
+++ b/services/syncbase/store/memstore/snapshot.go

@@ -0,0 +1,74 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package memstore
+
+import (
+	"sync"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/verror"
+)
+
+type snapshot struct {
+	store.SnapshotSpecImpl
+	mu   sync.Mutex
+	node *store.ResourceNode
+	data map[string][]byte
+	err  error
+}
+
+var _ store.Snapshot = (*snapshot)(nil)
+
+// Assumes st lock is held.
+func newSnapshot(st *memstore, parent *store.ResourceNode) *snapshot {
+	dataCopy := make(map[string][]byte, len(st.data))
+	for k, v := range st.data {
+		dataCopy[k] = v
+	}
+	s := &snapshot{
+		node: store.NewResourceNode(),
+		data: dataCopy,
+	}
+	parent.AddChild(s.node, func() {
+		s.Abort()
+	})
+	return s
+}
+
+// Abort implements the store.Snapshot interface.
+func (s *snapshot) Abort() error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.err != nil {
+		return store.ConvertError(s.err)
+	}
+	s.node.Close()
+	s.err = verror.New(verror.ErrCanceled, nil, store.ErrMsgAbortedSnapshot)
+	return nil
+}
+
+// Get implements the store.StoreReader interface.
+func (s *snapshot) Get(key, valbuf []byte) ([]byte, error) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.err != nil {
+		return valbuf, store.ConvertError(s.err)
+	}
+	value, ok := s.data[string(key)]
+	if !ok {
+		return valbuf, verror.New(store.ErrUnknownKey, nil, string(key))
+	}
+	return store.CopyBytes(valbuf, value), nil
+}
+
+// Scan implements the store.StoreReader interface.
+func (s *snapshot) Scan(start, limit []byte) store.Stream {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.err != nil {
+		return &store.InvalidStream{Error: s.err}
+	}
+	return newStream(s, s.node, start, limit)
+}

diff --git a/services/syncbase/store/memstore/store.go b/services/syncbase/store/memstore/store.go
new file mode 100644
index 0000000..15a2988
--- /dev/null
+++ b/services/syncbase/store/memstore/store.go

@@ -0,0 +1,98 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package memstore provides a simple, in-memory implementation of store.Store.
+// Since it's a prototype implementation, it makes no attempt to be performant.
+package memstore
+
+import (
+	"fmt"
+	"sync"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/syncbase/x/ref/services/syncbase/store/transactions"
+	"v.io/v23/verror"
+)
+
+type memstore struct {
+	mu   sync.Mutex
+	node *store.ResourceNode
+	data map[string][]byte
+	err  error
+}
+
+// New creates a new memstore.
+func New() store.Store {
+	return transactions.Wrap(&memstore{
+		data: map[string][]byte{},
+		node: store.NewResourceNode(),
+	})
+}
+
+// Close implements the store.Store interface.
+func (st *memstore) Close() error {
+	st.mu.Lock()
+	defer st.mu.Unlock()
+	if st.err != nil {
+		return store.ConvertError(st.err)
+	}
+	st.node.Close()
+	st.err = verror.New(verror.ErrCanceled, nil, store.ErrMsgClosedStore)
+	return nil
+}
+
+// Get implements the store.StoreReader interface.
+func (st *memstore) Get(key, valbuf []byte) ([]byte, error) {
+	st.mu.Lock()
+	defer st.mu.Unlock()
+	if st.err != nil {
+		return valbuf, store.ConvertError(st.err)
+	}
+	value, ok := st.data[string(key)]
+	if !ok {
+		return valbuf, verror.New(store.ErrUnknownKey, nil, string(key))
+	}
+	return store.CopyBytes(valbuf, value), nil
+}
+
+// Scan implements the store.StoreReader interface.
+func (st *memstore) Scan(start, limit []byte) store.Stream {
+	st.mu.Lock()
+	defer st.mu.Unlock()
+	if st.err != nil {
+		return &store.InvalidStream{Error: st.err}
+	}
+	// TODO(sadovsky): Close snapshot once stream is closed or canceled.
+	return newSnapshot(st, st.node).Scan(start, limit)
+}
+
+// NewSnapshot implements the store.Store interface.
+func (st *memstore) NewSnapshot() store.Snapshot {
+	st.mu.Lock()
+	defer st.mu.Unlock()
+	if st.err != nil {
+		return &store.InvalidSnapshot{Error: st.err}
+	}
+	return newSnapshot(st, st.node)
+}
+
+// WriteBatch implements the transactions.BatchStore interface.
+func (st *memstore) WriteBatch(batch ...transactions.WriteOp) error {
+	st.mu.Lock()
+	defer st.mu.Unlock()
+	if st.err != nil {
+		return store.ConvertError(st.err)
+	}
+	for _, write := range batch {
+		switch write.T {
+		case transactions.PutOp:
+			st.data[string(write.Key)] = write.Value
+		case transactions.DeleteOp:
+			delete(st.data, string(write.Key))
+		default:
+			panic(fmt.Sprintf("unknown write operation type: %v", write.T))
+		}
+	}
+	return nil
+}

diff --git a/services/syncbase/store/memstore/store_test.go b/services/syncbase/store/memstore/store_test.go
new file mode 100644
index 0000000..0b04032
--- /dev/null
+++ b/services/syncbase/store/memstore/store_test.go

@@ -0,0 +1,59 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package memstore
+
+import (
+	"runtime"
+	"testing"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/syncbase/x/ref/services/syncbase/store/test"
+)
+
+func init() {
+	runtime.GOMAXPROCS(10)
+}
+
+func TestStream(t *testing.T) {
+	runTest(t, test.RunStreamTest)
+}
+
+func TestSnapshot(t *testing.T) {
+	runTest(t, test.RunSnapshotTest)
+}
+
+func TestStoreState(t *testing.T) {
+	runTest(t, test.RunStoreStateTest)
+}
+
+func TestClose(t *testing.T) {
+	runTest(t, test.RunCloseTest)
+}
+
+func TestReadWriteBasic(t *testing.T) {
+	runTest(t, test.RunReadWriteBasicTest)
+}
+
+func TestReadWriteRandom(t *testing.T) {
+	runTest(t, test.RunReadWriteRandomTest)
+}
+
+func TestConcurrentTransactions(t *testing.T) {
+	runTest(t, test.RunConcurrentTransactionsTest)
+}
+
+func TestTransactionState(t *testing.T) {
+	runTest(t, test.RunTransactionStateTest)
+}
+
+func TestTransactionsWithGet(t *testing.T) {
+	runTest(t, test.RunTransactionsWithGetTest)
+}
+
+func runTest(t *testing.T, f func(t *testing.T, st store.Store)) {
+	st := New()
+	defer st.Close()
+	f(t, st)
+}

diff --git a/services/syncbase/store/memstore/stream.go b/services/syncbase/store/memstore/stream.go
new file mode 100644
index 0000000..345ea93
--- /dev/null
+++ b/services/syncbase/store/memstore/stream.go

@@ -0,0 +1,103 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package memstore
+
+import (
+	"sort"
+	"sync"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/verror"
+)
+
+type stream struct {
+	mu        sync.Mutex
+	node      *store.ResourceNode
+	sn        *snapshot
+	keys      []string
+	currIndex int
+	currKey   *string
+	err       error
+	done      bool
+}
+
+var _ store.Stream = (*stream)(nil)
+
+func newStream(sn *snapshot, parent *store.ResourceNode, start, limit []byte) *stream {
+	keys := []string{}
+	for k := range sn.data {
+		if k >= string(start) && (len(limit) == 0 || k < string(limit)) {
+			keys = append(keys, k)
+		}
+	}
+	sort.Strings(keys)
+	s := &stream{
+		node:      store.NewResourceNode(),
+		sn:        sn,
+		keys:      keys,
+		currIndex: -1,
+	}
+	parent.AddChild(s.node, func() {
+		s.Cancel()
+	})
+	return s
+}
+
+// Advance implements the store.Stream interface.
+func (s *stream) Advance() bool {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	s.currKey = nil
+	if s.done {
+		return false
+	}
+	s.currIndex++
+	if s.currIndex < len(s.keys) {
+		s.currKey = &s.keys[s.currIndex]
+	} else {
+		s.done = true
+		s.currKey = nil
+	}
+	return !s.done
+}
+
+// Key implements the store.Stream interface.
+func (s *stream) Key(keybuf []byte) []byte {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.currKey == nil {
+		panic("nothing staged")
+	}
+	return store.CopyBytes(keybuf, []byte(*s.currKey))
+}
+
+// Value implements the store.Stream interface.
+func (s *stream) Value(valbuf []byte) []byte {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.currKey == nil {
+		panic("nothing staged")
+	}
+	return store.CopyBytes(valbuf, s.sn.data[*s.currKey])
+}
+
+// Err implements the store.Stream interface.
+func (s *stream) Err() error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return store.ConvertError(s.err)
+}
+
+// Cancel implements the store.Stream interface.
+func (s *stream) Cancel() {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.done {
+		return
+	}
+	s.done = true
+	s.node.Close()
+	s.err = verror.New(verror.ErrCanceled, nil, store.ErrMsgCanceledStream)
+}

diff --git a/services/syncbase/store/model.go b/services/syncbase/store/model.go
new file mode 100644
index 0000000..be7265d
--- /dev/null
+++ b/services/syncbase/store/model.go

@@ -0,0 +1,147 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package store defines the API for the syncbase storage engine.
+// Currently, this API and its implementations are meant to be internal.
+package store
+
+// TODO(sadovsky): Decide whether to defensively copy passed-in []byte's vs.
+// requiring clients not to modify passed-in []byte's.
+
+// StoreReader reads data from a CRUD-capable storage engine.
+type StoreReader interface {
+	// Get returns the value for the given key. The returned slice may be a
+	// sub-slice of valbuf if valbuf was large enough to hold the entire value.
+	// Otherwise, a newly allocated slice will be returned. It is valid to pass a
+	// nil valbuf.
+	// If the given key is unknown, valbuf is returned unchanged and the function
+	// fails with ErrUnknownKey.
+	Get(key, valbuf []byte) ([]byte, error)
+
+	// Scan returns all rows with keys in range [start, limit). If limit is "",
+	// all rows with keys >= start are included.
+	// Concurrency semantics: It is legal to perform writes concurrently with
+	// Scan. The returned stream may or may not reflect subsequent writes to keys
+	// not yet reached by the stream.
+	Scan(start, limit []byte) Stream
+}
+
+// StoreWriter writes data to a CRUD-capable storage engine.
+type StoreWriter interface {
+	// Put writes the given value for the given key.
+	Put(key, value []byte) error
+
+	// Delete deletes the entry for the given key.
+	// Succeeds (no-op) if the given key is unknown.
+	Delete(key []byte) error
+}
+
+// storeReadWriter combines StoreReader and StoreWriter.
+type storeReadWriter interface {
+	StoreReader
+	StoreWriter
+}
+
+// Store is a CRUD-capable storage engine that supports transactions.
+type Store interface {
+	storeReadWriter
+
+	// Close closes the store.
+	Close() error
+
+	// NewTransaction creates a transaction.
+	// TODO(rogulenko): add transaction options.
+	NewTransaction() Transaction
+
+	// NewSnapshot creates a snapshot.
+	// TODO(rogulenko): add snapshot options.
+	NewSnapshot() Snapshot
+}
+
+// SnapshotOrTransaction represents a Snapshot or a Transaction.
+type SnapshotOrTransaction interface {
+	StoreReader
+
+	// Abort closes the snapshot or transaction.
+	// Any subsequent method calls will fail.
+	// NOTE: this method is also used to distinguish between StoreReader and
+	// SnapshotOrTransaction.
+	Abort() error
+}
+
+// Snapshot is a handle to particular state in time of a Store.
+//
+// All read operations are executed against a consistent view of Store commit
+// history. Snapshots don't acquire locks and thus don't block transactions.
+type Snapshot interface {
+	SnapshotOrTransaction
+
+	// __snapshotSpec is a utility method to distinguish between Snapshot and
+	// SnapshotOrTransaction. This is a no-op.
+	__snapshotSpec()
+}
+
+// Transaction provides a mechanism for atomic reads and writes. Instead of
+// calling this function directly, clients are encouraged to use the
+// RunInTransaction() helper function, which detects "concurrent transaction"
+// errors and handles retries internally.
+//
+// Default concurrency semantics:
+// - Reads (e.g. gets, scans) inside a transaction operate over a consistent
+//   snapshot taken during NewTransaction(), and will see the effects of prior
+//   writes performed inside the transaction.
+// - Commit() may fail with ErrConcurrentTransaction, indicating that after
+//   NewTransaction() but before Commit(), some concurrent routine wrote to a
+//   key that matches a key or row-range read inside this transaction.
+// - Other methods will never fail with error ErrConcurrentTransaction, even if
+//   it is known that Commit() will fail with this error.
+//
+// Once a transaction has been committed or aborted, subsequent method calls
+// will fail with no effect.
+type Transaction interface {
+	SnapshotOrTransaction
+	StoreWriter
+
+	// Commit commits the transaction.
+	// Fails if writes from outside this transaction conflict with reads from
+	// within this transaction.
+	Commit() error
+}
+
+// Stream is an interface for iterating through a collection of key-value pairs.
+type Stream interface {
+	// Advance stages an element so the client can retrieve it with Key or Value.
+	// Advance returns true iff there is an element to retrieve. The client must
+	// call Advance before calling Key or Value. The client must call Cancel if it
+	// does not iterate through all elements (i.e. until Advance returns false).
+	// Advance may block if an element is not immediately available.
+	Advance() bool
+
+	// Key returns the key of the element that was staged by Advance. The returned
+	// slice may be a sub-slice of keybuf if keybuf was large enough to hold the
+	// entire key. Otherwise, a newly allocated slice will be returned. It is
+	// valid to pass a nil keybuf.
+	// Key may panic if Advance returned false or was not called at all.
+	// Key does not block.
+	Key(keybuf []byte) []byte
+
+	// Value returns the value of the element that was staged by Advance. The
+	// returned slice may be a sub-slice of valbuf if valbuf was large enough to
+	// hold the entire value. Otherwise, a newly allocated slice will be returned.
+	// It is valid to pass a nil valbuf.
+	// Value may panic if Advance returned false or was not called at all.
+	// Value does not block.
+	Value(valbuf []byte) []byte
+
+	// Err returns a non-nil error iff the stream encountered any errors. Err does
+	// not block.
+	Err() error
+
+	// Cancel notifies the stream provider that it can stop producing elements.
+	// The client must call Cancel if it does not iterate through all elements
+	// (i.e. until Advance returns false). Cancel is idempotent and can be called
+	// concurrently with a goroutine that is iterating via Advance/Key/Value.
+	// Cancel causes Advance to subsequently return false. Cancel does not block.
+	Cancel()
+}

diff --git a/services/syncbase/store/model.vdl b/services/syncbase/store/model.vdl
new file mode 100644
index 0000000..6a56e66
--- /dev/null
+++ b/services/syncbase/store/model.vdl

@@ -0,0 +1,14 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package store
+
+error (
+	// ConcurrentTransaction means that the current transaction failed to commit
+	// because its read set was invalidated by some other transaction.
+	ConcurrentTransaction() {"en":"Concurrent transaction{:_}"}
+
+	// UnknownKey means the given key does not exist in the store.
+	UnknownKey() {"en":"Unknown key{:_}"}
+)

diff --git a/services/syncbase/store/model.vdl.go b/services/syncbase/store/model.vdl.go
new file mode 100644
index 0000000..eec8747
--- /dev/null
+++ b/services/syncbase/store/model.vdl.go

@@ -0,0 +1,38 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: model.vdl
+
+package store
+
+import (
+	// VDL system imports
+	"v.io/v23/context"
+	"v.io/v23/i18n"
+	"v.io/v23/verror"
+)
+
+var (
+	// ConcurrentTransaction means that the current transaction failed to commit
+	// because its read set was invalidated by some other transaction.
+	ErrConcurrentTransaction = verror.Register("v.io/syncbase/x/ref/services/syncbase/store.ConcurrentTransaction", verror.NoRetry, "{1:}{2:} Concurrent transaction{:_}")
+	// UnknownKey means the given key does not exist in the store.
+	ErrUnknownKey = verror.Register("v.io/syncbase/x/ref/services/syncbase/store.UnknownKey", verror.NoRetry, "{1:}{2:} Unknown key{:_}")
+)
+
+func init() {
+	i18n.Cat().SetWithBase(i18n.LangID("en"), i18n.MsgID(ErrConcurrentTransaction.ID), "{1:}{2:} Concurrent transaction{:_}")
+	i18n.Cat().SetWithBase(i18n.LangID("en"), i18n.MsgID(ErrUnknownKey.ID), "{1:}{2:} Unknown key{:_}")
+}
+
+// NewErrConcurrentTransaction returns an error with the ErrConcurrentTransaction ID.
+func NewErrConcurrentTransaction(ctx *context.T) error {
+	return verror.New(ErrConcurrentTransaction, ctx)
+}
+
+// NewErrUnknownKey returns an error with the ErrUnknownKey ID.
+func NewErrUnknownKey(ctx *context.T) error {
+	return verror.New(ErrUnknownKey, ctx)
+}

diff --git a/services/syncbase/store/resource_node.go b/services/syncbase/store/resource_node.go
new file mode 100644
index 0000000..c7228b2
--- /dev/null
+++ b/services/syncbase/store/resource_node.go

@@ -0,0 +1,73 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package store
+
+import (
+	"sync"
+)
+
+// ResourceNode is a node in a dependency graph. This graph is used to ensure
+// that when a resource is freed, downstream resources are also freed. For
+// example, closing a store closes all downstream transactions, snapshots and
+// streams.
+type ResourceNode struct {
+	mu       sync.Mutex
+	parent   *ResourceNode
+	children map[*ResourceNode]func()
+}
+
+// NewResourceNode creates a new isolated node in the dependency graph.
+func NewResourceNode() *ResourceNode {
+	return &ResourceNode{
+		children: make(map[*ResourceNode]func()),
+	}
+}
+
+// AddChild adds a parent-child relation between this node and the provided
+// node. The provided function is called to close the child when this node is
+// closed.
+func (r *ResourceNode) AddChild(node *ResourceNode, closefn func()) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	if r.children == nil {
+		panic("already closed")
+	}
+	node.parent = r
+	r.children[node] = closefn
+}
+
+// removeChild removes the parent-child relation between this node and the
+// provided node, enabling Go's garbage collector to free the resources
+// associated with the child node if there are no more references to it.
+func (r *ResourceNode) removeChild(node *ResourceNode) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	if r.children == nil {
+		// Already closed.
+		return
+	}
+	delete(r.children, node)
+}
+
+// Close closes this node and detaches it from its parent. All of this node's
+// children are closed using close functions provided to AddChild.
+func (r *ResourceNode) Close() {
+	r.mu.Lock()
+	if r.parent != nil {
+		// If there is a node V with parent P and we decide to explicitly close V,
+		// then we need to remove V from P's children list so that we don't close
+		// V again when P is closed.
+		r.parent.removeChild(r)
+		r.parent = nil
+	}
+	// Copy the children map to a local variable so that the removeChild step
+	// executed from children won't affect the map while we iterate through it.
+	children := r.children
+	r.children = nil
+	r.mu.Unlock()
+	for _, closefn := range children {
+		closefn()
+	}
+}

diff --git a/services/syncbase/store/test/snapshot.go b/services/syncbase/store/test/snapshot.go
new file mode 100644
index 0000000..04dee18
--- /dev/null
+++ b/services/syncbase/store/test/snapshot.go

@@ -0,0 +1,42 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+	"testing"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/verror"
+)
+
+// RunSnapshotTest verifies store.Snapshot operations.
+func RunSnapshotTest(t *testing.T, st store.Store) {
+	key1, value1 := []byte("key1"), []byte("value1")
+	st.Put(key1, value1)
+	snapshot := st.NewSnapshot()
+	key2, value2 := []byte("key2"), []byte("value2")
+	st.Put(key2, value2)
+
+	// Test Get and Scan.
+	verifyGet(t, snapshot, key1, value1)
+	verifyGet(t, snapshot, key2, nil)
+	s := snapshot.Scan([]byte("a"), []byte("z"))
+	verifyAdvance(t, s, key1, value1)
+	verifyAdvance(t, s, nil, nil)
+
+	// Test functions after Abort.
+	if err := snapshot.Abort(); err != nil {
+		t.Fatalf("can't abort the snapshot: %v", err)
+	}
+	expectedErrMsg := store.ErrMsgAbortedSnapshot
+	verifyError(t, snapshot.Abort(), verror.ErrCanceled.ID, expectedErrMsg)
+
+	_, err := snapshot.Get(key1, nil)
+	verifyError(t, err, verror.ErrCanceled.ID, expectedErrMsg)
+
+	s = snapshot.Scan([]byte("a"), []byte("z"))
+	verifyAdvance(t, s, nil, nil)
+	verifyError(t, s.Err(), verror.ErrCanceled.ID, expectedErrMsg)
+}

diff --git a/services/syncbase/store/test/store.go b/services/syncbase/store/test/store.go
new file mode 100644
index 0000000..48022f9
--- /dev/null
+++ b/services/syncbase/store/test/store.go

@@ -0,0 +1,239 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+	"fmt"
+	"math/rand"
+	"testing"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/verror"
+)
+
+type operation int
+
+const (
+	Put    operation = 0
+	Delete operation = 1
+)
+
+type testStep struct {
+	op  operation
+	key int
+}
+
+func randomBytes(rnd *rand.Rand, length int) []byte {
+	var res []byte
+	for i := 0; i < length; i++ {
+		res = append(res, '0'+byte(rnd.Intn(10)))
+	}
+	return res
+}
+
+// storeState is the in-memory representation of the store state.
+type storeState struct {
+	// We assume that the database has keys [0..size).
+	size     int
+	rnd      *rand.Rand
+	memtable map[string][]byte
+}
+
+func newStoreState(size int) *storeState {
+	return &storeState{
+		size,
+		rand.New(rand.NewSource(239017)),
+		make(map[string][]byte),
+	}
+}
+
+func (s *storeState) clone() *storeState {
+	other := &storeState{
+		s.size,
+		s.rnd,
+		make(map[string][]byte),
+	}
+	for k, v := range s.memtable {
+		other.memtable[k] = v
+	}
+	return other
+}
+
+// nextKey returns the smallest key in the store that is not less than the
+// provided key. If there is no such key, returns size.
+func (s *storeState) lowerBound(key int) int {
+	for key < s.size {
+		if _, ok := s.memtable[fmt.Sprintf("%05d", key)]; ok {
+			return key
+		}
+		key++
+	}
+	return key
+}
+
+// verify checks that various read operations on store.Store and memtable return
+// the same results.
+func (s *storeState) verify(t *testing.T, st store.StoreReader) {
+	// Verify Get().
+	for i := 0; i < s.size; i++ {
+		keystr := fmt.Sprintf("%05d", i)
+		answer, ok := s.memtable[keystr]
+		if ok {
+			verifyGet(t, st, []byte(keystr), answer)
+		} else {
+			verifyGet(t, st, []byte(keystr), nil)
+		}
+	}
+	// Verify 10 random Scan() calls.
+	for i := 0; i < 10; i++ {
+		start, limit := s.rnd.Intn(s.size), s.rnd.Intn(s.size)
+		if start > limit {
+			start, limit = limit, start
+		}
+		limit++
+		stream := st.Scan([]byte(fmt.Sprintf("%05d", start)), []byte(fmt.Sprintf("%05d", limit)))
+		for start = s.lowerBound(start); start < limit; start = s.lowerBound(start + 1) {
+			keystr := fmt.Sprintf("%05d", start)
+			verifyAdvance(t, stream, []byte(keystr), s.memtable[keystr])
+		}
+		verifyAdvance(t, stream, nil, nil)
+	}
+}
+
+// runReadWriteTest verifies read/write/snapshot operations.
+func runReadWriteTest(t *testing.T, st store.Store, size int, steps []testStep) {
+	s := newStoreState(size)
+	// We verify database state no more than ~100 times to prevent the test from
+	// being slow.
+	frequency := (len(steps) + 99) / 100
+	var states []*storeState
+	var snapshots []store.Snapshot
+	for i, step := range steps {
+		if step.key < 0 || step.key >= s.size {
+			t.Fatalf("invalid test step %v", step)
+		}
+		key := fmt.Sprintf("%05d", step.key)
+		switch step.op {
+		case Put:
+			value := randomBytes(s.rnd, 100)
+			s.memtable[key] = value
+			st.Put([]byte(key), value)
+		case Delete:
+			if _, ok := s.memtable[key]; ok {
+				delete(s.memtable, key)
+				st.Delete([]byte(key))
+			}
+		default:
+			t.Fatalf("invalid test step %v", step)
+		}
+		if i%frequency == 0 {
+			s.verify(t, st)
+			states = append(states, s.clone())
+			snapshots = append(snapshots, st.NewSnapshot())
+		}
+	}
+	s.verify(t, st)
+	for i := 0; i < len(states); i++ {
+		states[i].verify(t, snapshots[i])
+		snapshots[i].Abort()
+	}
+}
+
+// RunReadWriteBasicTest runs a basic test that verifies reads, writes and
+// snapshots.
+func RunReadWriteBasicTest(t *testing.T, st store.Store) {
+	runReadWriteTest(t, st, 3, []testStep{
+		testStep{Put, 1},
+		testStep{Put, 2},
+		testStep{Delete, 1},
+		testStep{Put, 1},
+		testStep{Put, 2},
+	})
+}
+
+// RunReadWriteRandomTest runs a random-generated test that verifies reads,
+// writes and snapshots.
+func RunReadWriteRandomTest(t *testing.T, st store.Store) {
+	rnd := rand.New(rand.NewSource(239017))
+	var steps []testStep
+	size := 50
+	for i := 0; i < 10000; i++ {
+		steps = append(steps, testStep{operation(rnd.Intn(2)), rnd.Intn(size)})
+	}
+	runReadWriteTest(t, st, size, steps)
+}
+
+// RunStoreStateTest verifies operations that modify the state of a store.Store.
+func RunStoreStateTest(t *testing.T, st store.Store) {
+	key1, value1 := []byte("key1"), []byte("value1")
+	st.Put(key1, value1)
+	key2 := []byte("key2")
+
+	// Test Get and Scan.
+	verifyGet(t, st, key1, value1)
+	verifyGet(t, st, key2, nil)
+	s := st.Scan([]byte("a"), []byte("z"))
+	verifyAdvance(t, s, key1, value1)
+	verifyAdvance(t, s, nil, nil)
+
+	// Test functions after Close.
+	if err := st.Close(); err != nil {
+		t.Fatalf("can't close the store: %v", err)
+	}
+	expectedErrMsg := store.ErrMsgClosedStore
+	verifyError(t, st.Close(), verror.ErrCanceled.ID, expectedErrMsg)
+
+	s = st.Scan([]byte("a"), []byte("z"))
+	verifyAdvance(t, s, nil, nil)
+	verifyError(t, s.Err(), verror.ErrCanceled.ID, expectedErrMsg)
+
+	snapshot := st.NewSnapshot()
+	_, err := snapshot.Get(key1, nil)
+	verifyError(t, err, verror.ErrCanceled.ID, expectedErrMsg)
+
+	tx := st.NewTransaction()
+	_, err = tx.Get(key1, nil)
+	verifyError(t, err, verror.ErrCanceled.ID, expectedErrMsg)
+
+	_, err = st.Get(key1, nil)
+	verifyError(t, err, verror.ErrCanceled.ID, expectedErrMsg)
+	verifyError(t, st.Put(key1, value1), verror.ErrCanceled.ID, expectedErrMsg)
+	verifyError(t, st.Delete(key1), verror.ErrCanceled.ID, expectedErrMsg)
+}
+
+// RunCloseTest verifies that child objects are closed when the parent object is
+// closed.
+func RunCloseTest(t *testing.T, st store.Store) {
+	key1, value1 := []byte("key1"), []byte("value1")
+	st.Put(key1, value1)
+
+	var streams []store.Stream
+	var snapshots []store.Snapshot
+	var transactions []store.Transaction
+	for i := 0; i < 10; i++ {
+		streams = append(streams, st.Scan([]byte("a"), []byte("z")))
+		snapshot := st.NewSnapshot()
+		tx := st.NewTransaction()
+		for j := 0; j < 10; j++ {
+			streams = append(streams, snapshot.Scan([]byte("a"), []byte("z")))
+			streams = append(streams, tx.Scan([]byte("a"), []byte("z")))
+		}
+		snapshots = append(snapshots, snapshot)
+		transactions = append(transactions, tx)
+	}
+	st.Close()
+
+	for _, stream := range streams {
+		verifyError(t, stream.Err(), verror.ErrCanceled.ID, store.ErrMsgCanceledStream)
+	}
+	for _, snapshot := range snapshots {
+		_, err := snapshot.Get(key1, nil)
+		verifyError(t, err, verror.ErrCanceled.ID, store.ErrMsgAbortedSnapshot)
+	}
+	for _, tx := range transactions {
+		_, err := tx.Get(key1, nil)
+		verifyError(t, err, verror.ErrCanceled.ID, store.ErrMsgAbortedTxn)
+	}
+}

diff --git a/services/syncbase/store/test/stream.go b/services/syncbase/store/test/stream.go
new file mode 100644
index 0000000..e058fc0
--- /dev/null
+++ b/services/syncbase/store/test/stream.go

@@ -0,0 +1,53 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+	"bytes"
+	"testing"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/verror"
+)
+
+// RunStreamTest verifies store.Stream operations.
+func RunStreamTest(t *testing.T, st store.Store) {
+	// Test that advancing or canceling a stream that has reached its end
+	// doesn't cause a panic.
+	s := st.Scan([]byte("a"), []byte("z"))
+	verifyAdvance(t, s, nil, nil)
+	verifyAdvance(t, s, nil, nil)
+	if s.Err() != nil {
+		t.Fatalf("unexpected error: %v", s.Err())
+	}
+	s.Cancel()
+	if s.Err() != nil {
+		t.Fatalf("unexpected error: %v", s.Err())
+	}
+
+	key1, value1 := []byte("key1"), []byte("value1")
+	st.Put(key1, value1)
+	key2, value2 := []byte("key2"), []byte("value2")
+	st.Put(key2, value2)
+	key3, value3 := []byte("key3"), []byte("value3")
+	st.Put(key3, value3)
+	s = st.Scan([]byte("a"), []byte("z"))
+	verifyAdvance(t, s, key1, value1)
+	if !s.Advance() {
+		t.Fatalf("can't advance the stream")
+	}
+	s.Cancel()
+	for i := 0; i < 2; i++ {
+		var key, value []byte
+		if key = s.Key(key); !bytes.Equal(key, key2) {
+			t.Fatalf("unexpected key: got %q, want %q", key, key2)
+		}
+		if value = s.Value(value); !bytes.Equal(value, value2) {
+			t.Fatalf("unexpected value: got %q, want %q", value, value2)
+		}
+	}
+	verifyAdvance(t, s, nil, nil)
+	verifyError(t, s.Err(), verror.ErrCanceled.ID, store.ErrMsgCanceledStream)
+}

diff --git a/services/syncbase/store/test/transaction.go b/services/syncbase/store/test/transaction.go
new file mode 100644
index 0000000..6cf26e8
--- /dev/null
+++ b/services/syncbase/store/test/transaction.go

@@ -0,0 +1,216 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+	"bytes"
+	"fmt"
+	"math/rand"
+	"strconv"
+	"sync"
+	"testing"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/verror"
+)
+
+// RunTransactionStateTest verifies operations that modify the state of a
+// store.Transaction.
+func RunTransactionStateTest(t *testing.T, st store.Store) {
+	finalizeFns := []func(t *testing.T, tx store.Transaction) (verror.ID, string){
+		func(t *testing.T, tx store.Transaction) (verror.ID, string) {
+			if err := tx.Abort(); err != nil {
+				Fatalf(t, "can't abort the transaction: %v", err)
+			}
+			return verror.ErrCanceled.ID, store.ErrMsgAbortedTxn
+		},
+		func(t *testing.T, tx store.Transaction) (verror.ID, string) {
+			if err := tx.Commit(); err != nil {
+				Fatalf(t, "can't commit the transaction: %v", err)
+			}
+			return verror.ErrBadState.ID, store.ErrMsgCommittedTxn
+		},
+	}
+	for _, fn := range finalizeFns {
+		key1, value1 := []byte("key1"), []byte("value1")
+		st.Put(key1, value1)
+		key2 := []byte("key2")
+		tx := st.NewTransaction()
+
+		// Test Get and Scan.
+		verifyGet(t, tx, key1, value1)
+		verifyGet(t, tx, key2, nil)
+		s := tx.Scan([]byte("a"), []byte("z"))
+		verifyAdvance(t, s, key1, value1)
+		verifyAdvance(t, s, nil, nil)
+
+		// Test Put then Get & Scan inside the transaction.
+		key3, value3 := []byte("key3"), []byte("value3")
+		tx.Put(key3, value3)
+		verifyGet(t, tx, key3, value3)
+		s = tx.Scan([]byte("a"), []byte("z"))
+		verifyAdvance(t, s, key1, value1)
+		verifyAdvance(t, s, key3, value3)
+		verifyAdvance(t, s, nil, nil)
+
+		// Test Delete of old key then Get inside the transaction.
+		tx.Delete(key1)
+		verifyGet(t, tx, key1, nil)
+
+		// Test Delete of new key then Get inside the transaction.
+		tx.Delete(key3)
+		verifyGet(t, tx, key3, nil)
+
+		// Test functions after finalize.
+		expectedID, expectedErrMsg := fn(t, tx)
+		verifyError(t, tx.Abort(), expectedID, expectedErrMsg)
+		verifyError(t, tx.Commit(), expectedID, expectedErrMsg)
+
+		s = tx.Scan([]byte("a"), []byte("z"))
+		verifyAdvance(t, s, nil, nil)
+		verifyError(t, s.Err(), expectedID, expectedErrMsg)
+
+		_, err := tx.Get(key1, nil)
+		verifyError(t, err, expectedID, expectedErrMsg)
+		verifyError(t, tx.Put(key1, value1), expectedID, expectedErrMsg)
+		verifyError(t, tx.Delete(key1), expectedID, expectedErrMsg)
+	}
+}
+
+// RunConcurrentTransactionsTest verifies that concurrent transactions
+// invalidate each other as expected.
+func RunConcurrentTransactionsTest(t *testing.T, st store.Store) {
+	st.Put([]byte("a"), []byte("0"))
+	st.Put([]byte("b"), []byte("0"))
+	st.Put([]byte("c"), []byte("0"))
+	// Test Get fails.
+	txA := st.NewTransaction()
+	txB := st.NewTransaction()
+	txA.Get([]byte("a"), nil)
+	txB.Get([]byte("a"), nil)
+	txA.Put([]byte("a"), []byte("a"))
+	txB.Put([]byte("a"), []byte("b"))
+	if err := txA.Commit(); err != nil {
+		t.Fatalf("can't commit the transaction: %v", err)
+	}
+	if err := txB.Commit(); verror.ErrorID(err) != store.ErrConcurrentTransaction.ID {
+		t.Fatalf("unexpected commit error: %v", err)
+	}
+	if value, _ := st.Get([]byte("a"), nil); !bytes.Equal(value, []byte("a")) {
+		t.Fatalf("unexpected value: got %q, want %q", value, "a")
+	}
+	// Test Scan fails.
+	txA = st.NewTransaction()
+	txB = st.NewTransaction()
+	txA.Scan([]byte("a"), []byte("z"))
+	txB.Scan([]byte("a"), []byte("z"))
+	txA.Put([]byte("aa"), []byte("a"))
+	txB.Put([]byte("bb"), []byte("b"))
+	if err := txA.Commit(); err != nil {
+		t.Fatalf("can't commit the transaction: %v", err)
+	}
+	if err := txB.Commit(); verror.ErrorID(err) != store.ErrConcurrentTransaction.ID {
+		t.Fatalf("unexpected commit error: %v", err)
+	}
+	if value, _ := st.Get([]byte("aa"), nil); !bytes.Equal(value, []byte("a")) {
+		t.Fatalf("unexpected value: got %q, want %q", value, "a")
+	}
+	// Test Get and Scan OK.
+	txA = st.NewTransaction()
+	txB = st.NewTransaction()
+	txA.Scan([]byte("a"), []byte("b"))
+	txB.Scan([]byte("b"), []byte("c"))
+	txA.Get([]byte("c"), nil)
+	txB.Get([]byte("c"), nil)
+	txA.Put([]byte("a"), []byte("a"))
+	txB.Put([]byte("b"), []byte("b"))
+	if err := txA.Commit(); err != nil {
+		t.Fatalf("can't commit the transaction: %v", err)
+	}
+	if err := txB.Commit(); err != nil {
+		t.Fatalf("can't commit the transaction: %v", err)
+	}
+}
+
+// RunTransactionsWithGetTest tests transactions that use Put and Get
+// operations.
+// NOTE: consider setting GOMAXPROCS to something greater than 1.
+func RunTransactionsWithGetTest(t *testing.T, st store.Store) {
+	// Invariant: value mapped to n is sum of values of 0..n-1.
+	// Each of k transactions takes m distinct random values from 0..n-1, adds 1
+	// to each and m to value mapped to n.
+	// The correctness of sums is checked after all transactions have been
+	// committed.
+	n, m, k := 10, 3, 100
+	for i := 0; i <= n; i++ {
+		if err := st.Put([]byte(fmt.Sprintf("%05d", i)), []byte{'0'}); err != nil {
+			t.Fatalf("can't write to database")
+		}
+	}
+	var wg sync.WaitGroup
+	wg.Add(k)
+	for i := 0; i < k; i++ {
+		go func(idx int) {
+			rnd := rand.New(rand.NewSource(239017 * int64(idx)))
+			perm := rnd.Perm(n)
+			if err := store.RunInTransaction(st, func(tx store.Transaction) error {
+				for j := 0; j <= m; j++ {
+					var keystr string
+					if j < m {
+						keystr = fmt.Sprintf("%05d", perm[j])
+					} else {
+						keystr = fmt.Sprintf("%05d", n)
+					}
+					key := []byte(keystr)
+					val, err := tx.Get(key, nil)
+					if err != nil {
+						return fmt.Errorf("can't get key %q: %v", key, err)
+					}
+					intValue, err := strconv.ParseInt(string(val), 10, 64)
+					if err != nil {
+						return fmt.Errorf("can't parse int from %q: %v", val, err)
+					}
+					var newValue int64
+					if j < m {
+						newValue = intValue + 1
+					} else {
+						newValue = intValue + int64(m)
+					}
+					if err := tx.Put(key, []byte(fmt.Sprintf("%d", newValue))); err != nil {
+						return fmt.Errorf("can't put {%q: %v}: %v", key, newValue, err)
+					}
+				}
+				return nil
+			}); err != nil {
+				panic(fmt.Errorf("can't commit transaction: %v", err))
+			}
+			wg.Done()
+		}(i)
+	}
+	wg.Wait()
+	var sum int64
+	for j := 0; j <= n; j++ {
+		keystr := fmt.Sprintf("%05d", j)
+		key := []byte(keystr)
+		val, err := st.Get(key, nil)
+		if err != nil {
+			t.Fatalf("can't get key %q: %v", key, err)
+		}
+		intValue, err := strconv.ParseInt(string(val), 10, 64)
+		if err != nil {
+			t.Fatalf("can't parse int from %q: %v", val, err)
+		}
+		if j < n {
+			sum += intValue
+		} else {
+			if intValue != int64(m*k) {
+				t.Fatalf("invalid sum value in the database: got %d, want %d", intValue, m*k)
+			}
+		}
+	}
+	if sum != int64(m*k) {
+		t.Fatalf("invalid sum of values in the database: got %d, want %d", sum, m*k)
+	}
+}

diff --git a/services/syncbase/store/test/util.go b/services/syncbase/store/test/util.go
new file mode 100644
index 0000000..55b886a
--- /dev/null
+++ b/services/syncbase/store/test/util.go

@@ -0,0 +1,79 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+	"bytes"
+	"runtime/debug"
+	"strings"
+	"testing"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/verror"
+)
+
+// verifyGet verifies that st.Get(key) == value. If value is nil, verifies that
+// the key is not found.
+func verifyGet(t *testing.T, st store.StoreReader, key, value []byte) {
+	valbuf := []byte("tmp")
+	var err error
+	if value != nil {
+		if valbuf, err = st.Get(key, valbuf); err != nil {
+			Fatalf(t, "can't get value of %q: %v", key, err)
+		}
+		if !bytes.Equal(valbuf, value) {
+			Fatalf(t, "unexpected value: got %q, want %q", valbuf, value)
+		}
+	} else {
+		valbuf, err = st.Get(key, valbuf)
+		verifyError(t, err, store.ErrUnknownKey.ID, string(key))
+		valcopy := []byte("tmp")
+		// Verify that valbuf is not modified if the key is not found.
+		if !bytes.Equal(valbuf, valcopy) {
+			Fatalf(t, "unexpected value: got %q, want %q", valbuf, valcopy)
+		}
+	}
+}
+
+// verifyGet verifies the next key/value pair of the provided stream.
+// If key is nil, verifies that next Advance call on the stream returns false.
+func verifyAdvance(t *testing.T, s store.Stream, key, value []byte) {
+	ok := s.Advance()
+	if key == nil {
+		if ok {
+			Fatalf(t, "advance returned true unexpectedly")
+		}
+		return
+	}
+	if !ok {
+		Fatalf(t, "can't advance the stream")
+	}
+	var k, v []byte
+	for i := 0; i < 2; i++ {
+		if k = s.Key(k); !bytes.Equal(k, key) {
+			Fatalf(t, "unexpected key: got %q, want %q", k, key)
+		}
+		if v = s.Value(v); !bytes.Equal(v, value) {
+			Fatalf(t, "unexpected value: got %q, want %q", v, value)
+		}
+	}
+}
+
+// verifyError verifies that the given error has the given errorID and that the
+// error string contains the given substr. Pass an empty substr to skip the
+// substr check.
+func verifyError(t *testing.T, err error, errorID verror.ID, substr string) {
+	if got := verror.ErrorID(err); got != errorID {
+		Fatalf(t, "unexpected error ID: got %v, want %v", got, errorID)
+	}
+	if !strings.Contains(err.Error(), substr) {
+		Fatalf(t, "unexpected error: %q not found in %q", substr, err)
+	}
+}
+
+func Fatalf(t *testing.T, format string, args ...interface{}) {
+	debug.PrintStack()
+	t.Fatalf(format, args...)
+}

diff --git a/services/syncbase/store/transactions/manager.go b/services/syncbase/store/transactions/manager.go
new file mode 100644
index 0000000..254812f
--- /dev/null
+++ b/services/syncbase/store/transactions/manager.go

@@ -0,0 +1,194 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package transactions
+
+import (
+	"container/list"
+	"sync"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/verror"
+)
+
+// BatchStore is a CRUD-capable storage engine that supports atomic batch
+// writes. BatchStore doesn't support transactions.
+// This interface is a Go version of the C++ LevelDB interface. It serves as
+// an intermediate interface between store.Store and the LevelDB interface.
+type BatchStore interface {
+	store.StoreReader
+
+	// WriteBatch atomically writes a list of write operations to the database.
+	WriteBatch(batch ...WriteOp) error
+
+	// Close closes the store.
+	Close() error
+
+	// NewSnapshot creates a snapshot.
+	NewSnapshot() store.Snapshot
+}
+
+// manager handles transaction-related operations of the store.
+type manager struct {
+	BatchStore
+	// mu protects the variables below, and is also held during transaction
+	// commits. It must always be acquired before the store-level lock.
+	mu sync.Mutex
+	// events is a queue of create/commit transaction events.
+	events *list.List
+	seq    uint64
+	// txTable is a set of keys written by recent transactions. This set
+	// includes all write sets of transactions committed after the oldest living
+	// (in-flight) transaction.
+	txTable *trie
+}
+
+// commitedTransaction is only used as an element of manager.events.
+type commitedTransaction struct {
+	seq   uint64
+	batch [][]byte
+}
+
+// Wrap wraps the BatchStore with transaction functionality.
+func Wrap(bs BatchStore) store.Store {
+	return &manager{
+		BatchStore: bs,
+		events:     list.New(),
+		txTable:    newTrie(),
+	}
+}
+
+// Close implements the store.Store interface.
+func (mg *manager) Close() error {
+	mg.mu.Lock()
+	defer mg.mu.Unlock()
+	if mg.txTable == nil {
+		return verror.New(verror.ErrCanceled, nil, store.ErrMsgClosedStore)
+	}
+	mg.BatchStore.Close()
+	for event := mg.events.Front(); event != nil; event = event.Next() {
+		if tx, ok := event.Value.(*transaction); ok {
+			// tx.Abort() internally removes tx from the mg.events list under
+			// the mg.mu lock. To brake the cyclic dependency, we set tx.event
+			// to nil.
+			tx.mu.Lock()
+			tx.event = nil
+			tx.mu.Unlock()
+			tx.Abort()
+		}
+	}
+	mg.events = nil
+	mg.txTable = nil
+	return nil
+}
+
+// NewTransaction implements the store.Store interface.
+func (mg *manager) NewTransaction() store.Transaction {
+	mg.mu.Lock()
+	defer mg.mu.Unlock()
+	if mg.txTable == nil {
+		return &store.InvalidTransaction{
+			Error: verror.New(verror.ErrCanceled, nil, store.ErrMsgClosedStore),
+		}
+	}
+	return newTransaction(mg)
+}
+
+// Put implements the store.StoreWriter interface.
+func (mg *manager) Put(key, value []byte) error {
+	mg.mu.Lock()
+	defer mg.mu.Unlock()
+	if mg.txTable == nil {
+		return verror.New(verror.ErrCanceled, nil, store.ErrMsgClosedStore)
+	}
+	write := WriteOp{
+		T:     PutOp,
+		Key:   key,
+		Value: value,
+	}
+	if err := mg.BatchStore.WriteBatch(write); err != nil {
+		return err
+	}
+	mg.trackBatch(write)
+	return nil
+}
+
+// Delete implements the store.StoreWriter interface.
+func (mg *manager) Delete(key []byte) error {
+	mg.mu.Lock()
+	defer mg.mu.Unlock()
+	if mg.txTable == nil {
+		return verror.New(verror.ErrCanceled, nil, store.ErrMsgClosedStore)
+	}
+	write := WriteOp{
+		T:   DeleteOp,
+		Key: key,
+	}
+	if err := mg.BatchStore.WriteBatch(write); err != nil {
+		return err
+	}
+	mg.trackBatch(write)
+	return nil
+}
+
+// trackBatch writes the batch to txTable and adds a commit event to
+// the events queue.
+// Assumes mu is held.
+func (mg *manager) trackBatch(batch ...WriteOp) {
+	if mg.events.Len() == 0 {
+		return
+	}
+	// TODO(rogulenko): do GC.
+	mg.seq++
+	var keys [][]byte
+	for _, write := range batch {
+		mg.txTable.add(write.Key, mg.seq)
+		keys = append(keys, write.Key)
+	}
+	tx := &commitedTransaction{
+		seq:   mg.seq,
+		batch: keys,
+	}
+	mg.events.PushBack(tx)
+}
+
+//////////////////////////////////////////////////////////////
+// Read and Write types used for storing transcation reads
+// and uncommitted writes.
+
+type WriteType int
+
+const (
+	PutOp WriteType = iota
+	DeleteOp
+)
+
+type WriteOp struct {
+	T     WriteType
+	Key   []byte
+	Value []byte
+}
+
+type scanRange struct {
+	Start, Limit []byte
+}
+
+type readSet struct {
+	Keys   [][]byte
+	Ranges []scanRange
+}
+
+type writeOpArray []WriteOp
+
+func (a writeOpArray) Len() int {
+	return len(a)
+}
+
+func (a writeOpArray) Less(i, j int) bool {
+	return string(a[i].Key) < string(a[j].Key)
+}
+
+func (a writeOpArray) Swap(i, j int) {
+	a[i], a[j] = a[j], a[i]
+}

diff --git a/services/syncbase/store/transactions/merged_stream.go b/services/syncbase/store/transactions/merged_stream.go
new file mode 100644
index 0000000..4ab10be
--- /dev/null
+++ b/services/syncbase/store/transactions/merged_stream.go

@@ -0,0 +1,149 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package transactions
+
+import (
+	"sort"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+)
+
+//////////////////////////////////////////////////////////////
+// mergedStream implementation of Stream
+//
+// This implementation of Stream must take into account writes
+// which have occurred since the snapshot was taken on the
+// transaction.
+//
+// The MergeWritesWithStream() function requires uncommitted
+// changes to be passed in as an array of WriteOp.
+
+// Create a new stream which merges a snapshot stream with an array of write operations.
+func mergeWritesWithStream(sn store.Snapshot, w []WriteOp, start, limit []byte) store.Stream {
+	// Collect writes with the range specified, then sort them.
+	// Note: Writes could contain more than one write for a given key.
+	//       The last write is the current state.
+	writesMap := map[string]WriteOp{}
+	for _, write := range w {
+		if string(write.Key) >= string(start) && (string(limit) == "" || string(write.Key) < string(limit)) {
+			writesMap[string(write.Key)] = write
+		}
+	}
+	var writesArray writeOpArray
+	for _, writeOp := range writesMap {
+		writesArray = append(writesArray, writeOp)
+	}
+	sort.Sort(writesArray)
+	return &mergedStream{
+		snapshotStream:      sn.Scan(start, limit),
+		writesArray:         writesArray,
+		writesCursor:        0,
+		unusedSnapshotValue: false,
+		snapshotStreamEOF:   false,
+		hasValue:            false,
+	}
+}
+
+type mergedStream struct {
+	snapshotStream      store.Stream
+	writesArray         []WriteOp
+	writesCursor        int
+	unusedSnapshotValue bool
+	snapshotStreamEOF   bool
+	hasValue            bool // if true, Key() and Value() can be called
+	key                 []byte
+	value               []byte
+}
+
+// Convenience function to check EOF on writesArray
+func (s *mergedStream) writesArrayEOF() bool {
+	return s.writesCursor >= len(s.writesArray)
+}
+
+// If a kv from the snapshot isn't on deck, call
+// Advance on the snapshot and set unusedSnapshotValue.
+// If EOF encountered, set snapshotStreamEOF.
+// If error encountered, return it.
+func (s *mergedStream) stageSnapshotKeyValue() error {
+	if !s.snapshotStreamEOF && !s.unusedSnapshotValue {
+		if !s.snapshotStream.Advance() {
+			s.snapshotStreamEOF = true
+			if err := s.snapshotStream.Err(); err != nil {
+				return err
+			}
+		}
+		s.unusedSnapshotValue = true
+	}
+	return nil
+}
+
+// Pick a kv from either the snapshot or the uncommited writes array.
+// If an uncommited write is picked advance past it and return false (also, advance the snapshot
+// stream if its current key is equal to the ucommitted delete).
+func (s *mergedStream) pickKeyValue() bool {
+	if !s.snapshotStreamEOF && (s.writesArrayEOF() || string(s.writesArray[s.writesCursor].Key) > string(s.snapshotStream.Key(nil))) {
+		s.key = s.snapshotStream.Key(s.key)
+		s.value = s.snapshotStream.Value(s.value)
+		s.unusedSnapshotValue = false
+		return true
+	}
+	if !s.snapshotStreamEOF && string(s.writesArray[s.writesCursor].Key) == string(s.snapshotStream.Key(nil)) {
+		s.unusedSnapshotValue = false
+	}
+	if s.writesArrayEOF() || s.writesArray[s.writesCursor].T == DeleteOp {
+		s.writesCursor++
+		return false
+	}
+	s.key = store.CopyBytes(s.key, s.writesArray[s.writesCursor].Key)
+	s.value = store.CopyBytes(s.value, s.writesArray[s.writesCursor].Value)
+	s.writesCursor++
+	return true
+}
+
+func (s *mergedStream) Advance() bool {
+	s.hasValue = false
+	for true {
+		if err := s.stageSnapshotKeyValue(); err != nil {
+			return false
+		}
+		if s.snapshotStreamEOF && s.writesArrayEOF() {
+			return false
+		}
+		if s.pickKeyValue() {
+			s.hasValue = true
+			return true
+		}
+	}
+	return false // compiler insists on this line
+}
+
+// Key implements the Stream interface.
+func (s *mergedStream) Key(keybuf []byte) []byte {
+	if !s.hasValue {
+		panic("nothing staged")
+	}
+	return store.CopyBytes(keybuf, s.key)
+}
+
+// Value implements the Stream interface.
+func (s *mergedStream) Value(valbuf []byte) []byte {
+	if !s.hasValue {
+		panic("nothing staged")
+	}
+	return store.CopyBytes(valbuf, s.value)
+}
+
+// Err implements the Stream interface.
+func (s *mergedStream) Err() error {
+	return s.snapshotStream.Err()
+}
+
+// Cancel implements the Stream interface.
+func (s *mergedStream) Cancel() {
+	s.snapshotStream.Cancel()
+	s.hasValue = false
+	s.snapshotStreamEOF = true
+	s.writesCursor = len(s.writesArray)
+}

diff --git a/services/syncbase/store/transactions/transaction.go b/services/syncbase/store/transactions/transaction.go
new file mode 100644
index 0000000..dcf7569
--- /dev/null
+++ b/services/syncbase/store/transactions/transaction.go

@@ -0,0 +1,193 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package transactions
+
+import (
+	"bytes"
+	"container/list"
+	"sync"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/verror"
+	"v.io/x/lib/vlog"
+)
+
+// transaction is a wrapper on top of a BatchWriter and a store.Snapshot that
+// implements the store.Transaction interface.
+type transaction struct {
+	// mu protects the state of the transaction.
+	mu       sync.Mutex
+	mg       *manager
+	seq      uint64
+	event    *list.Element // pointer to element of db.txEvents
+	snapshot store.Snapshot
+	reads    readSet
+	writes   []WriteOp
+	err      error
+}
+
+var _ store.Transaction = (*transaction)(nil)
+
+func newTransaction(mg *manager) *transaction {
+	tx := &transaction{
+		mg:       mg,
+		snapshot: mg.BatchStore.NewSnapshot(),
+		seq:      mg.seq,
+	}
+	tx.event = mg.events.PushFront(tx)
+	return tx
+}
+
+// close removes this transaction from the mg.events queue and aborts
+// the underlying snapshot.
+// Assumes mu is held.
+func (tx *transaction) close() {
+	tx.removeEvent()
+	tx.snapshot.Abort()
+}
+
+// removeEvent removes this transaction from the mg.events queue.
+// Assumes mu is held.
+func (tx *transaction) removeEvent() {
+	// This can happen if the transaction was committed, since Commit()
+	// explicitly calls removeEvent().
+	if tx.event == nil {
+		return
+	}
+	tx.mg.mu.Lock()
+	tx.mg.events.Remove(tx.event)
+	tx.mg.mu.Unlock()
+	tx.event = nil
+}
+
+// Get implements the store.StoreReader interface.
+func (tx *transaction) Get(key, valbuf []byte) ([]byte, error) {
+	tx.mu.Lock()
+	defer tx.mu.Unlock()
+	if tx.err != nil {
+		return valbuf, store.ConvertError(tx.err)
+	}
+	tx.reads.Keys = append(tx.reads.Keys, key)
+
+	// Reflect the state of the transaction: the "writes" (puts and
+	// deletes) override the values in the transaction snapshot.
+	// Find the last "writes" entry for this key, if one exists.
+	// Note: this step could be optimized by using maps (puts and
+	// deletes) instead of an array.
+	for i := len(tx.writes) - 1; i >= 0; i-- {
+		op := &tx.writes[i]
+		if bytes.Equal(op.Key, key) {
+			if op.T == PutOp {
+				return op.Value, nil
+			}
+			return valbuf, verror.New(store.ErrUnknownKey, nil, string(key))
+		}
+	}
+
+	return tx.snapshot.Get(key, valbuf)
+}
+
+// Scan implements the store.StoreReader interface.
+func (tx *transaction) Scan(start, limit []byte) store.Stream {
+	tx.mu.Lock()
+	defer tx.mu.Unlock()
+	if tx.err != nil {
+		return &store.InvalidStream{Error: tx.err}
+	}
+
+	tx.reads.Ranges = append(tx.reads.Ranges, scanRange{
+		Start: start,
+		Limit: limit,
+	})
+
+	// Return a stream which merges the snaphot stream with the uncommitted changes.
+	return mergeWritesWithStream(tx.snapshot, tx.writes, start, limit)
+}
+
+// Put implements the store.StoreWriter interface.
+func (tx *transaction) Put(key, value []byte) error {
+	tx.mu.Lock()
+	defer tx.mu.Unlock()
+	if tx.err != nil {
+		return store.ConvertError(tx.err)
+	}
+	tx.writes = append(tx.writes, WriteOp{
+		T:     PutOp,
+		Key:   key,
+		Value: value,
+	})
+	return nil
+}
+
+// Delete implements the store.StoreWriter interface.
+func (tx *transaction) Delete(key []byte) error {
+	tx.mu.Lock()
+	defer tx.mu.Unlock()
+	if tx.err != nil {
+		return store.ConvertError(tx.err)
+	}
+	tx.writes = append(tx.writes, WriteOp{
+		T:   DeleteOp,
+		Key: key,
+	})
+	return nil
+}
+
+// validateReadSet returns true iff the read set of this transaction has not
+// been invalidated by other transactions.
+// Assumes tx.mg.mu is held.
+func (tx *transaction) validateReadSet() bool {
+	for _, key := range tx.reads.Keys {
+		if tx.mg.txTable.get(key) > tx.seq {
+			vlog.VI(3).Infof("key conflict: %q", key)
+			return false
+		}
+	}
+	for _, r := range tx.reads.Ranges {
+		if tx.mg.txTable.rangeMax(r.Start, r.Limit) > tx.seq {
+			vlog.VI(3).Infof("range conflict: {%q, %q}", r.Start, r.Limit)
+			return false
+		}
+
+	}
+	return true
+}
+
+// Commit implements the store.Transaction interface.
+func (tx *transaction) Commit() error {
+	tx.mu.Lock()
+	defer tx.mu.Unlock()
+	if tx.err != nil {
+		return store.ConvertError(tx.err)
+	}
+	tx.err = verror.New(verror.ErrBadState, nil, store.ErrMsgCommittedTxn)
+	// Explicitly remove this transaction from the event queue. If this was the
+	// only active transaction, the event queue becomes empty and writeLocked will
+	// not add this transaction's write set to txTable.
+	tx.removeEvent()
+	defer tx.close()
+	tx.mg.mu.Lock()
+	defer tx.mg.mu.Unlock()
+	if !tx.validateReadSet() {
+		return store.NewErrConcurrentTransaction(nil)
+	}
+	if err := tx.mg.BatchStore.WriteBatch(tx.writes...); err != nil {
+		return err
+	}
+	tx.mg.trackBatch(tx.writes...)
+	return nil
+}
+
+// Abort implements the store.Transaction interface.
+func (tx *transaction) Abort() error {
+	tx.mu.Lock()
+	defer tx.mu.Unlock()
+	if tx.err != nil {
+		return store.ConvertError(tx.err)
+	}
+	tx.err = verror.New(verror.ErrCanceled, nil, store.ErrMsgAbortedTxn)
+	tx.close()
+	return nil
+}

diff --git a/services/syncbase/store/transactions/trie.go b/services/syncbase/store/transactions/trie.go
new file mode 100644
index 0000000..51a99a3
--- /dev/null
+++ b/services/syncbase/store/transactions/trie.go

@@ -0,0 +1,75 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package transactions
+
+import (
+	"fmt"
+)
+
+// trie is an in-memory data structure that keeps track of recently-written
+// keys, and exposes an interface for asking when a key or key range was most
+// recently written to. It is used to check whether the read set of a
+// transaction pending commit is still valid. The transaction can be committed
+// iff its read set is valid.
+// TODO(rogulenko): replace this dummy implementation with an actual trie.
+type trie struct {
+	seqs map[string]uint64
+}
+
+func newTrie() *trie {
+	return &trie{
+		seqs: make(map[string]uint64),
+	}
+}
+
+// add updates the given key to the given seq, which must be greater than the
+// current seq (if one exists). Seqs of subsequent calls must be in
+// ascending order.
+func (t *trie) add(key []byte, seq uint64) {
+	keystr := string(key)
+	if oldSeq, ok := t.seqs[keystr]; ok && seq < oldSeq {
+		panic(fmt.Sprintf("seq for key %q should be at least %d, but got %d", key, oldSeq, seq))
+	}
+	t.seqs[keystr] = seq
+}
+
+// remove reverts effect of add(key, seq).
+// Seqs of subsequent calls must be in ascending order.
+func (t *trie) remove(key []byte, seq uint64) {
+	keystr := string(key)
+	oldSeq, ok := t.seqs[keystr]
+	if !ok {
+		panic(fmt.Sprintf("key %q was not found", key))
+	}
+	if oldSeq > seq {
+		return
+	} else if oldSeq == seq {
+		delete(t.seqs, keystr)
+	} else {
+		panic(fmt.Sprintf("seq for key %q is too big: got %v, want %v", keystr, seq, oldSeq))
+	}
+}
+
+// get returns the seq associated with the given key.
+func (t *trie) get(key []byte) uint64 {
+	keystr := string(key)
+	if seq, ok := t.seqs[keystr]; ok {
+		return seq
+	}
+	return 0
+}
+
+// rangeMax returns the max seq associated with keys in range
+// [start, limit). Empty limit means no limit.
+func (t *trie) rangeMax(start, limit []byte) uint64 {
+	var result uint64 = 0
+	s, e := string(start), string(limit)
+	for key, seq := range t.seqs {
+		if key >= s && (e == "" || key < e) && seq > result {
+			result = seq
+		}
+	}
+	return result
+}

diff --git a/services/syncbase/store/util.go b/services/syncbase/store/util.go
new file mode 100644
index 0000000..19695ed
--- /dev/null
+++ b/services/syncbase/store/util.go

@@ -0,0 +1,61 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package store
+
+import (
+	"v.io/v23/verror"
+)
+
+type SnapshotSpecImpl struct{}
+
+func (s *SnapshotSpecImpl) __snapshotSpec() {}
+
+// RunInTransaction runs the given fn in a transaction, managing retries and
+// commit/abort.
+func RunInTransaction(st Store, fn func(tx Transaction) error) error {
+	// TODO(rogulenko): Make the number of attempts configurable.
+	// TODO(rogulenko): Change the default number of attempts to 3. Currently,
+	// some storage engine tests fail when the number of attempts is that low.
+	var err error
+	for i := 0; i < 100; i++ {
+		// TODO(sadovsky): Should NewTransaction return an error? If not, how will
+		// we deal with RPC errors when talking to remote storage engines? (Note,
+		// client-side BeginBatch returns an error.)
+		tx := st.NewTransaction()
+		if err = fn(tx); err != nil {
+			tx.Abort()
+			return err
+		}
+		// TODO(sadovsky): Commit() can fail for a number of reasons, e.g. RPC
+		// failure or ErrConcurrentTransaction. Depending on the cause of failure,
+		// it may be desirable to retry the Commit() and/or to call Abort().
+		if err = tx.Commit(); verror.ErrorID(err) != ErrConcurrentTransaction.ID {
+			return err
+		}
+	}
+	return err
+}
+
+// CopyBytes copies elements from a source slice into a destination slice.
+// The returned slice may be a sub-slice of dst if dst was large enough to hold
+// src. Otherwise, a newly allocated slice will be returned.
+// TODO(rogulenko): add some tests.
+func CopyBytes(dst, src []byte) []byte {
+	if cap(dst) < len(src) {
+		newlen := cap(dst)*2 + 2
+		if newlen < len(src) {
+			newlen = len(src)
+		}
+		dst = make([]byte, newlen)
+	}
+	dst = dst[:len(src)]
+	copy(dst, src)
+	return dst
+}
+
+// ConvertError returns a copy of the verror, appending the current stack to it.
+func ConvertError(err error) error {
+	return verror.Convert(verror.IDAction{}, nil, err)
+}

diff --git a/services/syncbase/syncbased/main.go b/services/syncbase/syncbased/main.go
new file mode 100644
index 0000000..8def540
--- /dev/null
+++ b/services/syncbase/syncbased/main.go

@@ -0,0 +1,81 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"flag"
+
+	"v.io/syncbase/x/ref/services/syncbase/server"
+	"v.io/v23"
+	"v.io/v23/context"
+	"v.io/v23/rpc"
+	"v.io/v23/security"
+	"v.io/v23/security/access"
+	"v.io/x/lib/vlog"
+	"v.io/x/ref/lib/security/securityflag"
+	_ "v.io/x/ref/runtime/factories/roaming"
+)
+
+var (
+	name    = flag.String("name", "", "Name to mount at.")
+	rootDir = flag.String("root-dir", "/var/lib/syncbase", "Root dir for storage engines and other data")
+	engine  = flag.String("engine", "leveldb", "Storage engine to use. Currently supported: memstore and leveldb.")
+)
+
+// defaultPerms returns a permissions object that grants all permissions to the
+// provided blessing patterns.
+func defaultPerms(blessingPatterns []security.BlessingPattern) access.Permissions {
+	perms := access.Permissions{}
+	for _, tag := range access.AllTypicalTags() {
+		for _, bp := range blessingPatterns {
+			perms.Add(bp, string(tag))
+		}
+	}
+	return perms
+}
+
+// TODO(sadovsky): We return rpc.Server and rpc.Dispatcher as a quick hack to
+// support Mojo.
+func Serve(ctx *context.T) (rpc.Server, rpc.Dispatcher) {
+	s, err := v23.NewServer(ctx)
+	if err != nil {
+		vlog.Fatal("v23.NewServer() failed: ", err)
+	}
+	if _, err := s.Listen(v23.GetListenSpec(ctx)); err != nil {
+		vlog.Fatal("s.Listen() failed: ", err)
+	}
+
+	perms, err := securityflag.PermissionsFromFlag()
+	if err != nil {
+		vlog.Fatal("securityflag.PermissionsFromFlag() failed: ", err)
+	}
+	if perms != nil {
+		vlog.Info("Using perms from command line flag.")
+	} else {
+		vlog.Info("Perms flag not set. Giving local principal all perms.")
+		perms = defaultPerms(security.DefaultBlessingPatterns(v23.GetPrincipal(ctx)))
+	}
+	vlog.Infof("Perms: %v", perms)
+	service, err := server.NewService(ctx, nil, server.ServiceOptions{
+		Perms:   perms,
+		RootDir: *rootDir,
+		Engine:  *engine,
+		Server:  s,
+	})
+	if err != nil {
+		vlog.Fatal("server.NewService() failed: ", err)
+	}
+	d := server.NewDispatcher(service)
+
+	// Publish the service in the mount table.
+	if err := s.ServeDispatcher(*name, d); err != nil {
+		vlog.Fatal("s.ServeDispatcher() failed: ", err)
+	}
+	if *name != "" {
+		vlog.Info("Mounted at: ", *name)
+	}
+
+	return s, d
+}

diff --git a/services/syncbase/syncbased/mojo_main.go b/services/syncbase/syncbased/mojo_main.go
new file mode 100644
index 0000000..74a247f
--- /dev/null
+++ b/services/syncbase/syncbased/mojo_main.go

@@ -0,0 +1,79 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build mojo
+
+package main
+
+// To build:
+// cd $V23_ROOT/experimental/projects/ether
+// make build
+
+import (
+	"log"
+
+	"mojo/public/go/application"
+	"mojo/public/go/bindings"
+	"mojo/public/go/system"
+
+	"mojom/syncbase"
+
+	"v.io/syncbase/x/ref/services/syncbase/server"
+	"v.io/v23"
+	"v.io/v23/context"
+	"v.io/v23/rpc"
+)
+
+//#include "mojo/public/c/system/types.h"
+import "C"
+
+type delegate struct {
+	ctx   *context.T
+	srv   rpc.Server
+	disp  rpc.Dispatcher
+	stubs []*bindings.Stub
+}
+
+func (d *delegate) Initialize(ctx application.Context) {
+	d.srv, d.disp = Serve(d.ctx)
+}
+
+func (d *delegate) Create(req syncbase.Syncbase_Request) {
+	impl := server.NewMojoImpl(d.ctx, d.srv, d.disp)
+	stub := syncbase.NewSyncbaseStub(req, impl, bindings.GetAsyncWaiter())
+	d.stubs = append(d.stubs, stub)
+	go func() {
+		for {
+			if err := stub.ServeRequest(); err != nil {
+				connErr, ok := err.(*bindings.ConnectionError)
+				if !ok || !connErr.Closed() {
+					log.Println(err)
+				}
+				break
+			}
+		}
+	}()
+}
+
+func (d *delegate) AcceptConnection(conn *application.Connection) {
+	conn.ProvideServices(&syncbase.Syncbase_ServiceFactory{d})
+}
+
+func (d *delegate) Quit() {
+	for _, stub := range d.stubs {
+		stub.Close()
+	}
+}
+
+//export MojoMain
+func MojoMain(handle C.MojoHandle) C.MojoResult {
+	ctx, shutdown := v23.Init()
+	defer shutdown()
+	application.Run(&delegate{ctx: ctx}, system.MojoHandle(handle))
+	return C.MOJO_RESULT_OK
+}
+
+// NOTE(nlacasse): Mojo runs Go code by calling MojoMain().  The main() method
+// below is still needed because the Go tool won't build without it.
+func main() {}

diff --git a/services/syncbase/syncbased/v23_main.go b/services/syncbase/syncbased/v23_main.go
new file mode 100644
index 0000000..1651dc1
--- /dev/null
+++ b/services/syncbase/syncbased/v23_main.go

@@ -0,0 +1,23 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !mojo
+
+// syncbased is a syncbase daemon.
+package main
+
+// Example invocation:
+// syncbased --veyron.tcp.address="127.0.0.1:0" --name=syncbased
+
+import (
+	"v.io/v23"
+	"v.io/x/ref/lib/signals"
+)
+
+func main() {
+	ctx, shutdown := v23.Init()
+	defer shutdown()
+	Serve(ctx)
+	<-signals.ShutdownOnSignals(ctx)
+}

diff --git a/services/syncbase/vsync/blob.go b/services/syncbase/vsync/blob.go
new file mode 100644
index 0000000..ff04066
--- /dev/null
+++ b/services/syncbase/vsync/blob.go

@@ -0,0 +1,429 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+import (
+	"io"
+
+	wire "v.io/syncbase/v23/services/syncbase/nosql"
+	blob "v.io/syncbase/x/ref/services/syncbase/localblobstore"
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/v23/context"
+	"v.io/v23/naming"
+	"v.io/v23/rpc"
+	"v.io/v23/verror"
+	"v.io/x/lib/vlog"
+)
+
+const (
+	chunkSize = 8 * 1024
+)
+
+// blobLocInfo contains the location information about a BlobRef. This location
+// information is merely a hint used to search for the blob.
+type blobLocInfo struct {
+	peer   string                          // Syncbase from which the presence of this BlobRef was first learned.
+	source string                          // Syncbase that originated this blob.
+	sgIds  map[interfaces.GroupId]struct{} // SyncGroups through which the BlobRef was learned.
+}
+
+////////////////////////////////////////////////////////////
+// RPCs for managing blobs between Syncbase and its clients.
+
+func (sd *syncDatabase) CreateBlob(ctx *context.T, call rpc.ServerCall) (wire.BlobRef, error) {
+	vlog.VI(2).Infof("sync: CreateBlob: begin")
+	defer vlog.VI(2).Infof("sync: CreateBlob: end")
+
+	// Get this Syncbase's blob store handle.
+	ss := sd.sync.(*syncService)
+	bst := ss.bst
+
+	writer, err := bst.NewBlobWriter(ctx, "")
+	if err != nil {
+		return wire.NullBlobRef, err
+	}
+	defer writer.CloseWithoutFinalize()
+
+	name := writer.Name()
+	vlog.VI(4).Infof("sync: CreateBlob: blob ref %s", name)
+	return wire.BlobRef(name), nil
+}
+
+func (sd *syncDatabase) PutBlob(ctx *context.T, call wire.BlobManagerPutBlobServerCall, br wire.BlobRef) error {
+	vlog.VI(2).Infof("sync: PutBlob: begin br %v", br)
+	defer vlog.VI(2).Infof("sync: PutBlob: end br %v", br)
+
+	// Get this Syncbase's blob store handle.
+	ss := sd.sync.(*syncService)
+	bst := ss.bst
+
+	writer, err := bst.ResumeBlobWriter(ctx, string(br))
+	if err != nil {
+		return err
+	}
+	defer writer.CloseWithoutFinalize()
+
+	stream := call.RecvStream()
+	for stream.Advance() {
+		item := blob.BlockOrFile{Block: stream.Value()}
+		if err = writer.AppendFragment(item); err != nil {
+			return err
+		}
+	}
+	return stream.Err()
+}
+
+func (sd *syncDatabase) CommitBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) error {
+	vlog.VI(2).Infof("sync: CommitBlob: begin br %v", br)
+	defer vlog.VI(2).Infof("sync: CommitBlob: end br %v", br)
+
+	// Get this Syncbase's blob store handle.
+	ss := sd.sync.(*syncService)
+	bst := ss.bst
+
+	writer, err := bst.ResumeBlobWriter(ctx, string(br))
+	if err != nil {
+		return err
+	}
+	return writer.Close()
+}
+
+func (sd *syncDatabase) GetBlobSize(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) (int64, error) {
+	vlog.VI(2).Infof("sync: GetBlobSize: begin br %v", br)
+	defer vlog.VI(2).Infof("sync: GetBlobSize: end br %v", br)
+
+	// Get this Syncbase's blob store handle.
+	ss := sd.sync.(*syncService)
+	bst := ss.bst
+
+	reader, err := bst.NewBlobReader(ctx, string(br))
+	if err != nil {
+		return 0, err
+	}
+	defer reader.Close()
+
+	return reader.Size(), nil
+}
+
+func (sd *syncDatabase) DeleteBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) error {
+	return verror.NewErrNotImplemented(ctx)
+}
+
+func (sd *syncDatabase) GetBlob(ctx *context.T, call wire.BlobManagerGetBlobServerCall, br wire.BlobRef, offset int64) error {
+	vlog.VI(2).Infof("sync: GetBlob: begin br %v", br)
+	defer vlog.VI(2).Infof("sync: GetBlob: end br %v", br)
+
+	// First get the blob locally if available.
+	ss := sd.sync.(*syncService)
+	err := getLocalBlob(ctx, call.SendStream(), ss.bst, br, offset)
+	if err == nil || verror.ErrorID(err) == wire.ErrBlobNotCommitted.ID {
+		return err
+	}
+
+	return sd.fetchBlobRemote(ctx, br, nil, call, offset)
+}
+
+func (sd *syncDatabase) FetchBlob(ctx *context.T, call wire.BlobManagerFetchBlobServerCall, br wire.BlobRef, priority uint64) error {
+	vlog.VI(2).Infof("sync: FetchBlob: begin br %v", br)
+	defer vlog.VI(2).Infof("sync: FetchBlob: end br %v", br)
+
+	clientStream := call.SendStream()
+
+	// Check if BlobRef already exists locally.
+	ss := sd.sync.(*syncService)
+	bst := ss.bst
+
+	bReader, err := bst.NewBlobReader(ctx, string(br))
+	if err == nil {
+		finalized := bReader.IsFinalized()
+		bReader.Close()
+
+		if !finalized {
+			return wire.NewErrBlobNotCommitted(ctx)
+		}
+		clientStream.Send(wire.BlobFetchStatus{State: wire.BlobFetchStateDone})
+		return nil
+	}
+
+	// Wait for this blob's turn.
+	// TODO(hpucha): Implement a blob queue.
+	clientStream.Send(wire.BlobFetchStatus{State: wire.BlobFetchStatePending})
+
+	return sd.fetchBlobRemote(ctx, br, call, nil, 0)
+}
+
+func (sd *syncDatabase) PinBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) error {
+	return verror.NewErrNotImplemented(ctx)
+}
+
+func (sd *syncDatabase) UnpinBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) error {
+	return verror.NewErrNotImplemented(ctx)
+}
+
+func (sd *syncDatabase) KeepBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef, rank uint64) error {
+	return verror.NewErrNotImplemented(ctx)
+}
+
+////////////////////////////////////////////////////////////
+// RPC for blob fetch between Syncbases.
+
+func (s *syncService) FetchBlob(ctx *context.T, call interfaces.SyncFetchBlobServerCall, br wire.BlobRef) error {
+	vlog.VI(2).Infof("sync: FetchBlob: sb-sb begin br %v", br)
+	defer vlog.VI(2).Infof("sync: FetchBlob: sb-sb end br %v", br)
+	return getLocalBlob(ctx, call.SendStream(), s.bst, br, 0)
+}
+
+func (s *syncService) HaveBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) (int64, error) {
+	vlog.VI(2).Infof("sync: HaveBlob: begin br %v", br)
+	defer vlog.VI(2).Infof("sync: HaveBlob: end br %v", br)
+
+	bReader, err := s.bst.NewBlobReader(ctx, string(br))
+	if err != nil {
+		return 0, err
+	}
+	defer bReader.Close()
+	if !bReader.IsFinalized() {
+		return 0, wire.NewErrBlobNotCommitted(ctx)
+	}
+	return bReader.Size(), nil
+}
+
+func (s *syncService) FetchBlobRecipe(ctx *context.T, call interfaces.SyncFetchBlobRecipeServerCall, br wire.BlobRef) error {
+	return verror.NewErrNotImplemented(ctx)
+}
+
+func (s *syncService) FetchChunks(ctx *context.T, call interfaces.SyncFetchChunksServerCall) error {
+	return verror.NewErrNotImplemented(ctx)
+}
+
+////////////////////////////////////////////////////////////
+// Helpers.
+
+type byteStream interface {
+	Send(item []byte) error
+}
+
+// getLocalBlob looks for a blob in the local store and, if found, reads the
+// blob and sends it to the client.  If the blob is found, it starts reading it
+// from the given offset and sends its bytes into the client stream.
+func getLocalBlob(ctx *context.T, stream byteStream, bst blob.BlobStore, br wire.BlobRef, offset int64) error {
+	vlog.VI(4).Infof("sync: getLocalBlob: begin br %v, offset %v", br, offset)
+	defer vlog.VI(4).Infof("sync: getLocalBlob: end br %v, offset %v", br, offset)
+
+	reader, err := bst.NewBlobReader(ctx, string(br))
+	if err != nil {
+		return err
+	}
+	defer reader.Close()
+
+	if !reader.IsFinalized() {
+		return wire.NewErrBlobNotCommitted(ctx)
+	}
+
+	buf := make([]byte, chunkSize)
+	for {
+		nbytes, err := reader.ReadAt(buf, offset)
+		if err != nil && err != io.EOF {
+			return err
+		}
+		if nbytes <= 0 {
+			break
+		}
+		offset += int64(nbytes)
+		stream.Send(buf[:nbytes])
+		if err == io.EOF {
+			break
+		}
+	}
+
+	return nil
+}
+
+func (sd *syncDatabase) fetchBlobRemote(ctx *context.T, br wire.BlobRef, statusCall wire.BlobManagerFetchBlobServerCall, dataCall wire.BlobManagerGetBlobServerCall, offset int64) error {
+	vlog.VI(4).Infof("sync: fetchBlobRemote: begin br %v, offset %v", br, offset)
+	defer vlog.VI(4).Infof("sync: fetchBlobRemote: end br %v, offset %v", br, offset)
+
+	var sendStatus, sendData bool
+	var statusStream interface {
+		Send(item wire.BlobFetchStatus) error
+	}
+	var dataStream interface {
+		Send(item []byte) error
+	}
+
+	if statusCall != nil {
+		sendStatus = true
+		statusStream = statusCall.SendStream()
+	}
+	if dataCall != nil {
+		sendData = true
+		dataStream = dataCall.SendStream()
+	}
+
+	if sendStatus {
+		// Start blob source discovery.
+		statusStream.Send(wire.BlobFetchStatus{State: wire.BlobFetchStateLocating})
+	}
+
+	// Locate blob.
+	peer, size, err := sd.locateBlob(ctx, br)
+	if err != nil {
+		return err
+	}
+
+	// Start blob fetching.
+	status := wire.BlobFetchStatus{State: wire.BlobFetchStateFetching, Total: size}
+	if sendStatus {
+		statusStream.Send(status)
+	}
+
+	ss := sd.sync.(*syncService)
+	bst := ss.bst
+
+	bWriter, err := bst.NewBlobWriter(ctx, string(br))
+	if err != nil {
+		return err
+	}
+
+	c := interfaces.SyncClient(peer)
+	ctxPeer, cancel := context.WithRootCancel(ctx)
+	stream, err := c.FetchBlob(ctxPeer, br)
+	if err == nil {
+		peerStream := stream.RecvStream()
+		for peerStream.Advance() {
+			item := blob.BlockOrFile{Block: peerStream.Value()}
+			if err = bWriter.AppendFragment(item); err != nil {
+				break
+			}
+			curSize := int64(len(item.Block))
+			status.Received += curSize
+			if sendStatus {
+				statusStream.Send(status)
+			}
+			if sendData {
+				if curSize <= offset {
+					offset -= curSize
+				} else if offset != 0 {
+					dataStream.Send(item.Block[offset:])
+					offset = 0
+				} else {
+					dataStream.Send(item.Block)
+				}
+			}
+		}
+
+		if err != nil {
+			cancel()
+			stream.Finish()
+		} else {
+			err = peerStream.Err()
+			if terr := stream.Finish(); err == nil {
+				err = terr
+			}
+			cancel()
+		}
+	}
+
+	bWriter.Close()
+	if err != nil {
+		// Clean up the blob with failed download, so that it can be
+		// downloaded again. Ignore any error from deletion.
+		bst.DeleteBlob(ctx, string(br))
+	} else {
+		status := wire.BlobFetchStatus{State: wire.BlobFetchStateDone}
+		if sendStatus {
+			statusStream.Send(status)
+		}
+	}
+	return err
+}
+
+// TODO(hpucha): Add syncgroup driven blob discovery.
+func (sd *syncDatabase) locateBlob(ctx *context.T, br wire.BlobRef) (string, int64, error) {
+	vlog.VI(4).Infof("sync: locateBlob: begin br %v", br)
+	defer vlog.VI(4).Infof("sync: locateBlob: end br %v", br)
+
+	ss := sd.sync.(*syncService)
+	loc, err := ss.getBlobLocInfo(ctx, br)
+	if err != nil {
+		return "", 0, err
+	}
+
+	// Search for blob amongst the source peer and peer learned from.
+	var peers = []string{loc.source, loc.peer}
+	for _, p := range peers {
+		vlog.VI(4).Infof("sync: locateBlob: attempting %s", p)
+		// Get the mounttables for this peer.
+		mtTables, err := sd.getMountTables(ctx, p)
+		if err != nil {
+			continue
+		}
+
+		for mt := range mtTables {
+			absName := naming.Join(mt, p, util.SyncbaseSuffix)
+			c := interfaces.SyncClient(absName)
+			size, err := c.HaveBlob(ctx, br)
+			if err == nil {
+				vlog.VI(4).Infof("sync: locateBlob: found blob on %s", absName)
+				return absName, size, nil
+			}
+		}
+	}
+
+	return "", 0, verror.New(verror.ErrInternal, ctx, "blob not found")
+
+}
+
+func (sd *syncDatabase) getMountTables(ctx *context.T, peer string) (map[string]struct{}, error) {
+	ss := sd.sync.(*syncService)
+	mInfo := ss.copyMemberInfo(ctx, peer)
+
+	mtTables := make(map[string]struct{})
+	for gdbName, sgInfo := range mInfo.db2sg {
+		appName, dbName, err := splitAppDbName(ctx, gdbName)
+		if err != nil {
+			return nil, err
+		}
+		st, err := ss.getDbStore(ctx, nil, appName, dbName)
+		if err != nil {
+			return nil, err
+		}
+
+		for id := range sgInfo {
+			sg, err := getSyncGroupById(ctx, st, id)
+			if err != nil {
+				continue
+			}
+			if _, ok := sg.Joiners[peer]; !ok {
+				// Peer is no longer part of the SyncGroup.
+				continue
+			}
+			for _, mt := range sg.Spec.MountTables {
+				mtTables[mt] = struct{}{}
+			}
+		}
+	}
+	return mtTables, nil
+}
+
+// TODO(hpucha): Persist the blob directory periodically.
+func (s *syncService) addBlobLocInfo(ctx *context.T, br wire.BlobRef, info *blobLocInfo) error {
+	s.blobDirLock.Lock()
+	defer s.blobDirLock.Unlock()
+
+	s.blobDirectory[br] = info
+	return nil
+}
+
+func (s *syncService) getBlobLocInfo(ctx *context.T, br wire.BlobRef) (*blobLocInfo, error) {
+	s.blobDirLock.Lock()
+	defer s.blobDirLock.Unlock()
+
+	if info, ok := s.blobDirectory[br]; ok {
+		return info, nil
+	}
+	return nil, verror.New(verror.ErrInternal, ctx, "blob state not found", br)
+}

diff --git a/services/syncbase/vsync/conflict_resolution.go b/services/syncbase/vsync/conflict_resolution.go
new file mode 100644
index 0000000..a8e41f6
--- /dev/null
+++ b/services/syncbase/vsync/conflict_resolution.go

@@ -0,0 +1,144 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+import (
+	"v.io/v23/context"
+	"v.io/v23/verror"
+	"v.io/x/lib/vlog"
+)
+
+// Policies for conflict resolution.
+// TODO(hpucha): Move relevant parts to client-facing vdl.
+const (
+	// Resolves conflicts by picking the update with the most recent timestamp.
+	useTime = iota
+
+	// TODO(hpucha): implement other policies.
+	// Resolves conflicts by using the app conflict resolver callbacks via store.
+	useCallback
+)
+
+var (
+	// conflictResolutionPolicy is the policy used to resolve conflicts.
+	conflictResolutionPolicy = useTime
+)
+
+// resolutionType represents how a conflict is resolved.
+type resolutionType byte
+
+const (
+	pickLocal  resolutionType = iota // local update was chosen as the resolution.
+	pickRemote                       // remote update was chosen as the resolution.
+	createNew                        // new update was created as the resolution.
+)
+
+// conflictResolution represents the state of a conflict resolution.
+type conflictResolution struct {
+	ty  resolutionType
+	rec *localLogRec // Valid only if ty == createNew.
+	val []byte       // Valid only if ty == createNew.
+}
+
+// resolveConflicts resolves conflicts for updated objects. Conflicts may be
+// resolved by adding new versions or picking either the local or the remote
+// version.
+func (iSt *initiationState) resolveConflicts(ctx *context.T) error {
+	for obj, st := range iSt.updObjects {
+		if !st.isConflict {
+			continue
+		}
+
+		// TODO(hpucha): Look up policy from the schema. Currently,
+		// hardcoded to time.
+		var err error
+		st.res, err = iSt.resolveObjConflict(ctx, obj, st.oldHead, st.newHead, st.ancestor)
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// resolveObjConflict resolves a conflict for an object given its ID and the 3
+// versions that express the conflict: the object's local version, its remote
+// version (from the device contacted), and the closest common ancestor (see
+// dag.go on how the ancestor is chosen). The function returns the new object
+// value according to the conflict resolution policy.
+func (iSt *initiationState) resolveObjConflict(ctx *context.T, oid, local, remote, ancestor string) (*conflictResolution, error) {
+	// Fetch the log records of the 3 object versions.
+	versions := []string{local, remote, ancestor}
+	lrecs, err := iSt.getLogRecsBatch(ctx, oid, versions)
+	if err != nil {
+		return nil, err
+	}
+
+	// The local and remote records must exist, however it is valid for the
+	// common ancestor to not exist.  This happens when two Syncbases create
+	// separately their first versions for the same object (key).
+	locRec, remRec, ancRec := lrecs[0], lrecs[1], lrecs[2]
+	if locRec == nil || remRec == nil {
+		vlog.Fatalf("sync: resolveObjConflict: oid %s: invalid local (%s: %v) or remote recs (%s: %v)",
+			oid, local, locRec, remote, remRec)
+	}
+
+	// Resolve the conflict according to the resolution policy.
+	switch conflictResolutionPolicy {
+	case useTime:
+		return iSt.resolveObjConflictByTime(ctx, oid, locRec, remRec, ancRec)
+	default:
+		return nil, verror.New(verror.ErrInternal, ctx, "unknown conflict resolution policy", conflictResolutionPolicy)
+	}
+}
+
+// resolveObjConflictByTime resolves conflicts using the timestamps of the
+// conflicting mutations.  It picks a mutation with the larger timestamp,
+// i.e. the most recent update.  If the timestamps are equal, it uses the
+// mutation version numbers as a tie-breaker, picking the mutation with the
+// larger version.  Instead of creating a new version that resolves the
+// conflict, we pick an existing version as the conflict resolution.
+func (iSt *initiationState) resolveObjConflictByTime(ctx *context.T, oid string, local, remote, ancestor *localLogRec) (*conflictResolution, error) {
+	var res conflictResolution
+	switch {
+	case local.Metadata.UpdTime.After(remote.Metadata.UpdTime):
+		res.ty = pickLocal
+	case local.Metadata.UpdTime.Before(remote.Metadata.UpdTime):
+		res.ty = pickRemote
+	case local.Metadata.CurVers > remote.Metadata.CurVers:
+		res.ty = pickLocal
+	case local.Metadata.CurVers < remote.Metadata.CurVers:
+		res.ty = pickRemote
+	default:
+		vlog.Fatalf("sync: resolveObjConflictByTime: local and remote update times and versions are the same, local %v remote %v", local, remote)
+	}
+
+	return &res, nil
+}
+
+// getLogRecsBatch gets the log records for an array of versions for a given object.
+func (iSt *initiationState) getLogRecsBatch(ctx *context.T, obj string, versions []string) ([]*localLogRec, error) {
+	lrecs := make([]*localLogRec, len(versions))
+	for p, v := range versions {
+		if v == NoVersion {
+			lrecs[p] = nil
+			continue
+		}
+
+		logKey, err := getLogRecKey(ctx, iSt.tx, obj, v)
+		if err != nil {
+			return nil, err
+		}
+		dev, gen, err := splitLogRecKey(ctx, logKey)
+		if err != nil {
+			return nil, err
+		}
+		lrecs[p], err = getLogRec(ctx, iSt.tx, dev, gen)
+		if err != nil {
+			return nil, err
+		}
+	}
+	return lrecs, nil
+}

diff --git a/services/syncbase/vsync/dag.go b/services/syncbase/vsync/dag.go
new file mode 100644
index 0000000..121f594
--- /dev/null
+++ b/services/syncbase/vsync/dag.go

@@ -0,0 +1,855 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Syncbase DAG (directed acyclic graph) utility functions.
+//
+// The DAG is used to track the version history of synced objects in order
+// to detect and resolve conflicts (concurrent changes on different devices).
+//
+// Note: the sync code uses the words "object" and "object ID" (oid) as a
+// generic way to refer to syncable entities, whether they are actual user data
+// (table row and its row key), prefix-ACLs (permission entry and its prefix),
+// or other metadata such as SyncGroups (SyncGroup value and its internal key
+// based on the SyncGroup ID).
+//
+// * Object IDs are globally unique across all devices.
+// * Syncable objects have version numbers associated with their mutations.
+// * For a given object ID, the version number is globally unique across all
+//   devices, i.e. the (oid, version) tuple is globally unique.
+// * Each (oid, version) tuple is represented by a node in the DAG.
+// * The previous version of an object is its parent in the DAG, i.e. the
+//   new version is derived from that parent.
+// * DAG nodes have child-to-parent pointers.
+// * When there are no conflicts, the parent node has a single child node
+//   that points to it.
+// * When a parent node has more than one child, this indicates concurrent
+//   mutations which are treated as a conflict to be resolved.
+// * When a conflict is resolved, the new version has pointers back to each of
+//   the two parents to indicate that it is derived from both nodes.
+// * During a sync operation from a source device to a target device, the
+//   target receives a DAG fragment from the source.  That fragment has to
+//   be incorporated (grafted) into the target device's DAG.  It may be a
+//   continuation of the DAG of an object, with the attachment (graft) point
+//   being the current head of DAG, in which case there are no conflicts.
+//   Or the graft point(s) may be older nodes, which means the new fragment
+//   is a divergence in the graph causing a conflict that must be resolved
+//   in order to re-converge the two DAG fragments.
+//
+// In the diagrams below:
+// (h) represents the head node in the local device.
+// (nh) represents the new head node received from the remote device.
+// (g) represents a graft node, where new nodes attach to the existing DAG.
+// <- represents a derived-from mutation, i.e. a child-to-parent pointer
+//
+// a- No-conflict example: the new nodes (v4, v5) attach to the head node (v3).
+//    In this case the new head becomes the head node, the new DAG fragment
+//    being a continuation of the existing DAG.
+//
+//    Before:
+//    v1 <- v2 <- v3(h)
+//
+//    Sync updates applied, no conflict detected:
+//    v1 <- v2 <- v3(h,g) <- v4 <- v5 (nh)
+//
+//    After:
+//    v1 <- v2 <- v3 <- v4 <- v5 (h)
+//
+// b- Conflict example: the new nodes (v4, v5) attach to an old node (v2).
+//    The current head node (v3) and the new head node (v5) are divergent
+//    (concurrent) mutations that need to be resolved.  The conflict
+//    resolution function is passed the old head (v3), new head (v5), and
+//    the common ancestor (v2).  It resolves the conflict with (v6) which
+//    is represented in the DAG as derived from both v3 and v5 (2 parents).
+//
+//    Before:
+//    v1 <- v2 <- v3(h)
+//
+//    Sync updates applied, conflict detected (v3 not a graft node):
+//    v1 <- v2(g) <- v3(h)
+//                <- v4 <- v5 (nh)
+//
+//    After: conflict resolver creates v6 having 2 parents (v3, v5):
+//    v1 <- v2(g) <- v3 <------- v6(h)
+//                <- v4 <- v5 <-
+//
+// The DAG does not grow indefinitely.  During a sync operation each device
+// learns what the other device already knows -- where it's at in the version
+// history for the objects.  When a device determines that all devices that
+// sync an object (members of matching SyncGroups) have moved past some version
+// for that object, the DAG for that object can be pruned up to that common
+// version, deleting all prior (ancestor) nodes.
+//
+// The DAG contains three tables persisted to disk (nodes, heads, batches):
+//
+//   * nodes:   one entry per (oid, version) with references to parent node(s)
+//              it is derived from, a reference to the log record identifying
+//              that mutation, a reference to its write batch (if any), and a
+//              boolean to indicate whether this was an object deletion.
+//
+//   * heads:   one entry per object pointing to its most recent version.
+//
+//   * batches: one entry per batch ID containing the set of objects in the
+//              write batch and their versions.
+
+import (
+	"container/list"
+	"fmt"
+
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	"v.io/v23/verror"
+	"v.io/x/lib/vlog"
+)
+
+const (
+	NoVersion = ""
+	NoBatchId = uint64(0)
+)
+
+// dagNode holds the information on a object mutation in the DAG.
+// Note: the batch ID and deleted flag are copies of information in the log
+// record.  They are also stored in the DAG node to improve DAG traversal for
+// conflict resolution and pruning without having to fetch the full log record
+// every time.
+type dagNode struct {
+	Level   uint64   // node distance from root
+	Parents []string // references to parent versions
+	Logrec  string   // reference to log record
+	BatchId uint64   // ID of a write batch
+	Deleted bool     // true if the change was a delete
+}
+
+// batchSet holds information on a set of write batches.
+type batchSet map[uint64]*batchInfo
+
+// batchInfo holds the information on a write batch:
+// - The map of syncable (versioned) objects: {oid: version}
+// - The total count of batch objects, including non-syncable ones.
+// TODO(rdaoud): add support to track the read and scan sets.
+type batchInfo struct {
+	Objects map[string]string
+	Count   uint64
+}
+
+// graftMap holds the state of DAG node grafting (attaching) per object.
+type graftMap map[string]*graftInfo
+
+// graftInfo holds the state of an object's node grafting in the DAG.
+// It is ephemeral (in-memory), used during a single sync operation to track
+// where the new DAG fragments are attached to the existing DAG for the object:
+// - newNodes:    the set of newly added nodes; used to detect the type of edges
+//                between nodes (new-node to old-node or vice versa).
+// - newHeads:    the set of new candidate head nodes; used to detect conflicts.
+// - graftNodes:  the set of old nodes on which new nodes were added, and their
+//                level in the DAG; used to find common ancestors for conflicts.
+// - oldHeadSnap: snapshot of the current local head known by sync, used in
+//                conflict detection, particularly when conflict detection needs
+//                to be retried due to sync dag state being stale compared
+//                to local store.
+//
+// After the received mutations are applied, if there are two heads in the
+// newHeads set, there is a conflict to be resolved for the object.  Otherwise,
+// if there is one head, no conflict was triggered and the new head becomes the
+// current object version.  In case of conflict, the graftNodes set is used to
+// select a common ancestor.
+// TODO(rdaoud): support open DAGs to handle delayed conflict resolution by
+// tracking multiple dangling remote heads in addition to the local head node.
+type graftInfo struct {
+	newNodes    map[string]bool
+	newHeads    map[string]bool
+	graftNodes  map[string]uint64
+	oldHeadSnap string
+}
+
+// newBatchInfo allocates and initializes a batch info entry.
+func newBatchInfo() *batchInfo {
+	return &batchInfo{Objects: make(map[string]string), Count: 0}
+}
+
+// startBatch marks the start of a batch.  It generates a batch ID and returns
+// it to the caller, if an ID is not given.  The batch ID is used to track DAG
+// nodes that are part of the same batch and it is stored in the log records.
+// If a batch ID is given by the caller, its information is accessed.
+func (s *syncService) startBatch(ctx *context.T, st store.StoreReader, btid uint64) uint64 {
+	s.batchesLock.Lock()
+	defer s.batchesLock.Unlock()
+
+	// If no batch ID is given, generate a new unused one.
+	if btid == NoBatchId {
+		for (btid == NoBatchId) || (s.batches[btid] != nil) {
+			btid = rand64()
+		}
+
+		s.batches[btid] = newBatchInfo()
+		return btid
+	}
+
+	// Use the given batch ID and, if needed, refetch its in-memory entry
+	// from the store.  It is OK not to find it in the store; it means sync
+	// is learning about this batch ID the first time from another sync.
+	if s.batches[btid] == nil {
+		info, err := getBatch(ctx, st, btid)
+		if err != nil {
+			info = newBatchInfo()
+		}
+		s.batches[btid] = info
+	}
+
+	return btid
+}
+
+// addNodeToBatch adds a node (oid, version) to a batch under construction.
+func (s *syncService) addNodeToBatch(ctx *context.T, btid uint64, oid, version string) error {
+	s.batchesLock.Lock()
+	defer s.batchesLock.Unlock()
+
+	if btid == NoBatchId {
+		return verror.New(verror.ErrInternal, ctx, "invalid batch id", btid)
+	}
+
+	info := s.batches[btid]
+	if info == nil {
+		return verror.New(verror.ErrInternal, ctx, "unknown batch id", btid)
+	}
+
+	info.Objects[oid] = version
+	return nil
+}
+
+// endBatch marks the end of a given batch.  The batch information is persisted
+// to the store and removed from the temporary in-memory entry.
+func (s *syncService) endBatch(ctx *context.T, tx store.Transaction, btid, count uint64) error {
+	s.batchesLock.Lock()
+	defer s.batchesLock.Unlock()
+
+	if btid == NoBatchId || count == 0 {
+		return verror.New(verror.ErrInternal, ctx, "invalid batch info", btid, count)
+	}
+
+	info := s.batches[btid]
+	if info == nil {
+		return verror.New(verror.ErrInternal, ctx, "unknown batch id", btid)
+	}
+
+	// The first time a batch is ended, info.Count is zero.  Subsequently,
+	// if this batch ID is started and ended again, info.Count should be
+	// the same as the "count" value given.
+	if info.Count != 0 && info.Count != count {
+		return verror.New(verror.ErrInternal, ctx, "wrong counts for batch", btid, info.Count, count)
+	}
+
+	// Only save non-empty batches.
+	if len(info.Objects) > 0 {
+		info.Count = count
+		if err := setBatch(ctx, tx, btid, info); err != nil {
+			return err
+		}
+	}
+
+	delete(s.batches, btid)
+	return nil
+}
+
+// addNode adds a new node for a DAG object, linking it to its parent nodes.
+// It verifies that the node does not exist and its parent nodes are valid.
+// If a batch ID is given, track the node membership in the batch.
+//
+// Note: an in-memory grafting structure is passed to track DAG attachments
+// during a sync operation.  This is needed when nodes are being added due to
+// remote changes fetched by the sync protocol.  The Initiator allocates a
+// grafting structure at the start of a sync operation and passes it across
+// calls to addNode() to update the DAG grafting state:
+// - If a parent node is not new, mark it as a DAG graft point.
+// - Mark this version as a new node.
+// - Update the new head node pointer of the grafted DAG.
+//
+// The grafting structure is not needed when nodes are being added locally by
+// the Watcher, passing a nil grafting structure.
+func (s *syncService) addNode(ctx *context.T, tx store.Transaction, oid, version, logrec string, deleted bool, parents []string, btid uint64, graft graftMap) error {
+	if parents != nil {
+		if len(parents) > 2 {
+			return verror.New(verror.ErrInternal, ctx, "cannot have more than 2 parents")
+		}
+		if len(parents) == 0 {
+			parents = nil // replace an empty array with a nil
+		}
+	}
+
+	// The new node must not exist.
+	if ok, err := hasNode(ctx, tx, oid, version); err != nil {
+		return err
+	} else if ok {
+		return verror.New(verror.ErrInternal, ctx, "DAG node already exists", oid, version)
+	}
+
+	// Verify the parents, determine the node level.  Also save the levels
+	// of the parent nodes for later in this function in graft updates.
+	parentLevels := make(map[string]uint64)
+	var level uint64
+	for _, parent := range parents {
+		pnode, err := getNode(ctx, tx, oid, parent)
+		if err != nil {
+			return err
+		}
+		parentLevels[parent] = pnode.Level
+		if level <= pnode.Level {
+			level = pnode.Level + 1
+		}
+	}
+
+	// If a batch ID is given, add the node to that batch.
+	if btid != NoBatchId {
+		if err := s.addNodeToBatch(ctx, btid, oid, version); err != nil {
+			return err
+		}
+	}
+
+	// Add the node entry to the DAG.
+	node := &dagNode{
+		Level:   level,
+		Parents: parents,
+		Logrec:  logrec,
+		BatchId: btid,
+		Deleted: deleted,
+	}
+	if err := setNode(ctx, tx, oid, version, node); err != nil {
+		return err
+	}
+
+	// We are done if grafting is not being tracked (a local node add).
+	if graft == nil {
+		return nil
+	}
+
+	// Get the object's graft info entry in order to update it.  It happens
+	// when addNode() is called by the sync Initiator and the DAG is updated
+	// with new nodes fetched from other devices.
+	//
+	// During a sync operation, each mutated object gets new nodes added in
+	// its DAG.  These new nodes are either derived from nodes that were
+	// previously known on this device (i.e. their parent nodes are pre-
+	// existing, or they have no parents (new root nodes)), or they are
+	// derived from other new DAG nodes being discovered during this sync
+	// (i.e. their parent nodes were also just added to the DAG).
+	//
+	// To detect a conflict and find the most recent common ancestor to
+	// pass to the conflict resolver, the DAG graft info keeps track of the
+	// new nodes that have old parent nodes.  These old-to-new edges are
+	// points where new DAG fragments are attached (grafted) onto the
+	// existing DAG.  The old nodes are the graft nodes forming the set of
+	// common ancestors to use in conflict resolution:
+	//
+	// 1- A conflict happens when the current "head node" for an object is
+	//    not in the set of graft nodes.  It means the object mutations
+	//    were not derived from what the device knows, but are divergent
+	//    changes at a prior point.
+	//
+	// 2- The most recent common ancestor to use in resolving the conflict
+	//    is the object graft node with the deepest level (furthest from
+	//    the root node), representing the most up-to-date common knowledge
+	//    between the devices.
+	info := getObjectGraftInfo(ctx, tx, graft, oid)
+
+	for _, parent := range parents {
+		// If this parent is an old node, it's a graft point.
+		if !info.newNodes[parent] {
+			info.graftNodes[parent] = parentLevels[parent]
+		}
+
+		// A parent cannot be a candidate for a new head.
+		delete(info.newHeads, parent)
+	}
+
+	// This new node is a candidate for new head version.
+	info.newNodes[version] = true
+	info.newHeads[version] = true
+	return nil
+}
+
+// addParent adds to the DAG node (oid, version) linkage to this parent node.
+//
+// Note: as with the addNode() call, an in-memory grafting structure is passed
+// to track DAG attachements during a sync operation.  It is not needed if the
+// parent linkage is due to a local change (from conflict resolution selecting
+// an existing version).
+func (s *syncService) addParent(ctx *context.T, tx store.Transaction, oid, version, parent string, graft graftMap) error {
+	if version == parent {
+		return verror.New(verror.ErrInternal, ctx, "object", oid, version, "cannot be its own parent")
+	}
+
+	node, err := getNode(ctx, tx, oid, version)
+	if err != nil {
+		return err
+	}
+	pnode, err := getNode(ctx, tx, oid, parent)
+	if err != nil {
+		return err
+	}
+
+	// Check if the parent is already linked to this node.
+	found := false
+	for _, p := range node.Parents {
+		if p == parent {
+			found = true
+			break
+		}
+	}
+
+	// Add the parent if it is not yet linked.
+	if !found {
+		// Make sure that adding the link does not create a DAG cycle.
+		// Verify that the node is not an ancestor of the parent that
+		// it is being linked to.
+		err = forEachAncestor(ctx, tx, oid, pnode.Parents, func(v string, nd *dagNode) error {
+			if v == version {
+				return verror.New(verror.ErrInternal, ctx, "cycle on object",
+					oid, ": node", version, "is ancestor of parent", parent)
+			}
+			return nil
+		})
+		if err != nil {
+			return err
+		}
+		node.Parents = append(node.Parents, parent)
+		if err = setNode(ctx, tx, oid, version, node); err != nil {
+			return err
+		}
+	}
+
+	// If no grafting structure is given (i.e. local changes), we are done.
+	if graft == nil {
+		return nil
+	}
+
+	// Update graft: if the node and its parent are new/old or old/new then
+	// add the parent as a graft point (a potential common ancestor).
+	info := getObjectGraftInfo(ctx, tx, graft, oid)
+
+	_, nodeNew := info.newNodes[version]
+	_, parentNew := info.newNodes[parent]
+	if (nodeNew && !parentNew) || (!nodeNew && parentNew) {
+		info.graftNodes[parent] = pnode.Level
+	}
+
+	// The parent node can no longer be a candidate for a new head version.
+	delete(info.newHeads, parent)
+	return nil
+}
+
+// moveHead moves the object head node in the DAG.
+func moveHead(ctx *context.T, tx store.Transaction, oid, head string) error {
+	// Verify that the node exists.
+	if ok, err := hasNode(ctx, tx, oid, head); err != nil {
+		return err
+	} else if !ok {
+		return verror.New(verror.ErrInternal, ctx, "node", oid, head, "does not exist")
+	}
+
+	return setHead(ctx, tx, oid, head)
+}
+
+// hasConflict determines if an object has a conflict between its new and old
+// head nodes.
+// - Yes: return (true, newHead, oldHead, ancestor)   -- from a common past
+// - Yes: return (true, newHead, oldHead, NoVersion)  -- from disjoint pasts
+// - No:  return (false, newHead, oldHead, NoVersion) -- no conflict
+//
+// A conflict exists when there are two new-head nodes in the graft structure.
+// It means the newly added object versions are not derived in part from this
+// device's current knowledge. A conflict also exists if the snapshotted local
+// head is different from the current local head. If there is a single new-head
+// and the snapshot head is the same as the current local head, the object
+// changes were applied without triggering a conflict.
+func hasConflict(ctx *context.T, st store.StoreReader, oid string, graft graftMap) (isConflict bool, newHead, oldHead, ancestor string, err error) {
+	isConflict = false
+	oldHead = NoVersion
+	newHead = NoVersion
+	ancestor = NoVersion
+	err = nil
+
+	if graft == nil {
+		err = verror.New(verror.ErrInternal, ctx, "no DAG graft map given")
+		return
+	}
+
+	info := graft[oid]
+	if info == nil {
+		err = verror.New(verror.ErrInternal, ctx, "node", oid, "has no DAG graft info")
+		return
+	}
+
+	numHeads := len(info.newHeads)
+	if numHeads < 1 || numHeads > 2 {
+		err = verror.New(verror.ErrInternal, ctx, "node", oid, "invalid count of new heads", numHeads)
+		return
+	}
+
+	// Fetch the current head for this object if it exists.  The error from
+	// getHead() is ignored because a newly received object is not yet known
+	// on this device and will not trigger a conflict.
+	oldHead, _ = getHead(ctx, st, oid)
+
+	// If there is only one new head node and the snapshotted old head is
+	// still unchanged, there is no conflict. The new head is that single
+	// one, even if it might also be the same old node.
+	if numHeads == 1 {
+		for head := range info.newHeads {
+			newHead = head
+		}
+		if newHead == info.oldHeadSnap {
+			// Only link log records could've been received.
+			newHead = oldHead
+			return
+		} else if oldHead == info.oldHeadSnap {
+			return
+		}
+	}
+
+	// The new head is the non-old one.
+	for head := range info.newHeads {
+		if head != info.oldHeadSnap {
+			newHead = head
+			break
+		}
+	}
+
+	// There wasn't a conflict at the old snapshot, but now there is. The
+	// snapshotted head is the common ancestor.
+	isConflict = true
+	if numHeads == 1 {
+		vlog.VI(4).Infof("sync: hasConflict: old graft snapshot %v, head %s", graft, oldHead)
+		ancestor = info.oldHeadSnap
+		return
+	}
+
+	// There is a conflict: the best choice ancestor is the graft node with
+	// the largest level (farthest from the root).  It is possible in some
+	// corner cases to have multiple graft nodes at the same level.  This
+	// would still be a single conflict, but the multiple same-level graft
+	// nodes representing equivalent conflict resolutions on different
+	// devices that are now merging their resolutions.  In such a case it
+	// does not matter which node is chosen as the ancestor because the
+	// conflict resolver function is assumed to be convergent.  However it
+	// is nicer to make that selection deterministic so all devices see the
+	// same choice: the version number is used as a tie-breaker.
+	// Note: for the case of a conflict from disjoint pasts, there are no
+	// graft nodes (empty set) and thus no common ancestor because the two
+	// DAG fragments were created from distinct root nodes.  The "NoVersion"
+	// value is returned as the ancestor.
+	var maxLevel uint64
+	for node, level := range info.graftNodes {
+		if maxLevel < level || (maxLevel == level && ancestor < node) {
+			maxLevel = level
+			ancestor = node
+		}
+	}
+	return
+}
+
+// newGraft allocates a graftMap to track DAG node grafting during sync.
+func newGraft() graftMap {
+	return make(graftMap)
+}
+
+// getObjectGraft returns the graftInfo for an object ID.  If the graftMap is
+// nil, a nil graftInfo is returned because grafting is not being tracked.
+func getObjectGraftInfo(ctx *context.T, sntx store.SnapshotOrTransaction, graft graftMap, oid string) *graftInfo {
+	if graft == nil {
+		return nil
+	}
+	if info := graft[oid]; info != nil {
+		return info
+	}
+
+	info := &graftInfo{
+		newNodes:   make(map[string]bool),
+		newHeads:   make(map[string]bool),
+		graftNodes: make(map[string]uint64),
+	}
+
+	// If the object has a head node, include it in the set of new heads.
+	if head, err := getHead(ctx, sntx, oid); err == nil {
+		info.newHeads[head] = true
+		info.oldHeadSnap = head
+	}
+
+	graft[oid] = info
+	return info
+}
+
+// forEachAncestor loops over the DAG ancestor nodes of an object in a breadth-
+// first traversal starting from given version nodes.  It calls the given
+// callback function once for each ancestor node.
+func forEachAncestor(ctx *context.T, st store.StoreReader, oid string, startVersions []string, callback func(version string, node *dagNode) error) error {
+	visited := make(map[string]bool)
+	queue := list.New()
+	for _, version := range startVersions {
+		queue.PushBack(version)
+		visited[version] = true
+	}
+
+	for queue.Len() > 0 {
+		version := queue.Remove(queue.Front()).(string)
+		node, err := getNode(ctx, st, oid, version)
+		if err != nil {
+			// Ignore it, the parent was previously pruned.
+			continue
+		}
+		for _, parent := range node.Parents {
+			if !visited[parent] {
+				queue.PushBack(parent)
+				visited[parent] = true
+			}
+		}
+		if err = callback(version, node); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// newBatchPruning allocates an in-memory structure to track batches affected
+// by a DAG pruning operation across objects.
+func newBatchPruning() batchSet {
+	return make(batchSet)
+}
+
+// prune trims the DAG of an object at a given version (node) by deleting all
+// its ancestor nodes, making it the new root node.  For each deleted node it
+// calls the given callback function to delete its log record.
+//
+// Note: this function should only be used when sync determines that all devices
+// that know about this object have gotten past this version.
+//
+// The batch set passed is used to track batches affected by the deletion of DAG
+// objects across multiple calls to prune().  It is later given to pruneDone()
+// to do GC on these batches.
+func prune(ctx *context.T, tx store.Transaction, oid, version string, batches batchSet, delLogRec func(ctx *context.T, tx store.Transaction, logrec string) error) error {
+	if batches == nil {
+		return verror.New(verror.ErrInternal, ctx, "missing batch set")
+	}
+
+	// Get the node at the pruning point and set its parents to nil.
+	// It will become the oldest DAG node (root) for the object.
+	node, err := getNode(ctx, tx, oid, version)
+	if err != nil {
+		return err
+	}
+	if node.Parents == nil {
+		// Nothing to do, this node is already the root.
+		return nil
+	}
+
+	parents := node.Parents
+	node.Parents = nil
+	if err = setNode(ctx, tx, oid, version, node); err != nil {
+		return err
+	}
+
+	// Delete all ancestor nodes and their log records. Delete as many as
+	// possible and track the error counts.  Update the batch set to track
+	// their pruning.
+	nodeErrs, logErrs := 0, 0
+	forEachAncestor(ctx, tx, oid, parents, func(v string, nd *dagNode) error {
+		if btid := nd.BatchId; btid != NoBatchId {
+			if batches[btid] == nil {
+				batches[btid] = newBatchInfo()
+			}
+			batches[btid].Objects[oid] = v
+		}
+
+		if err := delLogRec(ctx, tx, nd.Logrec); err != nil {
+			logErrs++
+		}
+		if err := delNode(ctx, tx, oid, v); err != nil {
+			nodeErrs++
+		}
+		return nil
+	})
+	if nodeErrs != 0 || logErrs != 0 {
+		return verror.New(verror.ErrInternal, ctx,
+			"prune failed to delete nodes and logs:", nodeErrs, logErrs)
+	}
+	return nil
+}
+
+// pruneDone is called when object pruning is finished within a single pass of
+// the sync garbage collector.  It updates the batch sets affected by objects
+// deleted by prune().
+func pruneDone(ctx *context.T, tx store.Transaction, batches batchSet) error {
+	// Update batch sets by removing the pruned objects from them.
+	for btid, pruneInfo := range batches {
+		info, err := getBatch(ctx, tx, btid)
+		if err != nil {
+			return err
+		}
+
+		for oid := range pruneInfo.Objects {
+			delete(info.Objects, oid)
+		}
+
+		if len(info.Objects) > 0 {
+			err = setBatch(ctx, tx, btid, info)
+		} else {
+			err = delBatch(ctx, tx, btid)
+		}
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// getLogRecKey returns the key of the log record for a given object version.
+func getLogRecKey(ctx *context.T, st store.StoreReader, oid, version string) (string, error) {
+	node, err := getNode(ctx, st, oid, version)
+	if err != nil {
+		return "", err
+	}
+	return node.Logrec, nil
+}
+
+// Low-level utility functions to access DB entries without tracking their
+// relationships.  Use the functions above to manipulate the DAG.
+
+// nodeKey returns the key used to access a DAG node (oid, version).
+func nodeKey(oid, version string) string {
+	return util.JoinKeyParts(util.SyncPrefix, dagPrefix, "n", oid, version)
+}
+
+// setNode stores the DAG node entry.
+func setNode(ctx *context.T, tx store.Transaction, oid, version string, node *dagNode) error {
+	if version == NoVersion {
+		return verror.New(verror.ErrInternal, ctx, "invalid version", version)
+	}
+
+	return util.Put(ctx, tx, nodeKey(oid, version), node)
+}
+
+// getNode retrieves the DAG node entry for the given (oid, version).
+func getNode(ctx *context.T, st store.StoreReader, oid, version string) (*dagNode, error) {
+	if version == NoVersion {
+		return nil, verror.New(verror.ErrInternal, ctx, "invalid version", version)
+	}
+
+	var node dagNode
+	key := nodeKey(oid, version)
+	if err := util.Get(ctx, st, key, &node); err != nil {
+		return nil, err
+	}
+	return &node, nil
+}
+
+// delNode deletes the DAG node entry.
+func delNode(ctx *context.T, tx store.Transaction, oid, version string) error {
+	if version == NoVersion {
+		return verror.New(verror.ErrInternal, ctx, "invalid version", version)
+	}
+
+	return util.Delete(ctx, tx, nodeKey(oid, version))
+}
+
+// hasNode returns true if the node (oid, version) exists in the DAG.
+func hasNode(ctx *context.T, st store.StoreReader, oid, version string) (bool, error) {
+	// TODO(rdaoud): optimize to avoid the unneeded fetch/decode of the data.
+	if _, err := getNode(ctx, st, oid, version); err != nil {
+		if verror.ErrorID(err) == verror.ErrNoExist.ID {
+			err = nil
+		}
+		return false, err
+	}
+	return true, nil
+}
+
+// headKey returns the key used to access the DAG object head.
+func headKey(oid string) string {
+	return util.JoinKeyParts(util.SyncPrefix, dagPrefix, "h", oid)
+}
+
+// setHead stores version as the DAG object head.
+func setHead(ctx *context.T, tx store.Transaction, oid, version string) error {
+	if version == NoVersion {
+		return verror.New(verror.ErrInternal, ctx, fmt.Errorf("invalid version: %s", version))
+	}
+
+	return util.Put(ctx, tx, headKey(oid), version)
+}
+
+// getHead retrieves the DAG object head.
+func getHead(ctx *context.T, st store.StoreReader, oid string) (string, error) {
+	var version string
+	key := headKey(oid)
+	if err := util.Get(ctx, st, key, &version); err != nil {
+		return NoVersion, err
+	}
+	return version, nil
+}
+
+// delHead deletes the DAG object head.
+func delHead(ctx *context.T, tx store.Transaction, oid string) error {
+	return util.Delete(ctx, tx, headKey(oid))
+}
+
+// batchKey returns the key used to access the DAG batch info.
+func batchKey(btid uint64) string {
+	return util.JoinKeyParts(util.SyncPrefix, dagPrefix, "b", fmt.Sprintf("%d", btid))
+}
+
+// setBatch stores the DAG batch entry.
+func setBatch(ctx *context.T, tx store.Transaction, btid uint64, info *batchInfo) error {
+	if btid == NoBatchId {
+		return verror.New(verror.ErrInternal, ctx, "invalid batch id", btid)
+	}
+
+	return util.Put(ctx, tx, batchKey(btid), info)
+}
+
+// getBatch retrieves the DAG batch entry.
+func getBatch(ctx *context.T, st store.StoreReader, btid uint64) (*batchInfo, error) {
+	if btid == NoBatchId {
+		return nil, verror.New(verror.ErrInternal, ctx, "invalid batch id", btid)
+	}
+
+	var info batchInfo
+	key := batchKey(btid)
+	if err := util.Get(ctx, st, key, &info); err != nil {
+		return nil, err
+	}
+	return &info, nil
+}
+
+// delBatch deletes the DAG batch entry.
+func delBatch(ctx *context.T, tx store.Transaction, btid uint64) error {
+	if btid == NoBatchId {
+		return verror.New(verror.ErrInternal, ctx, "invalid batch id", btid)
+	}
+
+	return util.Delete(ctx, tx, batchKey(btid))
+}
+
+// getParentMap is a testing and debug helper function that returns for an
+// object a map of its DAG (node-to-parents relations).  If a graft structure
+// is given, include its fragments in the map.
+func getParentMap(ctx *context.T, st store.StoreReader, oid string, graft graftMap) map[string][]string {
+	parentMap := make(map[string][]string)
+	var start []string
+
+	if head, err := getHead(ctx, st, oid); err == nil {
+		start = append(start, head)
+	}
+	if graft != nil && graft[oid] != nil {
+		for v := range graft[oid].newHeads {
+			start = append(start, v)
+		}
+	}
+
+	forEachAncestor(ctx, st, oid, start, func(v string, nd *dagNode) error {
+		parentMap[v] = nd.Parents
+		return nil
+	})
+	return parentMap
+}

diff --git a/services/syncbase/vsync/dag_test.go b/services/syncbase/vsync/dag_test.go
new file mode 100644
index 0000000..9ef43f7
--- /dev/null
+++ b/services/syncbase/vsync/dag_test.go

@@ -0,0 +1,1632 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Tests for the Syncbase DAG.
+
+import (
+	"errors"
+	"fmt"
+	"reflect"
+	"strconv"
+	"testing"
+
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	_ "v.io/x/ref/runtime/factories/generic"
+)
+
+// TestSetNode tests setting and getting a DAG node.
+func TestSetNode(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+
+	oid, version := "1111", "1"
+
+	node, err := getNode(nil, st, oid, version)
+	if err == nil || node != nil {
+		t.Errorf("found non-existent object %s:%s: %v", oid, version, node)
+	}
+
+	if ok, err := hasNode(nil, st, oid, version); err != nil || ok {
+		t.Errorf("hasNode() found non-existent object %s:%s", oid, version)
+	}
+
+	if logrec, err := getLogRecKey(nil, st, oid, version); err == nil || logrec != "" {
+		t.Errorf("non-existent object %s:%s has a logrec: %v", oid, version, logrec)
+	}
+
+	node = &dagNode{Level: 15, Parents: []string{"444", "555"}, Logrec: "logrec-23"}
+
+	tx := st.NewTransaction()
+	if err = setNode(nil, tx, oid, version, node); err != nil {
+		t.Fatalf("cannot set object %s:%s (%v): %v", oid, version, node, err)
+	}
+	tx.Commit()
+
+	node2, err := getNode(nil, st, oid, version)
+	if err != nil || node2 == nil {
+		t.Errorf("cannot find stored object %s:%s: %v", oid, version, err)
+	}
+
+	if ok, err := hasNode(nil, st, oid, version); err != nil || !ok {
+		t.Errorf("hasNode() did not find object %s:%s", oid, version)
+	}
+
+	if !reflect.DeepEqual(node, node2) {
+		t.Errorf("object %s:%s has wrong data: %v instead of %v", oid, version, node2, node)
+	}
+
+	if logrec, err := getLogRecKey(nil, st, oid, version); err != nil || logrec != "logrec-23" {
+		t.Errorf("object %s:%s has wrong logrec: %s", oid, version, logrec)
+	}
+}
+
+// TestDelNode tests deleting a DAG node.
+func TestDelNode(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+
+	oid, version := "2222", "2"
+
+	node := &dagNode{Level: 123, Parents: []string{"333"}, Logrec: "logrec-789"}
+
+	tx := st.NewTransaction()
+	if err := setNode(nil, tx, oid, version, node); err != nil {
+		t.Fatalf("cannot set object %s:%s (%v): %v", oid, version, node, err)
+	}
+	tx.Commit()
+
+	tx = st.NewTransaction()
+	if err := delNode(nil, tx, oid, version); err != nil {
+		t.Fatalf("cannot delete object %s:%s: %v", oid, version, err)
+	}
+	tx.Commit()
+
+	node2, err := getNode(nil, st, oid, version)
+	if err == nil || node2 != nil {
+		t.Errorf("found deleted object %s:%s (%v)", oid, version, node2)
+	}
+
+	if ok, err := hasNode(nil, st, oid, version); err != nil || ok {
+		t.Errorf("hasNode() found deleted object %s:%s", oid, version)
+	}
+
+	if logrec, err := getLogRecKey(nil, st, oid, version); err == nil || logrec != "" {
+		t.Errorf("deleted object %s:%s has logrec: %s", oid, version, logrec)
+	}
+}
+
+// TestAddParent tests adding parents to a DAG node.
+func TestAddParent(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+	s := svc.sync
+
+	oid, version := "foo1", "7"
+
+	tx := st.NewTransaction()
+	if err := s.addParent(nil, tx, oid, version, "haha", nil); err == nil {
+		t.Errorf("addParent() did not fail for an unknown object %s:%s", oid, version)
+	}
+	tx.Abort()
+
+	if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+		t.Fatal(err)
+	}
+
+	node := &dagNode{Level: 15, Logrec: "logrec-22"}
+
+	tx = st.NewTransaction()
+	if err := setNode(nil, tx, oid, version, node); err != nil {
+		t.Fatalf("cannot set object %s:%s (%v): %v", oid, version, node, err)
+	}
+	tx.Commit()
+
+	graft := newGraft()
+	tx = st.NewTransaction()
+	if err := s.addParent(nil, tx, oid, version, version, graft); err == nil {
+		t.Errorf("addParent() did not fail on a self-parent for object %s:%s", oid, version)
+	}
+	tx.Abort()
+
+	remote := true
+	expParents := []string{"4", "5", "6"}
+
+	for _, parent := range expParents {
+		tx = st.NewTransaction()
+		if err := s.addParent(nil, tx, oid, version, parent, graft); err == nil {
+			t.Errorf("addParent() did not reject invalid parent %s for object %s:%s",
+				parent, oid, version)
+		}
+		tx.Abort()
+
+		pnode := &dagNode{Level: 11, Logrec: fmt.Sprintf("logrec-%s", parent), Parents: []string{"3"}}
+
+		tx = st.NewTransaction()
+		if err := setNode(nil, tx, oid, parent, pnode); err != nil {
+			t.Fatalf("cannot set parent object %s:%s (%v): %v", oid, parent, pnode, err)
+		}
+		tx.Commit()
+
+		var g graftMap
+		if remote {
+			g = graft
+		}
+
+		// addParent() twice to verify it is idempotent.
+		for i := 0; i < 2; i++ {
+			tx = st.NewTransaction()
+			if err := s.addParent(nil, tx, oid, version, parent, g); err != nil {
+				t.Errorf("addParent() failed on parent %s, remote %t (i=%d) for %s:%s: %v",
+					parent, remote, i, oid, version, err)
+			}
+			tx.Commit()
+		}
+
+		remote = !remote
+	}
+
+	node2, err := getNode(nil, st, oid, version)
+	if err != nil || node2 == nil {
+		t.Errorf("cannot find object %s:%s: %v", oid, version, err)
+	}
+
+	if !reflect.DeepEqual(node2.Parents, expParents) {
+		t.Errorf("invalid parents for object %s:%s: %v instead of %v",
+			oid, version, node2.Parents, expParents)
+	}
+
+	// Creating cycles should fail.
+	for v := 1; v < 7; v++ {
+		ver := fmt.Sprintf("%d", v)
+		tx = st.NewTransaction()
+		if err = s.addParent(nil, tx, oid, ver, version, nil); err == nil {
+			t.Errorf("addParent() failed to reject a cycle for %s: from ancestor %s to node %s",
+				oid, ver, version)
+		}
+		tx.Abort()
+	}
+}
+
+// TestSetHead tests setting and getting a DAG head node.
+func TestSetHead(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+
+	oid := "3333"
+
+	version, err := getHead(nil, st, oid)
+	if err == nil {
+		t.Errorf("found non-existent object head %s:%s", oid, version)
+	}
+
+	for i := 0; i < 2; i++ {
+		version = fmt.Sprintf("v%d", 555+i)
+
+		tx := st.NewTransaction()
+		if err = setHead(nil, tx, oid, version); err != nil {
+			t.Fatalf("cannot set object head %s:%s (i=%d)", oid, version, i)
+		}
+		tx.Commit()
+
+		version2, err := getHead(nil, st, oid)
+		if err != nil {
+			t.Errorf("cannot find stored object head %s (i=%d)", oid, i)
+		}
+		if version != version2 {
+			t.Errorf("object %s has wrong head data (i=%d): %s instead of %s",
+				oid, i, version2, version)
+		}
+	}
+}
+
+// TestLocalUpdates tests the sync handling of initial local updates: an object
+// is created (v1) and updated twice (v2, v3) on this device.  The DAG should
+// show: v1 -> v2 -> v3 and the head should point to v3.
+func TestLocalUpdates(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+	s := svc.sync
+
+	oid := "foo1"
+
+	if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+		t.Fatal(err)
+	}
+
+	// The head must have moved to v3 and the parent map shows the updated DAG.
+	if head, err := getHead(nil, st, oid); err != nil || head != "3" {
+		t.Errorf("invalid object %s head: %s", oid, head)
+	}
+
+	pmap := getParentMap(nil, st, oid, nil)
+
+	exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2"}}
+
+	if !reflect.DeepEqual(pmap, exp) {
+		t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+	}
+
+	// Make sure an existing node cannot be added again.
+	tx := st.NewTransaction()
+	if err := s.addNode(nil, tx, oid, "2", "foo", false, []string{"1", "3"}, NoBatchId, nil); err == nil {
+		t.Errorf("addNode() did not fail when given an existing node")
+	}
+
+	// Make sure a new node cannot have more than 2 parents.
+	if err := s.addNode(nil, tx, oid, "4", "foo", false, []string{"1", "2", "3"}, NoBatchId, nil); err == nil {
+		t.Errorf("addNode() did not fail when given 3 parents")
+	}
+
+	// Make sure a new node cannot have an invalid parent.
+	if err := s.addNode(nil, tx, oid, "4", "foo", false, []string{"1", "555"}, NoBatchId, nil); err == nil {
+		t.Errorf("addNode() did not fail when using an invalid parent")
+	}
+
+	// Make sure a new root node (no parents) can be added once a root exists.
+	// For the parents array, check both the "nil" and the empty array as input.
+	if err := s.addNode(nil, tx, oid, "6789", "foo", false, nil, NoBatchId, nil); err != nil {
+		t.Errorf("cannot add another root node (nil parents) for object %s: %v", oid, err)
+	}
+	if err := s.addNode(nil, tx, oid, "9999", "foo", false, []string{}, NoBatchId, nil); err != nil {
+		t.Errorf("cannot add another root node (empty parents) for object %s: %v", oid, err)
+	}
+
+	tx.Abort()
+}
+
+// TestRemoteUpdates tests the sync handling of initial remote updates:
+// an object is created (v1) and updated twice (v2, v3) on another device and
+// we learn about it during sync.  The updated DAG should show: v1 -> v2 -> v3
+// and report no conflicts with the new head pointing at v3.
+func TestRemoteUpdates(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+	s := svc.sync
+
+	oid := "foo1"
+
+	graft, err := s.dagReplayCommands(nil, "remote-init-00.log.sync")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// The head must not have moved (i.e. still undefined) and the parent
+	// map shows the newly grafted DAG fragment.
+	if head, err := getHead(nil, st, oid); err == nil {
+		t.Errorf("object %s head found: %s", oid, head)
+	}
+
+	pmap := getParentMap(nil, st, oid, graft)
+
+	exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2"}}
+
+	if !reflect.DeepEqual(pmap, exp) {
+		t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+	}
+
+	// Verify the grafting of remote nodes.
+	g := graft[oid]
+
+	expNewHeads := map[string]bool{"3": true}
+
+	if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+		t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+	}
+
+	expGrafts := map[string]uint64{}
+	if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+		t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+	}
+
+	// There should be no conflict.
+	isConflict, newHead, oldHead, ancestor, errConflict := hasConflict(nil, st, oid, graft)
+	if !(!isConflict && newHead == "3" && oldHead == NoVersion && ancestor == NoVersion && errConflict == nil) {
+		t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+			oid, isConflict, newHead, oldHead, ancestor, errConflict)
+	}
+
+	if logrec, err := getLogRecKey(nil, st, oid, newHead); err != nil || logrec != "$sync:log:11:3" {
+		t.Errorf("invalid logrec for newhead object %s:%s: %v", oid, newHead, logrec)
+	}
+
+	// Make sure an unknown node cannot become the new head.
+	tx := st.NewTransaction()
+	if err := moveHead(nil, tx, oid, "55"); err == nil {
+		t.Errorf("moveHead() did not fail on an invalid node")
+	}
+	tx.Abort()
+
+	// Then move the head.
+	tx = st.NewTransaction()
+	if err := moveHead(nil, tx, oid, newHead); err != nil {
+		t.Errorf("object %s cannot move head to %s: %v", oid, newHead, err)
+	}
+	tx.Commit()
+}
+
+// TestRemoteNoConflict tests sync of remote updates on top of a local initial
+// state without conflict.  An object is created locally and updated twice
+// (v1 -> v2 -> v3).  Another device, having gotten this info, makes 3 updates
+// on top of that (v3 -> v4 -> v5 -> v6) and sends this info in a later sync.
+// The updated DAG should show (v1 -> v2 -> v3 -> v4 -> v5 -> v6) and report
+// no conflicts with the new head pointing at v6.  It should also report v3 as
+// the graft point on which the new fragment (v4 -> v5 -> v6) gets attached.
+func TestRemoteNoConflict(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+	s := svc.sync
+
+	oid := "foo1"
+
+	if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+		t.Fatal(err)
+	}
+	graft, err := s.dagReplayCommands(nil, "remote-noconf-00.log.sync")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// The head must not have moved (i.e. still at v3) and the parent map
+	// shows the newly grafted DAG fragment on top of the prior DAG.
+	if head, err := getHead(nil, st, oid); err != nil || head != "3" {
+		t.Errorf("object %s has wrong head: %s", oid, head)
+	}
+
+	pmap := getParentMap(nil, st, oid, graft)
+
+	exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2"}, "4": {"3"}, "5": {"4"}, "6": {"5"}}
+
+	if !reflect.DeepEqual(pmap, exp) {
+		t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+	}
+
+	// Verify the grafting of remote nodes.
+	g := graft[oid]
+
+	expNewHeads := map[string]bool{"6": true}
+	if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+		t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+	}
+
+	expGrafts := map[string]uint64{"3": 2}
+	if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+		t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+	}
+
+	// There should be no conflict.
+	isConflict, newHead, oldHead, ancestor, errConflict := hasConflict(nil, st, oid, graft)
+	if !(!isConflict && newHead == "6" && oldHead == "3" && ancestor == NoVersion && errConflict == nil) {
+		t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+			oid, isConflict, newHead, oldHead, ancestor, errConflict)
+	}
+
+	if logrec, err := getLogRecKey(nil, st, oid, oldHead); err != nil || logrec != "$sync:log:10:3" {
+		t.Errorf("invalid logrec for oldhead object %s:%s: %v", oid, oldHead, logrec)
+	}
+	if logrec, err := getLogRecKey(nil, st, oid, newHead); err != nil || logrec != "$sync:log:11:3" {
+		t.Errorf("invalid logrec for newhead object %s:%s: %v", oid, newHead, logrec)
+	}
+
+	// Then move the head.
+	tx := st.NewTransaction()
+	if err := moveHead(nil, tx, oid, newHead); err != nil {
+		t.Errorf("object %s cannot move head to %s: %v", oid, newHead, err)
+	}
+	tx.Commit()
+
+	// Verify that hasConflict() fails without graft data.
+	isConflict, newHead, oldHead, ancestor, errConflict = hasConflict(nil, st, oid, nil)
+	if errConflict == nil {
+		t.Errorf("hasConflict() on %s did not fail w/o graft data: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+			oid, isConflict, newHead, oldHead, ancestor, errConflict)
+	}
+}
+
+// TestRemoteConflict tests sync handling of remote updates that build on the
+// local initial state and trigger a conflict.  An object is created locally
+// and updated twice (v1 -> v2 -> v3).  Another device, having only gotten
+// the v1 -> v2 history, makes 3 updates on top of v2 (v2 -> v4 -> v5 -> v6)
+// and sends this info during a later sync.  Separately, the local device
+// makes a conflicting (concurrent) update v2 -> v3.  The updated DAG should
+// show the branches: (v1 -> v2 -> v3) and (v1 -> v2 -> v4 -> v5 -> v6) and
+// report the conflict between v3 and v6 (current and new heads).  It should
+// also report v2 as the graft point and the common ancestor in the conflict.
+// The conflict is resolved locally by creating v7 that is derived from both
+// v3 and v6 and it becomes the new head.
+func TestRemoteConflict(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+	s := svc.sync
+
+	oid := "foo1"
+
+	if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+		t.Fatal(err)
+	}
+	graft, err := s.dagReplayCommands(nil, "remote-conf-00.log.sync")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// The head must not have moved (i.e. still at v3) and the parent map
+	// shows the newly grafted DAG fragment on top of the prior DAG.
+	if head, err := getHead(nil, st, oid); err != nil || head != "3" {
+		t.Errorf("object %s has wrong head: %s", oid, head)
+	}
+
+	pmap := getParentMap(nil, st, oid, graft)
+
+	exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2"}, "4": {"2"}, "5": {"4"}, "6": {"5"}}
+
+	if !reflect.DeepEqual(pmap, exp) {
+		t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+	}
+
+	// Verify the grafting of remote nodes.
+	g := graft[oid]
+
+	expNewHeads := map[string]bool{"3": true, "6": true}
+	if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+		t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+	}
+
+	expGrafts := map[string]uint64{"2": 1}
+	if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+		t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+	}
+
+	// There should be a conflict between v3 and v6 with v2 as ancestor.
+	isConflict, newHead, oldHead, ancestor, errConflict := hasConflict(nil, st, oid, graft)
+	if !(isConflict && newHead == "6" && oldHead == "3" && ancestor == "2" && errConflict == nil) {
+		t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+			oid, isConflict, newHead, oldHead, ancestor, errConflict)
+	}
+
+	if logrec, err := getLogRecKey(nil, st, oid, oldHead); err != nil || logrec != "$sync:log:10:3" {
+		t.Errorf("invalid logrec for oldhead object %s:%s: %s", oid, oldHead, logrec)
+	}
+	if logrec, err := getLogRecKey(nil, st, oid, newHead); err != nil || logrec != "$sync:log:11:3" {
+		t.Errorf("invalid logrec for newhead object %s:%s: %s", oid, newHead, logrec)
+	}
+	if logrec, err := getLogRecKey(nil, st, oid, ancestor); err != nil || logrec != "$sync:log:10:2" {
+		t.Errorf("invalid logrec for ancestor object %s:%s: %s", oid, ancestor, logrec)
+	}
+
+	// Resolve the conflict by adding a new local v7 derived from v3 and v6 (this replay moves the head).
+	if _, err := s.dagReplayCommands(nil, "local-resolve-00.sync"); err != nil {
+		t.Fatal(err)
+	}
+
+	// Verify that the head moved to v7 and the parent map shows the resolution.
+	if head, err := getHead(nil, st, oid); err != nil || head != "7" {
+		t.Errorf("object %s has wrong head after conflict resolution: %s", oid, head)
+	}
+
+	exp["7"] = []string{"3", "6"}
+	pmap = getParentMap(nil, st, oid, nil)
+	if !reflect.DeepEqual(pmap, exp) {
+		t.Errorf("invalid object %s parent map after conflict resolution: (%v) instead of (%v)",
+			oid, pmap, exp)
+	}
+}
+
+// TestRemoteConflictTwoGrafts tests sync handling of remote updates that build
+// on the local initial state and trigger a conflict with 2 graft points.
+// An object is created locally and updated twice (v1 -> v2 -> v3).  Another
+// device, first learns about v1 and makes it own conflicting update v1 -> v4.
+// That remote device later learns about v2 and resolves the v2/v4 confict by
+// creating v5.  Then it makes a last v5 -> v6 update -- which will conflict
+// with v3 but it doesn't know that.
+// Now the sync order is reversed and the local device learns all of what
+// happened on the remote device.  The local DAG should get be augmented by
+// a subtree with 2 graft points: v1 and v2.  It receives this new branch:
+// v1 -> v4 -> v5 -> v6.  Note that v5 is also derived from v2 as a remote
+// conflict resolution.  This should report a conflict between v3 and v6
+// (current and new heads), with v1 and v2 as graft points, and v2 as the
+// most-recent common ancestor for that conflict.  The conflict is resolved
+// locally by creating v7, derived from both v3 and v6, becoming the new head.
+func TestRemoteConflictTwoGrafts(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+	s := svc.sync
+
+	oid := "foo1"
+
+	if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+		t.Fatal(err)
+	}
+	graft, err := s.dagReplayCommands(nil, "remote-conf-01.log.sync")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// The head must not have moved (i.e. still at v3) and the parent map
+	// shows the newly grafted DAG fragment on top of the prior DAG.
+	if head, err := getHead(nil, st, oid); err != nil || head != "3" {
+		t.Errorf("object %s has wrong head: %s", oid, head)
+	}
+
+	pmap := getParentMap(nil, st, oid, graft)
+
+	exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2"}, "4": {"1"}, "5": {"2", "4"}, "6": {"5"}}
+
+	if !reflect.DeepEqual(pmap, exp) {
+		t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+	}
+
+	// Verify the grafting of remote nodes.
+	g := graft[oid]
+
+	expNewHeads := map[string]bool{"3": true, "6": true}
+	if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+		t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+	}
+
+	expGrafts := map[string]uint64{"1": 0, "2": 1}
+	if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+		t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+	}
+
+	// There should be a conflict between v3 and v6 with v2 as ancestor.
+	isConflict, newHead, oldHead, ancestor, errConflict := hasConflict(nil, st, oid, graft)
+	if !(isConflict && newHead == "6" && oldHead == "3" && ancestor == "2" && errConflict == nil) {
+		t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+			oid, isConflict, newHead, oldHead, ancestor, errConflict)
+	}
+
+	if logrec, err := getLogRecKey(nil, st, oid, oldHead); err != nil || logrec != "$sync:log:10:3" {
+		t.Errorf("invalid logrec for oldhead object %s:%s: %s", oid, oldHead, logrec)
+	}
+	if logrec, err := getLogRecKey(nil, st, oid, newHead); err != nil || logrec != "$sync:log:11:2" {
+		t.Errorf("invalid logrec for newhead object %s:%s: %s", oid, newHead, logrec)
+	}
+	if logrec, err := getLogRecKey(nil, st, oid, ancestor); err != nil || logrec != "$sync:log:10:2" {
+		t.Errorf("invalid logrec for ancestor object %s:%s: %s", oid, ancestor, logrec)
+	}
+
+	// Resolve the conflict by adding a new local v7 derived from v3 and v6 (this replay moves the head).
+	if _, err := s.dagReplayCommands(nil, "local-resolve-00.sync"); err != nil {
+		t.Fatal(err)
+	}
+
+	// Verify that the head moved to v7 and the parent map shows the resolution.
+	if head, err := getHead(nil, st, oid); err != nil || head != "7" {
+		t.Errorf("object %s has wrong head after conflict resolution: %s", oid, head)
+	}
+
+	exp["7"] = []string{"3", "6"}
+	pmap = getParentMap(nil, st, oid, nil)
+	if !reflect.DeepEqual(pmap, exp) {
+		t.Errorf("invalid object %s parent map after conflict resolution: (%v) instead of (%v)",
+			oid, pmap, exp)
+	}
+}
+
+// TestRemoteConflictNoAncestor tests sync handling of remote updates that create
+// the same object independently from local initial state (no common past) and
+// trigger a conflict with no common ancestors (no graft points).  An object is
+// created locally and updated twice (v1 -> v2 -> v3).  Another device creates
+// the same object from scratch and updates it twice (v4 -> v5 -> v6).  When
+// the local device learns of what happened on the remote device, it should
+// detect a conflict between v3 and v6 with no common ancestor.  The conflict
+// is resolved locally by creating v7, derived from both v3 and v6, becoming
+// the new head.
+func TestRemoteConflictNoAncestor(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+	s := svc.sync
+
+	oid := "foo1"
+
+	if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+		t.Fatal(err)
+	}
+	graft, err := s.dagReplayCommands(nil, "remote-conf-03.log.sync")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// The head must not have moved (i.e. still at v3) and the parent map
+	// shows the newly grafted DAG fragment on top of the prior DAG.
+	if head, err := getHead(nil, st, oid); err != nil || head != "3" {
+		t.Errorf("object %s has wrong head: %s", oid, head)
+	}
+
+	pmap := getParentMap(nil, st, oid, graft)
+
+	exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2"}, "4": nil, "5": {"4"}, "6": {"5"}}
+
+	if !reflect.DeepEqual(pmap, exp) {
+		t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+	}
+
+	// Verify the grafting of remote nodes.
+	g := graft[oid]
+
+	expNewHeads := map[string]bool{"3": true, "6": true}
+	if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+		t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+	}
+
+	expGrafts := map[string]uint64{}
+	if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+		t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+	}
+
+	// There should be a conflict between v3 and v6 with no ancestor.
+	isConflict, newHead, oldHead, ancestor, errConflict := hasConflict(nil, st, oid, graft)
+	if !(isConflict && newHead == "6" && oldHead == "3" && ancestor == NoVersion && errConflict == nil) {
+		t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+			oid, isConflict, newHead, oldHead, ancestor, errConflict)
+	}
+
+	if logrec, err := getLogRecKey(nil, st, oid, oldHead); err != nil || logrec != "$sync:log:10:3" {
+		t.Errorf("invalid logrec for oldhead object %s:%s: %s", oid, oldHead, logrec)
+	}
+	if logrec, err := getLogRecKey(nil, st, oid, newHead); err != nil || logrec != "$sync:log:11:3" {
+		t.Errorf("invalid logrec for newhead object %s:%s: %s", oid, newHead, logrec)
+	}
+
+	// Resolve the conflict by adding a new local v7 derived from v3 and v6 (this replay moves the head).
+	if _, err := s.dagReplayCommands(nil, "local-resolve-00.sync"); err != nil {
+		t.Fatal(err)
+	}
+
+	// Verify that the head moved to v7 and the parent map shows the resolution.
+	if head, err := getHead(nil, st, oid); err != nil || head != "7" {
+		t.Errorf("object %s has wrong head after conflict resolution: %s", oid, head)
+	}
+
+	exp["7"] = []string{"3", "6"}
+	pmap = getParentMap(nil, st, oid, nil)
+	if !reflect.DeepEqual(pmap, exp) {
+		t.Errorf("invalid object %s parent map after conflict resolution: (%v) instead of (%v)",
+			oid, pmap, exp)
+	}
+}
+
+// TestAncestorIterator checks that the iterator goes over the correct set
+// of ancestor nodes for an object given a starting node.  It should traverse
+// reconvergent DAG branches only visiting each ancestor once:
+// v1 -> v2 -> v3 -> v5 -> v6 -> v8 -> v9
+//        |--> v4 ---|           |
+//        +--> v7 ---------------+
+// - Starting at v1 it should only cover v1.
+// - Starting at v3 it should only cover v1-v3.
+// - Starting at v6 it should only cover v1-v6.
+// - Starting at v9 it should cover all nodes (v1-v9).
+func TestAncestorIterator(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+	s := svc.sync
+
+	oid := "1234"
+
+	if _, err := s.dagReplayCommands(nil, "local-init-01.sync"); err != nil {
+		t.Fatal(err)
+	}
+
+	// Loop checking the iteration behavior for different starting nodes.
+	for _, start := range []int{1, 3, 6, 9} {
+		visitCount := make(map[string]int)
+		vstart := fmt.Sprintf("%d", start)
+		forEachAncestor(nil, st, oid, []string{vstart}, func(v string, nd *dagNode) error {
+			visitCount[v]++
+			return nil
+		})
+
+		// Check that all prior nodes are visited only once.
+		for i := 1; i < (start + 1); i++ {
+			vv := fmt.Sprintf("%d", i)
+			if visitCount[vv] != 1 {
+				t.Errorf("wrong visit count on object %s:%s starting from %s: %d instead of 1",
+					oid, vv, vstart, visitCount[vv])
+			}
+		}
+	}
+
+	// Make sure an error in the callback is returned.
+	cbErr := errors.New("callback error")
+	err := forEachAncestor(nil, st, oid, []string{"9"}, func(v string, nd *dagNode) error {
+		if v == "1" {
+			return cbErr
+		}
+		return nil
+	})
+	if err != cbErr {
+		t.Errorf("wrong error returned from callback: %v instead of %v", err, cbErr)
+	}
+}
+
+// TestPruning tests sync pruning of the DAG for an object with 3 concurrent
+// updates (i.e. 2 conflict resolution convergent points).  The pruning must
+// get rid of the DAG branches across the reconvergence points:
+// v1 -> v2 -> v3 -> v5 -> v6 -> v8 -> v9
+//        |--> v4 ---|           |
+//        +--> v7 ---------------+
+// By pruning at v1, nothing is deleted.
+// Then by pruning at v2, only v1 is deleted.
+// Then by pruning at v6, v2-v5 are deleted leaving v6 and "v7 -> v8 -> v9".
+// Then by pruning at v8, v6-v7 are deleted leaving "v8 -> v9".
+// Then by pruning at v9, v8 is deleted leaving v9 as the head.
+// Then by pruning again at v9 nothing changes.
+func TestPruning(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+	s := svc.sync
+
+	oid := "1234"
+
+	if _, err := s.dagReplayCommands(nil, "local-init-01.sync"); err != nil {
+		t.Fatal(err)
+	}
+
+	exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2"}, "4": {"2"}, "5": {"3", "4"}, "6": {"5"}, "7": {"2"}, "8": {"6", "7"}, "9": {"8"}}
+
+	// Loop pruning at an invalid version (333) then at different valid versions.
+	testVersions := []string{"333", "1", "2", "6", "8", "9", "9"}
+	delCounts := []int{0, 0, 1, 4, 2, 1, 0}
+	which := "prune-snip-"
+	remain := 9
+
+	for i, version := range testVersions {
+		batches := newBatchPruning()
+		tx := st.NewTransaction()
+		del := 0
+		err := prune(nil, tx, oid, version, batches,
+			func(ctx *context.T, tx store.Transaction, lr string) error {
+				del++
+				return nil
+			})
+		tx.Commit()
+
+		if i == 0 && err == nil {
+			t.Errorf("pruning non-existent object %s:%s did not fail", oid, version)
+		} else if i > 0 && err != nil {
+			t.Errorf("pruning object %s:%s failed: %v", oid, version, err)
+		}
+
+		if del != delCounts[i] {
+			t.Errorf("pruning object %s:%s deleted %d log records instead of %d",
+				oid, version, del, delCounts[i])
+		}
+
+		which += "*"
+		remain -= del
+
+		if head, err := getHead(nil, st, oid); err != nil || head != "9" {
+			t.Errorf("object %s has wrong head: %s", oid, head)
+		}
+
+		tx = st.NewTransaction()
+		err = pruneDone(nil, tx, batches)
+		if err != nil {
+			t.Errorf("pruneDone() failed: %v", err)
+		}
+		tx.Commit()
+
+		// Remove pruned nodes from the expected parent map used to validate
+		// and set the parents of the pruned node to nil.
+		intVersion, err := strconv.ParseInt(version, 10, 32)
+		if err != nil {
+			t.Errorf("invalid version: %s", version)
+		}
+
+		if intVersion < 10 {
+			for j := int64(0); j < intVersion; j++ {
+				delete(exp, fmt.Sprintf("%d", j))
+			}
+			exp[version] = nil
+		}
+
+		pmap := getParentMap(nil, st, oid, nil)
+		if !reflect.DeepEqual(pmap, exp) {
+			t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+		}
+	}
+}
+
+// TestPruningCallbackError tests sync pruning of the DAG when the callback
+// function returns an error.  The pruning must try to delete as many nodes
+// and log records as possible and properly adjust the parent pointers of
+// the pruning node.  The object DAG is:
+// v1 -> v2 -> v3 -> v5 -> v6 -> v8 -> v9
+//        |--> v4 ---|           |
+//        +--> v7 ---------------+
+// By pruning at v9 and having the callback function fail for v4, all other
+// nodes must be deleted and only v9 remains as the head.
+func TestPruningCallbackError(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+	s := svc.sync
+
+	oid := "1234"
+
+	if _, err := s.dagReplayCommands(nil, "local-init-01.sync"); err != nil {
+		t.Fatal(err)
+	}
+
+	exp := map[string][]string{"9": nil}
+
+	// Prune at v9 with a callback function that fails for v4.
+	del, expDel := 0, 8
+	version := "9"
+
+	batches := newBatchPruning()
+	tx := st.NewTransaction()
+	err := prune(nil, tx, oid, version, batches,
+		func(ctx *context.T, tx store.Transaction, lr string) error {
+			del++
+			if lr == "logrec-03" {
+				return fmt.Errorf("refuse to delete %s", lr)
+			}
+			return nil
+		})
+	tx.Commit()
+
+	if err == nil {
+		t.Errorf("pruning object %s:%s did not fail", oid, version)
+	}
+	if del != expDel {
+		t.Errorf("pruning object %s:%s deleted %d log records instead of %d", oid, version, del, expDel)
+	}
+
+	tx = st.NewTransaction()
+	err = pruneDone(nil, tx, batches)
+	if err != nil {
+		t.Errorf("pruneDone() failed: %v", err)
+	}
+	tx.Commit()
+
+	if head, err := getHead(nil, st, oid); err != nil || head != version {
+		t.Errorf("object %s has wrong head: %s", oid, head)
+	}
+
+	pmap := getParentMap(nil, st, oid, nil)
+	if !reflect.DeepEqual(pmap, exp) {
+		t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+	}
+}
+
+// TestRemoteLinkedNoConflictSameHead tests sync of remote updates that contain
+// linked nodes (conflict resolution by selecting an existing version) on top of
+// a local initial state without conflict.  An object is created locally and
+// updated twice (v1 -> v2 -> v3).  Another device learns about v1, then creates
+// (v1 -> v4), then learns about (v1 -> v2) and resolves the (v2/v4) conflict by
+// selecting v2 over v4.  It sends that new info (v4 and the v2/v4 link) back to
+// the original (local) device.  Instead of a v3/v4 conflict, the device sees
+// that v2 was chosen over v4 and resolves it as a no-conflict case.
+func TestRemoteLinkedNoConflictSameHead(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+	s := svc.sync
+
+	oid := "foo1"
+
+	if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+		t.Fatal(err)
+	}
+	graft, err := s.dagReplayCommands(nil, "remote-noconf-link-00.log.sync")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// The head must not have moved (i.e. still at v3) and the parent map
+	// shows the newly grafted DAG fragment on top of the prior DAG.
+	if head, err := getHead(nil, st, oid); err != nil || head != "3" {
+		t.Errorf("object %s has wrong head: %s", oid, head)
+	}
+
+	pmap := getParentMap(nil, st, oid, graft)
+
+	exp := map[string][]string{"1": nil, "2": {"1", "4"}, "3": {"2"}, "4": {"1"}}
+
+	if !reflect.DeepEqual(pmap, exp) {
+		t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+	}
+
+	// Verify the grafting of remote nodes.
+	g := graft[oid]
+
+	expNewHeads := map[string]bool{"3": true}
+	if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+		t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+	}
+
+	expGrafts := map[string]uint64{"1": 0, "4": 1}
+	if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+		t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+	}
+
+	// There should be no conflict.
+	isConflict, newHead, oldHead, ancestor, errConflict := hasConflict(nil, st, oid, graft)
+	if !(!isConflict && newHead == "3" && oldHead == "3" && ancestor == NoVersion && errConflict == nil) {
+		t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+			oid, isConflict, newHead, oldHead, ancestor, errConflict)
+	}
+
+	// Verify that hasConflict() fails with a nil or empty graft map.
+	isConflict, newHead, oldHead, ancestor, errConflict = hasConflict(nil, st, oid, nil)
+	if errConflict == nil {
+		t.Errorf("hasConflict() on %v did not fail with a nil graft map: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+			oid, isConflict, newHead, oldHead, ancestor, errConflict)
+	}
+	isConflict, newHead, oldHead, ancestor, errConflict = hasConflict(nil, st, oid, newGraft())
+	if errConflict == nil {
+		t.Errorf("hasConflict() on %v did not fail with an empty graft map: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+			oid, isConflict, newHead, oldHead, ancestor, errConflict)
+	}
+}
+
+// TestRemoteLinkedConflict tests sync of remote updates that contain linked
+// nodes (conflict resolution by selecting an existing version) on top of a local
+// initial state triggering a local conflict.  An object is created locally and
+// updated twice (v1 -> v2 -> v3).  Another device has along the way learned
+// about v1, created (v1 -> v4), then learned about (v1 -> v2) and resolved that
+// conflict by selecting v4 over v2.  Now it sends that new info (v4 and the
+// v4/v2 link) back to the original (local) device which sees a v3/v4 conflict.
+func TestRemoteLinkedConflict(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+	s := svc.sync
+
+	oid := "foo1"
+
+	if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+		t.Fatal(err)
+	}
+	graft, err := s.dagReplayCommands(nil, "remote-conf-link.log.sync")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// The head must not have moved (i.e. still at v2) and the parent map
+	// shows the newly grafted DAG fragment on top of the prior DAG.
+	if head, err := getHead(nil, st, oid); err != nil || head != "3" {
+		t.Errorf("object %s has wrong head: %s", oid, head)
+	}
+
+	pmap := getParentMap(nil, st, oid, graft)
+
+	exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2"}, "4": {"1", "2"}}
+
+	if !reflect.DeepEqual(pmap, exp) {
+		t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+	}
+
+	// Verify the grafting of remote nodes.
+	g := graft[oid]
+
+	expNewHeads := map[string]bool{"3": true, "4": true}
+	if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+		t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+	}
+
+	expGrafts := map[string]uint64{"1": 0, "2": 1}
+	if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+		t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+	}
+
+	// There should be a conflict.
+	isConflict, newHead, oldHead, ancestor, errConflict := hasConflict(nil, st, oid, graft)
+	if !(isConflict && newHead == "4" && oldHead == "3" && ancestor == "2" && errConflict == nil) {
+		t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+			oid, isConflict, newHead, oldHead, ancestor, errConflict)
+	}
+}
+
+// TestRemoteLinkedNoConflictNewHead tests sync of remote updates that contain
+// linked nodes (conflict resolution by selecting an existing version) on top of
+// a local initial state without conflict, but moves the head node to a new one.
+// An object is created locally and updated twice (v1 -> v2 -> v3).  Another
+// device has along the way learned about v1, created (v1 -> v4), then learned
+// about (v1 -> v2 -> v3) and resolved that conflict by selecting v4 over v3.
+// Now it sends that new info (v4 and the v4/v3 link) back to the original
+// (local) device.  The device sees that the new head v4 is "derived" from v3
+// thus no conflict.
+func TestRemoteLinkedConflictNewHead(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+	s := svc.sync
+
+	oid := "foo1"
+
+	if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+		t.Fatal(err)
+	}
+	graft, err := s.dagReplayCommands(nil, "remote-noconf-link-01.log.sync")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// The head must not have moved (i.e. still at v2) and the parent map
+	// shows the newly grafted DAG fragment on top of the prior DAG.
+	if head, err := getHead(nil, st, oid); err != nil || head != "3" {
+		t.Errorf("object %s has wrong head: %s", oid, head)
+	}
+
+	pmap := getParentMap(nil, st, oid, graft)
+
+	exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2"}, "4": {"1", "3"}}
+
+	if !reflect.DeepEqual(pmap, exp) {
+		t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+	}
+
+	// Verify the grafting of remote nodes.
+	g := graft[oid]
+
+	expNewHeads := map[string]bool{"4": true}
+	if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+		t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+	}
+
+	expGrafts := map[string]uint64{"1": 0, "3": 2}
+	if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+		t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+	}
+
+	// There should be no conflict.
+	isConflict, newHead, oldHead, ancestor, errConflict := hasConflict(nil, st, oid, graft)
+	if !(!isConflict && newHead == "4" && oldHead == "3" && ancestor == NoVersion && errConflict == nil) {
+		t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+			oid, isConflict, newHead, oldHead, ancestor, errConflict)
+	}
+}
+
+// TestRemoteLinkedNoConflictNewHeadOvertake tests sync of remote updates that
+// contain linked nodes (conflict resolution by selecting an existing version)
+// on top of a local initial state without conflict, but moves the head node
+// to a new one that overtook the linked node.
+//
+// An object is created locally and updated twice (v1 -> v2 -> v3).  Another
+// device has along the way learned about v1, created (v1 -> v4), then learned
+// about (v1 -> v2 -> v3) and resolved that conflict by selecting v3 over v4.
+// Then it creates a new update v5 from v3 (v3 -> v5).  Now it sends that new
+// info (v4, the v3/v4 link, and v5) back to the original (local) device.
+// The device sees that the new head v5 is "derived" from v3 thus no conflict.
+func TestRemoteLinkedConflictNewHeadOvertake(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+	s := svc.sync
+
+	oid := "foo1"
+
+	if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+		t.Fatal(err)
+	}
+	graft, err := s.dagReplayCommands(nil, "remote-noconf-link-02.log.sync")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// The head must not have moved (i.e. still at v2) and the parent map
+	// shows the newly grafted DAG fragment on top of the prior DAG.
+	if head, err := getHead(nil, st, oid); err != nil || head != "3" {
+		t.Errorf("object %s has wrong head: %s", oid, head)
+	}
+
+	pmap := getParentMap(nil, st, oid, graft)
+
+	exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2", "4"}, "4": {"1"}, "5": {"3"}}
+
+	if !reflect.DeepEqual(pmap, exp) {
+		t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+	}
+
+	// Verify the grafting of remote nodes.
+	g := graft[oid]
+
+	expNewHeads := map[string]bool{"5": true}
+	if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+		t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+	}
+
+	expGrafts := map[string]uint64{"1": 0, "3": 2, "4": 1}
+	if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+		t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+	}
+
+	// There should be no conflict.
+	isConflict, newHead, oldHead, ancestor, errConflict := hasConflict(nil, st, oid, graft)
+	if !(!isConflict && newHead == "5" && oldHead == "3" && ancestor == NoVersion && errConflict == nil) {
+		t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+			oid, isConflict, newHead, oldHead, ancestor, errConflict)
+	}
+
+	// Move the head.
+	tx := st.NewTransaction()
+	if err = moveHead(nil, tx, oid, newHead); err != nil {
+		t.Errorf("object %s cannot move head to %s: %v", oid, newHead, err)
+	}
+	tx.Commit()
+
+	// Now new info comes from another device repeating the v2/v3 link.
+	// Verify that it is a NOP (no changes).
+	graft, err = s.dagReplayCommands(nil, "remote-noconf-link-repeat.log.sync")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if head, err := getHead(nil, st, oid); err != nil || head != "5" {
+		t.Errorf("object %s has wrong head: %s", oid, head)
+	}
+
+	g = graft[oid]
+
+	if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+		t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+	}
+
+	expGrafts = map[string]uint64{}
+	if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+		t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+	}
+
+	isConflict, newHead, oldHead, ancestor, errConflict = hasConflict(nil, st, oid, graft)
+	if !(!isConflict && newHead == "5" && oldHead == "5" && ancestor == NoVersion && errConflict == nil) {
+		t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+			oid, isConflict, newHead, oldHead, ancestor, errConflict)
+	}
+}
+
+// TestAddNodeBatch tests adding multiple DAG nodes grouped within a batch.
+func TestAddNodeBatch(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+	s := svc.sync
+
+	if _, err := s.dagReplayCommands(nil, "local-init-02.sync"); err != nil {
+		t.Fatal(err)
+	}
+
+	oid_a, oid_b, oid_c := "1234", "6789", "2222"
+
+	tx := st.NewTransaction()
+
+	// Verify NoBatchId is reported as an error.
+	if err := s.endBatch(nil, tx, NoBatchId, 0); err == nil {
+		t.Errorf("endBatch() did not fail for invalid 'NoBatchId' value")
+	}
+	if _, err := getBatch(nil, st, NoBatchId); err == nil {
+		t.Errorf("getBatch() did not fail for invalid 'NoBatchId' value")
+	}
+	if err := setBatch(nil, tx, NoBatchId, nil); err == nil {
+		t.Errorf("setBatch() did not fail for invalid 'NoBatchId' value")
+	}
+	if err := delBatch(nil, tx, NoBatchId); err == nil {
+		t.Errorf("delBatch() did not fail for invalid 'NoBatchId' value")
+	}
+
+	// Mutate 2 objects within a batch.
+	btid_1 := s.startBatch(nil, st, NoBatchId)
+	if btid_1 == NoBatchId {
+		t.Fatal("cannot start 1st DAG batch")
+	}
+	if err := s.endBatch(nil, tx, btid_1, 0); err == nil {
+		t.Errorf("endBatch() did not fail for a zero-count batch")
+	}
+
+	info := s.batches[btid_1]
+	if info == nil {
+		t.Errorf("batches state for ID %v not found", btid_1)
+	}
+	if n := len(info.Objects); n != 0 {
+		t.Errorf("batch info map for ID %v has length %d instead of 0", btid_1, n)
+	}
+
+	if err := s.addNode(nil, tx, oid_a, "3", "logrec-a-03", false, []string{"2"}, btid_1, nil); err != nil {
+		t.Errorf("cannot addNode() on object %s and batch ID %v: %v", oid_a, btid_1, err)
+	}
+
+	if id := s.startBatch(nil, st, btid_1); id != btid_1 {
+		t.Fatalf("restarting batch failed: got %v instead of %v", id, btid_1)
+	}
+
+	if err := s.addNode(nil, tx, oid_b, "3", "logrec-b-03", false, []string{"2"}, btid_1, nil); err != nil {
+		t.Errorf("cannot addNode() on object %s and batch ID %v: %v", oid_b, btid_1, err)
+	}
+
+	// At the same time mutate the 3rd object in another batch.
+	btid_2 := s.startBatch(nil, st, NoBatchId)
+	if btid_2 == NoBatchId {
+		t.Fatal("cannot start 2nd DAG batch")
+	}
+
+	info = s.batches[btid_2]
+	if info == nil {
+		t.Errorf("batches state for ID %v not found", btid_2)
+	}
+	if n := len(info.Objects); n != 0 {
+		t.Errorf("batch info map for ID %v has length %d instead of 0", btid_2, n)
+	}
+
+	if err := s.addNode(nil, tx, oid_c, "2", "logrec-c-02", false, []string{"1"}, btid_2, nil); err != nil {
+		t.Errorf("cannot addNode() on object %s and batch ID %v: %v", oid_c, btid_2, err)
+	}
+
+	// Verify the in-memory batch sets constructed.
+	info = s.batches[btid_1]
+	if info == nil {
+		t.Errorf("batches state for ID %v not found", btid_1)
+	}
+
+	expInfo := &batchInfo{map[string]string{oid_a: "3", oid_b: "3"}, 0}
+	if !reflect.DeepEqual(info, expInfo) {
+		t.Errorf("invalid batch info for ID %v: %v instead of %v", btid_1, info, expInfo)
+	}
+
+	info = s.batches[btid_2]
+	if info == nil {
+		t.Errorf("batches state for ID %v not found", btid_2)
+	}
+
+	expInfo = &batchInfo{map[string]string{oid_c: "2"}, 0}
+	if !reflect.DeepEqual(info, expInfo) {
+		t.Errorf("invalid batch info for ID %v: %v instead of %v", btid_2, info, expInfo)
+	}
+
+	// Verify failing to use a batch ID not returned by startBatch().
+	bad_btid := btid_1 + 1
+	for bad_btid == NoBatchId || bad_btid == btid_2 {
+		bad_btid++
+	}
+
+	if err := s.addNode(nil, tx, oid_c, "3", "logrec-c-03", false, []string{"2"}, bad_btid, nil); err == nil {
+		t.Errorf("addNode() did not fail on object %s for a bad batch ID %v", oid_c, bad_btid)
+	}
+	if err := s.endBatch(nil, tx, bad_btid, 1); err == nil {
+		t.Errorf("endBatch() did not fail for a bad batch ID %v", bad_btid)
+	}
+
+	// End the 1st batch and verify the in-memory and in-store data.
+	if err := s.endBatch(nil, tx, btid_1, 2); err != nil {
+		t.Errorf("cannot endBatch() for ID %v: %v", btid_1, err)
+	}
+	tx.Commit()
+
+	if info = s.batches[btid_1]; info != nil {
+		t.Errorf("batch info for ID %v still exists", btid_1)
+	}
+
+	info, err := getBatch(nil, st, btid_1)
+	if err != nil {
+		t.Errorf("cannot getBatch() for ID %v: %v", btid_1, err)
+	}
+
+	expInfo = &batchInfo{map[string]string{oid_a: "3", oid_b: "3"}, 2}
+	if !reflect.DeepEqual(info, expInfo) {
+		t.Errorf("invalid batch info from DAG storage for ID %v: %v instead of %v",
+			btid_1, info, expInfo)
+	}
+
+	info = s.batches[btid_2]
+	if info == nil {
+		t.Errorf("batches state for ID %v not found", btid_2)
+	}
+
+	expInfo = &batchInfo{map[string]string{oid_c: "2"}, 0}
+	if !reflect.DeepEqual(info, expInfo) {
+		t.Errorf("invalid batch info for ID %v: %v instead of %v", btid_2, info, expInfo)
+	}
+
+	// End the 2nd batch and re-verify the in-memory and in-store data.
+	tx = st.NewTransaction()
+	if err := s.endBatch(nil, tx, btid_2, 1); err != nil {
+		t.Errorf("cannot endBatch() for ID %v: %v", btid_2, err)
+	}
+	tx.Commit()
+
+	if info = s.batches[btid_2]; info != nil {
+		t.Errorf("batch info for ID %v still exists", btid_2)
+	}
+
+	info, err = getBatch(nil, st, btid_2)
+	if err != nil {
+		t.Errorf("cannot getBatch() for ID %v: %v", btid_2, err)
+	}
+
+	expInfo = &batchInfo{map[string]string{oid_c: "2"}, 1}
+	if !reflect.DeepEqual(info, expInfo) {
+		t.Errorf("invalid batch info from DAG storage for ID %v: %v instead of %v", btid_2, info, expInfo)
+	}
+
+	if n := len(s.batches); n != 0 {
+		t.Errorf("batches set in-memory: %d entries found, should be empty", n)
+	}
+
+	// Test incrementally filling up a batch.
+	btid_3 := uint64(100)
+	if s.batches[btid_3] != nil {
+		t.Errorf("batch info for ID %v found", btid_3)
+	}
+
+	if id := s.startBatch(nil, st, btid_3); id != btid_3 {
+		t.Fatalf("cannot start batch %v", btid_3)
+	}
+
+	info = s.batches[btid_3]
+	if info == nil {
+		t.Errorf("batches state for ID %v not found", btid_3)
+	}
+	if n := len(info.Objects); n != 0 {
+		t.Errorf("batch info map for ID %v has length %d instead of 0", btid_3, n)
+	}
+
+	tx = st.NewTransaction()
+	if err := s.addNode(nil, tx, oid_a, "4", "logrec-a-04", false, []string{"3"}, btid_3, nil); err != nil {
+		t.Errorf("cannot addNode() on object %s and batch ID %v: %v", oid_a, btid_3, err)
+	}
+
+	if err := s.endBatch(nil, tx, btid_3, 2); err != nil {
+		t.Errorf("cannot endBatch() for ID %v: %v", btid_3, err)
+	}
+	tx.Commit()
+
+	if s.batches[btid_3] != nil {
+		t.Errorf("batch info for ID %v still exists", btid_3)
+	}
+
+	info, err = getBatch(nil, st, btid_3)
+	if err != nil {
+		t.Errorf("cannot getBatch() for ID %v: %v", btid_3, err)
+	}
+
+	expInfo = &batchInfo{map[string]string{oid_a: "4"}, 2}
+	if !reflect.DeepEqual(info, expInfo) {
+		t.Errorf("invalid batch info from DAG storage for ID %v: %v instead of %v",
+			btid_3, info, expInfo)
+	}
+
+	if id := s.startBatch(nil, st, btid_3); id != btid_3 {
+		t.Fatalf("cannot start batch %v", btid_3)
+	}
+
+	info = s.batches[btid_3]
+	if info == nil {
+		t.Errorf("batch state for ID %v not found", btid_3)
+	}
+
+	if !reflect.DeepEqual(info, expInfo) {
+		t.Errorf("invalid batch info from DAG storage for ID %v: %v instead of %v",
+			btid_3, info, expInfo)
+	}
+
+	tx = st.NewTransaction()
+	if err := s.addNode(nil, tx, oid_b, "4", "logrec-b-04", false, []string{"3"}, btid_3, nil); err != nil {
+		t.Errorf("cannot addNode() on object %s and batch ID %v: %v", oid_b, btid_3, err)
+	}
+
+	if err := s.endBatch(nil, tx, btid_3, 3); err == nil {
+		t.Errorf("endBatch() didn't fail for ID %v: %v", btid_3, err)
+	}
+
+	if err := s.endBatch(nil, tx, btid_3, 2); err != nil {
+		t.Errorf("cannot endBatch() for ID %v: %v", btid_3, err)
+	}
+	tx.Commit()
+
+	info, err = getBatch(nil, st, btid_3)
+	if err != nil {
+		t.Errorf("cannot getBatch() for ID %v: %v", btid_3, err)
+	}
+
+	expInfo = &batchInfo{map[string]string{oid_a: "4", oid_b: "4"}, 2}
+	if !reflect.DeepEqual(info, expInfo) {
+		t.Errorf("invalid batch state from DAG storage for ID %v: %v instead of %v",
+			btid_3, info, expInfo)
+	}
+
+	// Get the 3 new nodes from the DAG and verify their batch IDs.
+	type nodeTest struct {
+		oid     string
+		version string
+		btid    uint64
+	}
+	tests := []nodeTest{
+		{oid_a, "3", btid_1},
+		{oid_a, "4", btid_3},
+		{oid_b, "3", btid_1},
+		{oid_b, "4", btid_3},
+		{oid_c, "2", btid_2},
+	}
+
+	for _, test := range tests {
+		node, err := getNode(nil, st, test.oid, test.version)
+		if err != nil {
+			t.Errorf("cannot find object %s:%s: %v", test.oid, test.version, err)
+		}
+		if node.BatchId != test.btid {
+			t.Errorf("invalid batch ID for object %s:%s: %v instead of %v",
+				test.oid, test.version, node.BatchId, test.btid)
+		}
+	}
+}
+
+// TestPruningBatches tests pruning DAG nodes grouped within batches.
+func TestPruningBatches(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+	s := svc.sync
+
+	if _, err := s.dagReplayCommands(nil, "local-init-02.sync"); err != nil {
+		t.Fatal(err)
+	}
+
+	oid_a, oid_b, oid_c := "1234", "6789", "2222"
+
+	// Mutate objects in 2 batches then add non-batch mutations to act as
+	// the pruning points.  Before pruning the DAG is:
+	// a1 -- a2 -- (a3) --- a4
+	// b1 -- b2 -- (b3) -- (b4) -- b5
+	// c1 ---------------- (c2)
+	// Now by pruning at (a4, b5, c2), the new DAG should be:
+	// a4
+	// b5
+	// (c2)
+	// Batch 1 (a3, b3) gets deleted, but batch 2 (b4, c2) still has (c2)
+	// dangling waiting for a future pruning.
+	btid_1 := s.startBatch(nil, st, NoBatchId)
+	if btid_1 == NoBatchId {
+		t.Fatal("cannot start 1st DAG addNode() batch")
+	}
+
+	tx := st.NewTransaction()
+	if err := s.addNode(nil, tx, oid_a, "3", "logrec-a-03", false, []string{"2"}, btid_1, nil); err != nil {
+		t.Errorf("cannot addNode() on object %s and batch ID %v: %v", oid_a, btid_1, err)
+	}
+	if err := s.addNode(nil, tx, oid_b, "3", "logrec-b-03", false, []string{"2"}, btid_1, nil); err != nil {
+		t.Errorf("cannot addNode() on object %s and batch ID %v: %v", oid_b, btid_1, err)
+	}
+	if err := s.endBatch(nil, tx, btid_1, 2); err != nil {
+		t.Errorf("cannot endBatch() for ID %v: %v", btid_1, err)
+	}
+	tx.Commit()
+
+	btid_2 := s.startBatch(nil, st, NoBatchId)
+	if btid_2 == NoBatchId {
+		t.Fatal("cannot start 2nd DAG addNode() batch")
+	}
+
+	tx = st.NewTransaction()
+	if err := s.addNode(nil, tx, oid_b, "4", "logrec-b-04", false, []string{"3"}, btid_2, nil); err != nil {
+		t.Errorf("cannot addNode() on object %s and batch ID %v: %v", oid_b, btid_2, err)
+	}
+	if err := s.addNode(nil, tx, oid_c, "2", "logrec-c-02", false, []string{"1"}, btid_2, nil); err != nil {
+		t.Errorf("cannot addNode() on object %s and batch ID %v: %v", oid_c, btid_2, err)
+	}
+	if err := s.endBatch(nil, tx, btid_2, 2); err != nil {
+		t.Errorf("cannot endBatch() for ID %v: %v", btid_2, err)
+	}
+	tx.Commit()
+
+	tx = st.NewTransaction()
+	if err := s.addNode(nil, tx, oid_a, "4", "logrec-a-04", false, []string{"3"}, NoBatchId, nil); err != nil {
+		t.Errorf("cannot addNode() on object %s: %v", oid_a, err)
+	}
+	if err := s.addNode(nil, tx, oid_b, "5", "logrec-b-05", false, []string{"4"}, NoBatchId, nil); err != nil {
+		t.Errorf("cannot addNode() on object %s: %v", oid_b, err)
+	}
+
+	if err := moveHead(nil, tx, oid_a, "4"); err != nil {
+		t.Errorf("object %s cannot move head: %v", oid_a, err)
+	}
+	if err := moveHead(nil, tx, oid_b, "5"); err != nil {
+		t.Errorf("object %s cannot move head: %v", oid_b, err)
+	}
+	if err := moveHead(nil, tx, oid_c, "2"); err != nil {
+		t.Errorf("object %s cannot move head: %v", oid_c, err)
+	}
+	tx.Commit()
+
+	// Verify the batch sets.
+	info, err := getBatch(nil, st, btid_1)
+	if err != nil {
+		t.Errorf("cannot getBatch() for ID %v: %v", btid_1, err)
+	}
+
+	expInfo := &batchInfo{map[string]string{oid_a: "3", oid_b: "3"}, 2}
+	if !reflect.DeepEqual(info, expInfo) {
+		t.Errorf("invalid batch info from DAG storage for ID %v: %v instead of %v",
+			btid_1, info, expInfo)
+	}
+
+	info, err = getBatch(nil, st, btid_2)
+	if err != nil {
+		t.Errorf("cannot getBatch() for ID %v: %v", btid_2, err)
+	}
+
+	expInfo = &batchInfo{map[string]string{oid_b: "4", oid_c: "2"}, 2}
+	if !reflect.DeepEqual(info, expInfo) {
+		t.Errorf("invalid batch info from DAG storage for ID %v: %v instead of %v",
+			btid_2, info, expInfo)
+	}
+
+	// Prune the 3 objects at their head nodes.
+	batches := newBatchPruning()
+	tx = st.NewTransaction()
+	for _, oid := range []string{oid_a, oid_b, oid_c} {
+		head, err := getHead(nil, st, oid)
+		if err != nil {
+			t.Errorf("cannot getHead() on object %s: %v", oid, err)
+		}
+		err = prune(nil, tx, oid, head, batches,
+			func(ctx *context.T, itx store.Transaction, lr string) error {
+				return nil
+			})
+		if err != nil {
+			t.Errorf("cannot prune() on object %s: %v", oid, err)
+		}
+	}
+
+	if err = pruneDone(nil, tx, batches); err != nil {
+		t.Errorf("pruneDone() failed: %v", err)
+	}
+	tx.Commit()
+
+	// Verify that batch-1 was deleted and batch-2 still has c2 in it.
+	info, err = getBatch(nil, st, btid_1)
+	if err == nil {
+		t.Errorf("getBatch() did not fail for ID %v: %v", btid_1, info)
+	}
+
+	info, err = getBatch(nil, st, btid_2)
+	if err != nil {
+		t.Errorf("cannot getBatch() for ID %v: %v", btid_2, err)
+	}
+
+	expInfo = &batchInfo{map[string]string{oid_c: "2"}, 2}
+	if !reflect.DeepEqual(info, expInfo) {
+		t.Errorf("invalid batch info for ID %v: %v instead of %v", btid_2, info, expInfo)
+	}
+
+	// Add c3 as a new head and prune at that point.  This should GC batch-2.
+	tx = st.NewTransaction()
+	if err := s.addNode(nil, tx, oid_c, "3", "logrec-c-03", false, []string{"2"}, NoBatchId, nil); err != nil {
+		t.Errorf("cannot addNode() on object %s: %v", oid_c, err)
+	}
+	if err = moveHead(nil, tx, oid_c, "3"); err != nil {
+		t.Errorf("object %s cannot move head: %v", oid_c, err)
+	}
+
+	batches = newBatchPruning()
+	err = prune(nil, tx, oid_c, "3", batches,
+		func(ctx *context.T, itx store.Transaction, lr string) error {
+			return nil
+		})
+	if err != nil {
+		t.Errorf("cannot prune() on object %s: %v", oid_c, err)
+	}
+	if err = pruneDone(nil, tx, batches); err != nil {
+		t.Errorf("pruneDone() #2 failed: %v", err)
+	}
+	tx.Commit()
+
+	info, err = getBatch(nil, st, btid_2)
+	if err == nil {
+		t.Errorf("getBatch() did not fail for ID %v: %v", btid_2, info)
+	}
+}

diff --git a/services/syncbase/vsync/initiator.go b/services/syncbase/vsync/initiator.go
new file mode 100644
index 0000000..cccf217
--- /dev/null
+++ b/services/syncbase/vsync/initiator.go

@@ -0,0 +1,968 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Initiator is a goroutine that periodically picks a peer from all the known
+// remote peers, and requests deltas from that peer for all the SyncGroups in
+// common across all apps/databases. It then modifies the sync metadata (DAG and
+// local log records) based on the deltas, detects and resolves conflicts if
+// any, and suitably updates the local Databases.
+
+import (
+	"sort"
+	"strings"
+	"time"
+
+	"v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/server/watchable"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	"v.io/v23/naming"
+	"v.io/v23/vdl"
+	"v.io/v23/verror"
+	"v.io/v23/vom"
+	"v.io/x/lib/vlog"
+)
+
+// Policies to pick a peer to sync with.
+const (
+	// Picks a peer at random from the available set.
+	selectRandom = iota
+
+	// TODO(hpucha): implement other policies.
+	// Picks a peer with most differing generations.
+	selectMostDiff
+
+	// Picks a peer that was synced with the furthest in the past.
+	selectOldest
+)
+
+var (
+	// peerSyncInterval is the duration between two consecutive peer
+	// contacts. During every peer contact, the initiator obtains any
+	// pending updates from that peer.
+	peerSyncInterval = 50 * time.Millisecond
+
+	// peerSelectionPolicy is the policy used to select a peer when
+	// the initiator gets a chance to sync.
+	peerSelectionPolicy = selectRandom
+)
+
+// syncer wakes up every peerSyncInterval to do work: (1) Act as an initiator
+// for SyncGroup metadata by selecting a SyncGroup Admin, and syncing Syncgroup
+// metadata with it (getting updates from the remote peer, detecting and
+// resolving conflicts); (2) Refresh memberView if needed and act as an
+// initiator for data by selecting a peer, and syncing data corresponding to all
+// common SyncGroups across all Databases; (3) Act as a SyncGroup publisher to
+// publish pending SyncGroups; (4) Garbage collect older generations.
+//
+// TODO(hpucha): Currently only does initiation. Add rest.
+func (s *syncService) syncer(ctx *context.T) {
+	defer s.pending.Done()
+
+	ticker := time.NewTicker(peerSyncInterval)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-s.closed:
+			vlog.VI(1).Info("sync: syncer: channel closed, stop work and exit")
+			return
+
+		case <-ticker.C:
+		}
+
+		// TODO(hpucha): Cut a gen for the responder even if there is no
+		// one to initiate to?
+
+		// Do work.
+		peer, err := s.pickPeer(ctx)
+		if err != nil {
+			continue
+		}
+		s.getDeltasFromPeer(ctx, peer)
+	}
+}
+
+// getDeltasFromPeer performs an initiation round to the specified
+// peer. An initiation round consists of:
+// * Contacting the peer to receive all the deltas based on the local genvector.
+// * Processing those deltas to discover objects which have been updated.
+// * Processing updated objects to detect and resolve any conflicts if needed.
+// * Communicating relevant object updates to the Database, and updating local
+// genvector to catch up to the received remote genvector.
+//
+// The processing of the deltas is done one Database at a time. If a local error
+// is encountered during the processing of a Database, that Database is skipped
+// and the initiator continues on to the next one. If the connection to the peer
+// encounters an error, this initiation round is aborted. Note that until the
+// local genvector is updated based on the received deltas (the last step in an
+// initiation round), the work done by the initiator is idempotent.
+//
+// TODO(hpucha): Check the idempotence, esp in addNode in DAG.
+func (s *syncService) getDeltasFromPeer(ctxIn *context.T, peer string) {
+	vlog.VI(2).Infof("sync: getDeltasFromPeer: begin: contacting peer %s", peer)
+	defer vlog.VI(2).Infof("sync: getDeltasFromPeer: end: contacting peer %s", peer)
+
+	ctx, cancel := context.WithRootCancel(ctxIn)
+
+	info := s.copyMemberInfo(ctx, peer)
+	if info == nil {
+		vlog.Fatalf("sync: getDeltasFromPeer: missing information in member view for %q", peer)
+	}
+	connected := false
+	var stream interfaces.SyncGetDeltasClientCall
+
+	// Sync each Database that may have SyncGroups common with this peer,
+	// one at a time.
+	for gdbName, sgInfo := range info.db2sg {
+
+		// Initialize initiation state for syncing this Database.
+		iSt, err := newInitiationState(ctx, s, peer, gdbName, sgInfo)
+		if err != nil {
+			vlog.Errorf("sync: getDeltasFromPeer: couldn't initialize initiator state for peer %s, gdb %s, err %v", peer, gdbName, err)
+			continue
+		}
+
+		if len(iSt.sgIds) == 0 || len(iSt.sgPfxs) == 0 {
+			vlog.Errorf("sync: getDeltasFromPeer: didn't find any SyncGroups for peer %s, gdb %s, err %v", peer, gdbName, err)
+			continue
+		}
+
+		// Make contact with the peer once.
+		if !connected {
+			stream, connected = iSt.connectToPeer(ctx)
+			if !connected {
+				// Try a different Database. Perhaps there are
+				// different mount tables.
+				continue
+			}
+		}
+
+		// Create local genvec so that it contains knowledge only about common prefixes.
+		if err := iSt.createLocalGenVec(ctx); err != nil {
+			vlog.Errorf("sync: getDeltasFromPeer: error creating local genvec for gdb %s, err %v", gdbName, err)
+			continue
+		}
+
+		iSt.stream = stream
+		req := interfaces.DeltaReq{
+			AppName: iSt.appName,
+			DbName:  iSt.dbName,
+			SgIds:   iSt.sgIds,
+			InitVec: iSt.local,
+		}
+
+		vlog.VI(3).Infof("sync: getDeltasFromPeer: send request: %v", req)
+		sender := iSt.stream.SendStream()
+		sender.Send(req)
+
+		// Obtain deltas from the peer over the network.
+		if err := iSt.recvAndProcessDeltas(ctx); err != nil {
+			vlog.Errorf("sync: getDeltasFromPeer: error receiving deltas for gdb %s, err %v", gdbName, err)
+			// Returning here since something could be wrong with
+			// the connection, and no point in attempting the next
+			// Database.
+			cancel()
+			stream.Finish()
+			return
+		}
+		vlog.VI(3).Infof("sync: getDeltasFromPeer: got reply: %v", iSt.remote)
+
+		if err := iSt.processUpdatedObjects(ctx); err != nil {
+			vlog.Errorf("sync: getDeltasFromPeer: error processing objects for gdb %s, err %v", gdbName, err)
+			// Move to the next Database even if processing updates
+			// failed.
+			continue
+		}
+	}
+
+	if connected {
+		stream.Finish()
+	}
+	cancel()
+}
+
+type sgSet map[interfaces.GroupId]struct{}
+
+// initiationState is accumulated for each Database during an initiation round.
+type initiationState struct {
+	// Relative name of the peer to sync with.
+	peer string
+
+	// Collection of mount tables that this peer may have registered with.
+	mtTables map[string]struct{}
+
+	// SyncGroups being requested in the initiation round.
+	sgIds sgSet
+
+	// SyncGroup prefixes being requested in the initiation round, and their
+	// corresponding SyncGroup ids.
+	sgPfxs map[string]sgSet
+
+	// Local generation vector.
+	local interfaces.GenVector
+
+	// Generation vector from the remote peer.
+	remote interfaces.GenVector
+
+	// Updated local generation vector at the end of the initiation round.
+	updLocal interfaces.GenVector
+
+	// State to track updated objects during a log replay.
+	updObjects map[string]*objConflictState
+
+	// DAG state that tracks conflicts and common ancestors.
+	dagGraft graftMap
+
+	sync    *syncService
+	appName string
+	dbName  string
+	st      store.Store                        // Store handle to the Database.
+	stream  interfaces.SyncGetDeltasClientCall // Stream handle for the GetDeltas RPC.
+
+	// Transaction handle for the initiation round. Used during the update
+	// of objects in the Database.
+	tx store.Transaction
+}
+
+// objConflictState contains the conflict state for an object that is updated
+// during an initiator round.
+type objConflictState struct {
+	isConflict bool
+	newHead    string
+	oldHead    string
+	ancestor   string
+	res        *conflictResolution
+}
+
+// newInitiationState creates new initiation state.
+func newInitiationState(ctx *context.T, s *syncService, peer string, name string, sgInfo sgMemberInfo) (*initiationState, error) {
+	iSt := &initiationState{}
+	iSt.peer = peer
+	iSt.updObjects = make(map[string]*objConflictState)
+	iSt.dagGraft = newGraft()
+	iSt.sync = s
+
+	// TODO(hpucha): Would be nice to standardize on the combined "app:db"
+	// name across sync (not syncbase) so we only join split/join them at
+	// the boundary with the store part.
+	var err error
+	iSt.appName, iSt.dbName, err = splitAppDbName(ctx, name)
+	if err != nil {
+		return nil, err
+	}
+
+	// TODO(hpucha): nil rpc.ServerCall ok?
+	iSt.st, err = s.getDbStore(ctx, nil, iSt.appName, iSt.dbName)
+	if err != nil {
+		return nil, err
+	}
+
+	iSt.peerMtTblsAndSgInfo(ctx, peer, sgInfo)
+
+	return iSt, nil
+}
+
+// peerMtTblsAndSgInfo computes the possible mount tables, the SyncGroup Ids and
+// prefixes common with a remote peer in a particular Database by consulting the
+// SyncGroups in the specified Database.
+func (iSt *initiationState) peerMtTblsAndSgInfo(ctx *context.T, peer string, info sgMemberInfo) {
+	iSt.mtTables = make(map[string]struct{})
+	iSt.sgIds = make(sgSet)
+	iSt.sgPfxs = make(map[string]sgSet)
+
+	for id := range info {
+		sg, err := getSyncGroupById(ctx, iSt.st, id)
+		if err != nil {
+			continue
+		}
+		if _, ok := sg.Joiners[peer]; !ok {
+			// Peer is no longer part of the SyncGroup.
+			continue
+		}
+		for _, mt := range sg.Spec.MountTables {
+			iSt.mtTables[mt] = struct{}{}
+		}
+		iSt.sgIds[id] = struct{}{}
+
+		for _, p := range sg.Spec.Prefixes {
+			sgs, ok := iSt.sgPfxs[p]
+			if !ok {
+				sgs = make(sgSet)
+				iSt.sgPfxs[p] = sgs
+			}
+			sgs[id] = struct{}{}
+		}
+	}
+}
+
+// connectToPeer attempts to connect to the remote peer using the mount tables
+// obtained from the SyncGroups being synced in the current Database.
+func (iSt *initiationState) connectToPeer(ctx *context.T) (interfaces.SyncGetDeltasClientCall, bool) {
+	if len(iSt.mtTables) < 1 {
+		vlog.Errorf("sync: connectToPeer: no mount tables found to connect to peer %s, app %s db %s", iSt.peer, iSt.appName, iSt.dbName)
+		return nil, false
+	}
+	for mt := range iSt.mtTables {
+		absName := naming.Join(mt, iSt.peer, util.SyncbaseSuffix)
+		c := interfaces.SyncClient(absName)
+		stream, err := c.GetDeltas(ctx, iSt.sync.name)
+		if err == nil {
+			vlog.VI(3).Infof("sync: connectToPeer: established on %s", absName)
+			return stream, true
+		}
+	}
+	return nil, false
+}
+
+// createLocalGenVec creates the generation vector with local knowledge for the
+// initiator to send to the responder.
+//
+// TODO(hpucha): Refactor this code with computeDelta code in sync_state.go.
+func (iSt *initiationState) createLocalGenVec(ctx *context.T) error {
+	iSt.sync.thLock.Lock()
+	defer iSt.sync.thLock.Unlock()
+
+	// Freeze the most recent batch of local changes before fetching
+	// remote changes from a peer. This frozen state is used by the
+	// responder when responding to GetDeltas RPC.
+	//
+	// We only allow an initiator to freeze local generations (not
+	// responders/watcher) in order to maintain a static baseline
+	// for the duration of a sync. This addresses the following race
+	// condition: If we allow responders to use newer local
+	// generations while the initiator is in progress, they may beat
+	// the initiator and send these new generations to remote
+	// devices.  These remote devices in turn can send these
+	// generations back to the initiator in progress which was
+	// started with older generation information.
+	if err := iSt.sync.checkptLocalGen(ctx, iSt.appName, iSt.dbName); err != nil {
+		return err
+	}
+
+	local, lgen, err := iSt.sync.copyDbGenInfo(ctx, iSt.appName, iSt.dbName)
+	if err != nil {
+		return err
+	}
+	localPfxs := extractAndSortPrefixes(local)
+
+	sgPfxs := make([]string, len(iSt.sgPfxs))
+	i := 0
+	for p := range iSt.sgPfxs {
+		sgPfxs[i] = p
+		i++
+	}
+	sort.Strings(sgPfxs)
+
+	iSt.local = make(interfaces.GenVector)
+
+	if len(sgPfxs) == 0 {
+		return verror.New(verror.ErrInternal, ctx, "no syncgroups for syncing")
+	}
+
+	pfx := sgPfxs[0]
+	for _, p := range sgPfxs {
+		if strings.HasPrefix(p, pfx) && p != pfx {
+			continue
+		}
+
+		// Process this prefix as this is the start of a new set of
+		// nested prefixes.
+		pfx = p
+		var lpStart string
+		for _, lp := range localPfxs {
+			if !strings.HasPrefix(lp, pfx) && !strings.HasPrefix(pfx, lp) {
+				// No relationship with pfx.
+				continue
+			}
+			if strings.HasPrefix(pfx, lp) {
+				lpStart = lp
+			} else {
+				iSt.local[lp] = local[lp]
+			}
+		}
+		// Deal with the starting point.
+		if lpStart == "" {
+			// No matching prefixes for pfx were found.
+			iSt.local[pfx] = make(interfaces.PrefixGenVector)
+			iSt.local[pfx][iSt.sync.id] = lgen
+		} else {
+			iSt.local[pfx] = local[lpStart]
+		}
+	}
+	return nil
+}
+
+// recvAndProcessDeltas first receives the log records and generation vector
+// from the GetDeltas RPC and puts them in the Database. It also replays the
+// entire log stream as the log records arrive. These records span multiple
+// generations from different devices. It does not perform any conflict
+// resolution during replay.  This avoids resolving conflicts that have already
+// been resolved by other devices.
+func (iSt *initiationState) recvAndProcessDeltas(ctx *context.T) error {
+	iSt.sync.thLock.Lock()
+	defer iSt.sync.thLock.Unlock()
+
+	// TODO(hpucha): This works for now, but figure out a long term solution
+	// as this may be implementation dependent. It currently works because
+	// the RecvStream call is stateless, and grabbing a handle to it
+	// repeatedly doesn't affect what data is seen next.
+	rcvr := iSt.stream.RecvStream()
+	start, finish := false, false
+
+	// TODO(hpucha): See if we can avoid committing the entire delta stream
+	// as one batch. Currently the dependency is between the log records and
+	// the batch info.
+	tx := iSt.st.NewTransaction()
+	committed := false
+
+	defer func() {
+		if !committed {
+			tx.Abort()
+		}
+	}()
+
+	// Track received batches (BatchId --> BatchCount mapping).
+	batchMap := make(map[uint64]uint64)
+
+	for rcvr.Advance() {
+		resp := rcvr.Value()
+		switch v := resp.(type) {
+		case interfaces.DeltaRespStart:
+			if start {
+				return verror.New(verror.ErrInternal, ctx, "received start followed by start in delta response stream")
+			}
+			start = true
+
+		case interfaces.DeltaRespFinish:
+			if finish {
+				return verror.New(verror.ErrInternal, ctx, "received finish followed by finish in delta response stream")
+			}
+			finish = true
+
+		case interfaces.DeltaRespRespVec:
+			iSt.remote = v.Value
+
+		case interfaces.DeltaRespRec:
+			// Insert log record in Database.
+			// TODO(hpucha): Should we reserve more positions in a batch?
+			// TODO(hpucha): Handle if SyncGroup is left/destroyed while sync is in progress.
+			pos := iSt.sync.reservePosInDbLog(ctx, iSt.appName, iSt.dbName, 1)
+			rec := &localLogRec{Metadata: v.Value.Metadata, Pos: pos}
+			batchId := rec.Metadata.BatchId
+			if batchId != NoBatchId {
+				if cnt, ok := batchMap[batchId]; !ok {
+					if iSt.sync.startBatch(ctx, tx, batchId) != batchId {
+						return verror.New(verror.ErrInternal, ctx, "failed to create batch info")
+					}
+					batchMap[batchId] = rec.Metadata.BatchCount
+				} else if cnt != rec.Metadata.BatchCount {
+					return verror.New(verror.ErrInternal, ctx, "inconsistent counts for tid", batchId, cnt, rec.Metadata.BatchCount)
+				}
+			}
+
+			vlog.VI(4).Infof("sync: recvAndProcessDeltas: processing rec %v", rec)
+			if err := iSt.insertRecInLogDagAndDb(ctx, rec, batchId, v.Value.Value, tx); err != nil {
+				return err
+			}
+
+			// Check for BlobRefs, and process them.
+			if err := iSt.processBlobRefs(ctx, &rec.Metadata, v.Value.Value); err != nil {
+				return err
+			}
+
+			// Mark object dirty.
+			iSt.updObjects[rec.Metadata.ObjId] = &objConflictState{}
+		}
+
+		// Break out of the stream.
+		if finish {
+			break
+		}
+	}
+
+	if !(start && finish) {
+		return verror.New(verror.ErrInternal, ctx, "didn't receive start/finish delimiters in delta response stream")
+	}
+
+	if err := rcvr.Err(); err != nil {
+		return err
+	}
+
+	// End the started batches if any.
+	for bid, cnt := range batchMap {
+		if err := iSt.sync.endBatch(ctx, tx, bid, cnt); err != nil {
+			return err
+		}
+	}
+
+	// Commit this transaction. We do not retry this transaction since it
+	// should not conflict with any other keys. So if it fails, it is a
+	// non-retriable error.
+	err := tx.Commit()
+	if verror.ErrorID(err) == store.ErrConcurrentTransaction.ID {
+		// Note: This might be triggered with memstore until it handles
+		// transactions in a more fine-grained fashion.
+		vlog.Fatalf("sync: recvAndProcessDeltas: encountered concurrent transaction")
+	}
+	if err == nil {
+		committed = true
+	}
+	return err
+}
+
+func (iSt *initiationState) processBlobRefs(ctx *context.T, m *interfaces.LogRecMetadata, valbuf []byte) error {
+	objid := m.ObjId
+	srcPeer := syncbaseIdToName(m.Id)
+
+	vlog.VI(4).Infof("sync: processBlobRefs: begin processing blob refs for objid %s", objid)
+	defer vlog.VI(4).Infof("sync: processBlobRefs: end processing blob refs for objid %s", objid)
+
+	if valbuf == nil {
+		return nil
+	}
+
+	var val *vdl.Value
+	if err := vom.Decode(valbuf, &val); err != nil {
+		return err
+	}
+
+	brs := make(map[nosql.BlobRef]struct{})
+	if err := extractBlobRefs(val, brs); err != nil {
+		return err
+	}
+	sgIds := make(sgSet)
+	for br := range brs {
+		for p, sgs := range iSt.sgPfxs {
+			if strings.HasPrefix(extractAppKey(objid), p) {
+				for sg := range sgs {
+					sgIds[sg] = struct{}{}
+				}
+			}
+		}
+		vlog.VI(4).Infof("sync: processBlobRefs: Found blobref %v peer %v, source %v, sgs %v", br, iSt.peer, srcPeer, sgIds)
+		info := &blobLocInfo{peer: iSt.peer, source: srcPeer, sgIds: sgIds}
+		if err := iSt.sync.addBlobLocInfo(ctx, br, info); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// TODO(hpucha): Handle blobrefs part of list, map, any.
+func extractBlobRefs(val *vdl.Value, brs map[nosql.BlobRef]struct{}) error {
+	if val == nil {
+		return nil
+	}
+	switch val.Kind() {
+	case vdl.String:
+		// Could be a BlobRef.
+		var br nosql.BlobRef
+		if val.Type() == vdl.TypeOf(br) {
+			brs[nosql.BlobRef(val.RawString())] = struct{}{}
+		}
+	case vdl.Struct:
+		for i := 0; i < val.Type().NumField(); i++ {
+			v := val.StructField(i)
+			if err := extractBlobRefs(v, brs); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+// insertRecInLogDagAndDb adds a new log record to log and dag data structures,
+// and inserts the versioned value in the Database.
+func (iSt *initiationState) insertRecInLogDagAndDb(ctx *context.T, rec *localLogRec, batchId uint64, valbuf []byte, tx store.Transaction) error {
+	if err := putLogRec(ctx, tx, rec); err != nil {
+		return err
+	}
+
+	m := rec.Metadata
+	logKey := logRecKey(m.Id, m.Gen)
+
+	var err error
+	switch m.RecType {
+	case interfaces.NodeRec:
+		err = iSt.sync.addNode(ctx, tx, m.ObjId, m.CurVers, logKey, m.Delete, m.Parents, m.BatchId, iSt.dagGraft)
+	case interfaces.LinkRec:
+		err = iSt.sync.addParent(ctx, tx, m.ObjId, m.CurVers, m.Parents[0], iSt.dagGraft)
+	default:
+		err = verror.New(verror.ErrInternal, ctx, "unknown log record type")
+	}
+
+	if err != nil {
+		return err
+	}
+	// TODO(hpucha): Hack right now. Need to change Database's handling of
+	// deleted objects. Currently, the initiator needs to treat deletions
+	// specially since deletions do not get a version number or a special
+	// value in the Database.
+	if !rec.Metadata.Delete && rec.Metadata.RecType == interfaces.NodeRec {
+		return watchable.PutAtVersion(ctx, tx, []byte(m.ObjId), valbuf, []byte(m.CurVers))
+	}
+	return nil
+}
+
+// processUpdatedObjects processes all the updates received by the initiator,
+// one object at a time. Conflict detection and resolution is carried out after
+// the entire delta of log records is replayed, instead of incrementally after
+// each record/batch is replayed, to avoid repeating conflict resolution already
+// performed by other peers.
+//
+// For each updated object, we first check if the object has any conflicts,
+// resulting in three possibilities:
+//
+// * There is no conflict, and no updates are needed to the Database
+// (isConflict=false, newHead == oldHead). All changes received convey
+// information that still keeps the local head as the most recent version. This
+// occurs when conflicts are resolved by picking the existing local version.
+//
+// * There is no conflict, but a remote version is discovered that builds on the
+// local head (isConflict=false, newHead != oldHead). In this case, we generate
+// a Database update to simply update the Database to the latest value.
+//
+// * There is a conflict and we call into the app or use a well-known policy to
+// resolve the conflict, resulting in three possibilties: (a) conflict was
+// resolved by picking the local version. In this case, Database need not be
+// updated, but a link is added to record the choice. (b) conflict was resolved
+// by picking the remote version. In this case, Database is updated with the
+// remote version and a link is added as well. (c) conflict was resolved by
+// generating a new Database update. In this case, Database is updated with the
+// new version.
+//
+// We collect all the updates to the Database in a transaction. In addition, as
+// part of the same transaction, we update the log and dag state suitably (move
+// the head ptr of the object in the dag to the latest version, and create a new
+// log record reflecting conflict resolution if any). Finally, we update the
+// sync state first on storage. This transaction's commit can fail since
+// preconditions on the objects may have been violated. In this case, we wait to
+// get the latest versions of objects from the Database, and recheck if the object
+// has any conflicts and repeat the above steps, until the transaction commits
+// successfully. Upon commit, we also update the in-memory sync state of the
+// Database.
+func (iSt *initiationState) processUpdatedObjects(ctx *context.T) error {
+	// Note that the tx handle in initiation state is cached for the scope of
+	// this function only as different stages in the pipeline add to the
+	// transaction.
+	committed := false
+	defer func() {
+		if !committed {
+			iSt.tx.Abort()
+		}
+	}()
+
+	for {
+		vlog.VI(3).Infof("sync: processUpdatedObjects: begin: %d objects updated", len(iSt.updObjects))
+
+		iSt.tx = iSt.st.NewTransaction()
+		watchable.SetTransactionFromSync(iSt.tx) // for echo-suppression
+
+		if count, err := iSt.detectConflicts(ctx); err != nil {
+			return err
+		} else {
+			vlog.VI(3).Infof("sync: processUpdatedObjects: %d conflicts detected", count)
+		}
+
+		if err := iSt.resolveConflicts(ctx); err != nil {
+			return err
+		}
+
+		err := iSt.updateDbAndSyncSt(ctx)
+		if err == nil {
+			err = iSt.tx.Commit()
+		}
+		if err == nil {
+			committed = true
+			// Update in-memory genvector since commit is successful.
+			if err := iSt.sync.putDbGenInfoRemote(ctx, iSt.appName, iSt.dbName, iSt.updLocal); err != nil {
+				vlog.Fatalf("sync: processUpdatedObjects: putting geninfo in memory failed for app %s db %s, err %v", iSt.appName, iSt.dbName, err)
+			}
+			vlog.VI(3).Info("sync: processUpdatedObjects: end: changes committed")
+			return nil
+		}
+		if verror.ErrorID(err) != store.ErrConcurrentTransaction.ID {
+			return err
+		}
+
+		// Either updateDbAndSyncSt() or tx.Commit() detected a
+		// concurrent transaction. Retry processing the remote updates.
+		//
+		// TODO(hpucha): Sleeping and retrying is a temporary
+		// solution. Next iteration will have coordination with watch
+		// thread to intelligently retry. Hence this value is not a
+		// config param.
+		vlog.VI(3).Info("sync: processUpdatedObjects: retry due to local mutations")
+		iSt.tx.Abort()
+		time.Sleep(1 * time.Second)
+	}
+}
+
+// detectConflicts iterates through all the updated objects to detect conflicts.
+func (iSt *initiationState) detectConflicts(ctx *context.T) (int, error) {
+	count := 0
+	for objid, confSt := range iSt.updObjects {
+		// Check if object has a conflict.
+		var err error
+		confSt.isConflict, confSt.newHead, confSt.oldHead, confSt.ancestor, err = hasConflict(ctx, iSt.tx, objid, iSt.dagGraft)
+		if err != nil {
+			return 0, err
+		}
+
+		if !confSt.isConflict {
+			if confSt.newHead == confSt.oldHead {
+				confSt.res = &conflictResolution{ty: pickLocal}
+			} else {
+				confSt.res = &conflictResolution{ty: pickRemote}
+			}
+		} else {
+			count++
+		}
+	}
+	return count, nil
+}
+
+// updateDbAndSync updates the Database, and if that is successful, updates log,
+// dag and genvector data structures as needed.
+func (iSt *initiationState) updateDbAndSyncSt(ctx *context.T) error {
+	for objid, confSt := range iSt.updObjects {
+		// If the local version is picked, no further updates to the
+		// Database are needed. If the remote version is picked or if a
+		// new version is created, we put it in the Database.
+		if confSt.res.ty != pickLocal {
+
+			// TODO(hpucha): Hack right now. Need to change Database's
+			// handling of deleted objects.
+			oldVersDeleted := true
+			if confSt.oldHead != NoVersion {
+				oldDagNode, err := getNode(ctx, iSt.tx, objid, confSt.oldHead)
+				if err != nil {
+					return err
+				}
+				oldVersDeleted = oldDagNode.Deleted
+			}
+
+			var newVersion string
+			var newVersDeleted bool
+			switch confSt.res.ty {
+			case pickRemote:
+				newVersion = confSt.newHead
+				newDagNode, err := getNode(ctx, iSt.tx, objid, newVersion)
+				if err != nil {
+					return err
+				}
+				newVersDeleted = newDagNode.Deleted
+			case createNew:
+				newVersion = confSt.res.rec.Metadata.CurVers
+				newVersDeleted = confSt.res.rec.Metadata.Delete
+			}
+
+			// Skip delete followed by a delete.
+			if oldVersDeleted && newVersDeleted {
+				continue
+			}
+
+			if !oldVersDeleted {
+				// Read current version to enter it in the readset of the transaction.
+				version, err := watchable.GetVersion(ctx, iSt.tx, []byte(objid))
+				if err != nil {
+					return err
+				}
+				if string(version) != confSt.oldHead {
+					vlog.VI(4).Infof("sync: updateDbAndSyncSt: concurrent updates %s %s", version, confSt.oldHead)
+					return store.NewErrConcurrentTransaction(ctx)
+				}
+			} else {
+				// Ensure key doesn't exist.
+				if _, err := watchable.GetVersion(ctx, iSt.tx, []byte(objid)); verror.ErrorID(err) != store.ErrUnknownKey.ID {
+					return store.NewErrConcurrentTransaction(ctx)
+				}
+			}
+
+			if !newVersDeleted {
+				if confSt.res.ty == createNew {
+					vlog.VI(4).Infof("sync: updateDbAndSyncSt: PutAtVersion %s %s", objid, newVersion)
+					if err := watchable.PutAtVersion(ctx, iSt.tx, []byte(objid), confSt.res.val, []byte(newVersion)); err != nil {
+						return err
+					}
+				}
+				vlog.VI(4).Infof("sync: updateDbAndSyncSt: PutVersion %s %s", objid, newVersion)
+				if err := watchable.PutVersion(ctx, iSt.tx, []byte(objid), []byte(newVersion)); err != nil {
+					return err
+				}
+			} else {
+				vlog.VI(4).Infof("sync: updateDbAndSyncSt: Deleting obj %s", objid)
+				if err := iSt.tx.Delete([]byte(objid)); err != nil {
+					return err
+				}
+			}
+		}
+		// Always update sync state irrespective of local/remote/new
+		// versions being picked.
+		if err := iSt.updateLogAndDag(ctx, objid); err != nil {
+			return err
+		}
+	}
+
+	return iSt.updateSyncSt(ctx)
+}
+
+// updateLogAndDag updates the log and dag data structures.
+func (iSt *initiationState) updateLogAndDag(ctx *context.T, obj string) error {
+	confSt, ok := iSt.updObjects[obj]
+	if !ok {
+		return verror.New(verror.ErrInternal, ctx, "object state not found", obj)
+	}
+	var newVersion string
+
+	if !confSt.isConflict {
+		newVersion = confSt.newHead
+	} else {
+		// Object had a conflict. Create a log record to reflect resolution.
+		var rec *localLogRec
+
+		switch {
+		case confSt.res.ty == pickLocal:
+			// Local version was picked as the conflict resolution.
+			rec = iSt.createLocalLinkLogRec(ctx, obj, confSt.oldHead, confSt.newHead)
+			newVersion = confSt.oldHead
+		case confSt.res.ty == pickRemote:
+			// Remote version was picked as the conflict resolution.
+			rec = iSt.createLocalLinkLogRec(ctx, obj, confSt.newHead, confSt.oldHead)
+			newVersion = confSt.newHead
+		default:
+			// New version was created to resolve the conflict.
+			rec = confSt.res.rec
+			newVersion = confSt.res.rec.Metadata.CurVers
+		}
+
+		if err := putLogRec(ctx, iSt.tx, rec); err != nil {
+			return err
+		}
+
+		// Add a new DAG node.
+		var err error
+		m := rec.Metadata
+		switch m.RecType {
+		case interfaces.NodeRec:
+			err = iSt.sync.addNode(ctx, iSt.tx, obj, m.CurVers, logRecKey(m.Id, m.Gen), m.Delete, m.Parents, NoBatchId, nil)
+		case interfaces.LinkRec:
+			err = iSt.sync.addParent(ctx, iSt.tx, obj, m.CurVers, m.Parents[0], nil)
+		default:
+			return verror.New(verror.ErrInternal, ctx, "unknown log record type")
+		}
+		if err != nil {
+			return err
+		}
+	}
+
+	// Move the head. This should be idempotent. We may move head to the
+	// local head in some cases.
+	return moveHead(ctx, iSt.tx, obj, newVersion)
+}
+
+func (iSt *initiationState) createLocalLinkLogRec(ctx *context.T, obj, vers, par string) *localLogRec {
+	gen, pos := iSt.sync.reserveGenAndPosInDbLog(ctx, iSt.appName, iSt.dbName, 1)
+
+	vlog.VI(4).Infof("sync: createLocalLinkLogRec: obj %s vers %s par %s", obj, vers, par)
+
+	rec := &localLogRec{
+		Metadata: interfaces.LogRecMetadata{
+			Id:      iSt.sync.id,
+			Gen:     gen,
+			RecType: interfaces.LinkRec,
+
+			ObjId:      obj,
+			CurVers:    vers,
+			Parents:    []string{par},
+			UpdTime:    time.Now().UTC(),
+			BatchId:    NoBatchId,
+			BatchCount: 1,
+			// TODO(hpucha): What is its batchid and count?
+		},
+		Pos: pos,
+	}
+	return rec
+}
+
+// updateSyncSt updates local sync state at the end of an initiator cycle.
+func (iSt *initiationState) updateSyncSt(ctx *context.T) error {
+	// Get the current local sync state.
+	dsInMem, err := iSt.sync.copyDbSyncStateInMem(ctx, iSt.appName, iSt.dbName)
+	if err != nil {
+		return err
+	}
+	ds := &dbSyncState{
+		Gen:        dsInMem.gen,
+		CheckptGen: dsInMem.checkptGen,
+		GenVec:     dsInMem.genvec,
+	}
+
+	// remote can be a subset of local.
+	for rpfx, respgv := range iSt.remote {
+		for lpfx, lpgv := range ds.GenVec {
+			if strings.HasPrefix(lpfx, rpfx) {
+				mergePrefixGenVectors(lpgv, respgv)
+			}
+		}
+		if _, ok := ds.GenVec[rpfx]; !ok {
+			ds.GenVec[rpfx] = respgv
+		}
+	}
+
+	iSt.updLocal = ds.GenVec
+	// Clean the genvector of any local state. Note that local state is held
+	// in gen/ckPtGen in sync state struct.
+	for _, pgv := range iSt.updLocal {
+		delete(pgv, iSt.sync.id)
+	}
+
+	// TODO(hpucha): Add knowledge compaction.
+
+	return putDbSyncState(ctx, iSt.tx, ds)
+}
+
+// mergePrefixGenVectors merges responder prefix genvector into local genvector.
+func mergePrefixGenVectors(lpgv, respgv interfaces.PrefixGenVector) {
+	for devid, rgen := range respgv {
+		gen, ok := lpgv[devid]
+		if !ok || gen < rgen {
+			lpgv[devid] = rgen
+		}
+	}
+}
+
+////////////////////////////////////////
+// Peer selection policies.
+
+// pickPeer picks a Syncbase to sync with.
+func (s *syncService) pickPeer(ctx *context.T) (string, error) {
+	switch peerSelectionPolicy {
+	case selectRandom:
+		members := s.getMembers(ctx)
+		// Remove myself from the set.
+		delete(members, s.name)
+		if len(members) == 0 {
+			return "", verror.New(verror.ErrInternal, ctx, "no useful peer")
+		}
+
+		// Pick a peer at random.
+		ind := randIntn(len(members))
+		for m := range members {
+			if ind == 0 {
+				return m, nil
+			}
+			ind--
+		}
+		return "", verror.New(verror.ErrInternal, ctx, "random selection didn't succeed")
+	default:
+		return "", verror.New(verror.ErrInternal, ctx, "unknown peer selection policy")
+	}
+}

diff --git a/services/syncbase/vsync/initiator_test.go b/services/syncbase/vsync/initiator_test.go
new file mode 100644
index 0000000..af09ce1
--- /dev/null
+++ b/services/syncbase/vsync/initiator_test.go

@@ -0,0 +1,477 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The initiator tests below are driven by replaying the state from the log
+// files (in testdata directory). These log files may mimic watching the
+// Database locally (addl commands in the log file) or obtaining log records and
+// generation vector from a remote peer (addr, genvec commands). The log files
+// contain the metadata of log records. The log files are only used to set up
+// the state. The tests verify that given a particular local state and a stream
+// of remote deltas, the initiator behaves as expected.
+
+package vsync
+
+import (
+	"fmt"
+	"reflect"
+	"testing"
+	"time"
+
+	"v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/server/watchable"
+	"v.io/v23/vdl"
+	"v.io/v23/vom"
+	_ "v.io/x/ref/runtime/factories/generic"
+)
+
+func TestExtractBlobRefs(t *testing.T) {
+	var tests [][]byte
+	br := nosql.BlobRef("123")
+
+	// BlobRef is the value.
+	buf0, err := vom.Encode(br)
+	if err != nil {
+		t.Fatalf("Encode(BlobRef) failed, err %v", err)
+	}
+	tests = append(tests, buf0)
+
+	// Struct contains BlobRef.
+	type test1Struct struct {
+		A int64
+		B string
+		C nosql.BlobRef
+	}
+	v1 := test1Struct{A: 10, B: "foo", C: br}
+	buf1, err := vom.Encode(v1)
+	if err != nil {
+		t.Fatalf("Encode(test1Struct) failed, err %v", err)
+	}
+	tests = append(tests, buf1)
+
+	// Nested struct contains BlobRef.
+	type test2Struct struct {
+		A int64
+		B string
+		C test1Struct
+	}
+	v2 := test2Struct{A: 10, B: "foo", C: v1}
+	buf2, err := vom.Encode(v2)
+	if err != nil {
+		t.Fatalf("Encode(test2Struct) failed, err %v", err)
+	}
+	tests = append(tests, buf2)
+
+	for i, buf := range tests {
+		var val *vdl.Value
+		if err := vom.Decode(buf, &val); err != nil {
+			t.Fatalf("Decode failed (test %d), err %v", i, err)
+		}
+
+		gotbrs := make(map[nosql.BlobRef]struct{})
+		if err := extractBlobRefs(val, gotbrs); err != nil {
+			t.Fatalf("extractBlobRefs failed (test %d), err %v", i, err)
+		}
+		wantbrs := map[nosql.BlobRef]struct{}{br: struct{}{}}
+		if !reflect.DeepEqual(gotbrs, wantbrs) {
+			t.Fatalf("Data mismatch in blobrefs (test %d), got %v, want %v", i, gotbrs, wantbrs)
+		}
+	}
+}
+
+// TestLogStreamRemoteOnly tests processing of a remote log stream. Commands are
+// in file testdata/remote-init-00.log.sync.
+func TestLogStreamRemoteOnly(t *testing.T) {
+	svc, iSt, cleanup := testInit(t, "", "remote-init-00.log.sync")
+	defer cleanup(t, svc)
+
+	// Check all log records.
+	objid := util.JoinKeyParts(util.RowPrefix, "foo1")
+	var gen uint64
+	var parents []string
+	for gen = 1; gen < 4; gen++ {
+		gotRec, err := getLogRec(nil, svc.St(), 11, gen)
+		if err != nil || gotRec == nil {
+			t.Fatalf("getLogRec can not find object 11 %d, err %v", gen, err)
+		}
+		vers := fmt.Sprintf("%d", gen)
+		wantRec := &localLogRec{
+			Metadata: interfaces.LogRecMetadata{
+				Id:         11,
+				Gen:        gen,
+				RecType:    interfaces.NodeRec,
+				ObjId:      objid,
+				CurVers:    vers,
+				Parents:    parents,
+				UpdTime:    constTime,
+				BatchCount: 1,
+			},
+			Pos: gen - 1,
+		}
+
+		if !reflect.DeepEqual(gotRec, wantRec) {
+			t.Fatalf("Data mismatch in log record got %v, want %v", gotRec, wantRec)
+		}
+		// Verify DAG state.
+		if _, err := getNode(nil, svc.St(), objid, vers); err != nil {
+			t.Fatalf("getNode can not find object %s vers %s in DAG, err %v", objid, vers, err)
+		}
+		// Verify Database state.
+		tx := svc.St().NewTransaction()
+		if _, err := watchable.GetAtVersion(nil, tx, []byte(objid), nil, []byte(vers)); err != nil {
+			t.Fatalf("GetAtVersion can not find object %s vers %s in Database, err %v", objid, vers, err)
+		}
+		tx.Abort()
+		parents = []string{vers}
+	}
+
+	// Verify conflict state.
+	if len(iSt.updObjects) != 1 {
+		t.Fatalf("Unexpected number of updated objects %d", len(iSt.updObjects))
+	}
+	st := iSt.updObjects[objid]
+	if st.isConflict {
+		t.Fatalf("Detected a conflict %v", st)
+	}
+	if st.newHead != "3" || st.oldHead != NoVersion {
+		t.Fatalf("Conflict detection didn't succeed %v", st)
+	}
+
+	// Verify genvec state.
+	wantVec := interfaces.GenVector{
+		"foo1": interfaces.PrefixGenVector{11: 3},
+		"bar":  interfaces.PrefixGenVector{11: 0},
+	}
+	if !reflect.DeepEqual(iSt.updLocal, wantVec) {
+		t.Fatalf("Final local gen vec mismatch got %v, want %v", iSt.updLocal, wantVec)
+	}
+
+	// Verify DAG state.
+	if head, err := getHead(nil, svc.St(), objid); err != nil || head != "3" {
+		t.Fatalf("Invalid object %s head in DAG %v, err %v", objid, head, err)
+	}
+
+	// Verify Database state.
+	valbuf, err := svc.St().Get([]byte(objid), nil)
+	var val string
+	if err := vom.Decode(valbuf, &val); err != nil {
+		t.Fatalf("Value decode failed, err %v", err)
+	}
+	if err != nil || val != "abc" {
+		t.Fatalf("Invalid object %s in Database %v, err %v", objid, val, err)
+	}
+	tx := svc.St().NewTransaction()
+	version, err := watchable.GetVersion(nil, tx, []byte(objid))
+	if err != nil || string(version) != "3" {
+		t.Fatalf("Invalid object %s head in Database %v, err %v", objid, string(version), err)
+	}
+	tx.Abort()
+}
+
+// TestLogStreamNoConflict tests that a local and a remote log stream can be
+// correctly applied (when there are no conflicts). Commands are in files
+// testdata/<local-init-00.log.sync,remote-noconf-00.log.sync>.
+func TestLogStreamNoConflict(t *testing.T) {
+	svc, iSt, cleanup := testInit(t, "local-init-00.log.sync", "remote-noconf-00.log.sync")
+	defer cleanup(t, svc)
+
+	objid := util.JoinKeyParts(util.RowPrefix, "foo1")
+
+	// Check all log records.
+	var version uint64 = 1
+	var parents []string
+	for _, devid := range []uint64{10, 11} {
+		var gen uint64
+		for gen = 1; gen < 4; gen++ {
+			gotRec, err := getLogRec(nil, svc.St(), devid, gen)
+			if err != nil || gotRec == nil {
+				t.Fatalf("getLogRec can not find object %d:%d, err %v",
+					devid, gen, err)
+			}
+			vers := fmt.Sprintf("%d", version)
+			wantRec := &localLogRec{
+				Metadata: interfaces.LogRecMetadata{
+					Id:         devid,
+					Gen:        gen,
+					RecType:    interfaces.NodeRec,
+					ObjId:      objid,
+					CurVers:    vers,
+					Parents:    parents,
+					UpdTime:    constTime,
+					BatchCount: 1,
+				},
+				Pos: gen - 1,
+			}
+
+			if !reflect.DeepEqual(gotRec, wantRec) {
+				t.Fatalf("Data mismatch in log record got %v, want %v", gotRec, wantRec)
+			}
+
+			// Verify DAG state.
+			if _, err := getNode(nil, svc.St(), objid, vers); err != nil {
+				t.Fatalf("getNode can not find object %s vers %s in DAG, err %v", objid, vers, err)
+			}
+			// Verify Database state.
+			tx := svc.St().NewTransaction()
+			if _, err := watchable.GetAtVersion(nil, tx, []byte(objid), nil, []byte(vers)); err != nil {
+				t.Fatalf("GetAtVersion can not find object %s vers %s in Database, err %v", objid, vers, err)
+			}
+			tx.Abort()
+			parents = []string{vers}
+			version++
+		}
+	}
+
+	// Verify conflict state.
+	if len(iSt.updObjects) != 1 {
+		t.Fatalf("Unexpected number of updated objects %d", len(iSt.updObjects))
+	}
+	st := iSt.updObjects[objid]
+	if st.isConflict {
+		t.Fatalf("Detected a conflict %v", st)
+	}
+	if st.newHead != "6" || st.oldHead != "3" {
+		t.Fatalf("Conflict detection didn't succeed %v", st)
+	}
+
+	// Verify genvec state.
+	wantVec := interfaces.GenVector{
+		"foo1": interfaces.PrefixGenVector{11: 3},
+		"bar":  interfaces.PrefixGenVector{11: 0},
+	}
+	if !reflect.DeepEqual(iSt.updLocal, wantVec) {
+		t.Fatalf("Final local gen vec failed got %v, want %v", iSt.updLocal, wantVec)
+	}
+
+	// Verify DAG state.
+	if head, err := getHead(nil, svc.St(), objid); err != nil || head != "6" {
+		t.Fatalf("Invalid object %s head in DAG %v, err %v", objid, head, err)
+	}
+
+	// Verify Database state.
+	valbuf, err := svc.St().Get([]byte(objid), nil)
+	var val string
+	if err := vom.Decode(valbuf, &val); err != nil {
+		t.Fatalf("Value decode failed, err %v", err)
+	}
+	if err != nil || val != "abc" {
+		t.Fatalf("Invalid object %s in Database %v, err %v", objid, val, err)
+	}
+	tx := svc.St().NewTransaction()
+	versbuf, err := watchable.GetVersion(nil, tx, []byte(objid))
+	if err != nil || string(versbuf) != "6" {
+		t.Fatalf("Invalid object %s head in Database %v, err %v", objid, string(versbuf), err)
+	}
+	tx.Abort()
+}
+
+// TestLogStreamConflict tests that a local and a remote log stream can be
+// correctly applied when there are conflicts. Commands are in files
+// testdata/<local-init-00.log.sync,remote-conf-00.log.sync>.
+func TestLogStreamConflict(t *testing.T) {
+	svc, iSt, cleanup := testInit(t, "local-init-00.log.sync", "remote-conf-00.log.sync")
+	defer cleanup(t, svc)
+
+	objid := util.JoinKeyParts(util.RowPrefix, "foo1")
+
+	// Verify conflict state.
+	if len(iSt.updObjects) != 1 {
+		t.Fatalf("Unexpected number of updated objects %d", len(iSt.updObjects))
+	}
+	st := iSt.updObjects[objid]
+	if !st.isConflict {
+		t.Fatalf("Didn't detect a conflict %v", st)
+	}
+	if st.newHead != "6" || st.oldHead != "3" || st.ancestor != "2" {
+		t.Fatalf("Conflict detection didn't succeed %v", st)
+	}
+	if st.res.ty != pickRemote {
+		t.Fatalf("Conflict resolution did not pick remote: %v", st.res.ty)
+	}
+
+	// Verify DAG state.
+	if head, err := getHead(nil, svc.St(), objid); err != nil || head != "6" {
+		t.Fatalf("Invalid object %s head in DAG %v, err %v", objid, head, err)
+	}
+
+	// Verify Database state.
+	valbuf, err := svc.St().Get([]byte(objid), nil)
+	var val string
+	if err := vom.Decode(valbuf, &val); err != nil {
+		t.Fatalf("Value decode failed, err %v", err)
+	}
+	if err != nil || val != "abc" {
+		t.Fatalf("Invalid object %s in Database %v, err %v", objid, string(valbuf), err)
+	}
+	tx := svc.St().NewTransaction()
+	versbuf, err := watchable.GetVersion(nil, tx, []byte(objid))
+	if err != nil || string(versbuf) != "6" {
+		t.Fatalf("Invalid object %s head in Database %v, err %v", objid, string(versbuf), err)
+	}
+	tx.Abort()
+}
+
+// TestLogStreamConflictNoAncestor tests that a local and a remote log stream
+// can be correctly applied when there are conflicts from the start where the
+// two versions of an object have no common ancestor. Commands are in files
+// testdata/<local-init-00.log.sync,remote-conf-03.log.sync>.
+func TestLogStreamConflictNoAncestor(t *testing.T) {
+	svc, iSt, cleanup := testInit(t, "local-init-00.log.sync", "remote-conf-03.log.sync")
+	defer cleanup(t, svc)
+
+	objid := util.JoinKeyParts(util.RowPrefix, "foo1")
+
+	// Verify conflict state.
+	if len(iSt.updObjects) != 1 {
+		t.Fatalf("Unexpected number of updated objects %d", len(iSt.updObjects))
+	}
+	st := iSt.updObjects[objid]
+	if !st.isConflict {
+		t.Fatalf("Didn't detect a conflict %v", st)
+	}
+	if st.newHead != "6" || st.oldHead != "3" || st.ancestor != "" {
+		t.Fatalf("Conflict detection didn't succeed %v", st)
+	}
+	if st.res.ty != pickRemote {
+		t.Fatalf("Conflict resolution did not pick remote: %v", st.res.ty)
+	}
+
+	// Verify DAG state.
+	if head, err := getHead(nil, svc.St(), objid); err != nil || head != "6" {
+		t.Fatalf("Invalid object %s head in DAG %v, err %v", objid, head, err)
+	}
+
+	// Verify Database state.
+	valbuf, err := svc.St().Get([]byte(objid), nil)
+	var val string
+	if err := vom.Decode(valbuf, &val); err != nil {
+		t.Fatalf("Value decode failed, err %v", err)
+	}
+	if err != nil || val != "abc" {
+		t.Fatalf("Invalid object %s in Database %v, err %v", objid, string(valbuf), err)
+	}
+	tx := svc.St().NewTransaction()
+	versbuf, err := watchable.GetVersion(nil, tx, []byte(objid))
+	if err != nil || string(versbuf) != "6" {
+		t.Fatalf("Invalid object %s head in Database %v, err %v", objid, string(versbuf), err)
+	}
+	tx.Abort()
+}
+
+//////////////////////////////
+// Helpers.
+
+func testInit(t *testing.T, lfile, rfile string) (*mockService, *initiationState, func(*testing.T, *mockService)) {
+	// Set a large value to prevent the initiator from running.
+	peerSyncInterval = 1 * time.Hour
+	conflictResolutionPolicy = useTime
+	svc := createService(t)
+	cleanup := destroyService
+	s := svc.sync
+	s.id = 10 // initiator
+
+	sgId1 := interfaces.GroupId(1234)
+	nullInfo := nosql.SyncGroupMemberInfo{}
+	sgInfo := sgMemberInfo{
+		sgId1: nullInfo,
+	}
+
+	sg1 := &interfaces.SyncGroup{
+		Name:        "sg1",
+		Id:          sgId1,
+		AppName:     "mockapp",
+		DbName:      "mockdb",
+		Creator:     "mockCreator",
+		SpecVersion: "etag-0",
+		Spec: nosql.SyncGroupSpec{
+			Prefixes:    []string{"foo", "bar"},
+			MountTables: []string{"1/2/3/4", "5/6/7/8"},
+		},
+		Joiners: map[string]nosql.SyncGroupMemberInfo{
+			"a": nullInfo,
+			"b": nullInfo,
+		},
+	}
+
+	tx := svc.St().NewTransaction()
+	if err := addSyncGroup(nil, tx, sg1); err != nil {
+		t.Fatalf("cannot add SyncGroup ID %d, err %v", sg1.Id, err)
+	}
+	if err := tx.Commit(); err != nil {
+		t.Fatalf("cannot commit adding SyncGroup ID %d, err %v", sg1.Id, err)
+	}
+
+	if lfile != "" {
+		replayLocalCommands(t, svc, lfile)
+	}
+
+	if rfile == "" {
+		return svc, nil, cleanup
+	}
+
+	gdb := appDbName("mockapp", "mockdb")
+	iSt, err := newInitiationState(nil, s, "b", gdb, sgInfo)
+	if err != nil {
+		t.Fatalf("newInitiationState failed with err %v", err)
+	}
+
+	testIfSgPfxsEqual(t, iSt.sgPfxs, sg1.Spec.Prefixes)
+	testIfMapArrEqual(t, iSt.mtTables, sg1.Spec.MountTables)
+
+	s.initDbSyncStateInMem(nil, "mockapp", "mockdb")
+
+	// Create local genvec so that it contains knowledge only about common prefixes.
+	if err := iSt.createLocalGenVec(nil); err != nil {
+		t.Fatalf("createLocalGenVec failed with err %v", err)
+	}
+
+	wantVec := interfaces.GenVector{
+		"foo": interfaces.PrefixGenVector{10: 0},
+		"bar": interfaces.PrefixGenVector{10: 0},
+	}
+	if !reflect.DeepEqual(iSt.local, wantVec) {
+		t.Fatalf("createLocalGenVec failed got %v, want %v", iSt.local, wantVec)
+	}
+
+	iSt.stream = createReplayStream(t, rfile)
+
+	if err := iSt.recvAndProcessDeltas(nil); err != nil {
+		t.Fatalf("recvAndProcessDeltas failed with err %v", err)
+	}
+
+	if err := iSt.processUpdatedObjects(nil); err != nil {
+		t.Fatalf("processUpdatedObjects failed with err %v", err)
+	}
+	return svc, iSt, cleanup
+}
+
+func testIfSgPfxsEqual(t *testing.T, m map[string]sgSet, a []string) {
+	aMap := arrToMap(a)
+
+	if len(aMap) != len(m) {
+		t.Fatalf("testIfSgPfxsEqual diff lengths, got %v want %v", aMap, m)
+	}
+
+	for p := range aMap {
+		if _, ok := m[p]; !ok {
+			t.Fatalf("testIfSgPfxsEqual want %v", p)
+		}
+	}
+}
+
+func testIfMapArrEqual(t *testing.T, m map[string]struct{}, a []string) {
+	aMap := arrToMap(a)
+	if !reflect.DeepEqual(m, aMap) {
+		t.Fatalf("testIfMapArrEqual failed map %v, arr %v", m, aMap)
+	}
+}
+
+func arrToMap(a []string) map[string]struct{} {
+	m := make(map[string]struct{})
+	for _, s := range a {
+		m[s] = struct{}{}
+	}
+	return m
+}

diff --git a/services/syncbase/vsync/replay_test.go b/services/syncbase/vsync/replay_test.go
new file mode 100644
index 0000000..03d6dc6
--- /dev/null
+++ b/services/syncbase/vsync/replay_test.go

@@ -0,0 +1,399 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Used to ease the setup of sync test scenarios.
+// Parses a sync command file and returns a vector of commands to execute.
+// dagReplayCommands() executes the parsed commands at the DAG API level.
+
+import (
+	"bufio"
+	"container/list"
+	"fmt"
+	"os"
+	"strconv"
+	"strings"
+	"testing"
+	"time"
+
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/server/watchable"
+	"v.io/v23/context"
+	"v.io/v23/vom"
+)
+
+const (
+	addLocal = iota
+	addRemote
+	linkLocal
+	linkRemote
+	genvec
+)
+
+var (
+	constTime = time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC)
+)
+
+type syncCommand struct {
+	cmd        int
+	oid        string
+	version    string
+	parents    []string
+	logrec     string
+	deleted    bool
+	batchId    uint64
+	batchCount uint64
+	genVec     interfaces.GenVector
+}
+
+// parseSyncCommands parses a sync test file and returns its commands.
+func parseSyncCommands(file string) ([]syncCommand, error) {
+	cmds := []syncCommand{}
+
+	sf, err := os.Open("testdata/" + file)
+	if err != nil {
+		return nil, err
+	}
+	defer sf.Close()
+
+	scanner := bufio.NewScanner(sf)
+	lineno := 0
+	for scanner.Scan() {
+		lineno++
+		line := strings.TrimSpace(scanner.Text())
+		if line == "" || line[0] == '#' {
+			continue
+		}
+
+		args := strings.Split(line, "|")
+		nargs := len(args)
+
+		switch args[0] {
+		case "addl", "addr":
+			expNargs := 9
+			if nargs != expNargs {
+				return nil, fmt.Errorf("%s:%d: need %d args instead of %d",
+					file, lineno, expNargs, nargs)
+			}
+			var parents []string
+			for i := 3; i <= 4; i++ {
+				if args[i] != "" {
+					parents = append(parents, args[i])
+				}
+			}
+
+			batchId, err := strconv.ParseUint(args[6], 10, 64)
+			if err != nil {
+				return nil, fmt.Errorf("%s:%d: invalid batchId: %s", file, lineno, args[6])
+			}
+			batchCount, err := strconv.ParseUint(args[7], 10, 64)
+			if err != nil {
+				return nil, fmt.Errorf("%s:%d: invalid batch count: %s", file, lineno, args[7])
+			}
+			del, err := strconv.ParseBool(args[8])
+			if err != nil {
+				return nil, fmt.Errorf("%s:%d: invalid deleted bit: %s", file, lineno, args[8])
+			}
+			cmd := syncCommand{
+				oid:        args[1],
+				version:    args[2],
+				parents:    parents,
+				logrec:     args[5],
+				batchId:    batchId,
+				batchCount: batchCount,
+				deleted:    del,
+			}
+			if args[0] == "addl" {
+				cmd.cmd = addLocal
+			} else {
+				cmd.cmd = addRemote
+			}
+			cmds = append(cmds, cmd)
+
+		case "linkl", "linkr":
+			expNargs := 6
+			if nargs != expNargs {
+				return nil, fmt.Errorf("%s:%d: need %d args instead of %d",
+					file, lineno, expNargs, nargs)
+			}
+
+			if args[3] == "" {
+				return nil, fmt.Errorf("%s:%d: parent version not specified", file, lineno)
+			}
+			if args[4] != "" {
+				return nil, fmt.Errorf("%s:%d: cannot specify a 2nd parent: %s",
+					file, lineno, args[4])
+			}
+
+			cmd := syncCommand{
+				oid:     args[1],
+				version: args[2],
+				parents: []string{args[3]},
+				logrec:  args[5],
+			}
+			if args[0] == "linkl" {
+				cmd.cmd = linkLocal
+			} else {
+				cmd.cmd = linkRemote
+			}
+			cmds = append(cmds, cmd)
+
+		case "genvec":
+			cmd := syncCommand{
+				cmd:    genvec,
+				genVec: make(interfaces.GenVector),
+			}
+			for i := 1; i < len(args); i = i + 2 {
+				pfx := args[i]
+				genVec := make(interfaces.PrefixGenVector)
+				for _, elem := range strings.Split(args[i+1], ",") {
+					kv := strings.Split(elem, ":")
+					if len(kv) != 2 {
+						return nil, fmt.Errorf("%s:%d: invalid gen vector key/val: %s", file, lineno, elem)
+					}
+					dev, err := strconv.ParseUint(kv[0], 10, 64)
+					if err != nil {
+						return nil, fmt.Errorf("%s:%d: invalid devid: %s", file, lineno, args[i+1])
+					}
+					gen, err := strconv.ParseUint(kv[1], 10, 64)
+					if err != nil {
+						return nil, fmt.Errorf("%s:%d: invalid gen: %s", file, lineno, args[i+1])
+					}
+					genVec[dev] = gen
+				}
+				cmd.genVec[pfx] = genVec
+			}
+			cmds = append(cmds, cmd)
+
+		default:
+			return nil, fmt.Errorf("%s:%d: invalid operation: %s", file, lineno, args[0])
+		}
+	}
+
+	err = scanner.Err()
+	return cmds, err
+}
+
+// dagReplayCommands parses a sync test file and replays its commands, updating
+// the DAG structures associated with the sync service.
+func (s *syncService) dagReplayCommands(ctx *context.T, syncfile string) (graftMap, error) {
+	cmds, err := parseSyncCommands(syncfile)
+	if err != nil {
+		return nil, err
+	}
+
+	st := s.sv.St()
+	graft := newGraft()
+
+	for _, cmd := range cmds {
+		tx := st.NewTransaction()
+
+		switch cmd.cmd {
+		case addLocal:
+			err = s.addNode(ctx, tx, cmd.oid, cmd.version, cmd.logrec,
+				cmd.deleted, cmd.parents, NoBatchId, nil)
+			if err != nil {
+				return nil, fmt.Errorf("cannot add local node %s:%s: %v",
+					cmd.oid, cmd.version, err)
+			}
+
+			if err = moveHead(ctx, tx, cmd.oid, cmd.version); err != nil {
+				return nil, fmt.Errorf("cannot move head to %s:%s: %v",
+					cmd.oid, cmd.version, err)
+			}
+
+		case addRemote:
+			err = s.addNode(ctx, tx, cmd.oid, cmd.version, cmd.logrec,
+				cmd.deleted, cmd.parents, NoBatchId, graft)
+			if err != nil {
+				return nil, fmt.Errorf("cannot add remote node %s:%s: %v",
+					cmd.oid, cmd.version, err)
+			}
+
+		case linkLocal:
+			if err = s.addParent(ctx, tx, cmd.oid, cmd.version, cmd.parents[0], nil); err != nil {
+				return nil, fmt.Errorf("cannot add local parent %s to node %s:%s: %v",
+					cmd.parents[0], cmd.oid, cmd.version, err)
+			}
+
+		case linkRemote:
+			if err = s.addParent(ctx, tx, cmd.oid, cmd.version, cmd.parents[0], graft); err != nil {
+				return nil, fmt.Errorf("cannot add remote parent %s to node %s:%s: %v",
+					cmd.parents[0], cmd.oid, cmd.version, err)
+			}
+		}
+
+		tx.Commit()
+	}
+
+	return graft, nil
+}
+
+// dummyStream emulates stream of log records received from RPC.
+type dummyStream struct {
+	l     *list.List
+	entry interfaces.DeltaResp
+}
+
+func newStream() *dummyStream {
+	ds := &dummyStream{
+		l: list.New(),
+	}
+	return ds
+}
+
+func (ds *dummyStream) add(entry interfaces.DeltaResp) {
+	ds.l.PushBack(entry)
+}
+
+func (ds *dummyStream) Advance() bool {
+	if ds.l.Len() > 0 {
+		ds.entry = ds.l.Remove(ds.l.Front()).(interfaces.DeltaResp)
+		return true
+	}
+	return false
+}
+
+func (ds *dummyStream) Value() interfaces.DeltaResp {
+	return ds.entry
+}
+
+func (ds *dummyStream) RecvStream() interface {
+	Advance() bool
+	Value() interfaces.DeltaResp
+	Err() error
+} {
+	return ds
+}
+
+func (*dummyStream) Err() error { return nil }
+
+func (ds *dummyStream) Finish() error {
+	return nil
+}
+
+func (ds *dummyStream) Cancel() {
+}
+
+func (ds *dummyStream) SendStream() interface {
+	Send(item interfaces.DeltaReq) error
+	Close() error
+} {
+	return ds
+}
+
+func (ds *dummyStream) Send(item interfaces.DeltaReq) error {
+	return nil
+}
+
+func (ds *dummyStream) Close() error {
+	return nil
+}
+
+// replayLocalCommands replays local log records parsed from the input file.
+func replayLocalCommands(t *testing.T, s *mockService, syncfile string) {
+	cmds, err := parseSyncCommands(syncfile)
+	if err != nil {
+		t.Fatalf("parseSyncCommands failed with err %v", err)
+	}
+
+	tx := s.St().NewTransaction()
+	var pos uint64
+	for _, cmd := range cmds {
+		switch cmd.cmd {
+		case addLocal:
+			rec := &localLogRec{
+				Metadata: createMetadata(t, interfaces.NodeRec, cmd),
+				Pos:      pos,
+			}
+			err = s.sync.processLocalLogRec(nil, tx, rec)
+			if err != nil {
+				t.Fatalf("processLocalLogRec failed with err %v", err)
+			}
+
+			// Add to Store.
+			err = watchable.PutVersion(nil, tx, []byte(rec.Metadata.ObjId), []byte(rec.Metadata.CurVers))
+			if err != nil {
+				t.Fatalf("PutVersion failed with err %v", err)
+			}
+			err = watchable.PutAtVersion(nil, tx, []byte(rec.Metadata.ObjId), []byte("abc"), []byte(rec.Metadata.CurVers))
+			if err != nil {
+				t.Fatalf("PutAtVersion failed with err %v", err)
+			}
+
+		default:
+			t.Fatalf("replayLocalCommands failed with unknown command %v", cmd)
+		}
+		pos++
+	}
+	if err := tx.Commit(); err != nil {
+		t.Fatalf("cannot commit local log records %s, err %v", syncfile, err)
+	}
+}
+
+// createReplayStream creates a dummy stream of log records parsed from the input file.
+func createReplayStream(t *testing.T, syncfile string) *dummyStream {
+	cmds, err := parseSyncCommands(syncfile)
+	if err != nil {
+		t.Fatalf("parseSyncCommands failed with err %v", err)
+	}
+
+	stream := newStream()
+	start := interfaces.DeltaRespStart{true}
+	stream.add(start)
+
+	for _, cmd := range cmds {
+		var ty byte
+		switch cmd.cmd {
+		case genvec:
+			gv := interfaces.DeltaRespRespVec{cmd.genVec}
+			stream.add(gv)
+			continue
+		case addRemote:
+			ty = interfaces.NodeRec
+		case linkRemote:
+			ty = interfaces.LinkRec
+		default:
+			t.Fatalf("createReplayStream unknown command %v", cmd)
+		}
+
+		var val string = "abc"
+		valbuf, err := vom.Encode(val)
+		if err != nil {
+			t.Fatalf("createReplayStream encode failed, err %v", err)
+		}
+
+		rec := interfaces.DeltaRespRec{interfaces.LogRec{
+			Metadata: createMetadata(t, ty, cmd),
+			Value:    valbuf,
+		}}
+
+		stream.add(rec)
+	}
+	fin := interfaces.DeltaRespFinish{true}
+	stream.add(fin)
+	return stream
+}
+
+func createMetadata(t *testing.T, ty byte, cmd syncCommand) interfaces.LogRecMetadata {
+	id, gen, err := splitLogRecKey(nil, cmd.logrec)
+	if err != nil {
+		t.Fatalf("createReplayStream splitLogRecKey failed, key %s, err %v", cmd.logrec, gen)
+	}
+	m := interfaces.LogRecMetadata{
+		Id:         id,
+		Gen:        gen,
+		RecType:    ty,
+		ObjId:      util.JoinKeyParts(util.RowPrefix, cmd.oid),
+		CurVers:    cmd.version,
+		Parents:    cmd.parents,
+		UpdTime:    constTime,
+		Delete:     cmd.deleted,
+		BatchId:    cmd.batchId,
+		BatchCount: cmd.batchCount,
+	}
+	return m
+}

diff --git a/services/syncbase/vsync/responder.go b/services/syncbase/vsync/responder.go
new file mode 100644
index 0000000..c417eca
--- /dev/null
+++ b/services/syncbase/vsync/responder.go

@@ -0,0 +1,516 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+import (
+	"container/heap"
+	"sort"
+	"strings"
+
+	wire "v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+	"v.io/syncbase/x/ref/services/syncbase/server/watchable"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	"v.io/v23/verror"
+	"v.io/x/lib/vlog"
+)
+
+// GetDeltas implements the responder side of the GetDeltas RPC.
+func (s *syncService) GetDeltas(ctx *context.T, call interfaces.SyncGetDeltasServerCall, initiator string) error {
+	vlog.VI(2).Infof("sync: GetDeltas: begin: from initiator %s", initiator)
+	defer vlog.VI(2).Infof("sync: GetDeltas: end: from initiator %s", initiator)
+
+	recvr := call.RecvStream()
+	for recvr.Advance() {
+		req := recvr.Value()
+		// Ignoring errors since if one Database fails for any reason,
+		// it is fine to continue to the next one. In fact, sometimes
+		// the failure might be genuine. For example, the responder is
+		// no longer part of the requested SyncGroups, or the app/db is
+		// locally deleted, or a permission change has denied access.
+		rSt := newResponderState(ctx, call, s, req, initiator)
+		rSt.sendDeltasPerDatabase(ctx)
+	}
+
+	// TODO(hpucha): Is there a need to call finish or some such?
+	return recvr.Err()
+}
+
+// responderState is state accumulated per Database by the responder during an
+// initiation round.
+type responderState struct {
+	req       interfaces.DeltaReq
+	call      interfaces.SyncGetDeltasServerCall // Stream handle for the GetDeltas RPC.
+	initiator string
+	errState  error // Captures the error from the first two phases of the responder.
+	sync      *syncService
+	st        store.Store // Store handle to the Database.
+	diff      genRangeVector
+	outVec    interfaces.GenVector
+}
+
+func newResponderState(ctx *context.T, call interfaces.SyncGetDeltasServerCall, sync *syncService, req interfaces.DeltaReq, initiator string) *responderState {
+	rSt := &responderState{call: call, sync: sync, req: req, initiator: initiator}
+	return rSt
+}
+
+// sendDeltasPerDatabase sends to an initiator all the missing generations
+// corresponding to the prefixes requested for this Database, and a genvector
+// summarizing the knowledge transferred from the responder to the
+// initiator. This happens in three phases:
+//
+// In the first phase, the initiator is checked against the SyncGroup ACLs of
+// all the SyncGroups it is requesting, and only those prefixes that belong to
+// allowed SyncGroups are carried forward.
+//
+// In the second phase, for a given set of nested prefixes from the initiator,
+// the shortest prefix in that set is extracted. The initiator's prefix
+// genvector for this shortest prefix represents the lower bound on its
+// knowledge for the entire set of nested prefixes. This prefix genvector
+// (representing the lower bound) is diffed with all the responder prefix
+// genvectors corresponding to same or deeper prefixes compared to the initiator
+// prefix. This diff produces a bound on the missing knowledge. For example, say
+// the initiator is interested in prefixes {foo, foobar}, where each prefix is
+// associated with a prefix genvector. Since the initiator strictly has as much
+// or more knowledge for prefix "foobar" as it has for prefix "foo", "foo"'s
+// prefix genvector is chosen as the lower bound for the initiator's
+// knowledge. Similarly, say the responder has knowledge on prefixes {f,
+// foobarX, foobarY, bar}. The responder diffs the prefix genvectors for
+// prefixes f, foobarX and foobarY with the initiator's prefix genvector to
+// compute a bound on missing generations (all responder's prefixes that match
+// "foo". Note that since the responder doesn't have a prefix genvector at
+// "foo", its knowledge at "f" is applicable to "foo").
+//
+// Since the second phase outputs an aggressive calculation of missing
+// generations containing more generation entries than strictly needed by the
+// initiator, in the third phase, each missing generation is sent to the
+// initiator only if the initiator is eligible for it and is not aware of
+// it. The generations are sent to the initiator in the same order as the
+// responder learned them so that the initiator can reconstruct the DAG for the
+// objects by learning older nodes first.
+func (rSt *responderState) sendDeltasPerDatabase(ctx *context.T) error {
+	// TODO(rdaoud): for such vlog.VI() calls where the function name is
+	// embedded, consider using a helper function to auto-fill it instead
+	// (see http://goo.gl/mEa4L0) but only incur that overhead when the
+	// logging level specified is enabled.
+	vlog.VI(3).Infof("sync: sendDeltasPerDatabase: %s, %s: sgids %v, genvec %v",
+		rSt.req.AppName, rSt.req.DbName, rSt.req.SgIds, rSt.req.InitVec)
+
+	// Phase 1 of sendDeltas: Authorize the initiator and respond to the
+	// caller only for the SyncGroups that allow access.
+	rSt.authorizeAndFilterSyncGroups(ctx)
+
+	// Phase 2 of sendDeltas: diff contains the bound on the
+	// generations missing from the initiator per device.
+	rSt.computeDeltaBound(ctx)
+
+	// Phase 3 of sendDeltas: Process the diff, filtering out records that
+	// are not needed, and send the remainder on the wire ordered.
+	return rSt.filterAndSendDeltas(ctx)
+}
+
+// authorizeAndFilterSyncGroups authorizes the initiator against the requested
+// SyncGroups and filters the initiator's prefixes to only include those from
+// allowed SyncGroups (phase 1 of sendDeltas).
+func (rSt *responderState) authorizeAndFilterSyncGroups(ctx *context.T) {
+	rSt.st, rSt.errState = rSt.sync.getDbStore(ctx, nil, rSt.req.AppName, rSt.req.DbName)
+	if rSt.errState != nil {
+		return
+	}
+
+	allowedPfxs := make(map[string]struct{})
+	for sgid := range rSt.req.SgIds {
+		// Check permissions for the SyncGroup.
+		var sg *interfaces.SyncGroup
+		sg, rSt.errState = getSyncGroupById(ctx, rSt.st, sgid)
+		if rSt.errState != nil {
+			return
+		}
+		rSt.errState = authorize(ctx, rSt.call.Security(), sg)
+		if verror.ErrorID(rSt.errState) == verror.ErrNoAccess.ID {
+			continue
+		} else if rSt.errState != nil {
+			return
+		}
+
+		for _, p := range sg.Spec.Prefixes {
+			allowedPfxs[p] = struct{}{}
+		}
+
+		// Add the initiator to the SyncGroup membership if not already
+		// in it.  It is a temporary solution until SyncGroup metadata
+		// is synchronized peer to peer.
+		// TODO(rdaoud): remove this when SyncGroups are synced.
+		rSt.addInitiatorToSyncGroup(ctx, sgid)
+	}
+
+	// Filter the initiator's prefixes to what is allowed.
+	for pfx := range rSt.req.InitVec {
+		if _, ok := allowedPfxs[pfx]; ok {
+			continue
+		}
+		allowed := false
+		for p := range allowedPfxs {
+			if strings.HasPrefix(pfx, p) {
+				allowed = true
+			}
+		}
+
+		if !allowed {
+			delete(rSt.req.InitVec, pfx)
+		}
+	}
+	return
+}
+
+// addInitiatorToSyncGroup adds the request initiator to the membership of the
+// given SyncGroup if the initiator is not already a member.  It is a temporary
+// solution until SyncGroup metadata starts being synchronized, at which time
+// peers will learn of new members through mutations of the SyncGroup metadata
+// by the SyncGroup administrators.
+// Note: the joiner metadata is fake because the responder does not have it.
+func (rSt *responderState) addInitiatorToSyncGroup(ctx *context.T, gid interfaces.GroupId) {
+	if rSt.initiator == "" {
+		return
+	}
+
+	err := store.RunInTransaction(rSt.st, func(tx store.Transaction) error {
+		sg, err := getSyncGroupById(ctx, tx, gid)
+		if err != nil {
+			return err
+		}
+
+		// If the initiator is already a member of the SyncGroup abort
+		// the transaction with a special error code.
+		if _, ok := sg.Joiners[rSt.initiator]; ok {
+			return verror.New(verror.ErrExist, ctx, "member already in SyncGroup")
+		}
+
+		vlog.VI(4).Infof("sync: addInitiatorToSyncGroup: add %s to sgid %d", rSt.initiator, gid)
+		sg.Joiners[rSt.initiator] = wire.SyncGroupMemberInfo{SyncPriority: 1}
+		return setSGDataEntry(ctx, tx, gid, sg)
+	})
+
+	if err != nil && verror.ErrorID(err) != verror.ErrExist.ID {
+		vlog.Errorf("sync: addInitiatorToSyncGroup: initiator %s, sgid %d: %v", rSt.initiator, gid, err)
+	}
+}
+
+// computeDeltaBound computes the bound on missing generations across all
+// requested prefixes (phase 2 of sendDeltas).
+func (rSt *responderState) computeDeltaBound(ctx *context.T) {
+	// Check error from phase 1.
+	if rSt.errState != nil {
+		return
+	}
+
+	if len(rSt.req.InitVec) == 0 {
+		rSt.errState = verror.New(verror.ErrInternal, ctx, "empty initiator generation vector")
+		return
+	}
+
+	var respVec interfaces.GenVector
+	var respGen uint64
+	respVec, respGen, rSt.errState = rSt.sync.copyDbGenInfo(ctx, rSt.req.AppName, rSt.req.DbName)
+	if rSt.errState != nil {
+		return
+	}
+	respPfxs := extractAndSortPrefixes(respVec)
+	initPfxs := extractAndSortPrefixes(rSt.req.InitVec)
+
+	rSt.outVec = make(interfaces.GenVector)
+	rSt.diff = make(genRangeVector)
+	pfx := initPfxs[0]
+
+	for _, p := range initPfxs {
+		if strings.HasPrefix(p, pfx) && p != pfx {
+			continue
+		}
+
+		// Process this prefix as this is the start of a new set of
+		// nested prefixes.
+		pfx = p
+
+		// Lower bound on initiator's knowledge for this prefix set.
+		initpgv := rSt.req.InitVec[pfx]
+
+		// Find the relevant responder prefixes and add the corresponding knowledge.
+		var respgv interfaces.PrefixGenVector
+		var rpStart string
+		for _, rp := range respPfxs {
+			if !strings.HasPrefix(rp, pfx) && !strings.HasPrefix(pfx, rp) {
+				// No relationship with pfx.
+				continue
+			}
+
+			if strings.HasPrefix(pfx, rp) {
+				// If rp is a prefix of pfx, remember it because
+				// it may be a potential starting point for the
+				// responder's knowledge. The actual starting
+				// point is the deepest prefix where rp is a
+				// prefix of pfx.
+				//
+				// Say the initiator is looking for "foo", and
+				// the responder has knowledge for "f" and "fo",
+				// the responder's starting point will be the
+				// prefix genvector for "fo". Similarly, if the
+				// responder has knowledge for "foo", the
+				// starting point will be the prefix genvector
+				// for "foo".
+				rpStart = rp
+			} else {
+				// If pfx is a prefix of rp, this knowledge must
+				// be definitely sent to the initiator. Diff the
+				// prefix genvectors to adjust the delta bound and
+				// include in outVec.
+				respgv = respVec[rp]
+				rSt.diffPrefixGenVectors(respgv, initpgv)
+				rSt.outVec[rp] = respgv
+			}
+		}
+
+		// Deal with the starting point.
+		if rpStart == "" {
+			// No matching prefixes for pfx were found.
+			respgv = make(interfaces.PrefixGenVector)
+			respgv[rSt.sync.id] = respGen
+		} else {
+			respgv = respVec[rpStart]
+		}
+		rSt.diffPrefixGenVectors(respgv, initpgv)
+		rSt.outVec[pfx] = respgv
+	}
+
+	vlog.VI(3).Infof("sync: computeDeltaBound: %s, %s: diff %v, outvec %v",
+		rSt.req.AppName, rSt.req.DbName, rSt.diff, rSt.outVec)
+	return
+}
+
+// filterAndSendDeltas filters the computed delta to remove records already
+// known by the initiator, and sends the resulting records to the initiator
+// (phase 3 of sendDeltas).
+func (rSt *responderState) filterAndSendDeltas(ctx *context.T) error {
+	// Always send a start and finish response so that the initiator can
+	// move on to the next Database.
+	//
+	// TODO(hpucha): Although ok for now to call SendStream once per
+	// Database, would like to make this implementation agnostic.
+	sender := rSt.call.SendStream()
+	sender.Send(interfaces.DeltaRespStart{true})
+	defer sender.Send(interfaces.DeltaRespFinish{true})
+
+	// Check error from phase 2.
+	if rSt.errState != nil {
+		return rSt.errState
+	}
+
+	// First two phases were successful. So now on to phase 3. We now visit
+	// every log record in the generation range as obtained from phase 1 in
+	// their log order. We use a heap to incrementally sort the log records
+	// as per their position in the log.
+	//
+	// Init the min heap, one entry per device in the diff.
+	mh := make(minHeap, 0, len(rSt.diff))
+	for dev, r := range rSt.diff {
+		r.cur = r.min
+		rec, err := getNextLogRec(ctx, rSt.st, dev, r)
+		if err != nil {
+			return err
+		}
+		if rec != nil {
+			mh = append(mh, rec)
+		} else {
+			delete(rSt.diff, dev)
+		}
+	}
+	heap.Init(&mh)
+
+	// Process the log records in order.
+	initPfxs := extractAndSortPrefixes(rSt.req.InitVec)
+	for mh.Len() > 0 {
+		rec := heap.Pop(&mh).(*localLogRec)
+
+		if !filterLogRec(rec, rSt.req.InitVec, initPfxs) {
+			// Send on the wire.
+			wireRec, err := makeWireLogRec(ctx, rSt.st, rec)
+			if err != nil {
+				return err
+			}
+			sender.Send(interfaces.DeltaRespRec{*wireRec})
+		}
+
+		// Add a new record from the same device if not done.
+		dev := rec.Metadata.Id
+		rec, err := getNextLogRec(ctx, rSt.st, dev, rSt.diff[dev])
+		if err != nil {
+			return err
+		}
+		if rec != nil {
+			heap.Push(&mh, rec)
+		} else {
+			delete(rSt.diff, dev)
+		}
+	}
+
+	sender.Send(interfaces.DeltaRespRespVec{rSt.outVec})
+	return nil
+}
+
+// genRange represents a range of generations (min and max inclusive).
+type genRange struct {
+	min uint64
+	max uint64
+	cur uint64
+}
+
+type genRangeVector map[uint64]*genRange
+
+// diffPrefixGenVectors diffs two generation vectors, belonging to the responder
+// and the initiator, and updates the range of generations per device known to
+// the responder but not known to the initiator. "gens" (generation range) is
+// passed in as an input argument so that it can be incrementally updated as the
+// range of missing generations grows when different responder prefix genvectors
+// are used to compute the diff.
+//
+// For example: Generation vector for responder is say RVec = {A:10, B:5, C:1},
+// Generation vector for initiator is say IVec = {A:5, B:10, D:2}. Diffing these
+// two vectors returns: {A:[6-10], C:[1-1]}.
+//
+// TODO(hpucha): Add reclaimVec for GCing.
+func (rSt *responderState) diffPrefixGenVectors(respPVec, initPVec interfaces.PrefixGenVector) {
+	// Compute missing generations for devices that are in both initiator's and responder's vectors.
+	for devid, gen := range initPVec {
+		rgen, ok := respPVec[devid]
+		if ok {
+			updateDevRange(devid, rgen, gen, rSt.diff)
+		}
+	}
+
+	// Compute missing generations for devices not in initiator's vector but in responder's vector.
+	for devid, rgen := range respPVec {
+		if _, ok := initPVec[devid]; !ok {
+			updateDevRange(devid, rgen, 0, rSt.diff)
+		}
+	}
+}
+
+func updateDevRange(devid, rgen, gen uint64, gens genRangeVector) {
+	if gen < rgen {
+		// Need to include all generations in the interval [gen+1,rgen], gen+1 and rgen inclusive.
+		if r, ok := gens[devid]; !ok {
+			gens[devid] = &genRange{min: gen + 1, max: rgen}
+		} else {
+			if gen+1 < r.min {
+				r.min = gen + 1
+			}
+			if rgen > r.max {
+				r.max = rgen
+			}
+		}
+	}
+}
+
+func extractAndSortPrefixes(vec interfaces.GenVector) []string {
+	pfxs := make([]string, len(vec))
+	i := 0
+	for p := range vec {
+		pfxs[i] = p
+		i++
+	}
+	sort.Strings(pfxs)
+	return pfxs
+}
+
+// TODO(hpucha): This can be optimized using a scan instead of "gets" in a for
+// loop.
+func getNextLogRec(ctx *context.T, st store.Store, dev uint64, r *genRange) (*localLogRec, error) {
+	for i := r.cur; i <= r.max; i++ {
+		rec, err := getLogRec(ctx, st, dev, i)
+		if err == nil {
+			r.cur = i + 1
+			return rec, nil
+		}
+		if verror.ErrorID(err) != verror.ErrNoExist.ID {
+			return nil, err
+		}
+	}
+	return nil, nil
+}
+
+// Note: initPfxs is sorted.
+func filterLogRec(rec *localLogRec, initVec interfaces.GenVector, initPfxs []string) bool {
+	// The key starts with one of the store's reserved prefixes for managed
+	// namespaces (e.g. $row, $perms).  Remove that prefix before comparing
+	// it with the SyncGroup prefixes which are defined by the application.
+	key := extractAppKey(rec.Metadata.ObjId)
+
+	filter := true
+	var maxGen uint64
+	for _, p := range initPfxs {
+		if strings.HasPrefix(key, p) {
+			// Do not filter. Initiator is interested in this
+			// prefix.
+			filter = false
+
+			// Track if the initiator knows of this record.
+			gen := initVec[p][rec.Metadata.Id]
+			if maxGen < gen {
+				maxGen = gen
+			}
+		}
+	}
+
+	// Filter this record if the initiator already has it.
+	if maxGen >= rec.Metadata.Gen {
+		filter = true
+	}
+
+	return filter
+}
+
+// makeWireLogRec creates a sync log record to send on the wire from a given
+// local sync record.
+func makeWireLogRec(ctx *context.T, st store.Store, rec *localLogRec) (*interfaces.LogRec, error) {
+	// Get the object value at the required version.
+	key, version := rec.Metadata.ObjId, rec.Metadata.CurVers
+	var value []byte
+	if !rec.Metadata.Delete {
+		var err error
+		value, err = watchable.GetAtVersion(ctx, st, []byte(key), nil, []byte(version))
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	wireRec := &interfaces.LogRec{Metadata: rec.Metadata, Value: value}
+	return wireRec, nil
+}
+
+// A minHeap implements heap.Interface and holds local log records.
+type minHeap []*localLogRec
+
+func (mh minHeap) Len() int { return len(mh) }
+
+func (mh minHeap) Less(i, j int) bool {
+	return mh[i].Pos < mh[j].Pos
+}
+
+func (mh minHeap) Swap(i, j int) {
+	mh[i], mh[j] = mh[j], mh[i]
+}
+
+func (mh *minHeap) Push(x interface{}) {
+	item := x.(*localLogRec)
+	*mh = append(*mh, item)
+}
+
+func (mh *minHeap) Pop() interface{} {
+	old := *mh
+	n := len(old)
+	item := old[n-1]
+	*mh = old[0 : n-1]
+	return item
+}

diff --git a/services/syncbase/vsync/responder_test.go b/services/syncbase/vsync/responder_test.go
new file mode 100644
index 0000000..986f9b3
--- /dev/null
+++ b/services/syncbase/vsync/responder_test.go

@@ -0,0 +1,520 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+import (
+	"fmt"
+	"math/rand"
+	"reflect"
+	"testing"
+	"time"
+
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+	"v.io/syncbase/x/ref/services/syncbase/server/watchable"
+	"v.io/v23/naming"
+	"v.io/v23/rpc"
+	"v.io/v23/security"
+	_ "v.io/x/ref/runtime/factories/generic"
+)
+
+// TestDiffPrefixGenVectors tests diffing prefix gen vectors.
+func TestDiffPrefixGenVectors(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	s := svc.sync
+	s.id = 10 //responder. Initiator is id 11.
+
+	tests := []struct {
+		respPVec, initPVec interfaces.PrefixGenVector
+		genDiffIn          genRangeVector
+		genDiffWant        genRangeVector
+	}{
+		{ // responder and initiator are at identical vectors.
+			respPVec:  interfaces.PrefixGenVector{10: 1, 11: 10, 12: 20, 13: 2},
+			initPVec:  interfaces.PrefixGenVector{10: 1, 11: 10, 12: 20, 13: 2},
+			genDiffIn: make(genRangeVector),
+		},
+		{ // responder and initiator are at identical vectors.
+			respPVec:  interfaces.PrefixGenVector{10: 0},
+			initPVec:  interfaces.PrefixGenVector{10: 0},
+			genDiffIn: make(genRangeVector),
+		},
+		{ // responder has no updates.
+			respPVec:  interfaces.PrefixGenVector{10: 0},
+			initPVec:  interfaces.PrefixGenVector{10: 5, 11: 10, 12: 20, 13: 8},
+			genDiffIn: make(genRangeVector),
+		},
+		{ // responder and initiator have no updates.
+			respPVec:  interfaces.PrefixGenVector{10: 0},
+			initPVec:  interfaces.PrefixGenVector{11: 0},
+			genDiffIn: make(genRangeVector),
+		},
+		{ // responder is staler than initiator.
+			respPVec:  interfaces.PrefixGenVector{10: 1, 11: 10, 12: 20, 13: 2},
+			initPVec:  interfaces.PrefixGenVector{10: 1, 11: 10, 12: 20, 13: 8, 14: 5},
+			genDiffIn: make(genRangeVector),
+		},
+		{ // responder is more up-to-date than initiator for local updates.
+			respPVec:    interfaces.PrefixGenVector{10: 5, 11: 10, 12: 20, 13: 2},
+			initPVec:    interfaces.PrefixGenVector{10: 1, 11: 10, 12: 20, 13: 2},
+			genDiffIn:   make(genRangeVector),
+			genDiffWant: genRangeVector{10: &genRange{min: 2, max: 5}},
+		},
+		{ // responder is fresher than initiator for local updates and one device.
+			respPVec:  interfaces.PrefixGenVector{10: 5, 11: 10, 12: 22, 13: 2},
+			initPVec:  interfaces.PrefixGenVector{10: 1, 11: 10, 12: 20, 13: 2, 14: 40},
+			genDiffIn: make(genRangeVector),
+			genDiffWant: genRangeVector{
+				10: &genRange{min: 2, max: 5},
+				12: &genRange{min: 21, max: 22},
+			},
+		},
+		{ // responder is fresher than initiator in all but one device.
+			respPVec:  interfaces.PrefixGenVector{10: 1, 11: 2, 12: 3, 13: 4},
+			initPVec:  interfaces.PrefixGenVector{10: 0, 11: 2, 12: 0},
+			genDiffIn: make(genRangeVector),
+			genDiffWant: genRangeVector{
+				10: &genRange{min: 1, max: 1},
+				12: &genRange{min: 1, max: 3},
+				13: &genRange{min: 1, max: 4},
+			},
+		},
+		{ // initiator has no updates.
+			respPVec:  interfaces.PrefixGenVector{10: 1, 11: 2, 12: 3, 13: 4},
+			initPVec:  interfaces.PrefixGenVector{},
+			genDiffIn: make(genRangeVector),
+			genDiffWant: genRangeVector{
+				10: &genRange{min: 1, max: 1},
+				11: &genRange{min: 1, max: 2},
+				12: &genRange{min: 1, max: 3},
+				13: &genRange{min: 1, max: 4},
+			},
+		},
+		{ // initiator has no updates, pre-existing diff.
+			respPVec: interfaces.PrefixGenVector{10: 1, 11: 2, 12: 3, 13: 4},
+			initPVec: interfaces.PrefixGenVector{13: 1},
+			genDiffIn: genRangeVector{
+				10: &genRange{min: 5, max: 20},
+				13: &genRange{min: 1, max: 3},
+			},
+			genDiffWant: genRangeVector{
+				10: &genRange{min: 1, max: 20},
+				11: &genRange{min: 1, max: 2},
+				12: &genRange{min: 1, max: 3},
+				13: &genRange{min: 1, max: 4},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		want := test.genDiffWant
+		got := test.genDiffIn
+		rSt := newResponderState(nil, nil, s, interfaces.DeltaReq{}, "fakeInitiator")
+		rSt.diff = got
+		rSt.diffPrefixGenVectors(test.respPVec, test.initPVec)
+		checkEqualDevRanges(t, got, want)
+	}
+}
+
+// TestSendDeltas tests the computation of the delta bound (computeDeltaBound)
+// and if the log records on the wire are correctly ordered (phases 2 and 3 of
+// SendDeltas).
+func TestSendDeltas(t *testing.T) {
+	appName := "mockapp"
+	dbName := "mockdb"
+
+	tests := []struct {
+		respVec, initVec, outVec interfaces.GenVector
+		respGen                  uint64
+		genDiff                  genRangeVector
+		keyPfxs                  []string
+	}{
+		{ // Identical prefixes, local and remote updates.
+			respVec: interfaces.GenVector{
+				"foo":    interfaces.PrefixGenVector{12: 8},
+				"foobar": interfaces.PrefixGenVector{12: 10},
+			},
+			initVec: interfaces.GenVector{
+				"foo":    interfaces.PrefixGenVector{11: 5},
+				"foobar": interfaces.PrefixGenVector{11: 5},
+			},
+			respGen: 5,
+			outVec: interfaces.GenVector{
+				"foo":    interfaces.PrefixGenVector{10: 5, 12: 8},
+				"foobar": interfaces.PrefixGenVector{10: 5, 12: 10},
+			},
+			genDiff: genRangeVector{
+				10: &genRange{min: 1, max: 5},
+				12: &genRange{min: 1, max: 10},
+			},
+			keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", ""},
+		},
+		{ // Identical prefixes, local and remote updates.
+			respVec: interfaces.GenVector{
+				"bar":    interfaces.PrefixGenVector{12: 20},
+				"foo":    interfaces.PrefixGenVector{12: 8},
+				"foobar": interfaces.PrefixGenVector{12: 10},
+			},
+			initVec: interfaces.GenVector{
+				"foo":    interfaces.PrefixGenVector{11: 5},
+				"foobar": interfaces.PrefixGenVector{11: 5, 12: 10},
+				"bar":    interfaces.PrefixGenVector{10: 5, 11: 5, 12: 5},
+			},
+			respGen: 5,
+			outVec: interfaces.GenVector{
+				"foo":    interfaces.PrefixGenVector{10: 5, 12: 8},
+				"foobar": interfaces.PrefixGenVector{10: 5, 12: 10},
+				"bar":    interfaces.PrefixGenVector{10: 5, 12: 20},
+			},
+			genDiff: genRangeVector{
+				10: &genRange{min: 1, max: 5},
+				12: &genRange{min: 1, max: 20},
+			},
+			keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", "bar", "barbaz", ""},
+		},
+		{ // Non-identical prefixes, local only updates.
+			initVec: interfaces.GenVector{
+				"foo":    interfaces.PrefixGenVector{11: 5},
+				"foobar": interfaces.PrefixGenVector{11: 5},
+			},
+			respGen: 5,
+			outVec: interfaces.GenVector{
+				"foo": interfaces.PrefixGenVector{10: 5},
+			},
+			genDiff: genRangeVector{
+				10: &genRange{min: 1, max: 5},
+			},
+			keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", "", "fo", "fooxyz"},
+		},
+		{ // Non-identical prefixes, local and remote updates.
+			respVec: interfaces.GenVector{
+				"f":      interfaces.PrefixGenVector{12: 5, 13: 5},
+				"foo":    interfaces.PrefixGenVector{12: 10, 13: 10},
+				"foobar": interfaces.PrefixGenVector{12: 20, 13: 20},
+			},
+			initVec: interfaces.GenVector{
+				"foo": interfaces.PrefixGenVector{11: 5, 12: 1},
+			},
+			respGen: 5,
+			outVec: interfaces.GenVector{
+				"foo":    interfaces.PrefixGenVector{10: 5, 12: 10, 13: 10},
+				"foobar": interfaces.PrefixGenVector{10: 5, 12: 20, 13: 20},
+			},
+			genDiff: genRangeVector{
+				10: &genRange{min: 1, max: 5},
+				12: &genRange{min: 2, max: 20},
+				13: &genRange{min: 1, max: 20},
+			},
+			keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", "", "fo", "fooxyz"},
+		},
+		{ // Non-identical prefixes, local and remote updates.
+			respVec: interfaces.GenVector{
+				"foobar": interfaces.PrefixGenVector{12: 20, 13: 20},
+			},
+			initVec: interfaces.GenVector{
+				"foo": interfaces.PrefixGenVector{11: 5, 12: 1},
+			},
+			respGen: 5,
+			outVec: interfaces.GenVector{
+				"foo":    interfaces.PrefixGenVector{10: 5},
+				"foobar": interfaces.PrefixGenVector{10: 5, 12: 20, 13: 20},
+			},
+			genDiff: genRangeVector{
+				10: &genRange{min: 1, max: 5},
+				12: &genRange{min: 2, max: 20},
+				13: &genRange{min: 1, max: 20},
+			},
+			keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", "", "fo", "fooxyz"},
+		},
+		{ // Non-identical prefixes, local and remote updates.
+			respVec: interfaces.GenVector{
+				"f": interfaces.PrefixGenVector{12: 20, 13: 20},
+			},
+			initVec: interfaces.GenVector{
+				"foo": interfaces.PrefixGenVector{11: 5, 12: 1},
+			},
+			respGen: 5,
+			outVec: interfaces.GenVector{
+				"foo": interfaces.PrefixGenVector{10: 5, 12: 20, 13: 20},
+			},
+			genDiff: genRangeVector{
+				10: &genRange{min: 1, max: 5},
+				12: &genRange{min: 2, max: 20},
+				13: &genRange{min: 1, max: 20},
+			},
+			keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", "", "fo", "fooxyz"},
+		},
+		{ // Non-identical interleaving prefixes.
+			respVec: interfaces.GenVector{
+				"f":      interfaces.PrefixGenVector{12: 20, 13: 10},
+				"foo":    interfaces.PrefixGenVector{12: 30, 13: 20},
+				"foobar": interfaces.PrefixGenVector{12: 40, 13: 30},
+			},
+			initVec: interfaces.GenVector{
+				"fo":        interfaces.PrefixGenVector{11: 5, 12: 1},
+				"foob":      interfaces.PrefixGenVector{11: 5, 12: 10},
+				"foobarxyz": interfaces.PrefixGenVector{11: 5, 12: 20},
+			},
+			respGen: 5,
+			outVec: interfaces.GenVector{
+				"fo":     interfaces.PrefixGenVector{10: 5, 12: 20, 13: 10},
+				"foo":    interfaces.PrefixGenVector{10: 5, 12: 30, 13: 20},
+				"foobar": interfaces.PrefixGenVector{10: 5, 12: 40, 13: 30},
+			},
+			genDiff: genRangeVector{
+				10: &genRange{min: 1, max: 5},
+				12: &genRange{min: 2, max: 40},
+				13: &genRange{min: 1, max: 30},
+			},
+			keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", "", "fo", "foob", "foobarxyz", "fooxyz"},
+		},
+		{ // Non-identical interleaving prefixes.
+			respVec: interfaces.GenVector{
+				"fo":        interfaces.PrefixGenVector{12: 20, 13: 10},
+				"foob":      interfaces.PrefixGenVector{12: 30, 13: 20},
+				"foobarxyz": interfaces.PrefixGenVector{12: 40, 13: 30},
+			},
+			initVec: interfaces.GenVector{
+				"f":      interfaces.PrefixGenVector{11: 5, 12: 1},
+				"foo":    interfaces.PrefixGenVector{11: 5, 12: 10},
+				"foobar": interfaces.PrefixGenVector{11: 5, 12: 20},
+			},
+			respGen: 5,
+			outVec: interfaces.GenVector{
+				"f":         interfaces.PrefixGenVector{10: 5},
+				"fo":        interfaces.PrefixGenVector{10: 5, 12: 20, 13: 10},
+				"foob":      interfaces.PrefixGenVector{10: 5, 12: 30, 13: 20},
+				"foobarxyz": interfaces.PrefixGenVector{10: 5, 12: 40, 13: 30},
+			},
+			genDiff: genRangeVector{
+				10: &genRange{min: 1, max: 5},
+				12: &genRange{min: 2, max: 40},
+				13: &genRange{min: 1, max: 30},
+			},
+			keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", "", "fo", "foob", "foobarxyz", "fooxyz"},
+		},
+		{ // Non-identical sibling prefixes.
+			respVec: interfaces.GenVector{
+				"foo":       interfaces.PrefixGenVector{12: 20, 13: 10},
+				"foobarabc": interfaces.PrefixGenVector{12: 40, 13: 30},
+				"foobarxyz": interfaces.PrefixGenVector{12: 30, 13: 20},
+			},
+			initVec: interfaces.GenVector{
+				"foo": interfaces.PrefixGenVector{11: 5, 12: 1},
+			},
+			respGen: 5,
+			outVec: interfaces.GenVector{
+				"foo":       interfaces.PrefixGenVector{10: 5, 12: 20, 13: 10},
+				"foobarabc": interfaces.PrefixGenVector{10: 5, 12: 40, 13: 30},
+				"foobarxyz": interfaces.PrefixGenVector{10: 5, 12: 30, 13: 20},
+			},
+			genDiff: genRangeVector{
+				10: &genRange{min: 1, max: 5},
+				12: &genRange{min: 2, max: 40},
+				13: &genRange{min: 1, max: 30},
+			},
+			keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", "", "foobarabc", "foobarxyz", "foobar123", "fooxyz"},
+		},
+		{ // Non-identical prefixes, local and remote updates.
+			respVec: interfaces.GenVector{
+				"barbaz": interfaces.PrefixGenVector{12: 18},
+				"f":      interfaces.PrefixGenVector{12: 30, 13: 5},
+				"foobar": interfaces.PrefixGenVector{12: 30, 13: 8},
+			},
+			initVec: interfaces.GenVector{
+				"foo":    interfaces.PrefixGenVector{11: 5, 12: 5},
+				"foobar": interfaces.PrefixGenVector{11: 5, 12: 5},
+				"bar":    interfaces.PrefixGenVector{10: 5, 11: 5, 12: 5},
+			},
+			respGen: 5,
+			outVec: interfaces.GenVector{
+				"foo":    interfaces.PrefixGenVector{10: 5, 12: 30, 13: 5},
+				"foobar": interfaces.PrefixGenVector{10: 5, 12: 30, 13: 8},
+				"bar":    interfaces.PrefixGenVector{10: 5},
+				"barbaz": interfaces.PrefixGenVector{10: 5, 12: 18},
+			},
+			genDiff: genRangeVector{
+				10: &genRange{min: 1, max: 5},
+				12: &genRange{min: 6, max: 30},
+				13: &genRange{min: 1, max: 8},
+			},
+			keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", "bar", "barbaz", ""},
+		},
+	}
+
+	for i, test := range tests {
+		svc := createService(t)
+		s := svc.sync
+		s.id = 10 //responder.
+
+		wantDiff, wantVec := test.genDiff, test.outVec
+		s.syncState[appDbName(appName, dbName)] = &dbSyncStateInMem{gen: test.respGen, checkptGen: test.respGen, genvec: test.respVec}
+
+		req := interfaces.DeltaReq{AppName: appName, DbName: dbName, InitVec: test.initVec}
+		rSt := newResponderState(nil, nil, s, req, "fakeInitiator")
+
+		rSt.computeDeltaBound(nil)
+		if rSt.errState != nil || !reflect.DeepEqual(rSt.outVec, wantVec) {
+			t.Fatalf("computeDeltaBound failed (I: %v), (R: %v, %v), got %v, want %v err %v", test.initVec, test.respGen, test.respVec, rSt.outVec, wantVec, rSt.errState)
+		}
+		checkEqualDevRanges(t, rSt.diff, wantDiff)
+
+		////////////////////////////////////////
+		// Test sending deltas.
+
+		// Insert some log records to bootstrap testing below.
+		tRng := rand.New(rand.NewSource(int64(i)))
+		var wantRecs []*localLogRec
+		st := svc.St()
+		tx := st.NewTransaction()
+		objKeyPfxs := test.keyPfxs
+		j := 0
+		for id, r := range wantDiff {
+			pos := uint64(tRng.Intn(50) + 100*j)
+			for k := r.min; k <= r.max; k++ {
+				opfx := objKeyPfxs[tRng.Intn(len(objKeyPfxs))]
+				// Create holes in the log records.
+				if opfx == "" {
+					continue
+				}
+				okey := makeRowKey(fmt.Sprintf("%s~%x", opfx, tRng.Int()))
+				vers := fmt.Sprintf("%x", tRng.Int())
+				rec := &localLogRec{
+					Metadata: interfaces.LogRecMetadata{Id: id, Gen: k, ObjId: okey, CurVers: vers, UpdTime: time.Now().UTC()},
+					Pos:      pos + k,
+				}
+				if err := putLogRec(nil, tx, rec); err != nil {
+					t.Fatalf("putLogRec(%d:%d) failed rec %v err %v", id, k, rec, err)
+				}
+				value := fmt.Sprintf("value_%s", okey)
+				if err := watchable.PutAtVersion(nil, tx, []byte(okey), []byte(value), []byte(vers)); err != nil {
+					t.Fatalf("PutAtVersion(%d:%d) failed rec %v value %s: err %v", id, k, rec, value, err)
+				}
+
+				initPfxs := extractAndSortPrefixes(test.initVec)
+				if !filterLogRec(rec, test.initVec, initPfxs) {
+					wantRecs = append(wantRecs, rec)
+				}
+			}
+			j++
+		}
+		if err := tx.Commit(); err != nil {
+			t.Fatalf("cannot commit putting log rec, err %v", err)
+		}
+
+		d := &dummyResponder{}
+		rSt.call = d
+		rSt.st, rSt.errState = rSt.sync.getDbStore(nil, nil, rSt.req.AppName, rSt.req.DbName)
+		if rSt.errState != nil {
+			t.Fatalf("filterAndSendDeltas failed to get store handle for app/db %v %v", rSt.req.AppName, rSt.req.DbName)
+		}
+		err := rSt.filterAndSendDeltas(nil)
+		if err != nil {
+			t.Fatalf("filterAndSendDeltas failed (I: %v), (R: %v, %v) err %v", test.initVec, test.respGen, test.respVec, err)
+		}
+		d.diffLogRecs(t, wantRecs, wantVec)
+
+		destroyService(t, svc)
+	}
+}
+
+//////////////////////////////
+// Helpers
+
+type dummyResponder struct {
+	start, finish int
+	gotRecs       []*localLogRec
+	outVec        interfaces.GenVector
+}
+
+func (d *dummyResponder) RecvStream() interface {
+	Advance() bool
+	Value() interfaces.DeltaReq
+	Err() error
+} {
+	return d
+}
+
+func (d *dummyResponder) Advance() bool {
+	return false
+}
+
+func (d *dummyResponder) Value() interfaces.DeltaReq {
+	return interfaces.DeltaReq{}
+}
+
+func (d *dummyResponder) Err() error { return nil }
+
+func (d *dummyResponder) SendStream() interface {
+	Send(item interfaces.DeltaResp) error
+} {
+	return d
+}
+
+func (d *dummyResponder) Send(item interfaces.DeltaResp) error {
+	switch v := item.(type) {
+	case interfaces.DeltaRespStart:
+		d.start++
+	case interfaces.DeltaRespFinish:
+		d.finish++
+	case interfaces.DeltaRespRespVec:
+		d.outVec = v.Value
+	case interfaces.DeltaRespRec:
+		d.gotRecs = append(d.gotRecs, &localLogRec{Metadata: v.Value.Metadata})
+	}
+	return nil
+}
+
+func (d *dummyResponder) Security() security.Call {
+	return nil
+}
+
+func (d *dummyResponder) Suffix() string {
+	return ""
+}
+
+func (d *dummyResponder) LocalEndpoint() naming.Endpoint {
+	return nil
+}
+
+func (d *dummyResponder) RemoteEndpoint() naming.Endpoint {
+	return nil
+}
+
+func (d *dummyResponder) GrantedBlessings() security.Blessings {
+	return security.Blessings{}
+}
+
+func (d *dummyResponder) Server() rpc.Server {
+	return nil
+}
+
+func (d *dummyResponder) diffLogRecs(t *testing.T, wantRecs []*localLogRec, wantVec interfaces.GenVector) {
+	if d.start != 1 || d.finish != 1 {
+		t.Fatalf("diffLogRecs incorrect start/finish records (%v, %v)", d.start, d.finish)
+	}
+	if len(d.gotRecs) != len(wantRecs) {
+		t.Fatalf("diffLogRecs failed, gotLen %v, wantLen %v\n", len(d.gotRecs), len(wantRecs))
+	}
+	for i, rec := range d.gotRecs {
+		if !reflect.DeepEqual(rec.Metadata, wantRecs[i].Metadata) {
+			t.Fatalf("diffLogRecs failed, i %v, got %v, want %v\n", i, rec.Metadata, wantRecs[i].Metadata)
+		}
+	}
+	if !reflect.DeepEqual(d.outVec, wantVec) {
+		t.Fatalf("diffLogRecs failed genvector, got %v, want %v\n", d.outVec, wantVec)
+	}
+}
+
+func checkEqualDevRanges(t *testing.T, s1, s2 genRangeVector) {
+	if len(s1) != len(s2) {
+		t.Fatalf("len(s1): %v != len(s2): %v", len(s1), len(s2))
+	}
+	for d1, r1 := range s1 {
+		if r2, ok := s2[d1]; !ok || !reflect.DeepEqual(r1, r2) {
+			t.Fatalf("Dev %v: r1 %v != r2 %v", d1, r1, r2)
+		}
+	}
+}

diff --git a/services/syncbase/vsync/sync.go b/services/syncbase/vsync/sync.go
new file mode 100644
index 0000000..fd65c66
--- /dev/null
+++ b/services/syncbase/vsync/sync.go

@@ -0,0 +1,197 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Package vsync provides sync functionality for Syncbase. Sync
+// service serves incoming GetDeltas requests and contacts other peers
+// to get deltas from them. When it receives a GetDeltas request, the
+// incoming generation vector is diffed with the local generation
+// vector, and missing generations are sent back. When it receives log
+// records in response to a GetDeltas request, it replays those log
+// records to get in sync with the sender.
+import (
+	"fmt"
+	"math/rand"
+	"path"
+	"sync"
+	"time"
+
+	"v.io/syncbase/v23/services/syncbase/nosql"
+	blob "v.io/syncbase/x/ref/services/syncbase/localblobstore"
+	fsblob "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore"
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	"v.io/v23/rpc"
+	"v.io/v23/verror"
+)
+
+// syncService contains the metadata for the sync module.
+type syncService struct {
+	// TODO(hpucha): see if "v.io/v23/uniqueid" is a better fit. It is 128 bits.
+	id     uint64 // globally unique id for this instance of Syncbase.
+	name   string // name derived from the global id.
+	sv     interfaces.Service
+	server rpc.Server
+
+	// High-level lock to serialize the watcher and the initiator. This lock is
+	// needed to handle the following cases: (a) When the initiator is
+	// cutting a local generation, it waits for the watcher to commit the
+	// latest local changes before including them in the checkpoint. (b)
+	// When the initiator is receiving updates, it reads the latest head of
+	// an object as per the DAG state in order to construct the in-memory
+	// graft map used for conflict detection. At the same time, if a watcher
+	// is processing local updates, it may move the object head. Hence the
+	// initiator and watcher contend on the DAG head of an object. Instead
+	// of retrying a transaction which causes the entire delta to be
+	// replayed, we use pessimistic locking to serialize the initiator and
+	// the watcher.
+	//
+	// TODO(hpucha): This is a temporary hack.
+	thLock sync.RWMutex
+
+	// State to coordinate shutdown of spawned goroutines.
+	pending sync.WaitGroup
+	closed  chan struct{}
+
+	// TODO(hpucha): Other global names to advertise to enable Syncbase
+	// discovery. For example, every Syncbase must be reachable under
+	// <mttable>/<syncbaseid> for p2p sync. This is the name advertised
+	// during SyncGroup join. In addition, a Syncbase might also be
+	// accepting "publish SyncGroup requests", and might use a more
+	// human-readable name such as <mttable>/<idp>/<sgserver>. All these
+	// names must be advertised in the appropriate mount tables.
+
+	// In-memory sync membership info aggregated across databases.
+	allMembers     *memberView
+	allMembersLock sync.RWMutex
+
+	// In-memory sync state per Database. This state is populated at
+	// startup, and periodically persisted by the initiator.
+	syncState     map[string]*dbSyncStateInMem
+	syncStateLock sync.Mutex // lock to protect access to the sync state.
+
+	// In-memory tracking of batches during their construction.
+	// The sync Initiator and Watcher build batches incrementally here
+	// and then persist them in DAG batch entries.  The mutex guards
+	// access to the batch set.
+	batchesLock sync.Mutex
+	batches     batchSet
+
+	// Metadata related to blob handling.
+	bst           blob.BlobStore                 // local blob store associated with this Syncbase.
+	blobDirectory map[nosql.BlobRef]*blobLocInfo // directory structure containing blob location information.
+	blobDirLock   sync.RWMutex                   // lock to synchronize access to the blob directory information.
+
+}
+
+// syncDatabase contains the metadata for syncing a database. This struct is
+// used as a receiver to hand off the app-initiated SyncGroup calls that arrive
+// against a nosql.Database to the sync module.
+type syncDatabase struct {
+	db   interfaces.Database
+	sync interfaces.SyncServerMethods
+}
+
+var (
+	rng     = rand.New(rand.NewSource(time.Now().UTC().UnixNano()))
+	rngLock sync.Mutex
+	_       interfaces.SyncServerMethods = (*syncService)(nil)
+)
+
+// rand64 generates an unsigned 64-bit pseudo-random number.
+func rand64() uint64 {
+	rngLock.Lock()
+	defer rngLock.Unlock()
+	return (uint64(rng.Int63()) << 1) | uint64(rng.Int63n(2))
+}
+
+// randIntn mimics rand.Intn (generates a non-negative pseudo-random number in [0,n)).
+func randIntn(n int) int {
+	rngLock.Lock()
+	defer rngLock.Unlock()
+	return rng.Intn(n)
+}
+
+// New creates a new sync module.
+//
+// Concurrency: sync initializes two goroutines at startup: a "watcher" and an
+// "initiator". The "watcher" thread is responsible for watching the store for
+// changes to its objects. The "initiator" thread is responsible for
+// periodically contacting peers to fetch changes from them. In addition, the
+// sync module responds to incoming RPCs from remote sync modules.
+func New(ctx *context.T, call rpc.ServerCall, sv interfaces.Service, server rpc.Server, rootDir string) (*syncService, error) {
+	s := &syncService{
+		sv:      sv,
+		server:  server,
+		batches: make(batchSet),
+	}
+
+	data := &syncData{}
+	if err := store.RunInTransaction(sv.St(), func(tx store.Transaction) error {
+		if err := util.Get(ctx, sv.St(), s.stKey(), data); err != nil {
+			if verror.ErrorID(err) != verror.ErrNoExist.ID {
+				return err
+			}
+			// First invocation of vsync.New().
+			// TODO(sadovsky): Maybe move guid generation and storage to serviceData.
+			data.Id = rand64()
+			return util.Put(ctx, tx, s.stKey(), data)
+		}
+		return nil
+	}); err != nil {
+		return nil, err
+	}
+
+	// data.Id is now guaranteed to be initialized.
+	s.id = data.Id
+	s.name = syncbaseIdToName(s.id)
+
+	// Initialize in-memory state for the sync module before starting any threads.
+	if err := s.initSync(ctx); err != nil {
+		return nil, verror.New(verror.ErrInternal, ctx, err)
+	}
+
+	// Open a blob store.
+	var err error
+	s.bst, err = fsblob.Create(ctx, path.Join(rootDir, "blobs"))
+	if err != nil {
+		return nil, err
+	}
+	s.blobDirectory = make(map[nosql.BlobRef]*blobLocInfo)
+
+	// Channel to propagate close event to all threads.
+	s.closed = make(chan struct{})
+	s.pending.Add(2)
+
+	// Start watcher thread to watch for updates to local store.
+	go s.watchStore(ctx)
+
+	// Start initiator thread to periodically get deltas from peers.
+	go s.syncer(ctx)
+
+	return s, nil
+}
+
+// Close cleans up sync state.
+// TODO(hpucha): Hook it up to server shutdown of syncbased.
+func (s *syncService) Close() {
+	s.bst.Close()
+	close(s.closed)
+	s.pending.Wait()
+}
+
+func syncbaseIdToName(id uint64) string {
+	return fmt.Sprintf("%x", id)
+}
+
+func NewSyncDatabase(db interfaces.Database) *syncDatabase {
+	return &syncDatabase{db: db, sync: db.App().Service().Sync()}
+}
+
+func (s *syncService) stKey() string {
+	return util.SyncPrefix
+}

diff --git a/services/syncbase/vsync/sync_state.go b/services/syncbase/vsync/sync_state.go
new file mode 100644
index 0000000..8195559
--- /dev/null
+++ b/services/syncbase/vsync/sync_state.go

@@ -0,0 +1,349 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// New log records are created when objects in the local store are created,
+// updated or deleted. Local log records are also replayed to keep the
+// per-object dags consistent with the local store state. Sync module assigns
+// each log record created within a Database a unique sequence number, called
+// the generation number. Locally on each device, the position of each log
+// record is also recorded relative to other local and remote log records.
+//
+// When a device receives a request to send log records, it first computes the
+// missing generations between itself and the incoming request on a per-prefix
+// basis. It then sends all the log records belonging to the missing generations
+// in the order they occur locally (using the local log position). A device that
+// receives log records over the network replays all the records received from
+// another device in a single batch. Each replayed log record adds a new version
+// to the dag of the object contained in the log record. At the end of replaying
+// all the log records, conflict detection and resolution is carried out for all
+// the objects learned during this iteration. Conflict detection and resolution
+// is carried out after a batch of log records are replayed, instead of
+// incrementally after each record is replayed, to avoid repeating conflict
+// resolution already performed by other devices.
+//
+// Sync module tracks the current generation number and the current local log
+// position for each Database. In addition, it also tracks the current
+// generation vector for a Database. Log records are indexed such that they can
+// be selectively retrieved from the store for any missing generation from any
+// device.
+
+import (
+	"fmt"
+	"strconv"
+
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	"v.io/v23/verror"
+)
+
+// dbSyncStateInMem represents the in-memory sync state of a Database.
+type dbSyncStateInMem struct {
+	gen uint64
+	pos uint64
+
+	checkptGen uint64
+	genvec     interfaces.GenVector // Note: Generation vector contains state from remote devices only.
+}
+
+// initSync initializes the sync module during startup. It scans all the
+// databases across all apps to initialize the following:
+// a) in-memory sync state of a Database consisting of the current generation
+//    number, log position and generation vector.
+// b) watcher map of prefixes currently being synced.
+// c) republish names in mount tables for all syncgroups.
+//
+// TODO(hpucha): This is incomplete. Flesh this out further.
+func (s *syncService) initSync(ctx *context.T) error {
+	s.syncStateLock.Lock()
+	defer s.syncStateLock.Unlock()
+
+	var errFinal error
+	s.syncState = make(map[string]*dbSyncStateInMem)
+
+	s.forEachDatabaseStore(ctx, func(appName, dbName string, st store.Store) bool {
+		// Scan the SyncGroups, skipping those not yet being watched.
+		forEachSyncGroup(st, func(sg *interfaces.SyncGroup) bool {
+			// TODO(rdaoud): only use SyncGroups that have been
+			// marked as "watchable" by the sync watcher thread.
+			// This is to handle the case of a SyncGroup being
+			// created but Syncbase restarting before the watcher
+			// processed the SyncGroupOp entry in the watch queue.
+			// It should not be syncing that SyncGroup's data after
+			// restart, but wait until the watcher processes the
+			// entry as would have happened without a restart.
+			for _, prefix := range sg.Spec.Prefixes {
+				incrWatchPrefix(appName, dbName, prefix)
+			}
+			return false
+		})
+
+		if false {
+			// Fetch the sync state.
+			ds, err := getDbSyncState(ctx, st)
+			if err != nil && verror.ErrorID(err) != verror.ErrNoExist.ID {
+				errFinal = err
+				return false
+			}
+			var scanStart, scanLimit []byte
+			// Figure out what to scan among local log records.
+			if verror.ErrorID(err) == verror.ErrNoExist.ID {
+				scanStart, scanLimit = util.ScanPrefixArgs(logRecsPerDeviceScanPrefix(s.id), "")
+			} else {
+				scanStart, scanLimit = util.ScanPrefixArgs(logRecKey(s.id, ds.Gen), "")
+			}
+			var maxpos uint64
+			var dbName string
+			// Scan local log records to find the most recent one.
+			st.Scan(scanStart, scanLimit)
+			// Scan remote log records using the persisted GenVector.
+			s.syncState[dbName] = &dbSyncStateInMem{pos: maxpos + 1}
+		}
+
+		return false
+	})
+
+	return errFinal
+}
+
+// reserveGenAndPosInDbLog reserves a chunk of generation numbers and log
+// positions in a Database's log. Used when local updates result in log
+// entries.
+func (s *syncService) reserveGenAndPosInDbLog(ctx *context.T, appName, dbName string, count uint64) (uint64, uint64) {
+	return s.reserveGenAndPosInternal(appName, dbName, count, count)
+}
+
+// reservePosInDbLog reserves a chunk of log positions in a Database's log. Used
+// when remote log records are received.
+func (s *syncService) reservePosInDbLog(ctx *context.T, appName, dbName string, count uint64) uint64 {
+	_, pos := s.reserveGenAndPosInternal(appName, dbName, 0, count)
+	return pos
+}
+
+func (s *syncService) reserveGenAndPosInternal(appName, dbName string, genCount, posCount uint64) (uint64, uint64) {
+	s.syncStateLock.Lock()
+	defer s.syncStateLock.Unlock()
+
+	name := appDbName(appName, dbName)
+	ds, ok := s.syncState[name]
+	if !ok {
+		ds = &dbSyncStateInMem{gen: 1}
+		s.syncState[name] = ds
+	}
+
+	gen := ds.gen
+	pos := ds.pos
+
+	ds.gen += genCount
+	ds.pos += posCount
+
+	return gen, pos
+}
+
+// checkptLocalGen freezes the local generation number for the responder's use.
+func (s *syncService) checkptLocalGen(ctx *context.T, appName, dbName string) error {
+	s.syncStateLock.Lock()
+	defer s.syncStateLock.Unlock()
+
+	name := appDbName(appName, dbName)
+	ds, ok := s.syncState[name]
+	if !ok {
+		return verror.New(verror.ErrInternal, ctx, "db state not found", name)
+	}
+
+	// The frozen generation is the last generation number used, i.e. one
+	// below the next available one to use.
+	ds.checkptGen = ds.gen - 1
+	return nil
+}
+
+// initDbSyncStateInMem initializes the in memory sync state of the Database if needed.
+func (s *syncService) initDbSyncStateInMem(ctx *context.T, appName, dbName string) {
+	s.syncStateLock.Lock()
+	defer s.syncStateLock.Unlock()
+
+	name := appDbName(appName, dbName)
+	if s.syncState[name] == nil {
+		s.syncState[name] = &dbSyncStateInMem{gen: 1}
+	}
+}
+
+// copyDbSyncStateInMem returns a copy of the current in memory sync state of the Database.
+func (s *syncService) copyDbSyncStateInMem(ctx *context.T, appName, dbName string) (*dbSyncStateInMem, error) {
+	s.syncStateLock.Lock()
+	defer s.syncStateLock.Unlock()
+
+	name := appDbName(appName, dbName)
+	ds, ok := s.syncState[name]
+	if !ok {
+		return nil, verror.New(verror.ErrInternal, ctx, "db state not found", name)
+	}
+
+	dsCopy := &dbSyncStateInMem{
+		gen:        ds.gen,
+		pos:        ds.pos,
+		checkptGen: ds.checkptGen,
+	}
+
+	dsCopy.genvec = copyGenVec(ds.genvec)
+
+	return dsCopy, nil
+}
+
+// copyDbGenInfo returns a copy of the current generation information of the Database.
+func (s *syncService) copyDbGenInfo(ctx *context.T, appName, dbName string) (interfaces.GenVector, uint64, error) {
+	s.syncStateLock.Lock()
+	defer s.syncStateLock.Unlock()
+
+	name := appDbName(appName, dbName)
+	ds, ok := s.syncState[name]
+	if !ok {
+		return nil, 0, verror.New(verror.ErrInternal, ctx, "db state not found", name)
+	}
+
+	genvec := copyGenVec(ds.genvec)
+
+	// Add local generation information to the genvec.
+	for _, gv := range genvec {
+		gv[s.id] = ds.checkptGen
+	}
+
+	return genvec, ds.checkptGen, nil
+}
+
+// putDbGenInfoRemote puts the current remote generation information of the Database.
+func (s *syncService) putDbGenInfoRemote(ctx *context.T, appName, dbName string, genvec interfaces.GenVector) error {
+	s.syncStateLock.Lock()
+	defer s.syncStateLock.Unlock()
+
+	name := appDbName(appName, dbName)
+	ds, ok := s.syncState[name]
+	if !ok {
+		return verror.New(verror.ErrInternal, ctx, "db state not found", name)
+	}
+
+	ds.genvec = copyGenVec(genvec)
+
+	return nil
+}
+
+// appDbName combines the app and db names to return a globally unique name for
+// a Database.  This relies on the fact that the app name is globally unique and
+// the db name is unique within the scope of the app.
+func appDbName(appName, dbName string) string {
+	return util.JoinKeyParts(appName, dbName)
+}
+
+// splitAppDbName is the inverse of appDbName and returns app and db name from a
+// globally unique name for a Database.
+func splitAppDbName(ctx *context.T, name string) (string, string, error) {
+	parts := util.SplitKeyParts(name)
+	if len(parts) != 2 {
+		return "", "", verror.New(verror.ErrInternal, ctx, "invalid appDbName", name)
+	}
+	return parts[0], parts[1], nil
+}
+
+func copyGenVec(in interfaces.GenVector) interfaces.GenVector {
+	genvec := make(interfaces.GenVector)
+	for p, inpgv := range in {
+		pgv := make(interfaces.PrefixGenVector)
+		for id, gen := range inpgv {
+			pgv[id] = gen
+		}
+		genvec[p] = pgv
+	}
+	return genvec
+}
+
+////////////////////////////////////////////////////////////
+// Low-level utility functions to access sync state.
+
+// dbSyncStateKey returns the key used to access the sync state of a Database.
+func dbSyncStateKey() string {
+	return util.JoinKeyParts(util.SyncPrefix, dbssPrefix)
+}
+
+// putDbSyncState persists the sync state object for a given Database.
+func putDbSyncState(ctx *context.T, tx store.Transaction, ds *dbSyncState) error {
+	return util.Put(ctx, tx, dbSyncStateKey(), ds)
+}
+
+// getDbSyncState retrieves the sync state object for a given Database.
+func getDbSyncState(ctx *context.T, st store.StoreReader) (*dbSyncState, error) {
+	var ds dbSyncState
+	if err := util.Get(ctx, st, dbSyncStateKey(), &ds); err != nil {
+		return nil, err
+	}
+	return &ds, nil
+}
+
+////////////////////////////////////////////////////////////
+// Low-level utility functions to access log records.
+
+// logRecsPerDeviceScanPrefix returns the prefix used to scan log records for a particular device.
+func logRecsPerDeviceScanPrefix(id uint64) string {
+	return util.JoinKeyParts(util.SyncPrefix, logPrefix, fmt.Sprintf("%x", id))
+}
+
+// logRecKey returns the key used to access a specific log record.
+func logRecKey(id, gen uint64) string {
+	return util.JoinKeyParts(util.SyncPrefix, logPrefix, fmt.Sprintf("%d", id), fmt.Sprintf("%016x", gen))
+}
+
+// splitLogRecKey is the inverse of logRecKey and returns device id and generation number.
+func splitLogRecKey(ctx *context.T, key string) (uint64, uint64, error) {
+	parts := util.SplitKeyParts(key)
+	verr := verror.New(verror.ErrInternal, ctx, "invalid logreckey", key)
+	if len(parts) != 4 {
+		return 0, 0, verr
+	}
+	if parts[0] != util.SyncPrefix || parts[1] != logPrefix {
+		return 0, 0, verr
+	}
+	id, err := strconv.ParseUint(parts[2], 10, 64)
+	if err != nil {
+		return 0, 0, verr
+	}
+	gen, err := strconv.ParseUint(parts[3], 16, 64)
+	if err != nil {
+		return 0, 0, verr
+	}
+	return id, gen, nil
+}
+
+// hasLogRec returns true if the log record for (devid, gen) exists.
+func hasLogRec(st store.StoreReader, id, gen uint64) (bool, error) {
+	// TODO(hpucha): optimize to avoid the unneeded fetch/decode of the data.
+	var rec localLogRec
+	if err := util.Get(nil, st, logRecKey(id, gen), &rec); err != nil {
+		if verror.ErrorID(err) == verror.ErrNoExist.ID {
+			err = nil
+		}
+		return false, err
+	}
+	return true, nil
+}
+
+// putLogRec stores the log record.
+func putLogRec(ctx *context.T, tx store.Transaction, rec *localLogRec) error {
+	return util.Put(ctx, tx, logRecKey(rec.Metadata.Id, rec.Metadata.Gen), rec)
+}
+
+// getLogRec retrieves the log record for a given (devid, gen).
+func getLogRec(ctx *context.T, st store.StoreReader, id, gen uint64) (*localLogRec, error) {
+	var rec localLogRec
+	if err := util.Get(ctx, st, logRecKey(id, gen), &rec); err != nil {
+		return nil, err
+	}
+	return &rec, nil
+}
+
+// delLogRec deletes the log record for a given (devid, gen).
+func delLogRec(ctx *context.T, tx store.Transaction, id, gen uint64) error {
+	return util.Delete(ctx, tx, logRecKey(id, gen))
+}

diff --git a/services/syncbase/vsync/sync_state_test.go b/services/syncbase/vsync/sync_state_test.go
new file mode 100644
index 0000000..7e9302b
--- /dev/null
+++ b/services/syncbase/vsync/sync_state_test.go

@@ -0,0 +1,174 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+import (
+	"reflect"
+	"testing"
+	"time"
+
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	_ "v.io/x/ref/runtime/factories/generic"
+)
+
+// Tests for sync state management and storage in Syncbase.
+
+// TestReserveGenAndPos tests reserving generation numbers and log positions in a
+// Database log.
+func TestReserveGenAndPos(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	s := svc.sync
+
+	var wantGen, wantPos uint64 = 1, 0
+	for i := 0; i < 5; i++ {
+		gotGen, gotPos := s.reserveGenAndPosInternal("mockapp", "mockdb", 5, 10)
+		if gotGen != wantGen || gotPos != wantPos {
+			t.Fatalf("reserveGenAndPosInternal failed, gotGen %v wantGen %v, gotPos %v wantPos %v", gotGen, wantGen, gotPos, wantPos)
+		}
+		wantGen += 5
+		wantPos += 10
+
+		name := appDbName("mockapp", "mockdb")
+		if s.syncState[name].gen != wantGen || s.syncState[name].pos != wantPos {
+			t.Fatalf("reserveGenAndPosInternal failed, gotGen %v wantGen %v, gotPos %v wantPos %v", s.syncState[name].gen, wantGen, s.syncState[name].pos, wantPos)
+		}
+	}
+}
+
+// TestPutGetDbSyncState tests setting and getting sync metadata.
+func TestPutGetDbSyncState(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+
+	checkDbSyncState(t, st, false, nil)
+
+	gv := interfaces.GenVector{
+		"mocktbl/foo": interfaces.PrefixGenVector{
+			1: 2, 3: 4, 5: 6,
+		},
+	}
+
+	tx := st.NewTransaction()
+	wantSt := &dbSyncState{Gen: 40, GenVec: gv}
+	if err := putDbSyncState(nil, tx, wantSt); err != nil {
+		t.Fatalf("putDbSyncState failed, err %v", err)
+	}
+	if err := tx.Commit(); err != nil {
+		t.Fatalf("cannot commit putting db sync state, err %v", err)
+	}
+
+	checkDbSyncState(t, st, true, wantSt)
+}
+
+// TestPutGetDelLogRec tests setting, getting, and deleting a log record.
+func TestPutGetDelLogRec(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+
+	var id uint64 = 10
+	var gen uint64 = 100
+
+	checkLogRec(t, st, id, gen, false, nil)
+
+	tx := st.NewTransaction()
+	wantRec := &localLogRec{
+		Metadata: interfaces.LogRecMetadata{
+			Id:         id,
+			Gen:        gen,
+			RecType:    interfaces.NodeRec,
+			ObjId:      "foo",
+			CurVers:    "3",
+			Parents:    []string{"1", "2"},
+			UpdTime:    time.Now().UTC(),
+			Delete:     false,
+			BatchId:    10000,
+			BatchCount: 1,
+		},
+		Pos: 10,
+	}
+	if err := putLogRec(nil, tx, wantRec); err != nil {
+		t.Fatalf("putLogRec(%d:%d) failed err %v", id, gen, err)
+	}
+	if err := tx.Commit(); err != nil {
+		t.Fatalf("cannot commit putting log rec, err %v", err)
+	}
+
+	checkLogRec(t, st, id, gen, true, wantRec)
+
+	tx = st.NewTransaction()
+	if err := delLogRec(nil, tx, id, gen); err != nil {
+		t.Fatalf("delLogRec(%d:%d) failed err %v", id, gen, err)
+	}
+	if err := tx.Commit(); err != nil {
+		t.Fatalf("cannot commit deleting log rec, err %v", err)
+	}
+
+	checkLogRec(t, st, id, gen, false, nil)
+}
+
+func TestLogRecKeyUtils(t *testing.T) {
+	invalid := []string{"$sync:aa:bb", "log:aa:bb", "$sync:log:aa:xx", "$sync:log:x:bb"}
+
+	for _, k := range invalid {
+		if _, _, err := splitLogRecKey(nil, k); err == nil {
+			t.Fatalf("splitting log rec key didn't fail %q", k)
+		}
+	}
+
+	valid := []struct {
+		id  uint64
+		gen uint64
+	}{
+		{10, 20},
+		{190, 540},
+		{9999, 999999},
+	}
+
+	for _, v := range valid {
+		gotId, gotGen, err := splitLogRecKey(nil, logRecKey(v.id, v.gen))
+		if gotId != v.id || gotGen != v.gen || err != nil {
+			t.Fatalf("failed key conversion id got %v want %v, gen got %v want %v, err %v", gotId, v.id, gotGen, v.gen, err)
+		}
+	}
+}
+
+//////////////////////////////
+// Helpers
+
+// TODO(hpucha): Look into using v.io/syncbase/v23/syncbase/testutil.Fatalf()
+// for getting the stack trace. Right now cannot import the package due to a
+// cycle.
+
+func checkDbSyncState(t *testing.T, st store.StoreReader, exists bool, wantSt *dbSyncState) {
+	gotSt, err := getDbSyncState(nil, st)
+
+	if (!exists && err == nil) || (exists && err != nil) {
+		t.Fatalf("getDbSyncState failed, exists %v err %v", exists, err)
+	}
+
+	if !reflect.DeepEqual(gotSt, wantSt) {
+		t.Fatalf("getDbSyncState() failed, got %v, want %v", gotSt, wantSt)
+	}
+}
+
+func checkLogRec(t *testing.T, st store.StoreReader, id, gen uint64, exists bool, wantRec *localLogRec) {
+	gotRec, err := getLogRec(nil, st, id, gen)
+
+	if (!exists && err == nil) || (exists && err != nil) {
+		t.Fatalf("getLogRec(%d:%d) failed, exists %v err %v", id, gen, exists, err)
+	}
+
+	if !reflect.DeepEqual(gotRec, wantRec) {
+		t.Fatalf("getLogRec(%d:%d) failed, got %v, want %v", id, gen, gotRec, wantRec)
+	}
+
+	if ok, err := hasLogRec(st, id, gen); err != nil || ok != exists {
+		t.Fatalf("hasLogRec(%d:%d) failed, want %v", id, gen, exists)
+	}
+}

diff --git a/services/syncbase/vsync/syncgroup.go b/services/syncbase/vsync/syncgroup.go
new file mode 100644
index 0000000..905a319
--- /dev/null
+++ b/services/syncbase/vsync/syncgroup.go

@@ -0,0 +1,895 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// SyncGroup management and storage in Syncbase.  Handles the lifecycle
+// of SyncGroups (create, join, leave, etc.) and their persistence as
+// sync metadata in the application databases.  Provides helper functions
+// to the higher levels of sync (Initiator, Watcher) to get membership
+// information and map key/value changes to their matching SyncGroups.
+
+// TODO(hpucha): Add high level commentary about the logic behind create/join
+// etc.
+
+import (
+	"fmt"
+	"strings"
+	"time"
+
+	wire "v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/server/watchable"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	"v.io/v23/naming"
+	"v.io/v23/rpc"
+	"v.io/v23/security"
+	"v.io/v23/security/access"
+	"v.io/v23/verror"
+	"v.io/v23/vom"
+	"v.io/x/lib/vlog"
+)
+
+var (
+	// memberViewTTL is the shelf-life of the aggregate view of SyncGroup members.
+	memberViewTTL = 2 * time.Second
+)
+
+////////////////////////////////////////////////////////////
+// SyncGroup management internal to Syncbase.
+
+// memberView holds an aggregated view of all SyncGroup members across
+// databases. The view is not coherent, it gets refreshed according to a
+// configured TTL and not (coherently) when SyncGroup membership is updated in
+// the various databases. It is needed by the sync Initiator, which must select
+// a peer to contact from a global view of all SyncGroup members gathered from
+// all databases. This is why a slightly stale view is acceptable.
+// The members are identified by their Vanadium names (map keys).
+type memberView struct {
+	expiration time.Time
+	members    map[string]*memberInfo
+}
+
+// memberInfo holds the member metadata for each SyncGroup this member belongs
+// to within each App/Database (i.e. global database name).  It's a mapping of
+// global DB names to sets of SyncGroup member information.
+type memberInfo struct {
+	db2sg map[string]sgMemberInfo
+}
+
+// sgMemberInfo maps SyncGroups to their member metadata.
+type sgMemberInfo map[interfaces.GroupId]wire.SyncGroupMemberInfo
+
+// newSyncGroupVersion generates a random SyncGroup version ("etag").
+func newSyncGroupVersion() string {
+	return fmt.Sprintf("%x", rand64())
+}
+
+// newSyncGroupId generates a random SyncGroup ID.
+func newSyncGroupId() interfaces.GroupId {
+	id := interfaces.NoGroupId
+	for id == interfaces.NoGroupId {
+		id = interfaces.GroupId(rand64())
+	}
+	return id
+}
+
+// verifySyncGroup verifies if a SyncGroup struct is well-formed.
+func verifySyncGroup(ctx *context.T, sg *interfaces.SyncGroup) error {
+	if sg == nil {
+		return verror.New(verror.ErrBadArg, ctx, "group information not specified")
+	}
+	if sg.Name == "" {
+		return verror.New(verror.ErrBadArg, ctx, "group name not specified")
+	}
+	if sg.AppName == "" {
+		return verror.New(verror.ErrBadArg, ctx, "app name not specified")
+	}
+	if sg.DbName == "" {
+		return verror.New(verror.ErrBadArg, ctx, "db name not specified")
+	}
+	if sg.Creator == "" {
+		return verror.New(verror.ErrBadArg, ctx, "creator id not specified")
+	}
+	if sg.Id == interfaces.NoGroupId {
+		return verror.New(verror.ErrBadArg, ctx, "group id not specified")
+	}
+	if sg.SpecVersion == "" {
+		return verror.New(verror.ErrBadArg, ctx, "group version not specified")
+	}
+	if len(sg.Joiners) == 0 {
+		return verror.New(verror.ErrBadArg, ctx, "group has no joiners")
+	}
+	if len(sg.Spec.Prefixes) == 0 {
+		return verror.New(verror.ErrBadArg, ctx, "group has no prefixes specified")
+	}
+	return nil
+}
+
+// addSyncGroup adds a new SyncGroup given its information.
+func addSyncGroup(ctx *context.T, tx store.Transaction, sg *interfaces.SyncGroup) error {
+	// Verify SyncGroup before storing it since it may have been received
+	// from a remote peer.
+	if err := verifySyncGroup(ctx, sg); err != nil {
+		return err
+	}
+
+	if ok, err := hasSGDataEntry(tx, sg.Id); err != nil {
+		return err
+	} else if ok {
+		return verror.New(verror.ErrExist, ctx, "group id already exists")
+	}
+	if ok, err := hasSGNameEntry(tx, sg.Name); err != nil {
+		return err
+	} else if ok {
+		return verror.New(verror.ErrExist, ctx, "group name already exists")
+	}
+
+	// Add the group name and data entries.
+	if err := setSGNameEntry(ctx, tx, sg.Name, sg.Id); err != nil {
+		return err
+	}
+	if err := setSGDataEntry(ctx, tx, sg.Id, sg); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// getSyncGroupId retrieves the SyncGroup ID given its name.
+func getSyncGroupId(ctx *context.T, st store.StoreReader, name string) (interfaces.GroupId, error) {
+	return getSGNameEntry(ctx, st, name)
+}
+
+// getSyncGroupName retrieves the SyncGroup name given its ID.
+func getSyncGroupName(ctx *context.T, st store.StoreReader, gid interfaces.GroupId) (string, error) {
+	sg, err := getSyncGroupById(ctx, st, gid)
+	if err != nil {
+		return "", err
+	}
+	return sg.Name, nil
+}
+
+// getSyncGroupById retrieves the SyncGroup given its ID.
+func getSyncGroupById(ctx *context.T, st store.StoreReader, gid interfaces.GroupId) (*interfaces.SyncGroup, error) {
+	return getSGDataEntry(ctx, st, gid)
+}
+
+// getSyncGroupByName retrieves the SyncGroup given its name.
+func getSyncGroupByName(ctx *context.T, st store.StoreReader, name string) (*interfaces.SyncGroup, error) {
+	gid, err := getSyncGroupId(ctx, st, name)
+	if err != nil {
+		return nil, err
+	}
+	return getSyncGroupById(ctx, st, gid)
+}
+
+// delSyncGroupById deletes the SyncGroup given its ID.
+func delSyncGroupById(ctx *context.T, tx store.Transaction, gid interfaces.GroupId) error {
+	sg, err := getSyncGroupById(ctx, tx, gid)
+	if err != nil {
+		return err
+	}
+	if err = delSGNameEntry(ctx, tx, sg.Name); err != nil {
+		return err
+	}
+	return delSGDataEntry(ctx, tx, sg.Id)
+}
+
+// delSyncGroupByName deletes the SyncGroup given its name.
+func delSyncGroupByName(ctx *context.T, tx store.Transaction, name string) error {
+	gid, err := getSyncGroupId(ctx, tx, name)
+	if err != nil {
+		return err
+	}
+	return delSyncGroupById(ctx, tx, gid)
+}
+
+// refreshMembersIfExpired updates the aggregate view of SyncGroup members
+// across databases if the view has expired.
+// TODO(rdaoud): track dirty apps/dbs since the last refresh and incrementally
+// update the membership view for them instead of always scanning all of them.
+func (s *syncService) refreshMembersIfExpired(ctx *context.T) {
+	view := s.allMembers
+	if view == nil {
+		// The empty expiration time in Go is before "now" and treated as expired
+		// below.
+		view = &memberView{expiration: time.Time{}, members: nil}
+		s.allMembers = view
+	}
+
+	if time.Now().Before(view.expiration) {
+		return
+	}
+
+	// Create a new aggregate view of SyncGroup members across all app databases.
+	newMembers := make(map[string]*memberInfo)
+
+	s.forEachDatabaseStore(ctx, func(appName, dbName string, st store.Store) bool {
+		// For each database, fetch its SyncGroup data entries by scanning their
+		// prefix range.  Use a database snapshot for the scan.
+		sn := st.NewSnapshot()
+		defer sn.Abort()
+		name := appDbName(appName, dbName)
+
+		forEachSyncGroup(sn, func(sg *interfaces.SyncGroup) bool {
+			// Add all members of this SyncGroup to the membership view.
+			// A member's info is different across SyncGroups, so gather all of them.
+			for member, info := range sg.Joiners {
+				if _, ok := newMembers[member]; !ok {
+					newMembers[member] = &memberInfo{db2sg: make(map[string]sgMemberInfo)}
+				}
+				if _, ok := newMembers[member].db2sg[name]; !ok {
+					newMembers[member].db2sg[name] = make(sgMemberInfo)
+				}
+				newMembers[member].db2sg[name][sg.Id] = info
+			}
+			return false
+		})
+		return false
+	})
+
+	view.members = newMembers
+	view.expiration = time.Now().Add(memberViewTTL)
+}
+
+// forEachSyncGroup iterates over all SyncGroups in the Database and invokes
+// the callback function on each one.  The callback returns a "done" flag to
+// make forEachSyncGroup() stop the iteration earlier; otherwise the function
+// loops across all SyncGroups in the Database.
+func forEachSyncGroup(st store.StoreReader, callback func(*interfaces.SyncGroup) bool) {
+	scanStart, scanLimit := util.ScanPrefixArgs(sgDataKeyScanPrefix, "")
+	stream := st.Scan(scanStart, scanLimit)
+	for stream.Advance() {
+		var sg interfaces.SyncGroup
+		if vom.Decode(stream.Value(nil), &sg) != nil {
+			vlog.Errorf("sync: forEachSyncGroup: invalid SyncGroup value for key %s", string(stream.Key(nil)))
+			continue
+		}
+
+		if callback(&sg) {
+			break // done, early exit
+		}
+	}
+
+	if err := stream.Err(); err != nil {
+		vlog.Errorf("sync: forEachSyncGroup: scan stream error: %v", err)
+	}
+}
+
+// getMembers returns all SyncGroup members and the count of SyncGroups each one
+// joined.
+func (s *syncService) getMembers(ctx *context.T) map[string]uint32 {
+	s.allMembersLock.Lock()
+	defer s.allMembersLock.Unlock()
+
+	s.refreshMembersIfExpired(ctx)
+
+	members := make(map[string]uint32)
+	for member, info := range s.allMembers.members {
+		count := 0
+		for _, sgmi := range info.db2sg {
+			count += len(sgmi)
+		}
+		members[member] = uint32(count)
+	}
+
+	return members
+}
+
+// copyMemberInfo returns a copy of the info for the requested peer.
+func (s *syncService) copyMemberInfo(ctx *context.T, member string) *memberInfo {
+	s.allMembersLock.RLock()
+	defer s.allMembersLock.RUnlock()
+
+	info, ok := s.allMembers.members[member]
+	if !ok {
+		return nil
+	}
+
+	// Make a copy.
+	infoCopy := &memberInfo{make(map[string]sgMemberInfo)}
+	for gdbName, sgInfo := range info.db2sg {
+		infoCopy.db2sg[gdbName] = make(sgMemberInfo)
+		for gid, mi := range sgInfo {
+			infoCopy.db2sg[gdbName][gid] = mi
+		}
+	}
+
+	return infoCopy
+}
+
+// Low-level utility functions to access DB entries without tracking their
+// relationships.
+// Use the functions above to manipulate SyncGroups.
+
+var (
+	// sgDataKeyScanPrefix is the prefix used to scan SyncGroup data entries.
+	sgDataKeyScanPrefix = util.JoinKeyParts(util.SyncPrefix, sgPrefix, "d")
+
+	// sgNameKeyScanPrefix is the prefix used to scan SyncGroup name entries.
+	sgNameKeyScanPrefix = util.JoinKeyParts(util.SyncPrefix, sgPrefix, "n")
+)
+
+// sgDataKey returns the key used to access the SyncGroup data entry.
+func sgDataKey(gid interfaces.GroupId) string {
+	return util.JoinKeyParts(util.SyncPrefix, sgPrefix, "d", fmt.Sprintf("%d", gid))
+}
+
+// sgNameKey returns the key used to access the SyncGroup name entry.
+func sgNameKey(name string) string {
+	return util.JoinKeyParts(util.SyncPrefix, sgPrefix, "n", name)
+}
+
+// splitSgNameKey is the inverse of sgNameKey and returns the SyncGroup name.
+func splitSgNameKey(ctx *context.T, key string) (string, error) {
+	prefix := util.JoinKeyParts(util.SyncPrefix, sgPrefix, "n", "")
+
+	// Note that the actual SyncGroup name may contain ":" as a separator.
+	if !strings.HasPrefix(key, prefix) {
+		return "", verror.New(verror.ErrInternal, ctx, "invalid sgNamekey", key)
+	}
+	return strings.TrimPrefix(key, prefix), nil
+}
+
+// hasSGDataEntry returns true if the SyncGroup data entry exists.
+func hasSGDataEntry(sntx store.SnapshotOrTransaction, gid interfaces.GroupId) (bool, error) {
+	// TODO(rdaoud): optimize to avoid the unneeded fetch/decode of the data.
+	var sg interfaces.SyncGroup
+	if err := util.Get(nil, sntx, sgDataKey(gid), &sg); err != nil {
+		if verror.ErrorID(err) == verror.ErrNoExist.ID {
+			err = nil
+		}
+		return false, err
+	}
+	return true, nil
+}
+
+// hasSGNameEntry returns true if the SyncGroup name entry exists.
+func hasSGNameEntry(sntx store.SnapshotOrTransaction, name string) (bool, error) {
+	// TODO(rdaoud): optimize to avoid the unneeded fetch/decode of the data.
+	var gid interfaces.GroupId
+	if err := util.Get(nil, sntx, sgNameKey(name), &gid); err != nil {
+		if verror.ErrorID(err) == verror.ErrNoExist.ID {
+			err = nil
+		}
+		return false, err
+	}
+	return true, nil
+}
+
+// setSGDataEntry stores the SyncGroup data entry.
+func setSGDataEntry(ctx *context.T, tx store.Transaction, gid interfaces.GroupId, sg *interfaces.SyncGroup) error {
+	return util.Put(ctx, tx, sgDataKey(gid), sg)
+}
+
+// setSGNameEntry stores the SyncGroup name entry.
+func setSGNameEntry(ctx *context.T, tx store.Transaction, name string, gid interfaces.GroupId) error {
+	return util.Put(ctx, tx, sgNameKey(name), gid)
+}
+
+// getSGDataEntry retrieves the SyncGroup data for a given group ID.
+func getSGDataEntry(ctx *context.T, st store.StoreReader, gid interfaces.GroupId) (*interfaces.SyncGroup, error) {
+	var sg interfaces.SyncGroup
+	if err := util.Get(ctx, st, sgDataKey(gid), &sg); err != nil {
+		return nil, err
+	}
+	return &sg, nil
+}
+
+// getSGNameEntry retrieves the SyncGroup name to ID mapping.
+func getSGNameEntry(ctx *context.T, st store.StoreReader, name string) (interfaces.GroupId, error) {
+	var gid interfaces.GroupId
+	if err := util.Get(ctx, st, sgNameKey(name), &gid); err != nil {
+		return gid, err
+	}
+	return gid, nil
+}
+
+// delSGDataEntry deletes the SyncGroup data entry.
+func delSGDataEntry(ctx *context.T, tx store.Transaction, gid interfaces.GroupId) error {
+	return util.Delete(ctx, tx, sgDataKey(gid))
+}
+
+// delSGNameEntry deletes the SyncGroup name to ID mapping.
+func delSGNameEntry(ctx *context.T, tx store.Transaction, name string) error {
+	return util.Delete(ctx, tx, sgNameKey(name))
+}
+
+////////////////////////////////////////////////////////////
+// SyncGroup methods between Client and Syncbase.
+
+// TODO(hpucha): Pass blessings along.
+func (sd *syncDatabase) CreateSyncGroup(ctx *context.T, call rpc.ServerCall, sgName string, spec wire.SyncGroupSpec, myInfo wire.SyncGroupMemberInfo) error {
+	vlog.VI(2).Infof("sync: CreateSyncGroup: begin: %s", sgName)
+	defer vlog.VI(2).Infof("sync: CreateSyncGroup: end: %s", sgName)
+
+	err := store.RunInTransaction(sd.db.St(), func(tx store.Transaction) error {
+		// Check permissions on Database.
+		if err := sd.db.CheckPermsInternal(ctx, call, tx); err != nil {
+			return err
+		}
+
+		// TODO(hpucha): Check prefix ACLs on all SG prefixes.
+		// This may need another method on util.Database interface.
+
+		// TODO(hpucha): Do some SG ACL checking. Check creator
+		// has Admin privilege.
+
+		// Get this Syncbase's sync module handle.
+		ss := sd.sync.(*syncService)
+
+		// Instantiate sg. Add self as joiner.
+		sg := &interfaces.SyncGroup{
+			Id:          newSyncGroupId(),
+			Name:        sgName,
+			SpecVersion: newSyncGroupVersion(),
+			Spec:        spec,
+			Creator:     ss.name,
+			AppName:     sd.db.App().Name(),
+			DbName:      sd.db.Name(),
+			Status:      interfaces.SyncGroupStatusPublishPending,
+			Joiners:     map[string]wire.SyncGroupMemberInfo{ss.name: myInfo},
+		}
+
+		if err := addSyncGroup(ctx, tx, sg); err != nil {
+			return err
+		}
+
+		// TODO(hpucha): Bootstrap DAG/Genvector etc for syncing the SG metadata.
+
+		// Take a snapshot of the data to bootstrap the SyncGroup.
+		return sd.bootstrapSyncGroup(ctx, tx, spec.Prefixes)
+	})
+
+	if err != nil {
+		return err
+	}
+
+	// Local SG create succeeded. Publish the SG at the chosen server.
+	sd.publishSyncGroup(ctx, call, sgName)
+
+	// Publish at the chosen mount table and in the neighborhood.
+	sd.publishInMountTables(ctx, call, spec)
+
+	return nil
+}
+
+// TODO(hpucha): Pass blessings along.
+func (sd *syncDatabase) JoinSyncGroup(ctx *context.T, call rpc.ServerCall, sgName string, myInfo wire.SyncGroupMemberInfo) (wire.SyncGroupSpec, error) {
+	vlog.VI(2).Infof("sync: JoinSyncGroup: begin: %s", sgName)
+	defer vlog.VI(2).Infof("sync: JoinSyncGroup: end: %s", sgName)
+
+	var sgErr error
+	var sg *interfaces.SyncGroup
+	nullSpec := wire.SyncGroupSpec{}
+
+	err := store.RunInTransaction(sd.db.St(), func(tx store.Transaction) error {
+		// Check permissions on Database.
+		if err := sd.db.CheckPermsInternal(ctx, call, tx); err != nil {
+			return err
+		}
+
+		// Check if SyncGroup already exists.
+		sg, sgErr = getSyncGroupByName(ctx, tx, sgName)
+		if sgErr != nil {
+			return sgErr
+		}
+
+		// SyncGroup already exists. Possibilities include created
+		// locally, already joined locally or published at the device as
+		// a result of SyncGroup creation on a different device.
+		//
+		// TODO(hpucha): Handle the above cases. If the SG was published
+		// locally, but not joined, we need to bootstrap the DAG and
+		// watcher. If multiple joins are done locally, we may want to
+		// ref count the SG state and track the leaves accordingly. So
+		// we may need to add some local state for each SyncGroup.
+
+		// Check SG ACL.
+		return authorize(ctx, call.Security(), sg)
+	})
+
+	// The presented blessing is allowed to make this Syncbase instance join
+	// the specified SyncGroup, but this Syncbase instance has in fact
+	// already joined the SyncGroup. Join is idempotent, so we simply return
+	// the spec to indicate success.
+	if err == nil {
+		return sg.Spec, nil
+	}
+
+	// Join is not allowed (possibilities include Database permissions check
+	// failed, SG ACL check failed or error during fetching SG information).
+	if verror.ErrorID(sgErr) != verror.ErrNoExist.ID {
+		return nullSpec, err
+	}
+
+	// Brand new join.
+
+	// Get this Syncbase's sync module handle.
+	ss := sd.sync.(*syncService)
+
+	// Contact a SyncGroup Admin to join the SyncGroup.
+	sg = &interfaces.SyncGroup{}
+	*sg, err = sd.joinSyncGroupAtAdmin(ctx, call, sgName, ss.name, myInfo)
+	if err != nil {
+		return nullSpec, err
+	}
+
+	// Verify that the app/db combination is valid for this SyncGroup.
+	if sg.AppName != sd.db.App().Name() || sg.DbName != sd.db.Name() {
+		return nullSpec, verror.New(verror.ErrBadArg, ctx, "bad app/db with syncgroup")
+	}
+
+	err = store.RunInTransaction(sd.db.St(), func(tx store.Transaction) error {
+
+		// TODO(hpucha): Bootstrap DAG/Genvector etc for syncing the SG metadata.
+
+		// TODO(hpucha): Get SG Deltas from Admin device.
+
+		if err := addSyncGroup(ctx, tx, sg); err != nil {
+			return err
+		}
+
+		// Take a snapshot of the data to bootstrap the SyncGroup.
+		return sd.bootstrapSyncGroup(ctx, tx, sg.Spec.Prefixes)
+	})
+
+	if err != nil {
+		return nullSpec, err
+	}
+
+	// Publish at the chosen mount table and in the neighborhood.
+	sd.publishInMountTables(ctx, call, sg.Spec)
+
+	return sg.Spec, nil
+}
+
+func (sd *syncDatabase) GetSyncGroupNames(ctx *context.T, call rpc.ServerCall) ([]string, error) {
+	var sgNames []string
+
+	vlog.VI(2).Infof("sync: GetSyncGroupNames: begin")
+	defer vlog.VI(2).Infof("sync: GetSyncGroupNames: end: %v", sgNames)
+
+	sn := sd.db.St().NewSnapshot()
+	defer sn.Abort()
+
+	// Check permissions on Database.
+	if err := sd.db.CheckPermsInternal(ctx, call, sn); err != nil {
+		return nil, err
+	}
+
+	// Scan all the SyncGroup names found in the Database.
+	scanStart, scanLimit := util.ScanPrefixArgs(sgNameKeyScanPrefix, "")
+	stream := sn.Scan(scanStart, scanLimit)
+	var key []byte
+	for stream.Advance() {
+		sgName, err := splitSgNameKey(ctx, string(stream.Key(key)))
+		if err != nil {
+			return nil, err
+		}
+		sgNames = append(sgNames, sgName)
+	}
+
+	if err := stream.Err(); err != nil {
+		return nil, err
+	}
+
+	return sgNames, nil
+}
+
+func (sd *syncDatabase) GetSyncGroupSpec(ctx *context.T, call rpc.ServerCall, sgName string) (wire.SyncGroupSpec, string, error) {
+	var spec wire.SyncGroupSpec
+
+	vlog.VI(2).Infof("sync: GetSyncGroupSpec: begin %s", sgName)
+	defer vlog.VI(2).Infof("sync: GetSyncGroupSpec: end: %s spec %v", sgName, spec)
+
+	sn := sd.db.St().NewSnapshot()
+	defer sn.Abort()
+
+	// Check permissions on Database.
+	if err := sd.db.CheckPermsInternal(ctx, call, sn); err != nil {
+		return spec, "", err
+	}
+
+	// Get the SyncGroup information.
+	sg, err := getSyncGroupByName(ctx, sn, sgName)
+	if err != nil {
+		return spec, "", err
+	}
+	// TODO(hpucha): Check SyncGroup ACL.
+
+	spec = sg.Spec
+	return spec, sg.SpecVersion, nil
+}
+
+func (sd *syncDatabase) GetSyncGroupMembers(ctx *context.T, call rpc.ServerCall, sgName string) (map[string]wire.SyncGroupMemberInfo, error) {
+	var members map[string]wire.SyncGroupMemberInfo
+
+	vlog.VI(2).Infof("sync: GetSyncGroupMembers: begin %s", sgName)
+	defer vlog.VI(2).Infof("sync: GetSyncGroupMembers: end: %s members %v", sgName, members)
+
+	sn := sd.db.St().NewSnapshot()
+	defer sn.Abort()
+
+	// Check permissions on Database.
+	if err := sd.db.CheckPermsInternal(ctx, call, sn); err != nil {
+		return members, err
+	}
+
+	// Get the SyncGroup information.
+	sg, err := getSyncGroupByName(ctx, sn, sgName)
+	if err != nil {
+		return members, err
+	}
+
+	// TODO(hpucha): Check SyncGroup ACL.
+
+	members = sg.Joiners
+	return members, nil
+}
+
+// TODO(hpucha): Enable syncing syncgroup metadata.
+func (sd *syncDatabase) SetSyncGroupSpec(ctx *context.T, call rpc.ServerCall, sgName string, spec wire.SyncGroupSpec, version string) error {
+	vlog.VI(2).Infof("sync: SetSyncGroupSpec: begin %s %v %s", sgName, spec, version)
+	defer vlog.VI(2).Infof("sync: SetSyncGroupSpec: end: %s", sgName)
+
+	err := store.RunInTransaction(sd.db.St(), func(tx store.Transaction) error {
+		// Check permissions on Database.
+		if err := sd.db.CheckPermsInternal(ctx, call, tx); err != nil {
+			return err
+		}
+
+		sg, err := getSyncGroupByName(ctx, tx, sgName)
+		if err != nil {
+			return err
+		}
+
+		// TODO(hpucha): Check SyncGroup ACL. Perform version checking.
+
+		sg.Spec = spec
+		return setSGDataEntry(ctx, tx, sg.Id, sg)
+	})
+	return err
+}
+
+//////////////////////////////
+// Helper functions
+
+// TODO(hpucha): Call this periodically until we are able to contact the remote peer.
+func (sd *syncDatabase) publishSyncGroup(ctx *context.T, call rpc.ServerCall, sgName string) error {
+	sg, err := getSyncGroupByName(ctx, sd.db.St(), sgName)
+	if err != nil {
+		return err
+	}
+
+	if sg.Status != interfaces.SyncGroupStatusPublishPending {
+		return nil
+	}
+
+	c := interfaces.SyncClient(sgName)
+	err = c.PublishSyncGroup(ctx, *sg)
+
+	// Publish failed temporarily. Retry later.
+	// TODO(hpucha): Is there an RPC error that we can check here?
+	if err != nil && verror.ErrorID(err) != verror.ErrExist.ID {
+		return err
+	}
+
+	// Publish succeeded.
+	if err == nil {
+		// TODO(hpucha): Get SG Deltas from publisher. Obtaining the
+		// new version from the publisher prevents SG conflicts.
+		return err
+	}
+
+	// Publish rejected. Persist that to avoid retrying in the
+	// future and to remember the split universe scenario.
+	err = store.RunInTransaction(sd.db.St(), func(tx store.Transaction) error {
+		// Ensure SG still exists.
+		sg, err := getSyncGroupByName(ctx, tx, sgName)
+		if err != nil {
+			return err
+		}
+
+		sg.Status = interfaces.SyncGroupStatusPublishRejected
+		return setSGDataEntry(ctx, tx, sg.Id, sg)
+	})
+	return err
+}
+
+// bootstrapSyncGroup inserts into the transaction log a SyncGroup operation and
+// a set of Snapshot operations to notify the sync watcher about the SyncGroup
+// prefixes to start accepting and the initial state of existing store keys that
+// match these prefixes (both data and permission keys).
+// TODO(rdaoud): this operation scans the managed keys of the database and can
+// be time consuming.  Consider doing it asynchronously and letting the server
+// reply to the client earlier.  However it must happen within the scope of this
+// transaction (and its snapshot view).
+func (sd *syncDatabase) bootstrapSyncGroup(ctx *context.T, tx store.Transaction, prefixes []string) error {
+	if len(prefixes) == 0 {
+		return verror.New(verror.ErrInternal, ctx, "no prefixes specified")
+	}
+
+	// Get the store options to retrieve the list of managed key prefixes.
+	opts, err := watchable.GetOptions(sd.db.St())
+	if err != nil {
+		return err
+	}
+	if len(opts.ManagedPrefixes) == 0 {
+		return verror.New(verror.ErrInternal, ctx, "store has no managed prefixes")
+	}
+
+	// Notify the watcher of the SyncGroup prefixes to start accepting.
+	if err := watchable.AddSyncGroupOp(ctx, tx, prefixes, false); err != nil {
+		return err
+	}
+
+	// Loop over the store managed key prefixes (e.g. data and permissions).
+	// For each one, scan the ranges of the given SyncGroup prefixes.  For
+	// each matching key, insert a snapshot operation in the log.  Scanning
+	// is done over the version entries to retrieve the matching keys and
+	// their version numbers (the key values).  Remove the version prefix
+	// from the key used in the snapshot operation.
+	// TODO(rdaoud): for SyncGroup prefixes, there should be a separation
+	// between their representation at the client (a list of (db, prefix)
+	// tuples) and internally as strings that match the store's key format.
+	for _, mp := range opts.ManagedPrefixes {
+		for _, p := range prefixes {
+			start, limit := util.ScanPrefixArgs(util.JoinKeyParts(util.VersionPrefix, mp), p)
+			stream := tx.Scan(start, limit)
+			for stream.Advance() {
+				k, v := stream.Key(nil), stream.Value(nil)
+				parts := util.SplitKeyParts(string(k))
+				if len(parts) < 2 {
+					vlog.Fatalf("sync: bootstrapSyncGroup: invalid version key %s", string(k))
+
+				}
+				key := []byte(util.JoinKeyParts(parts[1:]...))
+				if err := watchable.AddSyncSnapshotOp(ctx, tx, key, v); err != nil {
+					return err
+				}
+
+			}
+			if err := stream.Err(); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+func (sd *syncDatabase) publishInMountTables(ctx *context.T, call rpc.ServerCall, spec wire.SyncGroupSpec) error {
+	// Get this Syncbase's sync module handle.
+	ss := sd.sync.(*syncService)
+
+	for _, mt := range spec.MountTables {
+		name := naming.Join(mt, ss.name)
+		// TODO(hpucha): Is this add idempotent? Appears to be from code.
+		// Confirm that it is ok to use absolute names here.
+		if err := ss.server.AddName(name); err != nil {
+			return err
+		}
+	}
+
+	// TODO(hpucha): Do we have to publish in neighborhood explicitly?
+
+	return nil
+}
+
+func (sd *syncDatabase) joinSyncGroupAtAdmin(ctx *context.T, call rpc.ServerCall, sgName, name string, myInfo wire.SyncGroupMemberInfo) (interfaces.SyncGroup, error) {
+	c := interfaces.SyncClient(sgName)
+	return c.JoinSyncGroupAtAdmin(ctx, sgName, name, myInfo)
+
+	// TODO(hpucha): Try to join using an Admin on neighborhood if the publisher is not reachable.
+}
+
+func authorize(ctx *context.T, call security.Call, sg *interfaces.SyncGroup) error {
+	auth := access.TypicalTagTypePermissionsAuthorizer(sg.Spec.Perms)
+	if err := auth.Authorize(ctx, call); err != nil {
+		return verror.New(verror.ErrNoAccess, ctx, err)
+	}
+	return nil
+}
+
+////////////////////////////////////////////////////////////
+// Methods for SyncGroup create/join between Syncbases.
+
+func (s *syncService) PublishSyncGroup(ctx *context.T, call rpc.ServerCall, sg interfaces.SyncGroup) error {
+	st, err := s.getDbStore(ctx, call, sg.AppName, sg.DbName)
+	if err != nil {
+		return err
+	}
+
+	err = store.RunInTransaction(st, func(tx store.Transaction) error {
+		localSG, err := getSyncGroupByName(ctx, tx, sg.Name)
+
+		if err != nil && verror.ErrorID(err) != verror.ErrNoExist.ID {
+			return err
+		}
+
+		// SG name already claimed.
+		if err == nil && localSG.Id != sg.Id {
+			return verror.New(verror.ErrExist, ctx, sg.Name)
+		}
+
+		// TODO(hpucha): Bootstrap DAG/Genvector etc for syncing the SG
+		// metadata if needed.
+		//
+		// TODO(hpucha): Catch up on SG versions so far.
+
+		// SG already published. Update if needed.
+		if err == nil && localSG.Id == sg.Id {
+			if localSG.Status == interfaces.SyncGroupStatusPublishPending {
+				localSG.Status = interfaces.SyncGroupStatusRunning
+				return setSGDataEntry(ctx, tx, localSG.Id, localSG)
+			}
+			return nil
+		}
+
+		// Publish the SyncGroup.
+
+		// TODO(hpucha): Use some ACL check to allow/deny publishing.
+		// TODO(hpucha): Ensure node is on Admin ACL.
+
+		// TODO(hpucha): Default priority?
+		sg.Joiners[s.name] = wire.SyncGroupMemberInfo{}
+		sg.Status = interfaces.SyncGroupStatusRunning
+		return addSyncGroup(ctx, tx, &sg)
+	})
+
+	return err
+}
+
+func (s *syncService) JoinSyncGroupAtAdmin(ctx *context.T, call rpc.ServerCall, sgName, joinerName string, joinerInfo wire.SyncGroupMemberInfo) (interfaces.SyncGroup, error) {
+	var dbSt store.Store
+	var gid interfaces.GroupId
+	var err error
+
+	// Find the database store for this SyncGroup.
+	//
+	// TODO(hpucha): At a high level, we have yet to decide if the SG name
+	// is stand-alone or is derived from the app/db namespace, based on the
+	// feedback from app developers (see discussion in SyncGroup API
+	// doc). If we decide to keep the SG name as stand-alone, this scan can
+	// be optimized by a lazy cache of sgname to <app, db> info.
+	s.forEachDatabaseStore(ctx, func(appName, dbName string, st store.Store) bool {
+		if gid, err = getSyncGroupId(ctx, st, sgName); err == nil {
+			// Found the SyncGroup being looked for.
+			dbSt = st
+			return true
+		}
+		return false
+	})
+
+	// SyncGroup not found.
+	if err != nil {
+		return interfaces.SyncGroup{}, verror.New(verror.ErrNoExist, ctx, "SyncGroup not found", sgName)
+	}
+
+	var sg *interfaces.SyncGroup
+	err = store.RunInTransaction(dbSt, func(tx store.Transaction) error {
+		var err error
+		sg, err = getSyncGroupById(ctx, tx, gid)
+		if err != nil {
+			return err
+		}
+
+		// Check SG ACL.
+		if err := authorize(ctx, call.Security(), sg); err != nil {
+			return err
+		}
+
+		// Add to joiner list.
+		sg.Joiners[joinerName] = joinerInfo
+		return setSGDataEntry(ctx, tx, sg.Id, sg)
+	})
+
+	if err != nil {
+		return interfaces.SyncGroup{}, err
+	}
+	return *sg, nil
+}

diff --git a/services/syncbase/vsync/syncgroup_test.go b/services/syncbase/vsync/syncgroup_test.go
new file mode 100644
index 0000000..6c87ee7
--- /dev/null
+++ b/services/syncbase/vsync/syncgroup_test.go

@@ -0,0 +1,493 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Tests for SyncGroup management and storage in Syncbase.
+
+import (
+	"reflect"
+	"testing"
+	"time"
+
+	"v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	_ "v.io/x/ref/runtime/factories/generic"
+)
+
+// checkSGStats verifies SyncGroup stats.
+func checkSGStats(t *testing.T, svc *mockService, which string, numSG, numMembers int) {
+	memberViewTTL = 0 // Always recompute the SyncGroup membership view.
+	svc.sync.refreshMembersIfExpired(nil)
+
+	view := svc.sync.allMembers
+	if num := len(view.members); num != numMembers {
+		t.Errorf("num-members (%s): got %v instead of %v", which, num, numMembers)
+	}
+
+	sgids := make(map[interfaces.GroupId]bool)
+	for _, info := range view.members {
+		for _, sgmi := range info.db2sg {
+			for gid := range sgmi {
+				sgids[gid] = true
+			}
+		}
+	}
+
+	if num := len(sgids); num != numSG {
+		t.Errorf("num-syncgroups (%s): got %v instead of %v", which, num, numSG)
+	}
+}
+
+// TestAddSyncGroup tests adding SyncGroups.
+func TestAddSyncGroup(t *testing.T) {
+	// Set a large value to prevent the initiator from running. Since this
+	// test adds a fake SyncGroup, if the initiator runs, it will attempt
+	// to initiate using this fake and partial SyncGroup data.
+	peerSyncInterval = 1 * time.Hour
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+
+	checkSGStats(t, svc, "add-1", 0, 0)
+
+	// Add a SyncGroup.
+
+	sgName := "foobar"
+	sgId := interfaces.GroupId(1234)
+
+	sg := &interfaces.SyncGroup{
+		Name:        sgName,
+		Id:          sgId,
+		AppName:     "mockApp",
+		DbName:      "mockDB",
+		Creator:     "mockCreator",
+		SpecVersion: "etag-0",
+		Spec: nosql.SyncGroupSpec{
+			Prefixes: []string{"foo", "bar"},
+		},
+		Joiners: map[string]nosql.SyncGroupMemberInfo{
+			"phone":  nosql.SyncGroupMemberInfo{SyncPriority: 10},
+			"tablet": nosql.SyncGroupMemberInfo{SyncPriority: 25},
+			"cloud":  nosql.SyncGroupMemberInfo{SyncPriority: 1},
+		},
+	}
+
+	tx := st.NewTransaction()
+	if err := addSyncGroup(nil, tx, sg); err != nil {
+		t.Errorf("cannot add SyncGroup ID %d: %v", sg.Id, err)
+	}
+	if err := tx.Commit(); err != nil {
+		t.Errorf("cannot commit adding SyncGroup ID %d: %v", sg.Id, err)
+	}
+
+	// Verify SyncGroup ID, name, and data.
+
+	if id, err := getSyncGroupId(nil, st, sgName); err != nil || id != sgId {
+		t.Errorf("cannot get ID of SyncGroup %s: got %d instead of %d; err: %v", sgName, id, sgId, err)
+	}
+	if name, err := getSyncGroupName(nil, st, sgId); err != nil || name != sgName {
+		t.Errorf("cannot get name of SyncGroup %d: got %s instead of %s; err: %v",
+			sgId, name, sgName, err)
+	}
+
+	sgOut, err := getSyncGroupById(nil, st, sgId)
+	if err != nil {
+		t.Errorf("cannot get SyncGroup by ID %d: %v", sgId, err)
+	}
+	if !reflect.DeepEqual(sgOut, sg) {
+		t.Errorf("invalid SyncGroup data for group ID %d: got %v instead of %v", sgId, sgOut, sg)
+	}
+
+	sgOut, err = getSyncGroupByName(nil, st, sgName)
+	if err != nil {
+		t.Errorf("cannot get SyncGroup by Name %s: %v", sgName, err)
+	}
+	if !reflect.DeepEqual(sgOut, sg) {
+		t.Errorf("invalid SyncGroup data for group name %s: got %v instead of %v", sgName, sgOut, sg)
+	}
+
+	// Verify membership data.
+
+	expMembers := map[string]uint32{"phone": 1, "tablet": 1, "cloud": 1}
+
+	members := svc.sync.getMembers(nil)
+	if !reflect.DeepEqual(members, expMembers) {
+		t.Errorf("invalid SyncGroup members: got %v instead of %v", members, expMembers)
+	}
+
+	view := svc.sync.allMembers
+	for mm := range members {
+		mi := view.members[mm]
+		if mi == nil {
+			t.Errorf("cannot get info for SyncGroup member %s", mm)
+		}
+		if len(mi.db2sg) != 1 {
+			t.Errorf("invalid info for SyncGroup member %s: %v", mm, mi)
+		}
+		var sgmi sgMemberInfo
+		for _, v := range mi.db2sg {
+			sgmi = v
+			break
+		}
+		if len(sgmi) != 1 {
+			t.Errorf("invalid member info for SyncGroup member %s: %v", mm, sgmi)
+		}
+		expJoinerInfo := sg.Joiners[mm]
+		joinerInfo := sgmi[sgId]
+		if !reflect.DeepEqual(joinerInfo, expJoinerInfo) {
+			t.Errorf("invalid Info for SyncGroup member %s in group ID %d: got %v instead of %v",
+				mm, sgId, joinerInfo, expJoinerInfo)
+		}
+	}
+
+	checkSGStats(t, svc, "add-2", 1, 3)
+
+	// Adding a SyncGroup for a pre-existing group ID or name should fail.
+
+	sg.Name = "another-name"
+
+	tx = st.NewTransaction()
+	if err = addSyncGroup(nil, tx, sg); err == nil {
+		t.Errorf("re-adding SyncGroup %d did not fail", sgId)
+	}
+	tx.Abort()
+
+	sg.Name = sgName
+	sg.Id = interfaces.GroupId(5555)
+
+	tx = st.NewTransaction()
+	if err = addSyncGroup(nil, tx, sg); err == nil {
+		t.Errorf("adding SyncGroup %s with a different ID did not fail", sgName)
+	}
+	tx.Abort()
+
+	checkSGStats(t, svc, "add-3", 1, 3)
+
+	// Fetch a non-existing SyncGroup by ID or name should fail.
+
+	badName := "not-available"
+	badId := interfaces.GroupId(999)
+	if id, err := getSyncGroupId(nil, st, badName); err == nil {
+		t.Errorf("found non-existing SyncGroup %s: got ID %d", badName, id)
+	}
+	if name, err := getSyncGroupName(nil, st, badId); err == nil {
+		t.Errorf("found non-existing SyncGroup %d: got name %s", badId, name)
+	}
+	if sg, err := getSyncGroupByName(nil, st, badName); err == nil {
+		t.Errorf("found non-existing SyncGroup %s: got %v", badName, sg)
+	}
+	if sg, err := getSyncGroupById(nil, st, badId); err == nil {
+		t.Errorf("found non-existing SyncGroup %d: got %v", badId, sg)
+	}
+}
+
+// TestInvalidAddSyncGroup tests adding SyncGroups.
+func TestInvalidAddSyncGroup(t *testing.T) {
+	// Set a large value to prevent the threads from firing.
+	peerSyncInterval = 1 * time.Hour
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+
+	checkBadAddSyncGroup := func(t *testing.T, st store.Store, sg *interfaces.SyncGroup, msg string) {
+		tx := st.NewTransaction()
+		if err := addSyncGroup(nil, tx, sg); err == nil {
+			t.Errorf("checkBadAddSyncGroup: adding bad SyncGroup (%s) did not fail", msg)
+		}
+		tx.Abort()
+	}
+
+	checkBadAddSyncGroup(t, st, nil, "nil SG")
+
+	sg := &interfaces.SyncGroup{Id: 1234}
+	checkBadAddSyncGroup(t, st, sg, "SG w/o name")
+
+	sg = &interfaces.SyncGroup{Name: "foobar"}
+	checkBadAddSyncGroup(t, st, sg, "SG w/o Id")
+
+	sg.Id = 1234
+	checkBadAddSyncGroup(t, st, sg, "SG w/o Version")
+
+	sg.SpecVersion = "v1"
+	checkBadAddSyncGroup(t, st, sg, "SG w/o Joiners")
+
+	sg.Joiners = map[string]nosql.SyncGroupMemberInfo{
+		"phone": nosql.SyncGroupMemberInfo{SyncPriority: 10},
+	}
+	checkBadAddSyncGroup(t, st, sg, "SG w/o Prefixes")
+}
+
+// TestDeleteSyncGroup tests deleting a SyncGroup.
+func TestDeleteSyncGroup(t *testing.T) {
+	// Set a large value to prevent the threads from firing.
+	peerSyncInterval = 1 * time.Hour
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+
+	sgName := "foobar"
+	sgId := interfaces.GroupId(1234)
+
+	// Delete non-existing SyncGroups.
+
+	tx := st.NewTransaction()
+	if err := delSyncGroupById(nil, tx, sgId); err == nil {
+		t.Errorf("deleting a non-existing SyncGroup ID did not fail")
+	}
+	if err := delSyncGroupByName(nil, tx, sgName); err == nil {
+		t.Errorf("deleting a non-existing SyncGroup name did not fail")
+	}
+	tx.Abort()
+
+	checkSGStats(t, svc, "del-1", 0, 0)
+
+	// Create the SyncGroup to delete later.
+
+	sg := &interfaces.SyncGroup{
+		Name:        sgName,
+		Id:          sgId,
+		AppName:     "mockApp",
+		DbName:      "mockDB",
+		Creator:     "mockCreator",
+		SpecVersion: "etag-0",
+		Spec: nosql.SyncGroupSpec{
+			Prefixes: []string{"foo", "bar"},
+		},
+		Joiners: map[string]nosql.SyncGroupMemberInfo{
+			"phone":  nosql.SyncGroupMemberInfo{SyncPriority: 10},
+			"tablet": nosql.SyncGroupMemberInfo{SyncPriority: 25},
+			"cloud":  nosql.SyncGroupMemberInfo{SyncPriority: 1},
+		},
+	}
+
+	tx = st.NewTransaction()
+	if err := addSyncGroup(nil, tx, sg); err != nil {
+		t.Errorf("creating SyncGroup ID %d failed: %v", sgId, err)
+	}
+	if err := tx.Commit(); err != nil {
+		t.Errorf("cannot commit adding SyncGroup ID %d: %v", sgId, err)
+	}
+
+	checkSGStats(t, svc, "del-2", 1, 3)
+
+	// Delete it by ID.
+
+	tx = st.NewTransaction()
+	if err := delSyncGroupById(nil, tx, sgId); err != nil {
+		t.Errorf("deleting SyncGroup ID %d failed: %v", sgId, err)
+	}
+	if err := tx.Commit(); err != nil {
+		t.Errorf("cannot commit deleting SyncGroup ID %d: %v", sgId, err)
+	}
+
+	checkSGStats(t, svc, "del-3", 0, 0)
+
+	// Create it again then delete it by name.
+
+	tx = st.NewTransaction()
+	if err := addSyncGroup(nil, tx, sg); err != nil {
+		t.Errorf("creating SyncGroup ID %d after delete failed: %v", sgId, err)
+	}
+	if err := tx.Commit(); err != nil {
+		t.Errorf("cannot commit adding SyncGroup ID %d after delete: %v", sgId, err)
+	}
+
+	checkSGStats(t, svc, "del-4", 1, 3)
+
+	tx = st.NewTransaction()
+	if err := delSyncGroupByName(nil, tx, sgName); err != nil {
+		t.Errorf("deleting SyncGroup name %s failed: %v", sgName, err)
+	}
+	if err := tx.Commit(); err != nil {
+		t.Errorf("cannot commit deleting SyncGroup name %s: %v", sgName, err)
+	}
+
+	checkSGStats(t, svc, "del-5", 0, 0)
+}
+
+// TestMultiSyncGroups tests creating multiple SyncGroups.
+func TestMultiSyncGroups(t *testing.T) {
+	// Set a large value to prevent the threads from firing.
+	peerSyncInterval = 1 * time.Hour
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+
+	sgName1, sgName2 := "foo", "bar"
+	sgId1, sgId2 := interfaces.GroupId(1234), interfaces.GroupId(8888)
+
+	// Add two SyncGroups.
+
+	sg1 := &interfaces.SyncGroup{
+		Name:        sgName1,
+		Id:          sgId1,
+		AppName:     "mockApp",
+		DbName:      "mockDB",
+		Creator:     "mockCreator",
+		SpecVersion: "etag-1",
+		Spec: nosql.SyncGroupSpec{
+			Prefixes: []string{"foo"},
+		},
+		Joiners: map[string]nosql.SyncGroupMemberInfo{
+			"phone":  nosql.SyncGroupMemberInfo{SyncPriority: 10},
+			"tablet": nosql.SyncGroupMemberInfo{SyncPriority: 25},
+			"cloud":  nosql.SyncGroupMemberInfo{SyncPriority: 1},
+		},
+	}
+	sg2 := &interfaces.SyncGroup{
+		Name:        sgName2,
+		Id:          sgId2,
+		AppName:     "mockApp",
+		DbName:      "mockDB",
+		Creator:     "mockCreator",
+		SpecVersion: "etag-2",
+		Spec: nosql.SyncGroupSpec{
+			Prefixes: []string{"bar"},
+		},
+		Joiners: map[string]nosql.SyncGroupMemberInfo{
+			"tablet": nosql.SyncGroupMemberInfo{SyncPriority: 111},
+			"door":   nosql.SyncGroupMemberInfo{SyncPriority: 33},
+			"lamp":   nosql.SyncGroupMemberInfo{SyncPriority: 9},
+		},
+	}
+
+	tx := st.NewTransaction()
+	if err := addSyncGroup(nil, tx, sg1); err != nil {
+		t.Errorf("creating SyncGroup ID %d failed: %v", sgId1, err)
+	}
+	if err := tx.Commit(); err != nil {
+		t.Errorf("cannot commit adding SyncGroup ID %d: %v", sgId1, err)
+	}
+
+	checkSGStats(t, svc, "multi-1", 1, 3)
+
+	tx = st.NewTransaction()
+	if err := addSyncGroup(nil, tx, sg2); err != nil {
+		t.Errorf("creating SyncGroup ID %d failed: %v", sgId2, err)
+	}
+	if err := tx.Commit(); err != nil {
+		t.Errorf("cannot commit adding SyncGroup ID %d: %v", sgId2, err)
+	}
+
+	checkSGStats(t, svc, "multi-2", 2, 5)
+
+	// Verify membership data.
+
+	expMembers := map[string]uint32{"phone": 1, "tablet": 2, "cloud": 1, "door": 1, "lamp": 1}
+
+	members := svc.sync.getMembers(nil)
+	if !reflect.DeepEqual(members, expMembers) {
+		t.Errorf("invalid SyncGroup members: got %v instead of %v", members, expMembers)
+	}
+
+	expMemberInfo := map[string]*memberInfo{
+		"phone": &memberInfo{
+			db2sg: map[string]sgMemberInfo{
+				"mockapp:mockdb": sgMemberInfo{
+					sgId1: sg1.Joiners["phone"],
+				},
+			},
+		},
+		"tablet": &memberInfo{
+			db2sg: map[string]sgMemberInfo{
+				"mockapp:mockdb": sgMemberInfo{
+					sgId1: sg1.Joiners["tablet"],
+					sgId2: sg2.Joiners["tablet"],
+				},
+			},
+		},
+		"cloud": &memberInfo{
+			db2sg: map[string]sgMemberInfo{
+				"mockapp:mockdb": sgMemberInfo{
+					sgId1: sg1.Joiners["cloud"],
+				},
+			},
+		},
+		"door": &memberInfo{
+			db2sg: map[string]sgMemberInfo{
+				"mockapp:mockdb": sgMemberInfo{
+					sgId2: sg2.Joiners["door"],
+				},
+			},
+		},
+		"lamp": &memberInfo{
+			db2sg: map[string]sgMemberInfo{
+				"mockapp:mockdb": sgMemberInfo{
+					sgId2: sg2.Joiners["lamp"],
+				},
+			},
+		},
+	}
+
+	view := svc.sync.allMembers
+	for mm := range members {
+		mi := view.members[mm]
+		if mi == nil {
+			t.Errorf("cannot get info for SyncGroup member %s", mm)
+		}
+		expInfo := expMemberInfo[mm]
+		if !reflect.DeepEqual(mi, expInfo) {
+			t.Errorf("invalid Info for SyncGroup member %s: got %v instead of %v", mm, mi, expInfo)
+		}
+	}
+
+	// Delete the 1st SyncGroup.
+
+	tx = st.NewTransaction()
+	if err := delSyncGroupById(nil, tx, sgId1); err != nil {
+		t.Errorf("deleting SyncGroup ID %d failed: %v", sgId1, err)
+	}
+	if err := tx.Commit(); err != nil {
+		t.Errorf("cannot commit deleting SyncGroup ID %d: %v", sgId1, err)
+	}
+
+	checkSGStats(t, svc, "multi-3", 1, 3)
+
+	// Verify SyncGroup membership data.
+
+	expMembers = map[string]uint32{"tablet": 1, "door": 1, "lamp": 1}
+
+	members = svc.sync.getMembers(nil)
+	if !reflect.DeepEqual(members, expMembers) {
+		t.Errorf("invalid SyncGroup members: got %v instead of %v", members, expMembers)
+	}
+
+	expMemberInfo = map[string]*memberInfo{
+		"tablet": &memberInfo{
+			db2sg: map[string]sgMemberInfo{
+				"mockapp:mockdb": sgMemberInfo{
+					sgId2: sg2.Joiners["tablet"],
+				},
+			},
+		},
+		"door": &memberInfo{
+			db2sg: map[string]sgMemberInfo{
+				"mockapp:mockdb": sgMemberInfo{
+					sgId2: sg2.Joiners["door"],
+				},
+			},
+		},
+		"lamp": &memberInfo{
+			db2sg: map[string]sgMemberInfo{
+				"mockapp:mockdb": sgMemberInfo{
+					sgId2: sg2.Joiners["lamp"],
+				},
+			},
+		},
+	}
+
+	view = svc.sync.allMembers
+	for mm := range members {
+		mi := view.members[mm]
+		if mi == nil {
+			t.Errorf("cannot get info for SyncGroup member %s", mm)
+		}
+		expInfo := expMemberInfo[mm]
+		if !reflect.DeepEqual(mi, expInfo) {
+			t.Errorf("invalid Info for SyncGroup member %s: got %v instead of %v", mm, mi, expInfo)
+		}
+	}
+}

diff --git a/services/syncbase/vsync/test_util.go b/services/syncbase/vsync/test_util.go
new file mode 100644
index 0000000..8328fda
--- /dev/null
+++ b/services/syncbase/vsync/test_util.go

@@ -0,0 +1,154 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Utilities for testing sync.
+
+import (
+	"fmt"
+	"os"
+	"path"
+	"testing"
+	"time"
+
+	wire "v.io/syncbase/v23/services/syncbase/nosql"
+	"v.io/syncbase/x/ref/services/syncbase/clock"
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/server/watchable"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	"v.io/v23/rpc"
+	"v.io/v23/security/access"
+	"v.io/v23/verror"
+	"v.io/x/ref/test"
+)
+
+// mockService emulates a Syncbase service that includes store and sync.
+// It is used to access a mock application.
+type mockService struct {
+	engine   string
+	dir      string
+	st       store.Store
+	sync     *syncService
+	shutdown func()
+}
+
+func (s *mockService) St() store.Store {
+	return s.st
+}
+
+func (s *mockService) Sync() interfaces.SyncServerMethods {
+	return s.sync
+}
+
+func (s *mockService) App(ctx *context.T, call rpc.ServerCall, appName string) (interfaces.App, error) {
+	return &mockApp{st: s.st}, nil
+}
+
+func (s *mockService) AppNames(ctx *context.T, call rpc.ServerCall) ([]string, error) {
+	return []string{"mockapp"}, nil
+}
+
+// mockApp emulates a Syncbase App.  It is used to access a mock database.
+type mockApp struct {
+	st store.Store
+}
+
+func (a *mockApp) NoSQLDatabase(ctx *context.T, call rpc.ServerCall, dbName string) (interfaces.Database, error) {
+	return &mockDatabase{st: a.st}, nil
+}
+
+func (a *mockApp) NoSQLDatabaseNames(ctx *context.T, call rpc.ServerCall) ([]string, error) {
+	return []string{"mockdb"}, nil
+}
+
+func (a *mockApp) CreateNoSQLDatabase(ctx *context.T, call rpc.ServerCall, dbName string, perms access.Permissions, metadata *wire.SchemaMetadata) error {
+	return verror.NewErrNotImplemented(ctx)
+}
+
+func (a *mockApp) DeleteNoSQLDatabase(ctx *context.T, call rpc.ServerCall, dbName string) error {
+	return verror.NewErrNotImplemented(ctx)
+}
+
+func (a *mockApp) SetDatabasePerms(ctx *context.T, call rpc.ServerCall, dbName string, perms access.Permissions, version string) error {
+	return verror.NewErrNotImplemented(ctx)
+}
+
+func (a *mockApp) Service() interfaces.Service {
+	return nil
+}
+
+func (a *mockApp) Name() string {
+	return "mockapp"
+}
+
+// mockDatabase emulates a Syncbase Database.  It is used to test sync functionality.
+type mockDatabase struct {
+	st store.Store
+}
+
+func (d *mockDatabase) St() store.Store {
+	return d.st
+}
+
+func (d *mockDatabase) CheckPermsInternal(ctx *context.T, call rpc.ServerCall, st store.StoreReader) error {
+	return verror.NewErrNotImplemented(ctx)
+}
+
+func (d *mockDatabase) SetPermsInternal(ctx *context.T, call rpc.ServerCall, perms access.Permissions, version string) error {
+	return verror.NewErrNotImplemented(ctx)
+}
+
+func (d *mockDatabase) Name() string {
+	return "mockdb"
+}
+
+func (d *mockDatabase) App() interfaces.App {
+	return nil
+}
+
+// createService creates a mock Syncbase service used for testing sync functionality.
+func createService(t *testing.T) *mockService {
+	ctx, shutdown := test.V23Init()
+	engine := "leveldb"
+	opts := util.OpenOptions{CreateIfMissing: true, ErrorIfExists: false}
+	dir := fmt.Sprintf("%s/vsync_test_%d_%d", os.TempDir(), os.Getpid(), time.Now().UnixNano())
+
+	st, err := util.OpenStore(engine, path.Join(dir, engine), opts)
+	if err != nil {
+		t.Fatalf("cannot create store %s (%s): %v", engine, dir, err)
+	}
+	vclock := clock.NewVClock(st)
+	st, err = watchable.Wrap(st, vclock, &watchable.Options{
+		ManagedPrefixes: []string{util.RowPrefix, util.PermsPrefix},
+	})
+
+	s := &mockService{
+		st:       st,
+		engine:   engine,
+		dir:      dir,
+		shutdown: shutdown,
+	}
+	if s.sync, err = New(ctx, nil, s, nil, dir); err != nil {
+		util.DestroyStore(engine, dir)
+		t.Fatalf("cannot create sync service: %v", err)
+	}
+	return s
+}
+
+// destroyService cleans up the mock Syncbase service.
+func destroyService(t *testing.T, s *mockService) {
+	defer s.shutdown()
+	defer s.sync.Close()
+	if err := util.DestroyStore(s.engine, s.dir); err != nil {
+		t.Fatalf("cannot destroy store %s (%s): %v", s.engine, s.dir, err)
+	}
+}
+
+// makeRowKey returns the database row key for a given application key.
+func makeRowKey(key string) string {
+	return util.JoinKeyParts(util.RowPrefix, key)
+}

diff --git a/services/syncbase/vsync/testdata/local-init-00.log.sync b/services/syncbase/vsync/testdata/local-init-00.log.sync
new file mode 100644
index 0000000..7435348
--- /dev/null
+++ b/services/syncbase/vsync/testdata/local-init-00.log.sync

@@ -0,0 +1,6 @@
+# Create an object locally and update it twice (linked-list).
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addl|foo1|1|||$sync:log:10:1|0|1|false
+addl|foo1|2|1||$sync:log:10:2|0|1|false
+addl|foo1|3|2||$sync:log:10:3|0|1|false

diff --git a/services/syncbase/vsync/testdata/local-init-01.sync b/services/syncbase/vsync/testdata/local-init-01.sync
new file mode 100644
index 0000000..86e24de
--- /dev/null
+++ b/services/syncbase/vsync/testdata/local-init-01.sync

@@ -0,0 +1,12 @@
+# Create an object DAG locally with branches and resolved conflicts.
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addl|1234|1|||logrec-00|0|1|false
+addl|1234|2|1||logrec-01|0|1|false
+addl|1234|3|2||logrec-02|0|1|false
+addl|1234|4|2||logrec-03|0|1|false
+addl|1234|5|3|4|logrec-04|0|1|false
+addl|1234|6|5||logrec-05|0|1|false
+addl|1234|7|2||logrec-06|0|1|false
+addl|1234|8|6|7|logrec-07|0|1|false
+addl|1234|9|8||logrec-08|0|1|false

diff --git a/services/syncbase/vsync/testdata/local-init-02.sync b/services/syncbase/vsync/testdata/local-init-02.sync
new file mode 100644
index 0000000..cb60a79
--- /dev/null
+++ b/services/syncbase/vsync/testdata/local-init-02.sync

@@ -0,0 +1,10 @@
+# Create DAGs for 3 objects locally.
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addl|1234|1|||logrec-a-01|0|1|false
+addl|1234|2|1||logrec-a-02|0|1|false
+
+addl|6789|1|||logrec-b-01|0|1|false
+addl|6789|2|1||logrec-b-02|0|1|false
+
+addl|2222|1|||logrec-c-01|0|1|false

diff --git a/services/syncbase/vsync/testdata/local-init-03.sync b/services/syncbase/vsync/testdata/local-init-03.sync
new file mode 100644
index 0000000..202a752
--- /dev/null
+++ b/services/syncbase/vsync/testdata/local-init-03.sync

@@ -0,0 +1,10 @@
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addl|1234|1|||logrec-01|0|1|false
+addl|1234|2|1||logrec-02|0|1|false
+addl|1234|3|1||logrec-03|0|1|false
+addl|1234|4|2||logrec-04|0|1|false
+addl|1234|5|2||logrec-05|0|1|true
+addl|1234|6|4|5|logrec-06|0|1|false
+addl|1234|7|3|5|logrec-07|0|1|false
+addl|1234|8|6|7|logrec-08|0|1|false

diff --git a/services/syncbase/vsync/testdata/local-resolve-00.sync b/services/syncbase/vsync/testdata/local-resolve-00.sync
new file mode 100644
index 0000000..1666cf0
--- /dev/null
+++ b/services/syncbase/vsync/testdata/local-resolve-00.sync

@@ -0,0 +1,4 @@
+# Create an object locally and update it twice (linked-list).
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addl|foo1|7|3|6|logrec-06|0|1|false

diff --git a/services/syncbase/vsync/testdata/remote-conf-00.log.sync b/services/syncbase/vsync/testdata/remote-conf-00.log.sync
new file mode 100644
index 0000000..060cf0c
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-conf-00.log.sync

@@ -0,0 +1,8 @@
+# Update an object remotely three times triggering one conflict after
+# it was created locally up to v3 (i.e. assume the remote sync received
+# it from the local sync at v2, then updated separately).
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addr|foo1|4|2||$sync:log:11:1|0|1|false
+addr|foo1|5|4||$sync:log:11:2|0|1|false
+addr|foo1|6|5||$sync:log:11:3|0|1|false

diff --git a/services/syncbase/vsync/testdata/remote-conf-01.log.sync b/services/syncbase/vsync/testdata/remote-conf-01.log.sync
new file mode 100644
index 0000000..2053157
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-conf-01.log.sync

@@ -0,0 +1,10 @@
+# Update an object remotely three times triggering a conflict with
+# 2 graft points: v1 and v4.  This assumes that the remote sync got
+# v1, made its own conflicting v4 that it resolved into v5 (against v2)
+# then made a v6 change.  When the local sync gets back this info it
+# sees 2 graft points: v1-v4 and v2-v5.
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addr|foo1|4|1||$sync:log:12:1|0|1|false
+addr|foo1|5|2|4|$sync:log:11:1|0|1|false
+addr|foo1|6|5||$sync:log:11:2|0|1|false

diff --git a/services/syncbase/vsync/testdata/remote-conf-03.log.sync b/services/syncbase/vsync/testdata/remote-conf-03.log.sync
new file mode 100644
index 0000000..673405e
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-conf-03.log.sync

@@ -0,0 +1,6 @@
+# Create the same object remotely from scratch and update it twice (linked-list).
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addr|foo1|4|||$sync:log:11:1|0|1|false
+addr|foo1|5|4||$sync:log:11:2|0|1|false
+addr|foo1|6|5||$sync:log:11:3|0|1|false

diff --git a/services/syncbase/vsync/testdata/remote-conf-link.log.sync b/services/syncbase/vsync/testdata/remote-conf-link.log.sync
new file mode 100644
index 0000000..bdf0331
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-conf-link.log.sync

@@ -0,0 +1,5 @@
+# Update an object remotely, detect conflict, and bless the local version.
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addr|foo1|4|1||$sync:log:11:1|0|1|false
+linkr|foo1|4|2||$sync:log:11:2

diff --git a/services/syncbase/vsync/testdata/remote-init-00.log.sync b/services/syncbase/vsync/testdata/remote-init-00.log.sync
new file mode 100644
index 0000000..2546c47
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-init-00.log.sync

@@ -0,0 +1,7 @@
+# Create an object remotely and update it twice (linked-list).
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addr|foo1|1|||$sync:log:11:1|0|1|false
+addr|foo1|2|1||$sync:log:11:2|0|1|false
+addr|foo1|3|2||$sync:log:11:3|0|1|false
+genvec|foo1|10:0,11:3|bar|11:0
\ No newline at end of file

diff --git a/services/syncbase/vsync/testdata/remote-noconf-00.log.sync b/services/syncbase/vsync/testdata/remote-noconf-00.log.sync
new file mode 100644
index 0000000..6adf5dd
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-noconf-00.log.sync

@@ -0,0 +1,9 @@
+# Update an object remotely three times without triggering a conflict
+# after it was created locally up to v3 (i.e. assume the remote sync
+# received it from the local sync first, then updated it).
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addr|foo1|4|3||$sync:log:11:1|0|1|false
+addr|foo1|5|4||$sync:log:11:2|0|1|false
+addr|foo1|6|5||$sync:log:11:3|0|1|false
+genvec|foo1|10:0,11:3|bar|11:0
\ No newline at end of file

diff --git a/services/syncbase/vsync/testdata/remote-noconf-link-00.log.sync b/services/syncbase/vsync/testdata/remote-noconf-link-00.log.sync
new file mode 100644
index 0000000..a06bec5
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-noconf-link-00.log.sync

@@ -0,0 +1,5 @@
+# Update an object remotely, detect conflict, and bless the remote version.
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addr|foo1|4|1||$sync:log:11:1|0|1|false
+linkr|foo1|2|4||$sync:log:11:2

diff --git a/services/syncbase/vsync/testdata/remote-noconf-link-01.log.sync b/services/syncbase/vsync/testdata/remote-noconf-link-01.log.sync
new file mode 100644
index 0000000..1271e23
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-noconf-link-01.log.sync

@@ -0,0 +1,5 @@
+# Update an object remotely, detect conflict, and bless the local version.
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addr|foo1|4|1||$sync:log:11:1|0|1|false
+linkr|foo1|4|3||$sync:log:11:2

diff --git a/services/syncbase/vsync/testdata/remote-noconf-link-02.log.sync b/services/syncbase/vsync/testdata/remote-noconf-link-02.log.sync
new file mode 100644
index 0000000..890d2bc
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-noconf-link-02.log.sync

@@ -0,0 +1,6 @@
+# Update an object remotely, detect conflict, and bless the remote version, and continue updating.
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addr|foo1|4|1||$sync:log:11:1|0|1|false
+linkr|foo1|3|4||$sync:log:11:2
+addr|foo1|5|3||$sync:log:11:3|0|1|false

diff --git a/services/syncbase/vsync/testdata/remote-noconf-link-repeat.log.sync b/services/syncbase/vsync/testdata/remote-noconf-link-repeat.log.sync
new file mode 100644
index 0000000..31e85a9
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-noconf-link-repeat.log.sync

@@ -0,0 +1,4 @@
+# Resolve the same conflict on two different devices.
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+linkr|foo1|3|4||$sync:log:12:1
\ No newline at end of file

diff --git a/services/syncbase/vsync/types.vdl b/services/syncbase/vsync/types.vdl
new file mode 100644
index 0000000..f44468e
--- /dev/null
+++ b/services/syncbase/vsync/types.vdl

@@ -0,0 +1,37 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+import (
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+)
+
+// Key prefixes for sync data structures. All these prefixes are prepended with
+// util.SyncPrefix.
+const (
+	logPrefix  = "log"
+	dbssPrefix = "dbss"
+	dagPrefix  = "dag"
+	sgPrefix   = "sg"
+)
+
+// syncData represents the persistent state of the sync module.
+type syncData struct {
+	Id uint64
+}
+
+// dbSyncState represents the persistent sync state of a Database.
+type dbSyncState struct {
+	Gen        uint64               // local generation number incremented on every local update.
+	CheckptGen uint64               // local generation number advertised to remote peers (used by the responder).
+	GenVec     interfaces.GenVector // generation vector capturing the locally-known generations of remote peers.
+}
+
+// localLogRec represents the persistent local state of a log record. Metadata
+// is synced across peers, while pos is local-only.
+type localLogRec struct {
+	Metadata interfaces.LogRecMetadata
+	Pos      uint64 // position in the Database log.
+}

diff --git a/services/syncbase/vsync/types.vdl.go b/services/syncbase/vsync/types.vdl.go
new file mode 100644
index 0000000..e9b1d7d
--- /dev/null
+++ b/services/syncbase/vsync/types.vdl.go

@@ -0,0 +1,64 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: types.vdl
+
+package vsync
+
+import (
+	// VDL system imports
+	"v.io/v23/vdl"
+
+	// VDL user imports
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+)
+
+// syncData represents the persistent state of the sync module.
+type syncData struct {
+	Id uint64
+}
+
+func (syncData) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/vsync.syncData"`
+}) {
+}
+
+// dbSyncState represents the persistent sync state of a Database.
+type dbSyncState struct {
+	Gen        uint64               // local generation number incremented on every local update.
+	CheckptGen uint64               // local generation number advertised to remote peers (used by the responder).
+	GenVec     interfaces.GenVector // generation vector capturing the locally-known generations of remote peers.
+}
+
+func (dbSyncState) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/vsync.dbSyncState"`
+}) {
+}
+
+// localLogRec represents the persistent local state of a log record. Metadata
+// is synced across peers, while pos is local-only.
+type localLogRec struct {
+	Metadata interfaces.LogRecMetadata
+	Pos      uint64 // position in the Database log.
+}
+
+func (localLogRec) __VDLReflect(struct {
+	Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/vsync.localLogRec"`
+}) {
+}
+
+func init() {
+	vdl.Register((*syncData)(nil))
+	vdl.Register((*dbSyncState)(nil))
+	vdl.Register((*localLogRec)(nil))
+}
+
+const logPrefix = "log"
+
+const dbssPrefix = "dbss"
+
+const dagPrefix = "dag"
+
+const sgPrefix = "sg"

diff --git a/services/syncbase/vsync/util.go b/services/syncbase/vsync/util.go
new file mode 100644
index 0000000..2f15cf8
--- /dev/null
+++ b/services/syncbase/vsync/util.go

@@ -0,0 +1,98 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Sync utility functions
+
+import (
+	"time"
+
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	"v.io/v23/rpc"
+	"v.io/x/lib/vlog"
+)
+
+const (
+	nanoPerSec = int64(1000000000)
+)
+
+// forEachDatabaseStore iterates over all Databases in all Apps within the
+// service and invokes the callback function on each database. The callback
+// returns a "done" flag to make forEachDatabaseStore() stop the iteration
+// earlier; otherwise the function loops across all databases of all apps.
+func (s *syncService) forEachDatabaseStore(ctx *context.T, callback func(string, string, store.Store) bool) {
+	// Get the apps and iterate over them.
+	// TODO(rdaoud): use a "privileged call" parameter instead of nil (here and
+	// elsewhere).
+	appNames, err := s.sv.AppNames(ctx, nil)
+	if err != nil {
+		vlog.Errorf("sync: forEachDatabaseStore: cannot get all app names: %v", err)
+		return
+	}
+
+	for _, a := range appNames {
+		// For each app, get its databases and iterate over them.
+		app, err := s.sv.App(ctx, nil, a)
+		if err != nil {
+			vlog.Errorf("sync: forEachDatabaseStore: cannot get app %s: %v", a, err)
+			continue
+		}
+		dbNames, err := app.NoSQLDatabaseNames(ctx, nil)
+		if err != nil {
+			vlog.Errorf("sync: forEachDatabaseStore: cannot get all db names for app %s: %v", a, err)
+			continue
+		}
+
+		for _, d := range dbNames {
+			// For each database, get its Store and invoke the callback.
+			db, err := app.NoSQLDatabase(ctx, nil, d)
+			if err != nil {
+				vlog.Errorf("sync: forEachDatabaseStore: cannot get db %s for app %s: %v", d, a, err)
+				continue
+			}
+
+			if callback(a, d, db.St()) {
+				return // done, early exit
+			}
+		}
+	}
+}
+
+// getDbStore gets the store handle to the database.
+func (s *syncService) getDbStore(ctx *context.T, call rpc.ServerCall, appName, dbName string) (store.Store, error) {
+	app, err := s.sv.App(ctx, call, appName)
+	if err != nil {
+		return nil, err
+	}
+	db, err := app.NoSQLDatabase(ctx, call, dbName)
+	if err != nil {
+		return nil, err
+	}
+	return db.St(), nil
+}
+
+// unixNanoToTime converts a Unix timestamp in nanoseconds to a Time object.
+func unixNanoToTime(timestamp int64) time.Time {
+	if timestamp < 0 {
+		vlog.Fatalf("sync: unixNanoToTime: invalid timestamp %d", timestamp)
+	}
+	return time.Unix(timestamp/nanoPerSec, timestamp%nanoPerSec)
+}
+
+// extractAppKey extracts the app key from the key sent over the wire between
+// two Syncbases. The on-wire key starts with one of the store's reserved
+// prefixes for managed namespaces (e.g. $row, $perms). This function removes
+// that prefix and returns the application component of the key. This is done
+// typically before comparing keys with the SyncGroup prefixes which are defined
+// by the application.
+func extractAppKey(key string) string {
+	parts := util.SplitKeyParts(key)
+	if len(parts) < 2 {
+		vlog.Fatalf("sync: extractAppKey: invalid entry key %s", key)
+	}
+	return util.JoinKeyParts(parts[1:]...)
+}

diff --git a/services/syncbase/vsync/watcher.go b/services/syncbase/vsync/watcher.go
new file mode 100644
index 0000000..ae69dc6
--- /dev/null
+++ b/services/syncbase/vsync/watcher.go

@@ -0,0 +1,436 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Syncbase Watcher is a goroutine that listens to local Database updates from
+// applications and modifies sync metadata (e.g. DAG and local log records).
+// The coupling between Syncbase storage and sync is loose, via asynchronous
+// listening by the Watcher, to unblock the application operations as soon as
+// possible, and offload the sync metadata update to the Watcher.  When the
+// application mutates objects in a Database, additional entries are written
+// to a log queue, persisted in the same Database.  This queue is read by the
+// sync Watcher to learn of the changes.
+
+import (
+	"strings"
+	"time"
+
+	"v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/server/watchable"
+	"v.io/syncbase/x/ref/services/syncbase/store"
+	"v.io/v23/context"
+	"v.io/v23/services/watch"
+	"v.io/v23/verror"
+	"v.io/x/lib/vlog"
+)
+
+var (
+	// watchPollInterval is the duration between consecutive watch polling
+	// events across all app databases.  Every watch event loops across all
+	// app databases and fetches from each one at most one batch update
+	// (transaction) to process.
+	// TODO(rdaoud): add a channel between store and watch to get change
+	// notifications instead of using a polling solution.
+	watchPollInterval = 100 * time.Millisecond
+
+	// watchPrefixes is an in-memory cache of SyncGroup prefixes for each
+	// app database.  It is filled at startup from persisted SyncGroup data
+	// and updated at runtime when SyncGroups are joined or left.  It is
+	// not guarded by a mutex because only the watcher goroutine uses it
+	// beyond the startup phase (before any sync goroutines are started).
+	// The map keys are the appdb names (globally unique).
+	watchPrefixes = make(map[string]sgPrefixes)
+)
+
+// sgPrefixes tracks SyncGroup prefixes being synced in a database and their
+// counts.
+type sgPrefixes map[string]uint32
+
+// watchStore processes updates obtained by watching the store.  This is the
+// sync watcher goroutine that learns about store updates asynchronously by
+// reading log records that track object mutation histories in each database.
+// For each batch mutation, the watcher updates the sync DAG and log records.
+// When an application makes a single non-transactional put, it is represented
+// as a batch of one log record. Thus the watcher only deals with batches.
+func (s *syncService) watchStore(ctx *context.T) {
+	defer s.pending.Done()
+
+	ticker := time.NewTicker(watchPollInterval)
+	defer ticker.Stop()
+
+	ctx, cancel := context.WithCancel(ctx)
+	defer cancel()
+
+	for {
+		select {
+		case <-s.closed:
+			vlog.VI(1).Info("sync: watchStore: channel closed, stop watching and exit")
+			return
+
+		case <-ticker.C:
+			s.processStoreUpdates(ctx)
+		}
+	}
+}
+
+// processStoreUpdates fetches updates from all databases and processes them.
+// To maintain fairness among databases, it processes one batch update from
+// each database, in a round-robin manner, until there are no further updates
+// from any database.
+func (s *syncService) processStoreUpdates(ctx *context.T) {
+	for {
+		total, active := 0, 0
+		s.forEachDatabaseStore(ctx, func(appName, dbName string, st store.Store) bool {
+			if s.processDatabase(ctx, appName, dbName, st) {
+				active++
+			}
+			total++
+			return false
+		})
+
+		vlog.VI(2).Infof("sync: processStoreUpdates: %d/%d databases had updates", active, total)
+		if active == 0 {
+			break
+		}
+	}
+}
+
+// processDatabase fetches from the given database at most one new batch update
+// (transaction) and processes it.  The one-batch limit prevents one database
+// from starving others.  A batch is stored as a contiguous set of log records
+// ending with one record having the "continued" flag set to false.  The call
+// returns true if a new batch update was processed.
+func (s *syncService) processDatabase(ctx *context.T, appName, dbName string, st store.Store) bool {
+	s.thLock.Lock()
+	defer s.thLock.Unlock()
+
+	vlog.VI(2).Infof("sync: processDatabase: begin: %s, %s", appName, dbName)
+	defer vlog.VI(2).Infof("sync: processDatabase: end: %s, %s", appName, dbName)
+
+	resMark, err := getResMark(ctx, st)
+	if err != nil {
+		if verror.ErrorID(err) != verror.ErrNoExist.ID {
+			vlog.Errorf("sync: processDatabase: %s, %s: cannot get resMark: %v", appName, dbName, err)
+			return false
+		}
+		resMark = watchable.MakeResumeMarker(0)
+	}
+
+	// Initialize Database sync state if needed.
+	s.initDbSyncStateInMem(ctx, appName, dbName)
+
+	// Get a batch of watch log entries, if any, after this resume marker.
+	logs, nextResmark, err := watchable.ReadBatchFromLog(st, resMark)
+	if err != nil {
+		vlog.Fatalf("sync: processDatabase: %s, %s: cannot get watch log batch: %v", appName, dbName, verror.DebugString(err))
+	}
+	if logs != nil {
+		s.processWatchLogBatch(ctx, appName, dbName, st, logs, nextResmark)
+		return true
+	}
+	return false
+}
+
+// processWatchLogBatch parses the given batch of watch log records, updates the
+// watchable SyncGroup prefixes, uses the prefixes to filter the batch to the
+// subset of syncable records, and transactionally applies these updates to the
+// sync metadata (DAG & log records) and updates the watch resume marker.
+func (s *syncService) processWatchLogBatch(ctx *context.T, appName, dbName string, st store.Store, logs []*watchable.LogEntry, resMark watch.ResumeMarker) {
+	if len(logs) == 0 {
+		return
+	}
+
+	// If the first log entry is a SyncGroup prefix operation, then this is
+	// a SyncGroup snapshot and not an application batch.  In this case,
+	// handle the SyncGroup prefix changes by updating the watch prefixes
+	// and exclude the first entry from the batch.  Also inform the batch
+	// processing below to not assign it a batch ID in the DAG.
+	appBatch := true
+	if processSyncGroupLogRecord(appName, dbName, logs[0]) {
+		appBatch = false
+		logs = logs[1:]
+	}
+
+	// Filter out the log entries for keys not part of any SyncGroup.
+	// Ignore as well log entries made by sync (echo suppression).
+	totalCount := uint64(len(logs))
+	appdb := appDbName(appName, dbName)
+
+	i := 0
+	for _, entry := range logs {
+		if !entry.FromSync && syncable(appdb, entry) {
+			logs[i] = entry
+			i++
+		}
+	}
+	logs = logs[:i]
+	vlog.VI(3).Infof("sync: processWatchLogBatch: %s, %s: sg snap %t, syncable %d, total %d",
+		appName, dbName, !appBatch, len(logs), totalCount)
+
+	// Transactional processing of the batch: convert these syncable log
+	// records to a batch of sync log records, filling their parent versions
+	// from the DAG head nodes.
+	err := store.RunInTransaction(st, func(tx store.Transaction) error {
+		batch := make([]*localLogRec, 0, len(logs))
+		for _, entry := range logs {
+			if rec, err := convertLogRecord(ctx, tx, entry); err != nil {
+				return err
+			} else if rec != nil {
+				batch = append(batch, rec)
+			}
+		}
+
+		if err := s.processBatch(ctx, appName, dbName, batch, appBatch, totalCount, tx); err != nil {
+			return err
+		}
+		return setResMark(ctx, tx, resMark)
+	})
+
+	if err != nil {
+		// TODO(rdaoud): don't crash, quarantine this app database.
+		vlog.Fatalf("sync: processWatchLogBatch:: %s, %s: watcher cannot process batch: %v", appName, dbName, err)
+	}
+}
+
+// processBatch applies a single batch of changes (object mutations) received
+// from watching a particular Database.
+func (s *syncService) processBatch(ctx *context.T, appName, dbName string, batch []*localLogRec, appBatch bool, totalCount uint64, tx store.Transaction) error {
+	count := uint64(len(batch))
+	if count == 0 {
+		return nil
+	}
+
+	// If an application batch has more than one mutation, start a batch for it.
+	batchId := NoBatchId
+	if appBatch && totalCount > 1 {
+		batchId = s.startBatch(ctx, tx, batchId)
+		if batchId == NoBatchId {
+			return verror.New(verror.ErrInternal, ctx, "failed to generate batch ID")
+		}
+	}
+
+	gen, pos := s.reserveGenAndPosInDbLog(ctx, appName, dbName, count)
+
+	vlog.VI(3).Infof("sync: processBatch: %s, %s: len %d, total %d, btid %x, gen %d, pos %d",
+		appName, dbName, count, totalCount, batchId, gen, pos)
+
+	for _, rec := range batch {
+		// Update the log record. Portions of the record Metadata must
+		// already be filled.
+		rec.Metadata.Id = s.id
+		rec.Metadata.Gen = gen
+		rec.Metadata.RecType = interfaces.NodeRec
+
+		rec.Metadata.BatchId = batchId
+		rec.Metadata.BatchCount = totalCount
+
+		rec.Pos = pos
+
+		gen++
+		pos++
+
+		if err := s.processLocalLogRec(ctx, tx, rec); err != nil {
+			return verror.New(verror.ErrInternal, ctx, err)
+		}
+	}
+
+	// End the batch if any.
+	if batchId != NoBatchId {
+		if err := s.endBatch(ctx, tx, batchId, totalCount); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// processLocalLogRec processes a local log record by adding to the Database and
+// suitably updating the DAG metadata.
+func (s *syncService) processLocalLogRec(ctx *context.T, tx store.Transaction, rec *localLogRec) error {
+	// Insert the new log record into the log.
+	if err := putLogRec(ctx, tx, rec); err != nil {
+		return err
+	}
+
+	m := rec.Metadata
+	logKey := logRecKey(m.Id, m.Gen)
+
+	// Insert the new log record into dag.
+	if err := s.addNode(ctx, tx, m.ObjId, m.CurVers, logKey, m.Delete, m.Parents, m.BatchId, nil); err != nil {
+		return err
+	}
+
+	// Move the head.
+	return moveHead(ctx, tx, m.ObjId, m.CurVers)
+}
+
+// incrWatchPrefix increments (or sets) a SyncGroup prefix for an app database
+// in the watch prefix cache.
+func incrWatchPrefix(appName, dbName, prefix string) {
+	name := appDbName(appName, dbName)
+	if pfxs := watchPrefixes[name]; pfxs != nil {
+		pfxs[prefix]++ // it auto-initializes a non-existent prefix
+	} else {
+		watchPrefixes[name] = sgPrefixes{prefix: 1}
+	}
+}
+
+// decrWatchPrefix decrements (or unsets) a SyncGroup prefix for an app database
+// in the watch prefix cache.
+func decrWatchPrefix(appName, dbName, prefix string) {
+	name := appDbName(appName, dbName)
+	if pfxs := watchPrefixes[name]; pfxs != nil {
+		if pfxs[prefix] > 1 {
+			pfxs[prefix]--
+		} else if len(pfxs) > 1 {
+			delete(pfxs, prefix)
+		} else {
+			delete(watchPrefixes, name)
+		}
+	}
+}
+
+// convertLogRecord converts a store log entry to a sync log record.  It fills
+// the previous object version (parent) by fetching its current DAG head if it
+// has one.  For a delete, it generates a new object version because the store
+// does not version a deletion.
+// TODO(rdaoud): change Syncbase to store and version a deleted object to
+// simplify the store-to-sync interaction.  A deleted key would still have a
+// version and its value entry would encode the "deleted" flag, either in the
+// key or probably in a value wrapper that would contain other metadata.
+func convertLogRecord(ctx *context.T, tx store.Transaction, logEnt *watchable.LogEntry) (*localLogRec, error) {
+	var rec *localLogRec
+	timestamp := logEnt.CommitTimestamp
+
+	switch op := logEnt.Op.(type) {
+	case watchable.OpGet:
+		// TODO(rdaoud): save read-set in sync.
+
+	case watchable.OpScan:
+		// TODO(rdaoud): save scan-set in sync.
+
+	case watchable.OpPut:
+		rec = newLocalLogRec(ctx, tx, op.Value.Key, op.Value.Version, false, timestamp)
+
+	case watchable.OpSyncSnapshot:
+		// Create records for object versions not already in the DAG.
+		// Duplicates can appear here in cases of nested SyncGroups or
+		// peer SyncGroups.
+		if ok, err := hasNode(ctx, tx, string(op.Value.Key), string(op.Value.Version)); err != nil {
+			return nil, err
+		} else if !ok {
+			rec = newLocalLogRec(ctx, tx, op.Value.Key, op.Value.Version, false, timestamp)
+		}
+
+	case watchable.OpDelete:
+		rec = newLocalLogRec(ctx, tx, op.Value.Key, watchable.NewVersion(), true, timestamp)
+
+	case watchable.OpSyncGroup:
+		vlog.Errorf("sync: convertLogRecord: watch LogEntry for SyncGroup should not be converted: %v", logEnt)
+		return nil, verror.New(verror.ErrInternal, ctx, "cannot convert a watch log OpSyncGroup entry")
+
+	default:
+		vlog.Errorf("sync: convertLogRecord: invalid watch LogEntry: %v", logEnt)
+		return nil, verror.New(verror.ErrInternal, ctx, "cannot convert unknown watch log entry")
+	}
+
+	return rec, nil
+}
+
+// newLocalLogRec creates a local sync log record given its information: key,
+// version, deletion flag, and timestamp.  It retrieves the current DAG head
+// for the key (if one exists) to use as its parent (previous) version.
+func newLocalLogRec(ctx *context.T, tx store.Transaction, key, version []byte, deleted bool, timestamp int64) *localLogRec {
+	rec := localLogRec{}
+	oid := string(key)
+
+	rec.Metadata.ObjId = oid
+	rec.Metadata.CurVers = string(version)
+	rec.Metadata.Delete = deleted
+	if head, err := getHead(ctx, tx, oid); err == nil {
+		rec.Metadata.Parents = []string{head}
+	} else if deleted || (verror.ErrorID(err) != verror.ErrNoExist.ID) {
+		vlog.Fatalf("sync: newLocalLogRec: cannot getHead to convert log record for %s: %v", oid, err)
+	}
+	rec.Metadata.UpdTime = unixNanoToTime(timestamp)
+	return &rec
+}
+
+// processSyncGroupLogRecord checks if the log entry is a SyncGroup update and,
+// if it is, updates the watch prefixes for the app database and returns true.
+// Otherwise it returns false with no other changes.
+func processSyncGroupLogRecord(appName, dbName string, logEnt *watchable.LogEntry) bool {
+	switch op := logEnt.Op.(type) {
+	case watchable.OpSyncGroup:
+		remove := op.Value.Remove
+		for _, prefix := range op.Value.Prefixes {
+			if remove {
+				decrWatchPrefix(appName, dbName, prefix)
+			} else {
+				incrWatchPrefix(appName, dbName, prefix)
+			}
+		}
+		vlog.VI(3).Infof("sync: processSyncGroupLogRecord: %s, %s: remove %t, prefixes: %q",
+			appName, dbName, remove, op.Value.Prefixes)
+		return true
+
+	default:
+		return false
+	}
+}
+
+// syncable returns true if the given log entry falls within the scope of a
+// SyncGroup prefix for the given app database, and thus should be synced.
+// It is used to pre-filter the batch of log entries before sync processing.
+func syncable(appdb string, logEnt *watchable.LogEntry) bool {
+	var key string
+	switch op := logEnt.Op.(type) {
+	case watchable.OpPut:
+		key = string(op.Value.Key)
+	case watchable.OpDelete:
+		key = string(op.Value.Key)
+	case watchable.OpSyncSnapshot:
+		key = string(op.Value.Key)
+	default:
+		return false
+	}
+
+	// The key starts with one of the store's reserved prefixes for managed
+	// namespaced (e.g. $row or $perm).  Remove that prefix before comparing
+	// it with the SyncGroup prefixes which are defined by the application.
+	parts := util.SplitKeyParts(key)
+	if len(parts) < 2 {
+		vlog.Fatalf("sync: syncable: %s: invalid entry key %s: %v", appdb, key, logEnt)
+	}
+	key = util.JoinKeyParts(parts[1:]...)
+
+	for prefix := range watchPrefixes[appdb] {
+		if strings.HasPrefix(key, prefix) {
+			return true
+		}
+	}
+	return false
+}
+
+// resMarkKey returns the key used to access the watcher resume marker.
+func resMarkKey() string {
+	return util.JoinKeyParts(util.SyncPrefix, "w", "rm")
+}
+
+// setResMark stores the watcher resume marker for a database.
+func setResMark(ctx *context.T, tx store.Transaction, resMark watch.ResumeMarker) error {
+	return util.Put(ctx, tx, resMarkKey(), resMark)
+}
+
+// getResMark retrieves the watcher resume marker for a database.
+func getResMark(ctx *context.T, st store.StoreReader) (watch.ResumeMarker, error) {
+	var resMark watch.ResumeMarker
+	key := resMarkKey()
+	if err := util.Get(ctx, st, key, &resMark); err != nil {
+		return nil, err
+	}
+	return resMark, nil
+}

diff --git a/services/syncbase/vsync/watcher_test.go b/services/syncbase/vsync/watcher_test.go
new file mode 100644
index 0000000..be77b94
--- /dev/null
+++ b/services/syncbase/vsync/watcher_test.go

@@ -0,0 +1,317 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Tests for the sync watcher in Syncbase.
+
+import (
+	"bytes"
+	"reflect"
+	"testing"
+	"time"
+
+	"v.io/syncbase/x/ref/services/syncbase/server/util"
+	"v.io/syncbase/x/ref/services/syncbase/server/watchable"
+	"v.io/v23/vom"
+	_ "v.io/x/ref/runtime/factories/generic"
+)
+
+// TestSetResmark tests setting and getting a resume marker.
+func TestSetResmark(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+
+	resmark, err := getResMark(nil, st)
+	if err == nil || resmark != nil {
+		t.Errorf("found non-existent resume marker: %s, %v", resmark, err)
+	}
+
+	wantResmark := watchable.MakeResumeMarker(1234567890)
+	tx := st.NewTransaction()
+	if err := setResMark(nil, tx, wantResmark); err != nil {
+		t.Errorf("cannot set resume marker: %v", err)
+	}
+	tx.Commit()
+
+	resmark, err = getResMark(nil, st)
+	if err != nil {
+		t.Errorf("cannot get new resume marker: %v", err)
+	}
+	if !bytes.Equal(resmark, wantResmark) {
+		t.Errorf("invalid new resume: got %s instead of %s", resmark, wantResmark)
+	}
+}
+
+// TestWatchPrefixes tests setting and updating the watch prefixes map.
+func TestWatchPrefixes(t *testing.T) {
+	watchPollInterval = time.Millisecond
+	svc := createService(t)
+	defer destroyService(t, svc)
+
+	if len(watchPrefixes) != 0 {
+		t.Errorf("watch prefixes not empty: %v", watchPrefixes)
+	}
+
+	watchPrefixOps := []struct {
+		appName, dbName, key string
+		incr                 bool
+	}{
+		{"app1", "db1", "foo", true},
+		{"app1", "db1", "bar", true},
+		{"app2", "db1", "xyz", true},
+		{"app3", "db1", "haha", true},
+		{"app1", "db1", "foo", true},
+		{"app1", "db1", "foo", true},
+		{"app1", "db1", "foo", false},
+		{"app2", "db1", "ttt", true},
+		{"app2", "db1", "ttt", true},
+		{"app2", "db1", "ttt", false},
+		{"app2", "db1", "ttt", false},
+		{"app2", "db2", "qwerty", true},
+		{"app3", "db1", "haha", true},
+		{"app2", "db2", "qwerty", false},
+		{"app3", "db1", "haha", false},
+	}
+
+	for _, op := range watchPrefixOps {
+		if op.incr {
+			incrWatchPrefix(op.appName, op.dbName, op.key)
+		} else {
+			decrWatchPrefix(op.appName, op.dbName, op.key)
+		}
+	}
+
+	expPrefixes := map[string]sgPrefixes{
+		"app1:db1": sgPrefixes{"foo": 2, "bar": 1},
+		"app2:db1": sgPrefixes{"xyz": 1},
+		"app3:db1": sgPrefixes{"haha": 1},
+	}
+	if !reflect.DeepEqual(watchPrefixes, expPrefixes) {
+		t.Errorf("invalid watch prefixes: got %v instead of %v", watchPrefixes, expPrefixes)
+	}
+
+	checkSyncableTests := []struct {
+		appName, dbName, key string
+		result               bool
+	}{
+		{"app1", "db1", "foo", true},
+		{"app1", "db1", "foobar", true},
+		{"app1", "db1", "bar", true},
+		{"app1", "db1", "bar123", true},
+		{"app1", "db1", "f", false},
+		{"app1", "db1", "ba", false},
+		{"app1", "db1", "xyz", false},
+		{"app1", "db555", "foo", false},
+		{"app555", "db1", "foo", false},
+		{"app2", "db1", "xyz123", true},
+		{"app2", "db1", "ttt123", false},
+		{"app2", "db2", "qwerty", false},
+		{"app3", "db1", "hahahoho", true},
+		{"app3", "db1", "hoho", false},
+		{"app3", "db1", "h", false},
+	}
+
+	for _, test := range checkSyncableTests {
+		log := &watchable.LogEntry{
+			Op: watchable.OpPut{
+				Value: watchable.PutOp{Key: []byte(makeRowKey(test.key))},
+			},
+		}
+		res := syncable(appDbName(test.appName, test.dbName), log)
+		if res != test.result {
+			t.Errorf("checkSyncable: invalid output: %s, %s, %s: got %t instead of %t",
+				test.appName, test.dbName, test.key, res, test.result)
+		}
+	}
+}
+
+// newLog creates a Put or Delete watch log entry.
+func newLog(key, version string, delete bool) *watchable.LogEntry {
+	k, v := []byte(key), []byte(version)
+	log := &watchable.LogEntry{}
+	if delete {
+		log.Op = watchable.OpDelete{watchable.DeleteOp{Key: k}}
+	} else {
+		log.Op = watchable.OpPut{watchable.PutOp{Key: k, Version: v}}
+	}
+	return log
+}
+
+// newSGLog creates a SyncGroup watch log entry.
+func newSGLog(prefixes []string, remove bool) *watchable.LogEntry {
+	return &watchable.LogEntry{
+		Op: watchable.OpSyncGroup{
+			Value: watchable.SyncGroupOp{Prefixes: prefixes, Remove: remove},
+		},
+	}
+}
+
+// TestProcessWatchLogBatch tests the processing of a batch of log records.
+func TestProcessWatchLogBatch(t *testing.T) {
+	svc := createService(t)
+	defer destroyService(t, svc)
+	st := svc.St()
+	s := svc.sync
+
+	app, db := "mockapp", "mockdb"
+	fooKey := makeRowKey("foo")
+	barKey := makeRowKey("bar")
+	fooxyzKey := makeRowKey("fooxyz")
+
+	// Empty logs does not fail.
+	s.processWatchLogBatch(nil, app, db, st, nil, nil)
+
+	// Non-syncable logs.
+	batch := []*watchable.LogEntry{
+		newLog(fooKey, "123", false),
+		newLog(barKey, "555", false),
+	}
+
+	resmark := watchable.MakeResumeMarker(1234)
+	s.processWatchLogBatch(nil, app, db, st, batch, resmark)
+
+	if res, err := getResMark(nil, st); err != nil && !bytes.Equal(res, resmark) {
+		t.Errorf("invalid resmark batch processing: got %s instead of %s", res, resmark)
+	}
+	if ok, err := hasNode(nil, st, fooKey, "123"); err != nil || ok {
+		t.Error("hasNode() found DAG entry for non-syncable log on foo")
+	}
+	if ok, err := hasNode(nil, st, barKey, "555"); err != nil || ok {
+		t.Error("hasNode() found DAG entry for non-syncable log on bar")
+	}
+
+	// Partially syncable logs.
+	batch = []*watchable.LogEntry{
+		newSGLog([]string{"f", "x"}, false),
+		newLog(fooKey, "333", false),
+		newLog(fooxyzKey, "444", false),
+		newLog(barKey, "222", false),
+	}
+
+	resmark = watchable.MakeResumeMarker(3456)
+	s.processWatchLogBatch(nil, app, db, st, batch, resmark)
+
+	if res, err := getResMark(nil, st); err != nil && !bytes.Equal(res, resmark) {
+		t.Errorf("invalid resmark batch processing: got %s instead of %s", res, resmark)
+	}
+	if head, err := getHead(nil, st, fooKey); err != nil && head != "333" {
+		t.Errorf("getHead() did not find foo: %s, %v", head, err)
+	}
+	node, err := getNode(nil, st, fooKey, "333")
+	if err != nil {
+		t.Errorf("getNode() did not find foo: %v", err)
+	}
+	if node.Level != 0 || node.Parents != nil || node.Logrec == "" || node.BatchId != NoBatchId {
+		t.Errorf("invalid DAG node for foo: %v", node)
+	}
+	node2, err := getNode(nil, st, fooxyzKey, "444")
+	if err != nil {
+		t.Errorf("getNode() did not find fooxyz: %v", err)
+	}
+	if node2.Level != 0 || node2.Parents != nil || node2.Logrec == "" || node2.BatchId != NoBatchId {
+		t.Errorf("invalid DAG node for fooxyz: %v", node2)
+	}
+	if ok, err := hasNode(nil, st, barKey, "222"); err != nil || ok {
+		t.Error("hasNode() found DAG entry for non-syncable log on bar")
+	}
+
+	// More partially syncable logs updating existing ones.
+	batch = []*watchable.LogEntry{
+		newLog(fooKey, "1", false),
+		newLog(fooxyzKey, "", true),
+		newLog(barKey, "7", false),
+	}
+
+	resmark = watchable.MakeResumeMarker(7890)
+	s.processWatchLogBatch(nil, app, db, st, batch, resmark)
+
+	if res, err := getResMark(nil, st); err != nil && !bytes.Equal(res, resmark) {
+		t.Errorf("invalid resmark batch processing: got %s instead of %s", res, resmark)
+	}
+	if head, err := getHead(nil, st, fooKey); err != nil && head != "1" {
+		t.Errorf("getHead() did not find foo: %s, %v", head, err)
+	}
+	node, err = getNode(nil, st, fooKey, "1")
+	if err != nil {
+		t.Errorf("getNode() did not find foo: %v", err)
+	}
+	expParents := []string{"333"}
+	if node.Level != 1 || !reflect.DeepEqual(node.Parents, expParents) ||
+		node.Logrec == "" || node.BatchId == NoBatchId {
+		t.Errorf("invalid DAG node for foo: %v", node)
+	}
+	head2, err := getHead(nil, st, fooxyzKey)
+	if err != nil {
+		t.Errorf("getHead() did not find fooxyz: %v", err)
+	}
+	node2, err = getNode(nil, st, fooxyzKey, head2)
+	if err != nil {
+		t.Errorf("getNode() did not find fooxyz: %v", err)
+	}
+	expParents = []string{"444"}
+	if node2.Level != 1 || !reflect.DeepEqual(node2.Parents, expParents) ||
+		node2.Logrec == "" || node2.BatchId == NoBatchId {
+		t.Errorf("invalid DAG node for fooxyz: %v", node2)
+	}
+	if ok, err := hasNode(nil, st, barKey, "7"); err != nil || ok {
+		t.Error("hasNode() found DAG entry for non-syncable log on bar")
+	}
+
+	// Back to non-syncable logs (remove "f" prefix).
+	batch = []*watchable.LogEntry{
+		newSGLog([]string{"f"}, true),
+		newLog(fooKey, "99", false),
+		newLog(fooxyzKey, "888", true),
+		newLog(barKey, "007", false),
+	}
+
+	resmark = watchable.MakeResumeMarker(20212223)
+	s.processWatchLogBatch(nil, app, db, st, batch, resmark)
+
+	if res, err := getResMark(nil, st); err != nil && !bytes.Equal(res, resmark) {
+		t.Errorf("invalid resmark batch processing: got %s instead of %s", res, resmark)
+	}
+	// No changes to "foo".
+	if head, err := getHead(nil, st, fooKey); err != nil && head != "333" {
+		t.Errorf("getHead() did not find foo: %s, %v", head, err)
+	}
+	if node, err := getNode(nil, st, fooKey, "99"); err == nil {
+		t.Errorf("getNode() should not have found foo @ 99: %v", node)
+	}
+	if node, err := getNode(nil, st, fooxyzKey, "888"); err == nil {
+		t.Errorf("getNode() should not have found fooxyz @ 888: %v", node)
+	}
+	if ok, err := hasNode(nil, st, barKey, "007"); err != nil || ok {
+		t.Error("hasNode() found DAG entry for non-syncable log on bar")
+	}
+
+	// Scan the batch records and verify that there is only 1 DAG batch
+	// stored, with a total count of 3 and a map of 2 syncable entries.
+	// This is because the 1st batch, while containing syncable keys, is a
+	// SyncGroup snapshot that does not get assigned a batch ID.  The 2nd
+	// batch is an application batch with 3 keys of which 2 are syncable.
+	// The 3rd batch is also a SyncGroup snapshot.
+	count := 0
+	start, limit := util.ScanPrefixArgs(util.JoinKeyParts(util.SyncPrefix, "dag", "b"), "")
+	stream := st.Scan(start, limit)
+	for stream.Advance() {
+		count++
+		key := string(stream.Key(nil))
+		var info batchInfo
+		if err := vom.Decode(stream.Value(nil), &info); err != nil {
+			t.Errorf("cannot decode batch %s: %v", key, err)
+		}
+		if info.Count != 3 {
+			t.Errorf("wrong total count in batch %s: got %d instead of 3", key, info.Count)
+		}
+		if n := len(info.Objects); n != 2 {
+			t.Errorf("wrong object count in batch %s: got %d instead of 2", key, n)
+		}
+	}
+	if count != 1 {
+		t.Errorf("wrong count of batches: got %d instead of 2", count)
+	}
+}
commit	1f66c65483cc6f405a6ae0221a2af6cd57103b66	[log] [tgz]
author	Adam Sadovsky <asadovsky@gmail.com>	Mon Aug 31 13:52:39 2015 -0700
committer	Adam Sadovsky <asadovsky@gmail.com>	Mon Aug 31 13:52:39 2015 -0700
tree	d3d714ca808667d3df3ebd4c24eee6e9711ead3c
parent	51d78a480e99d23531890ec401aebfcff4bd50a4 [diff]
parent	67badb68dc77b5507bca8cd7854ee85dc5bb5cbe [diff]