Merge branch 'master' of /tmp/staging2/roadmap.go.syncbase into sb
diff --git a/cmd/sb51/doc.go b/cmd/sb51/doc.go
new file mode 100644
index 0000000..0cd9c36
--- /dev/null
+++ b/cmd/sb51/doc.go
@@ -0,0 +1,55 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Antimony (sb51) is a Syncbase general-purpose client and management utility.
+// It currently supports experimenting with the Syncbase query language.
+//
+// The 'sh' command connects to a specified database on a Syncbase instance,
+// creating it if it does not exist if -create-missing is specified.
+// The user can then enter the following at the command line:
+// 1. dump - to get a dump of the database
+// 2. a syncbase select statement - which is executed and results printed to stdout
+// 3. make-demo - to create demo tables in the database to experiment with, equivalent to -make-demo flag
+// 4. exit (or quit) - to exit the program
+//
+// When the shell is running non-interactively (stdin not connected to a tty),
+// errors cause the shell to exit with a non-zero status.
+//
+// To build client:
+// v23 go install v.io/syncbase/x/ref/syncbase/sb51
+//
+// To run client:
+// $V23_ROOT/roadmap/go/bin/sb51 sh <appname> <dbname>
+//
+// Sample run (assuming a syncbase service is mounted at '/:8101/syncbase',
+// otherwise specify using -service flag):
+// > $V23_ROOT/roadmap/go/bin/sb51 sh -create-missing -make-demo -format=csv demoapp demodb
+// ? select v.Name, v.Address.State from DemoCustomers where Type(v) = "Customer";
+// v.Name,v.Address.State
+// John Smith,CA
+// Bat Masterson,IA
+// ? select v.CustId, v.InvoiceNum, v.ShipTo.Zip, v.Amount from DemoCustomers where Type(v) = "Invoice" and v.Amount > 100;
+// v.CustId,v.InvoiceNum,v.ShipTo.Zip,v.Amount
+// 2,1001,50055,166
+// 2,1002,50055,243
+// 2,1004,50055,787
+// ? select k, v fro DemoCustomers;
+// Error:
+// select k, v fro DemoCustomers
+// ^
+// 13: Expected 'from', found fro.
+// ? select k, v from DemoCustomers;
+// k,v
+// 001,"{Name: ""John Smith"", Id: 1, Active: true, Address: {Street: ""1 Main St."", City: ""Palo Alto"", State: ""CA"", Zip: ""94303""}, Credit: {Agency: Equifax, Report: EquifaxReport: {Rating: 65}}}"
+// 001001,"{CustId: 1, InvoiceNum: 1000, Amount: 42, ShipTo: {Street: ""1 Main St."", City: ""Palo Alto"", State: ""CA"", Zip: ""94303""}}"
+// 001002,"{CustId: 1, InvoiceNum: 1003, Amount: 7, ShipTo: {Street: ""2 Main St."", City: ""Palo Alto"", State: ""CA"", Zip: ""94303""}}"
+// 001003,"{CustId: 1, InvoiceNum: 1005, Amount: 88, ShipTo: {Street: ""3 Main St."", City: ""Palo Alto"", State: ""CA"", Zip: ""94303""}}"
+// 002,"{Name: ""Bat Masterson"", Id: 2, Active: true, Address: {Street: ""777 Any St."", City: ""Collins"", State: ""IA"", Zip: ""50055""}, Credit: {Agency: TransUnion, Report: TransUnionReport: {Rating: 80}}}"
+// 002001,"{CustId: 2, InvoiceNum: 1001, Amount: 166, ShipTo: {Street: ""777 Any St."", City: ""Collins"", State: ""IA"", Zip: ""50055""}}"
+// 002002,"{CustId: 2, InvoiceNum: 1002, Amount: 243, ShipTo: {Street: ""888 Any St."", City: ""Collins"", State: ""IA"", Zip: ""50055""}}"
+// 002003,"{CustId: 2, InvoiceNum: 1004, Amount: 787, ShipTo: {Street: ""999 Any St."", City: ""Collins"", State: ""IA"", Zip: ""50055""}}"
+// 002004,"{CustId: 2, InvoiceNum: 1006, Amount: 88, ShipTo: {Street: ""101010 Any St."", City: ""Collins"", State: ""IA"", Zip: ""50055""}}"
+// ? exit;
+// >
+package main
diff --git a/cmd/sb51/internal/demodb/db.go b/cmd/sb51/internal/demodb/db.go
new file mode 100644
index 0000000..2674361
--- /dev/null
+++ b/cmd/sb51/internal/demodb/db.go
@@ -0,0 +1,138 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package demodb
+
+import (
+ "fmt"
+ "time"
+
+ wire "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/syncbase/v23/syncbase/nosql"
+ "v.io/v23/context"
+ "v.io/v23/vdl"
+)
+
+type kv struct {
+ key string
+ value *vdl.Value
+}
+
+type table struct {
+ name string
+ rows []kv
+}
+
+const demoPrefix = "Demo"
+
+var demoTables = []table{
+ table{
+ name: "Customers",
+ rows: []kv{
+ kv{
+ "001",
+ vdl.ValueOf(Customer{"John Smith", 1, true, AddressInfo{"1 Main St.", "Palo Alto", "CA", "94303"}, CreditReport{Agency: CreditAgencyEquifax, Report: AgencyReportEquifaxReport{EquifaxCreditReport{'A'}}}}),
+ },
+ kv{
+ "001001",
+ vdl.ValueOf(Invoice{1, 1000, 42, AddressInfo{"1 Main St.", "Palo Alto", "CA", "94303"}}),
+ },
+ kv{
+ "001002",
+ vdl.ValueOf(Invoice{1, 1003, 7, AddressInfo{"2 Main St.", "Palo Alto", "CA", "94303"}}),
+ },
+ kv{
+ "001003",
+ vdl.ValueOf(Invoice{1, 1005, 88, AddressInfo{"3 Main St.", "Palo Alto", "CA", "94303"}}),
+ },
+ kv{
+ "002",
+ vdl.ValueOf(Customer{"Bat Masterson", 2, true, AddressInfo{"777 Any St.", "Collins", "IA", "50055"}, CreditReport{Agency: CreditAgencyTransUnion, Report: AgencyReportTransUnionReport{TransUnionCreditReport{80}}}}),
+ },
+ kv{
+ "002001",
+ vdl.ValueOf(Invoice{2, 1001, 166, AddressInfo{"777 Any St.", "Collins", "IA", "50055"}}),
+ },
+ kv{
+ "002002",
+ vdl.ValueOf(Invoice{2, 1002, 243, AddressInfo{"888 Any St.", "Collins", "IA", "50055"}}),
+ },
+ kv{
+ "002003",
+ vdl.ValueOf(Invoice{2, 1004, 787, AddressInfo{"999 Any St.", "Collins", "IA", "50055"}}),
+ },
+ kv{
+ "002004",
+ vdl.ValueOf(Invoice{2, 1006, 88, AddressInfo{"101010 Any St.", "Collins", "IA", "50055"}}),
+ },
+ },
+ },
+ table{
+ name: "Numbers",
+ rows: []kv{
+ kv{
+ "001",
+ vdl.ValueOf(Numbers{byte(12), uint16(1234), uint32(5678), uint64(999888777666), int16(9876), int32(876543), int64(128), float32(3.14159), float64(2.71828182846), complex64(123.0 + 7.0i), complex128(456.789 + 10.1112i)}),
+ },
+ kv{
+ "002",
+ vdl.ValueOf(Numbers{byte(9), uint16(99), uint32(999), uint64(9999999), int16(9), int32(99), int64(88), float32(1.41421356237), float64(1.73205080757), complex64(9.87 + 7.65i), complex128(4.32 + 1.0i)}),
+ },
+ kv{
+ "003",
+ vdl.ValueOf(Numbers{byte(210), uint16(210), uint32(210), uint64(210), int16(210), int32(210), int64(210), float32(210.0), float64(210.0), complex64(210.0 + 0.0i), complex128(210.0 + 0.0i)}),
+ },
+ },
+ },
+ table{
+ name: "Composites",
+ rows: []kv{
+ kv{
+ "uno",
+ vdl.ValueOf(Composite{Array2String{"foo", "bar"}, []int32{1, 2}, map[int32]struct{}{1: struct{}{}, 2: struct{}{}}, map[string]int32{"foo": 1, "bar": 2}}),
+ },
+ },
+ },
+ table{
+ name: "Recursives",
+ rows: []kv{
+ kv{
+ "alpha",
+ vdl.ValueOf(Recursive{nil, &Times{time.Unix(123456789, 42244224), time.Duration(1337)}, map[Array2String]Recursive{
+ Array2String{"a", "b"}: Recursive{},
+ Array2String{"x", "y"}: Recursive{vdl.ValueOf(CreditReport{Agency: CreditAgencyExperian, Report: AgencyReportExperianReport{ExperianCreditReport{ExperianRatingGood}}}), nil, map[Array2String]Recursive{
+ Array2String{"alpha", "beta"}: Recursive{vdl.ValueOf(FooType{Bar: BarType{Baz: BazType{Name: "hello", TitleOrValue: TitleOrValueTypeValue{Value: 42}}}}), nil, nil},
+ }},
+ Array2String{"u", "v"}: Recursive{vdl.ValueOf(vdl.TypeOf(Recursive{})), nil, nil},
+ }}),
+ },
+ },
+ },
+}
+
+// Creates demo tables in the provided database. Tables are deleted and
+// recreated if they already exist.
+func PopulateDemoDB(ctx *context.T, db nosql.Database) error {
+ for i, t := range demoTables {
+ tn := demoPrefix + t.name
+ if err := db.DeleteTable(ctx, tn); err != nil {
+ return fmt.Errorf("failed deleting table %s (%d/%d): %v", tn, i+1, len(demoTables), err)
+ }
+ if err := db.CreateTable(ctx, tn, nil); err != nil {
+ return fmt.Errorf("failed creating table %s (%d/%d): %v", tn, i+1, len(demoTables), err)
+ }
+ if err := nosql.RunInBatch(ctx, db, wire.BatchOptions{}, func(db nosql.BatchDatabase) error {
+ dt := db.Table(tn)
+ for _, kv := range t.rows {
+ if err := dt.Put(ctx, kv.key, kv.value); err != nil {
+ return err
+ }
+ }
+ return nil
+ }); err != nil {
+ return fmt.Errorf("failed populating table %s (%d/%d): %v", tn, i+1, len(demoTables), err)
+ }
+ }
+ return nil
+}
diff --git a/cmd/sb51/internal/demodb/db_objects.vdl b/cmd/sb51/internal/demodb/db_objects.vdl
new file mode 100644
index 0000000..cbf119a
--- /dev/null
+++ b/cmd/sb51/internal/demodb/db_objects.vdl
@@ -0,0 +1,115 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package demodb
+
+import "time"
+
+type AddressInfo struct {
+ Street string
+ City string
+ State string
+ Zip string
+}
+
+type CreditAgency enum {
+ Equifax
+ Experian
+ TransUnion
+}
+
+type ExperianRating enum {
+ Good
+ Bad
+}
+
+type EquifaxCreditReport struct {
+ Rating byte
+}
+
+type ExperianCreditReport struct {
+ Rating ExperianRating
+}
+
+type TransUnionCreditReport struct {
+ Rating int16
+}
+
+type AgencyReport union {
+ EquifaxReport EquifaxCreditReport
+ ExperianReport ExperianCreditReport
+ TransUnionReport TransUnionCreditReport
+}
+
+type CreditReport struct {
+ Agency CreditAgency
+ Report AgencyReport
+}
+
+type Customer struct {
+ Name string
+ Id int64
+ Active bool
+ Address AddressInfo
+ Credit CreditReport
+}
+
+type Invoice struct {
+ CustId int64
+ InvoiceNum int64
+ Amount int64
+ ShipTo AddressInfo
+}
+
+type Numbers struct {
+ B byte
+ Ui16 uint16
+ Ui32 uint32
+ Ui64 uint64
+ I16 int16
+ I32 int32
+ I64 int64
+ F32 float32
+ F64 float64
+ C64 complex64
+ C128 complex128
+}
+
+type FooType struct {
+ Bar BarType
+}
+
+type BarType struct {
+ Baz BazType
+}
+
+type TitleOrValueType union {
+ Title string
+ Value int64
+}
+
+type BazType struct {
+ Name string
+ TitleOrValue TitleOrValueType
+}
+
+type Array2String [2]string
+
+type Composite struct {
+ Arr Array2String
+ ListInt []int32
+ MySet set[int32]
+ Map map[string]int32
+}
+
+type Times struct {
+ Stamp time.Time
+ Interval time.Duration
+}
+
+type Recursive struct {
+ Any any
+ Maybe ?Times
+ Rec map[Array2String]Recursive
+}
diff --git a/cmd/sb51/internal/demodb/db_objects.vdl.go b/cmd/sb51/internal/demodb/db_objects.vdl.go
new file mode 100644
index 0000000..23481f8
--- /dev/null
+++ b/cmd/sb51/internal/demodb/db_objects.vdl.go
@@ -0,0 +1,383 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: db_objects.vdl
+
+package demodb
+
+import (
+ // VDL system imports
+ "fmt"
+ "v.io/v23/vdl"
+
+ // VDL user imports
+ "time"
+ _ "v.io/v23/vdlroot/time"
+)
+
+type AddressInfo struct {
+ Street string
+ City string
+ State string
+ Zip string
+}
+
+func (AddressInfo) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.AddressInfo"`
+}) {
+}
+
+type CreditAgency int
+
+const (
+ CreditAgencyEquifax CreditAgency = iota
+ CreditAgencyExperian
+ CreditAgencyTransUnion
+)
+
+// CreditAgencyAll holds all labels for CreditAgency.
+var CreditAgencyAll = [...]CreditAgency{CreditAgencyEquifax, CreditAgencyExperian, CreditAgencyTransUnion}
+
+// CreditAgencyFromString creates a CreditAgency from a string label.
+func CreditAgencyFromString(label string) (x CreditAgency, err error) {
+ err = x.Set(label)
+ return
+}
+
+// Set assigns label to x.
+func (x *CreditAgency) Set(label string) error {
+ switch label {
+ case "Equifax", "equifax":
+ *x = CreditAgencyEquifax
+ return nil
+ case "Experian", "experian":
+ *x = CreditAgencyExperian
+ return nil
+ case "TransUnion", "transunion":
+ *x = CreditAgencyTransUnion
+ return nil
+ }
+ *x = -1
+ return fmt.Errorf("unknown label %q in demodb.CreditAgency", label)
+}
+
+// String returns the string label of x.
+func (x CreditAgency) String() string {
+ switch x {
+ case CreditAgencyEquifax:
+ return "Equifax"
+ case CreditAgencyExperian:
+ return "Experian"
+ case CreditAgencyTransUnion:
+ return "TransUnion"
+ }
+ return ""
+}
+
+func (CreditAgency) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.CreditAgency"`
+ Enum struct{ Equifax, Experian, TransUnion string }
+}) {
+}
+
+type ExperianRating int
+
+const (
+ ExperianRatingGood ExperianRating = iota
+ ExperianRatingBad
+)
+
+// ExperianRatingAll holds all labels for ExperianRating.
+var ExperianRatingAll = [...]ExperianRating{ExperianRatingGood, ExperianRatingBad}
+
+// ExperianRatingFromString creates a ExperianRating from a string label.
+func ExperianRatingFromString(label string) (x ExperianRating, err error) {
+ err = x.Set(label)
+ return
+}
+
+// Set assigns label to x.
+func (x *ExperianRating) Set(label string) error {
+ switch label {
+ case "Good", "good":
+ *x = ExperianRatingGood
+ return nil
+ case "Bad", "bad":
+ *x = ExperianRatingBad
+ return nil
+ }
+ *x = -1
+ return fmt.Errorf("unknown label %q in demodb.ExperianRating", label)
+}
+
+// String returns the string label of x.
+func (x ExperianRating) String() string {
+ switch x {
+ case ExperianRatingGood:
+ return "Good"
+ case ExperianRatingBad:
+ return "Bad"
+ }
+ return ""
+}
+
+func (ExperianRating) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.ExperianRating"`
+ Enum struct{ Good, Bad string }
+}) {
+}
+
+type EquifaxCreditReport struct {
+ Rating byte
+}
+
+func (EquifaxCreditReport) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.EquifaxCreditReport"`
+}) {
+}
+
+type ExperianCreditReport struct {
+ Rating ExperianRating
+}
+
+func (ExperianCreditReport) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.ExperianCreditReport"`
+}) {
+}
+
+type TransUnionCreditReport struct {
+ Rating int16
+}
+
+func (TransUnionCreditReport) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.TransUnionCreditReport"`
+}) {
+}
+
+type (
+ // AgencyReport represents any single field of the AgencyReport union type.
+ AgencyReport interface {
+ // Index returns the field index.
+ Index() int
+ // Interface returns the field value as an interface.
+ Interface() interface{}
+ // Name returns the field name.
+ Name() string
+ // __VDLReflect describes the AgencyReport union type.
+ __VDLReflect(__AgencyReportReflect)
+ }
+ // AgencyReportEquifaxReport represents field EquifaxReport of the AgencyReport union type.
+ AgencyReportEquifaxReport struct{ Value EquifaxCreditReport }
+ // AgencyReportExperianReport represents field ExperianReport of the AgencyReport union type.
+ AgencyReportExperianReport struct{ Value ExperianCreditReport }
+ // AgencyReportTransUnionReport represents field TransUnionReport of the AgencyReport union type.
+ AgencyReportTransUnionReport struct{ Value TransUnionCreditReport }
+ // __AgencyReportReflect describes the AgencyReport union type.
+ __AgencyReportReflect struct {
+ Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.AgencyReport"`
+ Type AgencyReport
+ Union struct {
+ EquifaxReport AgencyReportEquifaxReport
+ ExperianReport AgencyReportExperianReport
+ TransUnionReport AgencyReportTransUnionReport
+ }
+ }
+)
+
+func (x AgencyReportEquifaxReport) Index() int { return 0 }
+func (x AgencyReportEquifaxReport) Interface() interface{} { return x.Value }
+func (x AgencyReportEquifaxReport) Name() string { return "EquifaxReport" }
+func (x AgencyReportEquifaxReport) __VDLReflect(__AgencyReportReflect) {}
+
+func (x AgencyReportExperianReport) Index() int { return 1 }
+func (x AgencyReportExperianReport) Interface() interface{} { return x.Value }
+func (x AgencyReportExperianReport) Name() string { return "ExperianReport" }
+func (x AgencyReportExperianReport) __VDLReflect(__AgencyReportReflect) {}
+
+func (x AgencyReportTransUnionReport) Index() int { return 2 }
+func (x AgencyReportTransUnionReport) Interface() interface{} { return x.Value }
+func (x AgencyReportTransUnionReport) Name() string { return "TransUnionReport" }
+func (x AgencyReportTransUnionReport) __VDLReflect(__AgencyReportReflect) {}
+
+type CreditReport struct {
+ Agency CreditAgency
+ Report AgencyReport
+}
+
+func (CreditReport) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.CreditReport"`
+}) {
+}
+
+type Customer struct {
+ Name string
+ Id int64
+ Active bool
+ Address AddressInfo
+ Credit CreditReport
+}
+
+func (Customer) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.Customer"`
+}) {
+}
+
+type Invoice struct {
+ CustId int64
+ InvoiceNum int64
+ Amount int64
+ ShipTo AddressInfo
+}
+
+func (Invoice) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.Invoice"`
+}) {
+}
+
+type Numbers struct {
+ B byte
+ Ui16 uint16
+ Ui32 uint32
+ Ui64 uint64
+ I16 int16
+ I32 int32
+ I64 int64
+ F32 float32
+ F64 float64
+ C64 complex64
+ C128 complex128
+}
+
+func (Numbers) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.Numbers"`
+}) {
+}
+
+type FooType struct {
+ Bar BarType
+}
+
+func (FooType) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.FooType"`
+}) {
+}
+
+type BarType struct {
+ Baz BazType
+}
+
+func (BarType) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.BarType"`
+}) {
+}
+
+type (
+ // TitleOrValueType represents any single field of the TitleOrValueType union type.
+ TitleOrValueType interface {
+ // Index returns the field index.
+ Index() int
+ // Interface returns the field value as an interface.
+ Interface() interface{}
+ // Name returns the field name.
+ Name() string
+ // __VDLReflect describes the TitleOrValueType union type.
+ __VDLReflect(__TitleOrValueTypeReflect)
+ }
+ // TitleOrValueTypeTitle represents field Title of the TitleOrValueType union type.
+ TitleOrValueTypeTitle struct{ Value string }
+ // TitleOrValueTypeValue represents field Value of the TitleOrValueType union type.
+ TitleOrValueTypeValue struct{ Value int64 }
+ // __TitleOrValueTypeReflect describes the TitleOrValueType union type.
+ __TitleOrValueTypeReflect struct {
+ Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.TitleOrValueType"`
+ Type TitleOrValueType
+ Union struct {
+ Title TitleOrValueTypeTitle
+ Value TitleOrValueTypeValue
+ }
+ }
+)
+
+func (x TitleOrValueTypeTitle) Index() int { return 0 }
+func (x TitleOrValueTypeTitle) Interface() interface{} { return x.Value }
+func (x TitleOrValueTypeTitle) Name() string { return "Title" }
+func (x TitleOrValueTypeTitle) __VDLReflect(__TitleOrValueTypeReflect) {}
+
+func (x TitleOrValueTypeValue) Index() int { return 1 }
+func (x TitleOrValueTypeValue) Interface() interface{} { return x.Value }
+func (x TitleOrValueTypeValue) Name() string { return "Value" }
+func (x TitleOrValueTypeValue) __VDLReflect(__TitleOrValueTypeReflect) {}
+
+type BazType struct {
+ Name string
+ TitleOrValue TitleOrValueType
+}
+
+func (BazType) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.BazType"`
+}) {
+}
+
+type Array2String [2]string
+
+func (Array2String) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.Array2String"`
+}) {
+}
+
+type Composite struct {
+ Arr Array2String
+ ListInt []int32
+ MySet map[int32]struct{}
+ Map map[string]int32
+}
+
+func (Composite) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.Composite"`
+}) {
+}
+
+type Times struct {
+ Stamp time.Time
+ Interval time.Duration
+}
+
+func (Times) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.Times"`
+}) {
+}
+
+type Recursive struct {
+ Any *vdl.Value
+ Maybe *Times
+ Rec map[Array2String]Recursive
+}
+
+func (Recursive) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/syncbase/sb51/internal/demodb.Recursive"`
+}) {
+}
+
+func init() {
+ vdl.Register((*AddressInfo)(nil))
+ vdl.Register((*CreditAgency)(nil))
+ vdl.Register((*ExperianRating)(nil))
+ vdl.Register((*EquifaxCreditReport)(nil))
+ vdl.Register((*ExperianCreditReport)(nil))
+ vdl.Register((*TransUnionCreditReport)(nil))
+ vdl.Register((*AgencyReport)(nil))
+ vdl.Register((*CreditReport)(nil))
+ vdl.Register((*Customer)(nil))
+ vdl.Register((*Invoice)(nil))
+ vdl.Register((*Numbers)(nil))
+ vdl.Register((*FooType)(nil))
+ vdl.Register((*BarType)(nil))
+ vdl.Register((*TitleOrValueType)(nil))
+ vdl.Register((*BazType)(nil))
+ vdl.Register((*Array2String)(nil))
+ vdl.Register((*Composite)(nil))
+ vdl.Register((*Times)(nil))
+ vdl.Register((*Recursive)(nil))
+}
diff --git a/cmd/sb51/internal/demodb/doc.go b/cmd/sb51/internal/demodb/doc.go
new file mode 100644
index 0000000..3daf52f
--- /dev/null
+++ b/cmd/sb51/internal/demodb/doc.go
@@ -0,0 +1,7 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package demodb supports loading an example database into Syncbase for
+// experimentation and testing purposes.
+package demodb
diff --git a/cmd/sb51/internal/reader/reader.go b/cmd/sb51/internal/reader/reader.go
new file mode 100644
index 0000000..930c140
--- /dev/null
+++ b/cmd/sb51/internal/reader/reader.go
@@ -0,0 +1,130 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package reader provides an object that reads queries from various input
+// sources (e.g. stdin, pipe).
+package reader
+
+import (
+ "bufio"
+ "os"
+ "strings"
+ "text/scanner"
+
+ "github.com/peterh/liner"
+)
+
+type T struct {
+ s scanner.Scanner
+ prompt prompter
+}
+
+func newT(prompt prompter) *T {
+ t := &T{prompt: prompt}
+ t.initScanner("")
+ return t
+}
+
+// Close frees any resources acquired by this reader.
+func (t *T) Close() {
+ t.prompt.Close()
+}
+
+func (t *T) initScanner(input string) {
+ t.s.Init(strings.NewReader(input))
+ // Keep all whitespace.
+ t.s.Whitespace = 0
+}
+
+// GetQuery returns an entire query where queries are delimited by semicolons.
+// GetQuery returns the error io.EOF when there is no more input.
+func (t *T) GetQuery() (string, error) {
+ if t.s.Peek() == scanner.EOF {
+ input, err := t.prompt.InitialPrompt()
+ if err != nil {
+ return "", err
+ }
+ t.initScanner(input)
+ }
+ var query string
+WholeQuery:
+ for true {
+ for tok := t.s.Scan(); tok != scanner.EOF; tok = t.s.Scan() {
+ if tok == ';' {
+ break WholeQuery
+ }
+ query += t.s.TokenText()
+ }
+ input, err := t.prompt.ContinuePrompt()
+ if err != nil {
+ return "", err
+ }
+ t.initScanner(input)
+ query += "\n" // User started a new line.
+ }
+ t.prompt.AppendHistory(query + ";")
+ return query, nil
+}
+
+type prompter interface {
+ Close()
+ InitialPrompt() (string, error)
+ ContinuePrompt() (string, error)
+ AppendHistory(query string)
+}
+
+// noninteractive prompter just blindly reads from stdin.
+type noninteractive struct {
+ input *bufio.Reader
+}
+
+// NewNonInteractive returns a T that simply reads input from stdin. Useful
+// for when the user is piping input from a file or another program.
+func NewNonInteractive() *T {
+ return newT(&noninteractive{bufio.NewReader(os.Stdin)})
+}
+
+func (i *noninteractive) Close() {
+}
+
+func (i *noninteractive) InitialPrompt() (string, error) {
+ return i.input.ReadString('\n')
+}
+
+func (i *noninteractive) ContinuePrompt() (string, error) {
+ return i.input.ReadString('\n')
+}
+
+func (i *noninteractive) AppendHistory(query string) {
+}
+
+// interactive prompter provides a nice prompt for a user to input queries.
+type interactive struct {
+ line *liner.State
+}
+
+// NewInteractive returns a T that prompts the user for input.
+func NewInteractive() *T {
+ i := &interactive{
+ line: liner.NewLiner(),
+ }
+ i.line.SetCtrlCAborts(true)
+ return newT(i)
+}
+
+func (i *interactive) Close() {
+ i.line.Close()
+}
+
+func (i *interactive) InitialPrompt() (string, error) {
+ return i.line.Prompt("? ")
+}
+
+func (i *interactive) ContinuePrompt() (string, error) {
+ return i.line.Prompt(" > ")
+}
+
+func (i *interactive) AppendHistory(query string) {
+ i.line.AppendHistory(query)
+}
diff --git a/cmd/sb51/internal/reader/reader_test.go b/cmd/sb51/internal/reader/reader_test.go
new file mode 100644
index 0000000..de4332c
--- /dev/null
+++ b/cmd/sb51/internal/reader/reader_test.go
@@ -0,0 +1,87 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package reader
+
+import (
+ "io"
+ "reflect"
+ "testing"
+)
+
+type stringPrompter struct {
+ lines []string
+ curr int
+}
+
+func (s *stringPrompter) Close() {
+}
+
+func (s *stringPrompter) InitialPrompt() (string, error) {
+ if s.curr >= len(s.lines) {
+ return "", io.EOF
+ }
+ q := s.lines[s.curr]
+ s.curr++
+ return q, nil
+}
+
+func (s *stringPrompter) ContinuePrompt() (string, error) {
+ return s.InitialPrompt()
+}
+
+func (s *stringPrompter) AppendHistory(query string) {
+}
+
+func TestGetQuery(t *testing.T) {
+ type testCase struct {
+ lines []string
+ queries []string
+ }
+
+ tests := []testCase{
+ { // Single query.
+ []string{"select k from C;"},
+ []string{"select k from C"},
+ },
+ { // Multiple queries.
+ []string{"select k from C;", "select bar from C;"},
+ []string{"select k from C", "select bar from C"},
+ },
+ { // Multiple queries on one line.
+ []string{"select k from C; select bar from C;"},
+ []string{"select k from C", " select bar from C"},
+ },
+ { // Multiple queries without a ; are just one query.
+ []string{"select k from C select bar from C;"},
+ []string{"select k from C select bar from C"},
+ },
+ { // Multiple queries without a ; are just one query.
+ []string{"select k from C", "select bar from C;"},
+ []string{"select k from C\nselect bar from C"},
+ },
+ {
+ []string{"select\tfoo.bar from\nC;"},
+ []string{"select\tfoo.bar from\nC"},
+ },
+ }
+ for _, test := range tests {
+ r := newT(&stringPrompter{lines: test.lines})
+ var queries []string
+ for true {
+ if q, err := r.GetQuery(); err != nil {
+ if err == io.EOF {
+ break
+ }
+ t.Errorf("test %v: unexpected error: %v", test.lines, err)
+ break
+ } else {
+ queries = append(queries, q)
+ }
+ }
+ if got, want := queries, test.queries; !reflect.DeepEqual(got, want) {
+ t.Errorf("test %#v: got %#v, want %#v", test.lines, got, want)
+ }
+ }
+}
diff --git a/cmd/sb51/internal/writer/doc.go b/cmd/sb51/internal/writer/doc.go
new file mode 100644
index 0000000..315ba57
--- /dev/null
+++ b/cmd/sb51/internal/writer/doc.go
@@ -0,0 +1,8 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package writer provides functions for formatting query results.
+//
+// TODO(ivanpi): Export as VDL formatter library.
+package writer
diff --git a/cmd/sb51/internal/writer/writer.go b/cmd/sb51/internal/writer/writer.go
new file mode 100644
index 0000000..c7900b6
--- /dev/null
+++ b/cmd/sb51/internal/writer/writer.go
@@ -0,0 +1,404 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package writer
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "io"
+ "strconv"
+ "strings"
+ "unicode/utf8"
+
+ "v.io/syncbase/v23/syncbase/nosql"
+ "v.io/v23/vdl"
+ vtime "v.io/v23/vdlroot/time"
+)
+
+type Justification int
+
+const (
+ Unknown Justification = iota
+ Left
+ Right
+)
+
+// WriteTable formats the results as ASCII tables.
+func WriteTable(out io.Writer, columnNames []string, rs nosql.ResultStream) error {
+ // Buffer the results so we can compute the column widths.
+ columnWidths := make([]int, len(columnNames))
+ for i, cName := range columnNames {
+ columnWidths[i] = utf8.RuneCountInString(cName)
+ }
+ justification := make([]Justification, len(columnNames))
+ var results [][]string
+ for rs.Advance() {
+ row := make([]string, len(columnNames))
+ for i, column := range rs.Result() {
+ if i >= len(columnNames) {
+ return errors.New("more columns in result than in columnNames")
+ }
+ if justification[i] == Unknown {
+ justification[i] = getJustification(column)
+ }
+ columnStr := toString(column, false)
+ row[i] = columnStr
+ columnLen := utf8.RuneCountInString(columnStr)
+ if columnLen > columnWidths[i] {
+ columnWidths[i] = columnLen
+ }
+ }
+ results = append(results, row)
+ }
+ if rs.Err() != nil {
+ return rs.Err()
+ }
+
+ writeBorder(out, columnWidths)
+ sep := "| "
+ for i, cName := range columnNames {
+ io.WriteString(out, fmt.Sprintf("%s%*s", sep, columnWidths[i], cName))
+ sep = " | "
+ }
+ io.WriteString(out, " |\n")
+ writeBorder(out, columnWidths)
+ for _, result := range results {
+ sep = "| "
+ for i, column := range result {
+ if justification[i] == Right {
+ io.WriteString(out, fmt.Sprintf("%s%*s", sep, columnWidths[i], column))
+ } else {
+ io.WriteString(out, fmt.Sprintf("%s%-*s", sep, columnWidths[i], column))
+ }
+ sep = " | "
+ }
+ io.WriteString(out, " |\n")
+ }
+ writeBorder(out, columnWidths)
+ return nil
+}
+
+func writeBorder(out io.Writer, columnWidths []int) {
+ sep := "+-"
+ for _, width := range columnWidths {
+ io.WriteString(out, fmt.Sprintf("%s%s", sep, strings.Repeat("-", width)))
+ sep = "-+-"
+ }
+ io.WriteString(out, "-+\n")
+}
+
+func getJustification(val *vdl.Value) Justification {
+ switch val.Kind() {
+ // TODO(kash): Floating point numbers should have the decimal point line up.
+ case vdl.Bool, vdl.Byte, vdl.Uint16, vdl.Uint32, vdl.Uint64, vdl.Int16, vdl.Int32, vdl.Int64,
+ vdl.Float32, vdl.Float64, vdl.Complex64, vdl.Complex128:
+ return Right
+ // TODO(kash): Leave nil values as unknown.
+ default:
+ return Left
+ }
+}
+
+// WriteCSV formats the results as CSV as specified by https://tools.ietf.org/html/rfc4180.
+func WriteCSV(out io.Writer, columnNames []string, rs nosql.ResultStream, delimiter string) error {
+ delim := ""
+ for _, cName := range columnNames {
+ str := doubleQuoteForCSV(cName, delimiter)
+ io.WriteString(out, fmt.Sprintf("%s%s", delim, str))
+ delim = delimiter
+ }
+ io.WriteString(out, "\n")
+ for rs.Advance() {
+ delim := ""
+ for _, column := range rs.Result() {
+ str := doubleQuoteForCSV(toString(column, false), delimiter)
+ io.WriteString(out, fmt.Sprintf("%s%s", delim, str))
+ delim = delimiter
+ }
+ io.WriteString(out, "\n")
+ }
+ return rs.Err()
+}
+
+// doubleQuoteForCSV follows the escaping rules from
+// https://tools.ietf.org/html/rfc4180. In particular, values containing
+// newlines, double quotes, and the delimiter must be enclosed in double
+// quotes.
+func doubleQuoteForCSV(str, delimiter string) string {
+ doubleQuote := strings.Index(str, delimiter) != -1 || strings.Index(str, "\n") != -1
+ if strings.Index(str, "\"") != -1 {
+ str = strings.Replace(str, "\"", "\"\"", -1)
+ doubleQuote = true
+ }
+ if doubleQuote {
+ str = "\"" + str + "\""
+ }
+ return str
+}
+
+// WriteJson formats the result as a JSON array of arrays (rows) of values.
+func WriteJson(out io.Writer, columnNames []string, rs nosql.ResultStream) error {
+ io.WriteString(out, "[")
+ jsonColNames := make([][]byte, len(columnNames))
+ for i, cName := range columnNames {
+ jsonCName, err := json.Marshal(cName)
+ if err != nil {
+ panic(fmt.Sprintf("JSON marshalling failed for column name: %v", err))
+ }
+ jsonColNames[i] = jsonCName
+ }
+ bOpen := "{"
+ for rs.Advance() {
+ io.WriteString(out, bOpen)
+ linestart := "\n "
+ for i, column := range rs.Result() {
+ str := toJson(column)
+ io.WriteString(out, fmt.Sprintf("%s%s: %s", linestart, jsonColNames[i], str))
+ linestart = ",\n "
+ }
+ io.WriteString(out, "\n}")
+ bOpen = ", {"
+ }
+ io.WriteString(out, "]\n")
+ return rs.Err()
+}
+
+// Converts VDL value to readable yet parseable string representation.
+// If nested is not set, strings outside composites are left unquoted.
+// TODO(ivanpi): Handle cycles and improve non-tree DAG handling.
+func toString(val *vdl.Value, nested bool) string {
+ switch val.Type() {
+ case vdl.TypeOf(vtime.Time{}), vdl.TypeOf(vtime.Duration{}):
+ s, err := toStringNative(val)
+ if err != nil {
+ panic(fmt.Sprintf("toStringNative failed for builtin time type: %v", err))
+ }
+ if nested {
+ s = strconv.Quote(s)
+ }
+ return s
+ default:
+ // fall through to Kind switch
+ }
+ switch val.Kind() {
+ case vdl.Bool:
+ return fmt.Sprint(val.Bool())
+ case vdl.Byte:
+ return fmt.Sprint(val.Byte())
+ case vdl.Uint16, vdl.Uint32, vdl.Uint64:
+ return fmt.Sprint(val.Uint())
+ case vdl.Int16, vdl.Int32, vdl.Int64:
+ return fmt.Sprint(val.Int())
+ case vdl.Float32, vdl.Float64:
+ return fmt.Sprint(val.Float())
+ case vdl.Complex64, vdl.Complex128:
+ c := val.Complex()
+ return fmt.Sprintf("%v+%vi", real(c), imag(c))
+ case vdl.String:
+ s := val.RawString()
+ if nested {
+ s = strconv.Quote(s)
+ }
+ return s
+ case vdl.Enum:
+ return val.EnumLabel()
+ case vdl.Array, vdl.List:
+ return listToString("[", ", ", "]", val.Len(), func(i int) string {
+ return toString(val.Index(i), true)
+ })
+ case vdl.Any, vdl.Optional:
+ if val.IsNil() {
+ if nested {
+ return "nil"
+ }
+ // TODO(ivanpi): Blank is better for CSV, but <nil> might be better for table and TSV.
+ return ""
+ }
+ return toString(val.Elem(), nested)
+ case vdl.Struct:
+ return listToString("{", ", ", "}", val.Type().NumField(), func(i int) string {
+ field := toString(val.StructField(i), true)
+ return fmt.Sprintf("%s: %s", val.Type().Field(i).Name, field)
+ })
+ case vdl.Union:
+ ui, uv := val.UnionField()
+ field := toString(uv, true)
+ return fmt.Sprintf("%s: %s", val.Type().Field(ui).Name, field)
+ case vdl.Set:
+ // TODO(ivanpi): vdl.SortValuesAsString() used for predictable output ordering.
+ // Use a more sensible sort for numbers etc.
+ keys := vdl.SortValuesAsString(val.Keys())
+ return listToString("{", ", ", "}", len(keys), func(i int) string {
+ return toString(keys[i], true)
+ })
+ case vdl.Map:
+ // TODO(ivanpi): vdl.SortValuesAsString() used for predictable output ordering.
+ // Use a more sensible sort for numbers etc.
+ keys := vdl.SortValuesAsString(val.Keys())
+ return listToString("{", ", ", "}", len(keys), func(i int) string {
+ k := toString(keys[i], true)
+ v := toString(val.MapIndex(keys[i]), true)
+ return fmt.Sprintf("%s: %s", k, v)
+ })
+ case vdl.TypeObject:
+ return val.String()
+ default:
+ panic(fmt.Sprintf("unknown Kind %s", val.Kind()))
+ }
+}
+
+// Converts a VDL value to string using the corresponding native type String()
+// method.
+func toStringNative(val *vdl.Value) (string, error) {
+ var natVal interface{}
+ if err := vdl.Convert(&natVal, val); err != nil {
+ return "", fmt.Errorf("failed converting %s to native value: %v", val.Type().String(), err)
+ }
+ if _, ok := natVal.(*vdl.Value); ok {
+ return "", fmt.Errorf("failed converting %s to native value: got vdl.Value", val.Type().String())
+ }
+ if strNatVal, ok := natVal.(fmt.Stringer); !ok {
+ return "", fmt.Errorf("native value of %s doesn't implement String()", val.Type().String())
+ } else {
+ return strNatVal.String(), nil
+ }
+}
+
+// Stringifies a sequence of n elements, where element i string representation
+// is obtained using elemToString(i),
+func listToString(begin, sep, end string, n int, elemToString func(i int) string) string {
+ elems := make([]string, n)
+ for i, _ := range elems {
+ elems[i] = elemToString(i)
+ }
+ return begin + strings.Join(elems, sep) + end
+}
+
+// Converts VDL value to JSON representation.
+func toJson(val *vdl.Value) string {
+ jf := toJsonFriendly(val)
+ jOut, err := json.Marshal(jf)
+ if err != nil {
+ panic(fmt.Sprintf("JSON marshalling failed: %v", err))
+ }
+ return string(jOut)
+}
+
+// Converts VDL value to Go type compatible with json.Marshal().
+func toJsonFriendly(val *vdl.Value) interface{} {
+ switch val.Type() {
+ case vdl.TypeOf(vtime.Time{}), vdl.TypeOf(vtime.Duration{}):
+ s, err := toStringNative(val)
+ if err != nil {
+ panic(fmt.Sprintf("toStringNative failed for builtin time type: %v", err))
+ }
+ return s
+ default:
+ // fall through to Kind switch
+ }
+ switch val.Kind() {
+ case vdl.Bool:
+ return val.Bool()
+ case vdl.Byte:
+ return val.Byte()
+ case vdl.Uint16, vdl.Uint32, vdl.Uint64:
+ return val.Uint()
+ case vdl.Int16, vdl.Int32, vdl.Int64:
+ return val.Int()
+ case vdl.Float32, vdl.Float64:
+ return val.Float()
+ case vdl.Complex64, vdl.Complex128:
+ // Go doesn't support marshalling complex values, we need to stringify.
+ c := val.Complex()
+ return fmt.Sprintf("%v+%vi", real(c), imag(c))
+ case vdl.String:
+ return val.RawString()
+ case vdl.Enum:
+ return val.EnumLabel()
+ case vdl.Array, vdl.List:
+ arr := make([]interface{}, val.Len())
+ for i, _ := range arr {
+ arr[i] = toJsonFriendly(val.Index(i))
+ }
+ return arr
+ case vdl.Any, vdl.Optional:
+ if val.IsNil() {
+ return nil
+ }
+ return toJsonFriendly(val.Elem())
+ case vdl.Struct:
+ // TODO(ivanpi): Consider lowercasing field names.
+ return toOrderedMap(val.Type().NumField(), func(i int) (string, interface{}) {
+ return val.Type().Field(i).Name, toJsonFriendly(val.StructField(i))
+ })
+ case vdl.Union:
+ // TODO(ivanpi): Consider lowercasing field name.
+ ui, uv := val.UnionField()
+ return toOrderedMap(1, func(_ int) (string, interface{}) {
+ return val.Type().Field(ui).Name, toJsonFriendly(uv)
+ })
+ case vdl.Set:
+ // TODO(ivanpi): vdl.SortValuesAsString() used for predictable output ordering.
+ // Use a more sensible sort for numbers etc.
+ keys := vdl.SortValuesAsString(val.Keys())
+ return toOrderedMap(len(keys), func(i int) (string, interface{}) {
+ return toString(keys[i], false), true
+ })
+ case vdl.Map:
+ // TODO(ivanpi): vdl.SortValuesAsString() used for predictable output ordering.
+ // Use a more sensible sort for numbers etc.
+ keys := vdl.SortValuesAsString(val.Keys())
+ return toOrderedMap(len(keys), func(i int) (string, interface{}) {
+ return toString(keys[i], false), toJsonFriendly(val.MapIndex(keys[i]))
+ })
+ case vdl.TypeObject:
+ return val.String()
+ default:
+ panic(fmt.Sprintf("unknown Kind %s", val.Kind()))
+ }
+}
+
+// Serializes to JSON object, preserving key order.
+// Native Go map will serialize to JSON object with sorted keys, which is
+// unexpected behaviour for a struct.
+type orderedMap []orderedMapElem
+
+type orderedMapElem struct {
+ Key string
+ Val interface{}
+}
+
+var _ json.Marshaler = (*orderedMap)(nil)
+
+// Builds an orderedMap with n elements, obtaining the key and value of element
+// i using elemToKeyVal(i).
+func toOrderedMap(n int, elemToKeyVal func(i int) (string, interface{})) orderedMap {
+ om := make(orderedMap, n)
+ for i, _ := range om {
+ om[i].Key, om[i].Val = elemToKeyVal(i)
+ }
+ return om
+}
+
+// Serializes orderedMap to JSON object, preserving key order.
+func (om orderedMap) MarshalJSON() (_ []byte, rerr error) {
+ defer func() {
+ if r := recover(); r != nil {
+ rerr = fmt.Errorf("orderedMap: %v", r)
+ }
+ }()
+ return []byte(listToString("{", ",", "}", len(om), func(i int) string {
+ keyJson, err := json.Marshal(om[i].Key)
+ if err != nil {
+ panic(err)
+ }
+ valJson, err := json.Marshal(om[i].Val)
+ if err != nil {
+ panic(err)
+ }
+ return fmt.Sprintf("%s:%s", keyJson, valJson)
+ })), nil
+}
diff --git a/cmd/sb51/internal/writer/writer_test.go b/cmd/sb51/internal/writer/writer_test.go
new file mode 100644
index 0000000..3d7c1f2
--- /dev/null
+++ b/cmd/sb51/internal/writer/writer_test.go
@@ -0,0 +1,564 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package writer_test
+
+import (
+ "bytes"
+ "encoding/json"
+ "testing"
+ "time"
+
+ "v.io/syncbase/v23/syncbase/nosql"
+ db "v.io/syncbase/x/ref/syncbase/sb51/internal/demodb"
+ "v.io/syncbase/x/ref/syncbase/sb51/internal/writer"
+ "v.io/v23/vdl"
+)
+
+type fakeResultStream struct {
+ rows [][]*vdl.Value
+ curr int
+}
+
+var (
+ customer = db.Customer{
+ Name: "John Smith",
+ Id: 1,
+ Active: true,
+ Address: db.AddressInfo{
+ Street: "1 Main St.",
+ City: "Palo Alto",
+ State: "CA",
+ Zip: "94303",
+ },
+ Credit: db.CreditReport{
+ Agency: db.CreditAgencyEquifax,
+ Report: db.AgencyReportEquifaxReport{Value: db.EquifaxCreditReport{Rating: 'A'}},
+ },
+ }
+ invoice = db.Invoice{
+ CustId: 1,
+ InvoiceNum: 1000,
+ Amount: 42,
+ ShipTo: db.AddressInfo{
+ Street: "1 Main St.",
+ City: "Palo Alto",
+ State: "CA",
+ Zip: "94303",
+ },
+ }
+)
+
+func array2String(s1, s2 string) db.Array2String {
+ a := [2]string{s1, s2}
+ return db.Array2String(a)
+}
+
+func newResultStream(iRows [][]interface{}) nosql.ResultStream {
+ vRows := make([][]*vdl.Value, len(iRows))
+ for i, iRow := range iRows {
+ vRow := make([]*vdl.Value, len(iRow))
+ for j, iCol := range iRow {
+ vRow[j] = vdl.ValueOf(iCol)
+ }
+ vRows[i] = vRow
+ }
+ return &fakeResultStream{
+ rows: vRows,
+ curr: -1,
+ }
+}
+
+func (f *fakeResultStream) Advance() bool {
+ f.curr++
+ return f.curr < len(f.rows)
+}
+
+func (f *fakeResultStream) Result() []*vdl.Value {
+ if f.curr == -1 {
+ panic("call advance first")
+ }
+ return f.rows[f.curr]
+}
+
+func (f *fakeResultStream) Err() error {
+ return nil
+}
+
+func (f *fakeResultStream) Cancel() {
+ // Nothing to do.
+}
+
+func TestWriteTable(t *testing.T) {
+ type testCase struct {
+ columns []string
+ rows [][]interface{}
+ // To make the test cases easier to read, output should have a leading
+ // newline.
+ output string
+ }
+ tests := []testCase{
+ {
+ []string{"c1", "c2"},
+ [][]interface{}{
+ {5, "foo"},
+ {6, "bar"},
+ },
+ `
++----+-----+
+| c1 | c2 |
++----+-----+
+| 5 | foo |
+| 6 | bar |
++----+-----+
+`,
+ },
+ {
+ []string{"c1", "c2"},
+ [][]interface{}{
+ {500, "foo"},
+ {6, "barbaz"},
+ },
+ `
++-----+--------+
+| c1 | c2 |
++-----+--------+
+| 500 | foo |
+| 6 | barbaz |
++-----+--------+
+`,
+ },
+ {
+ []string{"c1", "reallylongcolumnheader"},
+ [][]interface{}{
+ {5, "foo"},
+ {6, "bar"},
+ },
+ `
++----+------------------------+
+| c1 | reallylongcolumnheader |
++----+------------------------+
+| 5 | foo |
+| 6 | bar |
++----+------------------------+
+`,
+ },
+ { // Numbers.
+ []string{"byte", "uint16", "uint32", "uint64", "int16", "int32", "int64",
+ "float32", "float64", "complex64", "complex128"},
+ [][]interface{}{
+ {
+ byte(12), uint16(1234), uint32(5678), uint64(999888777666), int16(9876), int32(876543), int64(128),
+ float32(3.14159), float64(2.71828182846), complex64(123.0 + 7.0i), complex128(456.789 + 10.1112i),
+ },
+ {
+ byte(9), uint16(99), uint32(999), uint64(9999999), int16(9), int32(99), int64(88),
+ float32(1.41421356237), float64(1.73205080757), complex64(9.87 + 7.65i), complex128(4.32 + 1.0i),
+ },
+ },
+ `
++------+--------+--------+--------------+-------+--------+-------+--------------------+---------------+--------------------------------------+------------------+
+| byte | uint16 | uint32 | uint64 | int16 | int32 | int64 | float32 | float64 | complex64 | complex128 |
++------+--------+--------+--------------+-------+--------+-------+--------------------+---------------+--------------------------------------+------------------+
+| 12 | 1234 | 5678 | 999888777666 | 9876 | 876543 | 128 | 3.141590118408203 | 2.71828182846 | 123+7i | 456.789+10.1112i |
+| 9 | 99 | 999 | 9999999 | 9 | 99 | 88 | 1.4142135381698608 | 1.73205080757 | 9.869999885559082+7.650000095367432i | 4.32+1i |
++------+--------+--------+--------------+-------+--------+-------+--------------------+---------------+--------------------------------------+------------------+
+`,
+ },
+ { // Strings with whitespace should be printed literally.
+ []string{"c1", "c2"},
+ [][]interface{}{
+ {"foo\tbar", "foo\nbar"},
+ },
+ `
++---------+---------+
+| c1 | c2 |
++---------+---------+
+| foo bar | foo
+bar |
++---------+---------+
+`,
+ },
+ { // nil is shown as blank.
+ []string{"c1"},
+ [][]interface{}{
+ {nil},
+ },
+ `
++----+
+| c1 |
++----+
+| |
++----+
+`,
+ },
+ {
+ []string{"c1"},
+ [][]interface{}{{customer}, {invoice}},
+ `
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| c1 |
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| {Name: "John Smith", Id: 1, Active: true, Address: {Street: "1 Main St.", City: "Palo Alto", State: "CA", Zip: "94303"}, Credit: {Agency: Equifax, Report: EquifaxReport: {Rating: 65}}} |
+| {CustId: 1, InvoiceNum: 1000, Amount: 42, ShipTo: {Street: "1 Main St.", City: "Palo Alto", State: "CA", Zip: "94303"}} |
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+`,
+ },
+ {
+ []string{"c1"},
+ [][]interface{}{
+ {db.Composite{array2String("foo", "棎鶊鵱"), []int32{1, 2}, map[int32]struct{}{1: struct{}{}, 2: struct{}{}}, map[string]int32{"foo": 1, "bar": 2}}},
+ },
+ `
++----------------------------------------------------------------------------------+
+| c1 |
++----------------------------------------------------------------------------------+
+| {Arr: ["foo", "棎鶊鵱"], ListInt: [1, 2], MySet: {1, 2}, Map: {"bar": 2, "foo": 1}} |
++----------------------------------------------------------------------------------+
+`,
+ },
+ { // Types not built in to Go.
+ []string{"time", "type", "union", "enum", "set"},
+ [][]interface{}{
+ {time.Unix(13377331, 0), vdl.TypeOf(map[float32]struct{ B bool }{}), db.TitleOrValueTypeTitle{"dahar master"}, db.ExperianRatingBad, map[int32]struct{}{47: struct{}{}}},
+ },
+ `
++-------------------------------+----------------------------------------+-----------------------+------+------+
+| time | type | union | enum | set |
++-------------------------------+----------------------------------------+-----------------------+------+------+
+| 1970-06-04 19:55:31 +0000 UTC | typeobject(map[float32]struct{B bool}) | Title: "dahar master" | Bad | {47} |
++-------------------------------+----------------------------------------+-----------------------+------+------+
+`,
+ },
+ {
+ []string{"c1"},
+ [][]interface{}{
+ {
+ db.Recursive{
+ Any: nil,
+ Maybe: &db.Times{
+ Stamp: time.Unix(123456789, 42244224),
+ Interval: time.Duration(13377331),
+ },
+ Rec: map[db.Array2String]db.Recursive{
+ array2String("a", "b"): db.Recursive{},
+ array2String("x\nx", "y\"y"): db.Recursive{
+ Any: vdl.ValueOf(db.AgencyReportExperianReport{Value: db.ExperianCreditReport{Rating: db.ExperianRatingGood}}),
+ Maybe: nil,
+ Rec: nil,
+ },
+ },
+ },
+ },
+ },
+ `
++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| c1 |
++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| {Any: nil, Maybe: {Stamp: "1973-11-29 21:33:09.042244224 +0000 UTC", Interval: "13.377331ms"}, Rec: {["a", "b"]: {Any: nil, Maybe: nil, Rec: {}}, ["x\nx", "y\"y"]: {Any: ExperianReport: {Rating: Good}, Maybe: nil, Rec: {}}}} |
++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+`,
+ },
+ }
+ for _, test := range tests {
+ var b bytes.Buffer
+ if err := writer.WriteTable(&b, test.columns, newResultStream(test.rows)); err != nil {
+ t.Errorf("Unexpected error: %v", err)
+ continue
+ }
+ // Add a leading newline to the output to match the leading newline
+ // in our test cases.
+ if got, want := "\n"+b.String(), test.output; got != want {
+ t.Errorf("Wrong output:\nGOT:%s\nWANT:%s", got, want)
+ }
+ }
+}
+
+func TestWriteCSV(t *testing.T) {
+ type testCase struct {
+ columns []string
+ rows [][]interface{}
+ delimiter string
+ // To make the test cases easier to read, output should have a leading
+ // newline.
+ output string
+ }
+ tests := []testCase{
+ { // Basic.
+ []string{"c1", "c2"},
+ [][]interface{}{
+ {5, "foo"},
+ {6, "bar"},
+ },
+ ",",
+ `
+c1,c2
+5,foo
+6,bar
+`,
+ },
+ { // Numbers.
+ []string{"byte", "uint16", "uint32", "uint64", "int16", "int32", "int64",
+ "float32", "float64", "complex64", "complex128"},
+ [][]interface{}{
+ {
+ byte(12), uint16(1234), uint32(5678), uint64(999888777666), int16(9876), int32(876543), int64(128),
+ float32(3.14159), float64(2.71828182846), complex64(123.0 + 7.0i), complex128(456.789 + 10.1112i),
+ },
+ {
+ byte(9), uint16(99), uint32(999), uint64(9999999), int16(9), int32(99), int64(88),
+ float32(1.41421356237), float64(1.73205080757), complex64(9.87 + 7.65i), complex128(4.32 + 1.0i),
+ },
+ },
+ ",",
+ `
+byte,uint16,uint32,uint64,int16,int32,int64,float32,float64,complex64,complex128
+12,1234,5678,999888777666,9876,876543,128,3.141590118408203,2.71828182846,123+7i,456.789+10.1112i
+9,99,999,9999999,9,99,88,1.4142135381698608,1.73205080757,9.869999885559082+7.650000095367432i,4.32+1i
+`,
+ },
+ {
+ // Values containing newlines, double quotes, and the delimiter must be
+ // enclosed in double quotes.
+ []string{"c1", "c2"},
+ [][]interface{}{
+ {"foo\tbar", "foo\nbar"},
+ {"foo\"bar\"", "foo,bar"},
+ },
+ ",",
+ `
+c1,c2
+foo bar,"foo
+bar"
+"foo""bar""","foo,bar"
+`,
+ },
+ { // Delimiters other than comma should be supported.
+ []string{"c1", "c2"},
+ [][]interface{}{
+ {"foo\tbar", "foo\nbar"},
+ {"foo\"bar\"", "foo,bar"},
+ },
+ "\t",
+ `
+c1 c2
+"foo bar" "foo
+bar"
+"foo""bar""" foo,bar
+`,
+ },
+ { // Column names should be escaped properly.
+ []string{"foo\tbar", "foo,bar"},
+ [][]interface{}{},
+ ",",
+ `
+foo bar,"foo,bar"
+`,
+ },
+ { // Same as above but use a non-default delimiter.
+ []string{"foo\tbar", "foo,棎鶊鵱"},
+ [][]interface{}{},
+ "\t",
+ `
+"foo bar" foo,棎鶊鵱
+`,
+ },
+ {
+ []string{"c1"},
+ [][]interface{}{{customer}, {invoice}},
+ ",",
+ `
+c1
+"{Name: ""John Smith"", Id: 1, Active: true, Address: {Street: ""1 Main St."", City: ""Palo Alto"", State: ""CA"", Zip: ""94303""}, Credit: {Agency: Equifax, Report: EquifaxReport: {Rating: 65}}}"
+"{CustId: 1, InvoiceNum: 1000, Amount: 42, ShipTo: {Street: ""1 Main St."", City: ""Palo Alto"", State: ""CA"", Zip: ""94303""}}"
+`,
+ },
+ }
+ for _, test := range tests {
+ var b bytes.Buffer
+ if err := writer.WriteCSV(&b, test.columns, newResultStream(test.rows), test.delimiter); err != nil {
+ t.Errorf("Unexpected error: %v", err)
+ continue
+ }
+ // Add a leading newline to the output to match the leading newline
+ // in our test cases.
+ if got, want := "\n"+b.String(), test.output; got != want {
+ t.Errorf("Wrong output:\nGOT: %q\nWANT:%q", got, want)
+ }
+ }
+}
+
+func TestWriteJson(t *testing.T) {
+ type testCase struct {
+ columns []string
+ rows [][]interface{}
+ // To make the test cases easier to read, output should have a leading
+ // newline.
+ output string
+ }
+ tests := []testCase{
+ { // Basic.
+ []string{"c\n1", "c鶊2"},
+ [][]interface{}{
+ {5, "foo\nbar"},
+ {6, "bar\tfoo"},
+ },
+ `
+[{
+ "c\n1": 5,
+ "c鶊2": "foo\nbar"
+}, {
+ "c\n1": 6,
+ "c鶊2": "bar\tfoo"
+}]
+`,
+ },
+ { // Numbers.
+ []string{"byte", "uint16", "uint32", "uint64", "int16", "int32", "int64",
+ "float32", "float64", "complex64", "complex128"},
+ [][]interface{}{
+ {
+ byte(12), uint16(1234), uint32(5678), uint64(999888777666), int16(9876), int32(876543), int64(128),
+ float32(3.14159), float64(2.71828182846), complex64(123.0 + 7.0i), complex128(456.789 + 10.1112i),
+ },
+ {
+ byte(9), uint16(99), uint32(999), uint64(9999999), int16(9), int32(99), int64(88),
+ float32(1.41421356237), float64(1.73205080757), complex64(9.87 + 7.65i), complex128(4.32 + 1.0i),
+ },
+ },
+ `
+[{
+ "byte": 12,
+ "uint16": 1234,
+ "uint32": 5678,
+ "uint64": 999888777666,
+ "int16": 9876,
+ "int32": 876543,
+ "int64": 128,
+ "float32": 3.141590118408203,
+ "float64": 2.71828182846,
+ "complex64": "123+7i",
+ "complex128": "456.789+10.1112i"
+}, {
+ "byte": 9,
+ "uint16": 99,
+ "uint32": 999,
+ "uint64": 9999999,
+ "int16": 9,
+ "int32": 99,
+ "int64": 88,
+ "float32": 1.4142135381698608,
+ "float64": 1.73205080757,
+ "complex64": "9.869999885559082+7.650000095367432i",
+ "complex128": "4.32+1i"
+}]
+`,
+ },
+ { // Empty result.
+ []string{"nothing", "nada", "zilch"},
+ [][]interface{}{},
+ `
+[]
+`,
+ },
+ { // Empty column set.
+ []string{},
+ [][]interface{}{
+ {},
+ {},
+ },
+ `
+[{
+}, {
+}]
+`,
+ },
+ { // Empty values.
+ []string{"blank", "empty", "nil"},
+ [][]interface{}{
+ {struct{}{}, []string{}, nil},
+ },
+ `
+[{
+ "blank": {},
+ "empty": [],
+ "nil": null
+}]
+`,
+ },
+ {
+ []string{"c1"},
+ [][]interface{}{{customer}, {invoice}},
+ `
+[{
+ "c1": {"Name":"John Smith","Id":1,"Active":true,"Address":{"Street":"1 Main St.","City":"Palo Alto","State":"CA","Zip":"94303"},"Credit":{"Agency":"Equifax","Report":{"EquifaxReport":{"Rating":65}}}}
+}, {
+ "c1": {"CustId":1,"InvoiceNum":1000,"Amount":42,"ShipTo":{"Street":"1 Main St.","City":"Palo Alto","State":"CA","Zip":"94303"}}
+}]
+`,
+ },
+ {
+ []string{"nil", "composite", "typeobj"},
+ [][]interface{}{
+ {
+ nil,
+ db.Composite{array2String("foo", "bar"), []int32{1, 2}, map[int32]struct{}{1: struct{}{}, 2: struct{}{}}, map[string]int32{"foo": 1, "bar": 2}},
+ vdl.TypeOf(map[string]struct{}{}),
+ },
+ },
+ `
+[{
+ "nil": null,
+ "composite": {"Arr":["foo","bar"],"ListInt":[1,2],"MySet":{"1":true,"2":true},"Map":{"bar":2,"foo":1}},
+ "typeobj": "typeobject(set[string])"
+}]
+`,
+ },
+ {
+ []string{"c1"},
+ [][]interface{}{
+ {
+ db.Recursive{
+ Any: nil,
+ Maybe: &db.Times{
+ Stamp: time.Unix(123456789, 42244224),
+ Interval: time.Duration(1337),
+ },
+ Rec: map[db.Array2String]db.Recursive{
+ array2String("a", "棎鶊鵱"): db.Recursive{},
+ array2String("x", "y"): db.Recursive{
+ Any: vdl.ValueOf(db.CreditReport{
+ Agency: db.CreditAgencyExperian,
+ Report: db.AgencyReportExperianReport{Value: db.ExperianCreditReport{Rating: db.ExperianRatingGood}},
+ }),
+ Maybe: nil,
+ Rec: nil,
+ },
+ }},
+ },
+ },
+ `
+[{
+ "c1": {"Any":null,"Maybe":{"Stamp":"1973-11-29 21:33:09.042244224 +0000 UTC","Interval":"1.337µs"},"Rec":{"[\"a\", \"棎鶊鵱\"]":{"Any":null,"Maybe":null,"Rec":{}},"[\"x\", \"y\"]":{"Any":{"Agency":"Experian","Report":{"ExperianReport":{"Rating":"Good"}}},"Maybe":null,"Rec":{}}}}
+}]
+`,
+ },
+ }
+ for _, test := range tests {
+ var b bytes.Buffer
+ if err := writer.WriteJson(&b, test.columns, newResultStream(test.rows)); err != nil {
+ t.Errorf("Unexpected error: %v", err)
+ continue
+ }
+ var decoded interface{}
+ if err := json.Unmarshal(b.Bytes(), &decoded); err != nil {
+ t.Errorf("Got invalid JSON: %v", err)
+ }
+ // Add a leading newline to the output to match the leading newline
+ // in our test cases.
+ if got, want := "\n"+b.String(), test.output; got != want {
+ t.Errorf("Wrong output:\nGOT: %q\nWANT:%q", got, want)
+ }
+ }
+}
diff --git a/cmd/sb51/main.go b/cmd/sb51/main.go
new file mode 100644
index 0000000..9968722
--- /dev/null
+++ b/cmd/sb51/main.go
@@ -0,0 +1,34 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Antimony (sb51) - Syncbase general-purpose client and management utility.
+// Currently supports SyncQL select queries.
+
+package main
+
+import (
+ "flag"
+
+ "v.io/x/lib/cmdline"
+ _ "v.io/x/ref/runtime/factories/generic"
+)
+
+func main() {
+ cmdline.Main(cmdSb51)
+}
+
+var cmdSb51 = &cmdline.Command{
+ Name: "sb51",
+ Short: "Antimony - Vanadium Syncbase client and management utility",
+ Long: `
+Syncbase general-purpose client and management utility.
+Currently supports starting a SyncQL shell.
+`,
+ Children: []*cmdline.Command{cmdSbShell},
+}
+
+var (
+ // TODO(ivanpi): Decide on convention for local syncbase service name.
+ flagSbService = flag.String("service", "/:8101/syncbase", "Location of the Syncbase service to connect to. Can be absolute or relative to the namespace root.")
+)
diff --git a/cmd/sb51/shell.go b/cmd/sb51/shell.go
new file mode 100644
index 0000000..b5f18c0
--- /dev/null
+++ b/cmd/sb51/shell.go
@@ -0,0 +1,240 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Syncbase client shell. Currently supports SyncQL select queries.
+
+package main
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "strconv"
+ "strings"
+
+ isatty "github.com/mattn/go-isatty"
+
+ "v.io/syncbase/v23/syncbase"
+ "v.io/syncbase/v23/syncbase/nosql"
+ "v.io/syncbase/x/ref/syncbase/sb51/internal/demodb"
+ "v.io/syncbase/x/ref/syncbase/sb51/internal/reader"
+ "v.io/syncbase/x/ref/syncbase/sb51/internal/writer"
+ "v.io/v23/context"
+ "v.io/x/lib/cmdline"
+ "v.io/x/ref/lib/v23cmd"
+)
+
+var cmdSbShell = &cmdline.Command{
+ Runner: v23cmd.RunnerFunc(runSbShell),
+ Name: "sh",
+ Short: "Start a SyncQL shell",
+ Long: `
+Connect to a database on the Syncbase service and start a SyncQL shell.
+`,
+ ArgsName: "<app_name> <db_name>",
+ ArgsLong: `
+<app_name> and <db_name> specify the database to execute queries against.
+The database must exist unless -create-missing is specified.
+`,
+}
+
+var (
+ flagFormat string
+ flagCSVDelimiter string
+ flagCreateIfNotExists bool
+ flagMakeDemoTables bool
+)
+
+func init() {
+ cmdSbShell.Flags.StringVar(&flagFormat, "format", "table", "Output format. 'table': human-readable table; 'csv': comma-separated values, use -csv-delimiter to control the delimiter; 'json': JSON objects.")
+ cmdSbShell.Flags.StringVar(&flagCSVDelimiter, "csv-delimiter", ",", "Delimiter to use when printing data as CSV (e.g. \"\t\", \",\")")
+ cmdSbShell.Flags.BoolVar(&flagCreateIfNotExists, "create-missing", false, "Create the app and/or database if they do not exist yet.")
+ cmdSbShell.Flags.BoolVar(&flagMakeDemoTables, "make-demo", false, "(Re)create demo tables in the database.")
+}
+
+func validateFlags() error {
+ if flagFormat != "table" && flagFormat != "csv" && flagFormat != "json" {
+ return fmt.Errorf("Unsupported -format %q. Must be one of 'table', 'csv', or 'json'.", flagFormat)
+ }
+ if len(flagCSVDelimiter) == 0 {
+ return fmt.Errorf("-csv-delimiter cannot be empty.")
+ }
+ return nil
+}
+
+// Starts a SyncQL shell against the specified database.
+// Runs in interactive or batch mode depending on stdin.
+func runSbShell(ctx *context.T, env *cmdline.Env, args []string) error {
+ // TODO(ivanpi): Add 'use' statement, default to no app/database selected.
+ if len(args) != 2 {
+ return env.UsageErrorf("exactly two arguments expected")
+ }
+ appName, dbName := args[0], args[1]
+ if err := validateFlags(); err != nil {
+ return env.UsageErrorf("%v", err)
+ }
+
+ sbs := syncbase.NewService(*flagSbService)
+ d, err := openAppDB(ctx, sbs, appName, dbName, flagCreateIfNotExists)
+ if err != nil {
+ return err
+ }
+
+ if flagMakeDemoTables {
+ if err := makeDemoDB(ctx, d); err != nil {
+ return err
+ }
+ }
+
+ var input *reader.T
+ // TODO(ivanpi): This is hacky, it would be better for lib/cmdline to support IsTerminal.
+ stdinFile, ok := env.Stdin.(*os.File)
+ isTerminal := ok && isatty.IsTerminal(stdinFile.Fd())
+ if isTerminal {
+ input = reader.NewInteractive()
+ } else {
+ input = reader.NewNonInteractive()
+ }
+ defer input.Close()
+
+stmtLoop:
+ for true {
+ if q, err := input.GetQuery(); err != nil {
+ if err == io.EOF {
+ if isTerminal {
+ // ctrl-d
+ fmt.Println()
+ }
+ break
+ } else {
+ // ctrl-c
+ break
+ }
+ } else {
+ var err error
+ tq := strings.Fields(q)
+ if len(tq) > 0 {
+ switch strings.ToLower(tq[0]) {
+ case "exit", "quit":
+ break stmtLoop
+ case "dump":
+ err = dumpDB(ctx, env.Stdout, d)
+ case "make-demo":
+ err = makeDemoDB(ctx, d)
+ case "select":
+ err = queryExec(ctx, env.Stdout, d, q)
+ default:
+ err = fmt.Errorf("unknown statement: '%s'; expected one of: 'select', 'make-demo', 'dump', 'exit', 'quit'", strings.ToLower(tq[0]))
+ }
+ }
+ if err != nil {
+ if isTerminal {
+ fmt.Fprintln(env.Stderr, "Error:", err)
+ } else {
+ // If running non-interactively, errors stop execution.
+ return err
+ }
+ }
+ }
+ }
+
+ return nil
+}
+
+func openAppDB(ctx *context.T, sbs syncbase.Service, appName, dbName string, createIfNotExists bool) (nosql.Database, error) {
+ app := sbs.App(appName)
+ if exists, err := app.Exists(ctx); err != nil {
+ return nil, fmt.Errorf("failed checking for app %q: %v", app.FullName(), err)
+ } else if !exists {
+ if !createIfNotExists {
+ return nil, fmt.Errorf("app %q does not exist", app.FullName())
+ }
+ if err := app.Create(ctx, nil); err != nil {
+ return nil, err
+ }
+ }
+ d := app.NoSQLDatabase(dbName, nil)
+ if exists, err := d.Exists(ctx); err != nil {
+ return nil, fmt.Errorf("failed checking for db %q: %v", d.FullName(), err)
+ } else if !exists {
+ if !createIfNotExists {
+ return nil, fmt.Errorf("db %q does not exist", d.FullName())
+ }
+ if err := d.Create(ctx, nil); err != nil {
+ return nil, err
+ }
+ }
+ return d, nil
+}
+
+func dumpDB(ctx *context.T, w io.Writer, d nosql.Database) error {
+ tables, err := d.ListTables(ctx)
+ if err != nil {
+ return fmt.Errorf("failed listing tables: %v", err)
+ }
+ var errs []error
+ for _, table := range tables {
+ fmt.Fprintf(w, "table: %s\n", table)
+ if err := queryExec(ctx, w, d, fmt.Sprintf("select k, v from %s", table)); err != nil {
+ errs = append(errs, fmt.Errorf("> %s: %v", table, err))
+ }
+ }
+ if len(errs) > 0 {
+ err := fmt.Errorf("failed dumping %d of %d tables:", len(errs), len(tables))
+ for _, e := range errs {
+ err = fmt.Errorf("%v\n%v", err, e)
+ }
+ return err
+ }
+ return nil
+}
+
+func makeDemoDB(ctx *context.T, d nosql.Database) error {
+ if err := demodb.PopulateDemoDB(ctx, d); err != nil {
+ return fmt.Errorf("failed making demo tables: %v", err)
+ }
+ return nil
+}
+
+// Split an error message into an offset and the remaining (i.e., rhs of offset) message.
+// The convention for syncql is "<module><optional-rpc>[offset]<remaining-message>".
+func splitError(err error) (int64, string) {
+ errMsg := err.Error()
+ idx1 := strings.Index(errMsg, "[")
+ idx2 := strings.Index(errMsg, "]")
+ if idx1 == -1 || idx2 == -1 {
+ return 0, errMsg
+ }
+ offsetString := errMsg[idx1+1 : idx2]
+ offset, err := strconv.ParseInt(offsetString, 10, 64)
+ if err != nil {
+ return 0, errMsg
+ }
+ return offset, errMsg[idx2+1:]
+}
+
+func queryExec(ctx *context.T, w io.Writer, d nosql.Database, q string) error {
+ if columnNames, rs, err := d.Exec(ctx, q); err != nil {
+ off, msg := splitError(err)
+ return fmt.Errorf("\n%s\n%s^\n%d: %s", q, strings.Repeat(" ", int(off)), off+1, msg)
+ } else {
+ switch flagFormat {
+ case "table":
+ if err := writer.WriteTable(w, columnNames, rs); err != nil {
+ return err
+ }
+ case "csv":
+ if err := writer.WriteCSV(w, columnNames, rs, flagCSVDelimiter); err != nil {
+ return err
+ }
+ case "json":
+ if err := writer.WriteJson(w, columnNames, rs); err != nil {
+ return err
+ }
+ default:
+ panic(fmt.Sprintf("invalid format flag value: %v", flagFormat))
+ }
+ }
+ return nil
+}
diff --git a/services/syncbase/clock/clock_darwin.go b/services/syncbase/clock/clock_darwin.go
new file mode 100644
index 0000000..4c801cb
--- /dev/null
+++ b/services/syncbase/clock/clock_darwin.go
@@ -0,0 +1,51 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+ "bytes"
+ "encoding/binary"
+ "syscall"
+ "time"
+ "unsafe"
+)
+
+// This file contains darwin specific implementations of functions for clock
+// package.
+
+// ElapsedTime returns the time elapsed since last boot.
+// Darwin provides a system call "kern.boottime" which returns a Timeval32
+// object containing the boot time for the system. Darwin calculates this
+// boottime based on the current clock and the internal tracking of elapsed
+// time since boot. Hence if the clock is changed, the boot time changes along
+// with it. So the difference between the current time and boot time will always
+// give us the correct elapsed time since boot.
+func (sc *systemClockImpl) ElapsedTime() (time.Duration, error) {
+ tv := syscall.Timeval32{}
+
+ if err := sysctlbyname("kern.boottime", &tv); err != nil {
+ return 0, err
+ }
+ return time.Since(time.Unix(int64(tv.Sec), int64(tv.Usec)*1000)), nil
+}
+
+// Generic Sysctl buffer unmarshalling.
+func sysctlbyname(name string, data interface{}) (err error) {
+ val, err := syscall.Sysctl(name)
+ if err != nil {
+ return err
+ }
+
+ buf := []byte(val)
+
+ switch v := data.(type) {
+ case *uint64:
+ *v = *(*uint64)(unsafe.Pointer(&buf[0]))
+ return
+ }
+
+ bbuf := bytes.NewBuffer([]byte(val))
+ return binary.Read(bbuf, binary.LittleEndian, data)
+}
diff --git a/services/syncbase/clock/clock_linux.go b/services/syncbase/clock/clock_linux.go
new file mode 100644
index 0000000..dabaab4
--- /dev/null
+++ b/services/syncbase/clock/clock_linux.go
@@ -0,0 +1,26 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+ "syscall"
+ "time"
+)
+
+// This file contains linux specific implementations of functions for clock
+// package.
+
+// Linux System stores this information in /proc/uptime as seconds
+// since boot with a precision up to 2 decimal points.
+// NOTE: Go system call returns elapsed time in seconds and removes the decimal
+// points by rounding to the closest second. Be careful in using this value as
+// it can introduce a compounding error.
+func (sc *systemClockImpl) ElapsedTime() (time.Duration, error) {
+ var sysInfo syscall.Sysinfo_t
+ if err := syscall.Sysinfo(&sysInfo); err != nil {
+ return 0, err
+ }
+ return time.Duration(sysInfo.Uptime) * time.Second, nil
+}
diff --git a/services/syncbase/clock/clockservice.go b/services/syncbase/clock/clockservice.go
new file mode 100644
index 0000000..fed9a02
--- /dev/null
+++ b/services/syncbase/clock/clockservice.go
@@ -0,0 +1,127 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+ "math"
+ "time"
+
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/v23/context"
+ "v.io/v23/verror"
+ "v.io/x/lib/vlog"
+)
+
+// This file contains code related to checking current system clock to see
+// if it has been changed by any external action.
+
+// runClockCheck estimates the current system time based on saved boottime
+// and elapsed time since boot and checks if the system clock shows the same
+// time. This involves the following steps:
+// 1) Check if system was rebooted since last run. If so update the saved
+// ClockData.
+// 2) Fetch stored ClockData. If none exists, this is the first time
+// runClockCheck has been run. Write new ClockData.
+// 3) Estimate current system clock time and check if the actual system clock
+// agrees with the estimation. If not update the skew value appropriately.
+// 4) Update saved elapsed time since boot. This is used to check if the system
+// was rebooted or not. TODO(jlodhia): work with device manager to provide a
+// way to notify syncbase if the system was just rebooted.
+func (c *VClock) runClockCheck(ctx *context.T) {
+ checkSystemRebooted(ctx, c)
+
+ clockData := &ClockData{}
+ if err := c.sa.GetClockData(ctx, clockData); err != nil {
+ if verror.ErrorID(err) == verror.ErrNoExist.ID {
+ // VClock's cron job to setup UTC time at boot is being run for the
+ // first time. Skew is not known, hence assigning 0.
+ writeNewClockData(ctx, c, 0)
+ } else {
+ vlog.Errorf("Error while fetching clock data: %v", err)
+ }
+ return
+ }
+
+ systemTime := c.clock.Now()
+ elapsedTime, err := c.clock.ElapsedTime()
+ if err != nil {
+ vlog.Errorf("Error while fetching elapsed time: %v", err)
+ return
+ }
+
+ newClockData := &ClockData{
+ SystemTimeAtBoot: clockData.SystemTimeAtBoot,
+ Skew: clockData.Skew,
+ ElapsedTimeSinceBoot: elapsedTime.Nanoseconds(),
+ }
+
+ estimatedClockTime := clockData.SystemBootTime().Add(elapsedTime)
+ diff := estimatedClockTime.Sub(systemTime)
+ if math.Abs(float64(diff.Nanoseconds())) > util.LocalClockDriftThreshold {
+ newClockData.Skew = newClockData.Skew + diff.Nanoseconds()
+ newSystemTimeAtBoot := systemTime.Add(-elapsedTime)
+ newClockData.SystemTimeAtBoot = newSystemTimeAtBoot.UnixNano()
+ }
+
+ if err := c.sa.SetClockData(ctx, newClockData); err != nil {
+ vlog.Errorf("Error while setting clock data: %v", err)
+ }
+}
+
+func writeNewClockData(ctx *context.T, c *VClock, skew time.Duration) {
+ systemTime := c.clock.Now()
+ elapsedTime, err := c.clock.ElapsedTime()
+ if err != nil {
+ vlog.Errorf("Error while fetching elapsed time: %v", err)
+ return
+ }
+ systemTimeAtBoot := systemTime.Add(-elapsedTime)
+ clockData := &ClockData{
+ SystemTimeAtBoot: systemTimeAtBoot.UnixNano(),
+ Skew: skew.Nanoseconds(),
+ ElapsedTimeSinceBoot: elapsedTime.Nanoseconds(),
+ }
+ if err := c.sa.SetClockData(ctx, clockData); err != nil {
+ vlog.Errorf("Error while setting clock data: %v", err)
+ }
+}
+
+// checkSystemRebooted compares the elapsed time stored during the last
+// run of runClockCheck() to the current elapsed time since boot provided
+// by system clock. Since elapsed time is monotonically increasing and cannot
+// be changed unless a reboot happens, if the current value is lower than the
+// previous value then a reboot has happened since last run. If so, update
+// the boot time and elapsed time since boot appropriately.
+func checkSystemRebooted(ctx *context.T, c *VClock) bool {
+ currentSysTime := c.clock.Now()
+ elapsedTime, err := c.clock.ElapsedTime()
+ if err != nil {
+ vlog.Errorf("Error while fetching elapsed time: %v", err)
+ return false
+ }
+
+ clockData := &ClockData{}
+ if err := c.sa.GetClockData(ctx, clockData); err != nil {
+ if verror.ErrorID(err) != verror.ErrNoExist.ID {
+ vlog.Errorf("Error while fetching clock delta: %v", err)
+ }
+ // In case of verror.ErrNoExist no clock data present. Nothing needed to
+ // be done. writeNewClockData() will write new clock data to storage.
+ return false
+ }
+
+ if elapsedTime.Nanoseconds() < clockData.ElapsedTimeSinceBoot {
+ // Since the elapsed time since last boot provided by the system is
+ // less than the elapsed time since boot seen the last time clockservice
+ // ran, the system must have rebooted in between.
+ clockData.SystemTimeAtBoot = currentSysTime.Add(-elapsedTime).UnixNano()
+ clockData.ElapsedTimeSinceBoot = elapsedTime.Nanoseconds()
+ if err := c.sa.SetClockData(ctx, clockData); err != nil {
+ vlog.Errorf("Error while setting clock data: %v", err)
+ }
+ return true
+ }
+ return false
+}
diff --git a/services/syncbase/clock/clockservice_test.go b/services/syncbase/clock/clockservice_test.go
new file mode 100644
index 0000000..86650c1
--- /dev/null
+++ b/services/syncbase/clock/clockservice_test.go
@@ -0,0 +1,191 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+ "testing"
+ "time"
+)
+
+const (
+ constElapsedTime int64 = 50
+)
+
+func TestWriteNewClockData(t *testing.T) {
+ sysTs := time.Now()
+ sysClock := MockSystemClock(sysTs, time.Duration(constElapsedTime))
+ stAdapter := MockStorageAdapter()
+
+ clock := NewVClockWithMockServices(stAdapter, sysClock, nil)
+
+ writeNewClockData(nil, clock, 0)
+
+ expectedSystemTimeAtBoot := sysTs.UnixNano() - constElapsedTime
+ verifyClockData(t, stAdapter, 0, expectedSystemTimeAtBoot, constElapsedTime)
+}
+
+// This test runs the following scenarios
+// 1) Run checkSystemRebooted() with no ClockData stored
+// Result: no op.
+// 2) Run checkSystemRebooted() with ClockData that has SystemTimeAtBoot higher
+// than the current elapsed time.
+// Result: A new ClockData is written with updated SystemTimeAtBoot and
+// elapsed time.
+// 3) Run checkSystemRebooted() again after moving the sysClock forward
+// Result: no op.
+func TestCheckSystemRebooted(t *testing.T) {
+ sysTs := time.Now()
+ sysClock := MockSystemClock(sysTs, time.Duration(constElapsedTime))
+ stAdapter := MockStorageAdapter()
+
+ clock := NewVClockWithMockServices(stAdapter, sysClock, nil)
+
+ // stAdapter will return ErrNoExist while fetching ClockData
+ // checkSystemRebooted should return false.
+ if checkSystemRebooted(nil, clock) {
+ t.Error("Unexpected return value")
+ }
+
+ // Set clock data with elapsed time greater than constElapsedTime
+ clockData := &ClockData{25003, 25, 34569}
+ stAdapter.SetClockData(nil, clockData)
+
+ if !checkSystemRebooted(nil, clock) {
+ t.Error("Unexpected return value")
+ }
+ expectedSystemTimeAtBoot := sysTs.UnixNano() - constElapsedTime
+ verifyClockData(t, stAdapter, 25, expectedSystemTimeAtBoot, constElapsedTime)
+
+ // move clock forward without reboot and run checkSystemRebooted again
+ var timePassed int64 = 200
+ newSysTs := sysTs.Add(time.Duration(timePassed))
+ sysClock.SetNow(newSysTs)
+ sysClock.SetElapsedTime(time.Duration(constElapsedTime + timePassed))
+
+ if checkSystemRebooted(nil, clock) {
+ t.Error("Unexpected return value")
+ }
+ expectedSystemTimeAtBoot = sysTs.UnixNano() - constElapsedTime
+ verifyClockData(t, stAdapter, 25, expectedSystemTimeAtBoot, constElapsedTime)
+}
+
+// Setup: No prior ClockData present.
+// Result: A new ClockData value gets set.
+func TestRunClockCheck1(t *testing.T) {
+ sysTs := time.Now()
+ sysClock := MockSystemClock(sysTs, time.Duration(constElapsedTime))
+ stAdapter := MockStorageAdapter()
+
+ clock := NewVClockWithMockServices(stAdapter, sysClock, nil)
+
+ clock.runClockCheck(nil)
+ expectedSystemTimeAtBoot := sysTs.UnixNano() - constElapsedTime
+ verifyClockData(t, stAdapter, 0, expectedSystemTimeAtBoot, constElapsedTime)
+}
+
+// Setup: ClockData present, system clock elapsed time is lower than whats
+// stored in clock data.
+// Result: A new ClockData value gets set with new system boot time and elapsed
+// time, skew remains the same.
+func TestRunClockCheck2(t *testing.T) {
+ sysTs := time.Now()
+ sysClock := MockSystemClock(sysTs, time.Duration(constElapsedTime))
+ stAdapter := MockStorageAdapter()
+ // Set clock data with elapsed time greater than constElapsedTime
+ clockData := &ClockData{25003, 25, 34569}
+ stAdapter.SetClockData(nil, clockData)
+
+ clock := NewVClockWithMockServices(stAdapter, sysClock, nil)
+
+ clock.runClockCheck(nil)
+ expectedSystemTimeAtBoot := sysTs.UnixNano() - constElapsedTime
+ verifyClockData(t, stAdapter, 25, expectedSystemTimeAtBoot, constElapsedTime)
+}
+
+// Setup: ClockData present, system clock gets a skew of 10 seconds
+// Result: A new ClockData value gets set with new elapsed time and skew,
+// system boot time remains the same.
+func TestRunClockCheck3(t *testing.T) {
+ sysTs := time.Now()
+ sysClock := MockSystemClock(sysTs, time.Duration(constElapsedTime))
+ stAdapter := MockStorageAdapter()
+
+ bootTs := sysTs.Add(time.Duration(-constElapsedTime))
+ oldSkew := 25 * time.Second
+ clockData := &ClockData{bootTs.UnixNano(), oldSkew.Nanoseconds(), 40}
+ stAdapter.SetClockData(nil, clockData)
+
+ clock := NewVClockWithMockServices(stAdapter, sysClock, nil)
+
+ // introduce a change in sys clock
+ extraSkew := 10 * time.Second // moves clock closer to UTC
+ changedSysTs := sysTs.Add(extraSkew)
+ sysClock.SetNow(changedSysTs)
+ newSkew := 15 * time.Second
+
+ clock.runClockCheck(nil)
+ expectedSystemTimeAtBoot := bootTs.UnixNano() + extraSkew.Nanoseconds()
+ verifyClockData(t, stAdapter, newSkew.Nanoseconds(), expectedSystemTimeAtBoot, constElapsedTime)
+}
+
+func TestWithRealSysClock(t *testing.T) {
+ stAdapter := MockStorageAdapter()
+ clock := NewVClockWithMockServices(stAdapter, nil, nil)
+
+ writeNewClockData(nil, clock, 0)
+
+ // Verify if clock data was written to StorageAdapter
+ clockData := &ClockData{}
+ if err := stAdapter.GetClockData(nil, clockData); err != nil {
+ t.Errorf("Expected to find clockData, received error: %v", err)
+ }
+
+ // Verify that calling checkSystemRebooted() does nothing
+ if checkSystemRebooted(nil, clock) {
+ t.Error("Unexpected return value")
+ }
+
+ // sleep for 1 second more than the skew threshold
+ time.Sleep(1800 * time.Millisecond)
+
+ // Verify that calling runClockCheck() only updates elapsed time
+ clock.runClockCheck(nil)
+ newClockData := &ClockData{}
+ if err := stAdapter.GetClockData(nil, newClockData); err != nil {
+ t.Errorf("Expected to find clockData, received error: %v", err)
+ }
+ if newClockData.Skew != clockData.Skew {
+ t.Errorf("Unexpected value for skew: %d", newClockData.Skew)
+ }
+ if newClockData.ElapsedTimeSinceBoot <= clockData.ElapsedTimeSinceBoot {
+ t.Errorf("Unexpected value for elapsed time: %d",
+ newClockData.ElapsedTimeSinceBoot)
+ }
+ if newClockData.SystemTimeAtBoot != clockData.SystemTimeAtBoot {
+ t.Errorf("SystemTimeAtBoot expected: %d, found: %d",
+ clockData.SystemTimeAtBoot, newClockData.SystemTimeAtBoot)
+ }
+}
+
+func verifyClockData(t *testing.T, stAdapter StorageAdapter, skew int64,
+ sysTimeAtBoot int64, elapsedTime int64) {
+ // verify ClockData
+ clockData := &ClockData{}
+ if err := stAdapter.GetClockData(nil, clockData); err != nil {
+ t.Errorf("Expected to find clockData, found error: %v", err)
+ }
+
+ if clockData.Skew != skew {
+ t.Errorf("Expected value for skew: %d, found: %d", skew, clockData.Skew)
+ }
+ if clockData.ElapsedTimeSinceBoot != elapsedTime {
+ t.Errorf("Expected value for elapsed time: %d, found: %d", elapsedTime,
+ clockData.ElapsedTimeSinceBoot)
+ }
+ if clockData.SystemTimeAtBoot != sysTimeAtBoot {
+ t.Errorf("Expected value for SystemTimeAtBoot: %d, found: %d",
+ sysTimeAtBoot, clockData.SystemTimeAtBoot)
+ }
+}
diff --git a/services/syncbase/clock/ntp.go b/services/syncbase/clock/ntp.go
new file mode 100644
index 0000000..ba55322
--- /dev/null
+++ b/services/syncbase/clock/ntp.go
@@ -0,0 +1,151 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+ "fmt"
+ "net"
+ "time"
+
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+)
+
+const (
+ udp = "udp"
+ port = "123"
+)
+
+var _ NtpSource = (*ntpSourceImpl)(nil)
+
+func NewNtpSource(clock SystemClock) NtpSource {
+ return &ntpSourceImpl{util.NtpServerPool, clock}
+}
+
+type ntpSourceImpl struct {
+ ntpHost string
+ sc SystemClock
+}
+
+// NtpSync samples data from NTP server and returns the one which has the lowest
+// network delay. The sample with lowest network delay will have the least error
+// in computation of the offset.
+// Param sampleCount is the number of samples this method will fetch.
+func (ns *ntpSourceImpl) NtpSync(sampleCount int) (*NtpData, error) {
+ var canonicalSample *NtpData = nil
+ for i := 0; i < sampleCount; i++ {
+ if sample, err := ns.sample(); err == nil {
+ if (canonicalSample == nil) || (sample.delay < canonicalSample.delay) {
+ canonicalSample = sample
+ }
+ }
+ }
+ if canonicalSample == nil {
+ err := fmt.Errorf("Failed to get any sample from NTP server: %s", ns.ntpHost)
+ return nil, err
+ }
+ return canonicalSample, nil
+}
+
+// Sample connects to an NTP server and returns NtpData containing the clock
+// offset and the network delay experienced while talking to the server.
+//
+// NTP protocol involves sending a request of size 48 bytes with the first
+// byte containing protocol version and mode and the last 8 bytes containing
+// transmit timestamp. The current NTP version is 4. A response from NTP server
+// contains original timestamp (client's transmit timestamp from request) from
+// bytes 24 to 31, server's receive timestamp from byte 32 to 39 and server's
+// transmit time from byte 40 to 47. Client can register the response receive
+// time as soon it receives a response from server.
+// Based on the 4 timestamps the client can compute the offset between the
+// two clocks and the roundtrip network delay for the request.
+func (ns *ntpSourceImpl) sample() (*NtpData, error) {
+ raddr, err := net.ResolveUDPAddr(udp, ns.ntpHost+":"+port)
+ if err != nil {
+ return nil, err
+ }
+
+ con, err := net.DialUDP("udp", nil, raddr)
+ if err != nil {
+ return nil, err
+ }
+ defer con.Close()
+
+ msg := ns.createRequest()
+ _, err = con.Write(msg)
+ if err != nil {
+ return nil, err
+ }
+
+ con.SetDeadline(time.Now().Add(5 * time.Second))
+ _, err = con.Read(msg)
+ if err != nil {
+ return nil, err
+ }
+
+ clientReceiveTs := ns.sc.Now()
+ clientTransmitTs := extractTime(msg[24:32])
+ serverReceiveTs := extractTime(msg[32:40])
+ serverTransmitTs := extractTime(msg[40:48])
+
+ // Following code extracts the clock offset and network delay based on the
+ // transmit and receive timestamps on the client and the server as per
+ // the formula explained at http://www.eecis.udel.edu/~mills/time.html
+ data := NtpData{}
+ data.offset = (serverReceiveTs.Sub(clientTransmitTs) + serverTransmitTs.Sub(clientReceiveTs)) / 2
+ data.delay = clientReceiveTs.Sub(clientTransmitTs) - serverTransmitTs.Sub(serverReceiveTs)
+
+ return &data, nil
+}
+
+func (ns *ntpSourceImpl) createRequest() []byte {
+ data := make([]byte, 48)
+ data[0] = 0x23 // protocol version = 4, mode = 3 (Client)
+
+ // For NTP the prime epoch, or base date of era 0, is 0 h 1 January 1900 UTC
+ t0 := time.Date(1900, 1, 1, 0, 0, 0, 0, time.UTC)
+ tnow := ns.sc.Now()
+ d := tnow.Sub(t0)
+ nsec := d.Nanoseconds()
+
+ // The encoding of timestamp below is an exact opposite of the decoding
+ // being done in extractTime(). Refer extractTime() for more explaination.
+ sec := nsec / 1e9 // Integer part of seconds since epoch
+ frac := ((nsec % 1e9) << 32) / 1e9 // fractional part of seconds since epoch
+
+ // write the timestamp to Transmit Timestamp section of request.
+ data[43] = byte(sec)
+ data[42] = byte(sec >> 8)
+ data[41] = byte(sec >> 16)
+ data[40] = byte(sec >> 24)
+
+ data[47] = byte(frac)
+ data[46] = byte(frac >> 8)
+ data[45] = byte(frac >> 16)
+ data[44] = byte(frac >> 24)
+ return data
+}
+
+// ExtractTime takes a byte array which contains encoded timestamp from NTP
+// server starting at the 0th byte and is 8 bytes long. The encoded timestamp is
+// in seconds since 1900. The first 4 bytes contain the integer part of of the
+// seconds while the last 4 bytes contain the fractional part of the seconds
+// where (FFFFFFFF + 1) represents 1 second while 00000001 represents 2^(-32) of
+// a second.
+func extractTime(data []byte) time.Time {
+ var sec, frac uint64
+ sec = uint64(data[3]) | uint64(data[2])<<8 | uint64(data[1])<<16 | uint64(data[0])<<24
+ frac = uint64(data[7]) | uint64(data[6])<<8 | uint64(data[5])<<16 | uint64(data[4])<<24
+
+ // multiply the integral second part with 1Billion to convert to nanoseconds
+ nsec := sec * 1e9
+ // multiply frac part with 2^(-32) to get the correct value in seconds and
+ // then multiply with 1Billion to convert to nanoseconds. The multiply by
+ // Billion is done first to make sure that we dont loose precision.
+ nsec += (frac * 1e9) >> 32
+
+ t := time.Date(1900, 1, 1, 0, 0, 0, 0, time.UTC).Add(time.Duration(nsec)).Local()
+
+ return t
+}
diff --git a/services/syncbase/clock/ntpservice.go b/services/syncbase/clock/ntpservice.go
new file mode 100644
index 0000000..79045f3
--- /dev/null
+++ b/services/syncbase/clock/ntpservice.go
@@ -0,0 +1,46 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+ "math"
+
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/v23/context"
+ "v.io/v23/verror"
+ "v.io/x/lib/vlog"
+)
+
+// runNtpCheck talks to an NTP server, fetches the current UTC time from it
+// and corrects VClock time.
+func (c *VClock) runNtpCheck(ctx *context.T) error {
+ ntpData, err := c.ntpSource.NtpSync(util.NtpSampleCount)
+ if err != nil {
+ vlog.Errorf("Error while fetching ntp time: %v", err)
+ return err
+ }
+ offset := ntpData.offset
+
+ data := &ClockData{}
+ if err := c.sa.GetClockData(ctx, data); err != nil {
+ if verror.ErrorID(err) == verror.ErrNoExist.ID {
+ // No ClockData found, write a new one.
+ writeNewClockData(ctx, c, offset)
+ return nil
+ }
+ vlog.Info("Error while fetching clock data: %v", err)
+ vlog.Info("Overwriting clock data with NTP")
+ writeNewClockData(ctx, c, offset)
+ return nil
+ }
+
+ // Update clock skew if the difference between offset and skew is larger
+ // than NtpDiffThreshold. NtpDiffThreshold helps avoid constant tweaking of
+ // the syncbase clock.
+ if math.Abs(float64(offset.Nanoseconds() - data.Skew)) > util.NtpDiffThreshold {
+ writeNewClockData(ctx, c, offset)
+ }
+ return nil
+}
diff --git a/services/syncbase/clock/ntpservice_test.go b/services/syncbase/clock/ntpservice_test.go
new file mode 100644
index 0000000..e505e01
--- /dev/null
+++ b/services/syncbase/clock/ntpservice_test.go
@@ -0,0 +1,192 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+ "net"
+ "testing"
+ "time"
+)
+
+func TestWithMockNtpForErr(t *testing.T) {
+ sysClock := MockSystemClock(time.Now(), 0)
+ stAdapter := MockStorageAdapter()
+ ntpSource := MockNtpSource()
+ ntpSource.Err = net.UnknownNetworkError("network err")
+
+ vclock := NewVClockWithMockServices(stAdapter, sysClock, ntpSource)
+
+ if err := vclock.runNtpCheck(nil); err == nil {
+ t.Error("Network error expected but not found")
+ }
+
+ if stAdapter.clockData != nil {
+ t.Error("Non-nil clock data found.")
+ }
+}
+
+func TestWithMockNtpForDiffBelowThreshold(t *testing.T) {
+ sysClock := MockSystemClock(time.Now(), 0) // not used
+ stAdapter := MockStorageAdapter()
+ originalData := NewClockData(0)
+ stAdapter.SetClockData(nil, &originalData)
+
+ ntpSource := MockNtpSource()
+ offset := 1800 * time.Millisecond // error threshold is 2 seconds
+ ntpSource.Data = &NtpData{offset: offset, delay: 5 * time.Millisecond}
+
+ vclock := NewVClockWithMockServices(stAdapter, sysClock, ntpSource)
+ if err := vclock.runNtpCheck(nil); err != nil {
+ t.Errorf("Unexpected err: %v", err)
+ }
+ if isClockDataChanged(stAdapter, &originalData) {
+ t.Error("ClockData expected to be unchanged but found updated")
+ }
+}
+
+func TestWithMockNtpForDiffAboveThreshold(t *testing.T) {
+ sysTs := time.Now()
+ elapsedTime := 10 * time.Minute
+ sysClock := MockSystemClock(sysTs, elapsedTime)
+
+ stAdapter := MockStorageAdapter()
+ originalData := NewClockData(0)
+ stAdapter.SetClockData(nil, &originalData)
+
+ ntpSource := MockNtpSource()
+ skew := 2100 * time.Millisecond // error threshold is 2 seconds
+ ntpSource.Data = &NtpData{offset: skew, delay: 5 * time.Millisecond}
+
+ vclock := NewVClockWithMockServices(stAdapter, sysClock, ntpSource)
+ if err := vclock.runNtpCheck(nil); err != nil {
+ t.Errorf("Unexpected err: %v", err)
+ }
+ if !isClockDataChanged(stAdapter, &originalData) {
+ t.Error("ClockData expected to be updated but found unchanged")
+ }
+ expectedBootTime := sysTs.Add(-elapsedTime).UnixNano()
+ if stAdapter.clockData.Skew != skew.Nanoseconds() {
+ t.Errorf("Skew expected to be %d but found %d",
+ skew.Nanoseconds(), stAdapter.clockData.Skew)
+ }
+ if stAdapter.clockData.ElapsedTimeSinceBoot != elapsedTime.Nanoseconds() {
+ t.Errorf("ElapsedTime expected to be %d but found %d",
+ elapsedTime.Nanoseconds(), stAdapter.clockData.ElapsedTimeSinceBoot)
+ }
+ if stAdapter.clockData.SystemTimeAtBoot != expectedBootTime {
+ t.Errorf("Skew expected to be %d but found %d",
+ expectedBootTime, stAdapter.clockData.SystemTimeAtBoot)
+ }
+}
+
+func TestWithMockNtpForDiffBelowThresholdAndExistingLargeSkew(t *testing.T) {
+ sysTs := time.Now()
+ elapsedTime := 10 * time.Minute
+ sysClock := MockSystemClock(sysTs, elapsedTime)
+
+ stAdapter := MockStorageAdapter()
+ originalData := NewClockData(2300 * time.Millisecond.Nanoseconds()) // large skew
+ stAdapter.SetClockData(nil, &originalData)
+
+ ntpSource := MockNtpSource()
+ skew := 200 * time.Millisecond // error threshold is 2 seconds
+ ntpSource.Data = &NtpData{offset: skew, delay: 5 * time.Millisecond}
+
+ vclock := NewVClockWithMockServices(stAdapter, sysClock, ntpSource)
+ if err := vclock.runNtpCheck(nil); err != nil {
+ t.Errorf("Unexpected err: %v", err)
+ }
+ if !isClockDataChanged(stAdapter, &originalData) {
+ t.Error("ClockData expected to be updated but found unchanged")
+ }
+ expectedBootTime := sysTs.Add(-elapsedTime).UnixNano()
+ if stAdapter.clockData.Skew != skew.Nanoseconds() {
+ t.Errorf("Skew expected to be %d but found %d",
+ skew.Nanoseconds(), stAdapter.clockData.Skew)
+ }
+ if stAdapter.clockData.ElapsedTimeSinceBoot != elapsedTime.Nanoseconds() {
+ t.Errorf("ElapsedTime expected to be %d but found %d",
+ elapsedTime.Nanoseconds(), stAdapter.clockData.ElapsedTimeSinceBoot)
+ }
+ if stAdapter.clockData.SystemTimeAtBoot != expectedBootTime {
+ t.Errorf("Skew expected to be %d but found %d",
+ expectedBootTime, stAdapter.clockData.SystemTimeAtBoot)
+ }
+}
+
+func TestWithMockNtpForDiffBelowThresholdWithNoStoredClockData(t *testing.T) {
+ sysTs := time.Now()
+ elapsedTime := 10 * time.Minute
+ sysClock := MockSystemClock(sysTs, elapsedTime)
+
+ stAdapter := MockStorageAdapter() // no skew data stored
+
+ ntpSource := MockNtpSource()
+ skew := 200 * time.Millisecond // error threshold is 2 seconds
+ ntpSource.Data = &NtpData{offset: skew, delay: 5 * time.Millisecond}
+
+ vclock := NewVClockWithMockServices(stAdapter, sysClock, ntpSource)
+ if err := vclock.runNtpCheck(nil); err != nil {
+ t.Errorf("Unexpected err: %v", err)
+ }
+ if !isClockDataChanged(stAdapter, nil) {
+ t.Error("ClockData expected to be updated but found unchanged")
+ }
+ expectedBootTime := sysTs.Add(-elapsedTime).UnixNano()
+ if stAdapter.clockData.Skew != skew.Nanoseconds() {
+ t.Errorf("Skew expected to be %d but found %d",
+ skew.Nanoseconds(), stAdapter.clockData.Skew)
+ }
+ if stAdapter.clockData.ElapsedTimeSinceBoot != elapsedTime.Nanoseconds() {
+ t.Errorf("ElapsedTime expected to be %d but found %d",
+ elapsedTime.Nanoseconds(), stAdapter.clockData.ElapsedTimeSinceBoot)
+ }
+ if stAdapter.clockData.SystemTimeAtBoot != expectedBootTime {
+ t.Errorf("Skew expected to be %d but found %d",
+ expectedBootTime, stAdapter.clockData.SystemTimeAtBoot)
+ }
+}
+
+/*
+Following two tests are commented out as they hit the real NTP servers
+and can resut into being flaky if the clock of the machine running continuous
+test has a skew more than 2 seconds.
+
+func TestWithRealNtp(t *testing.T) {
+ stAdapter := MockStorageAdapter()
+ originalData := NewClockData(100 * time.Millisecond.Nanoseconds()) // small skew
+ stAdapter.SetClockData(nil, &originalData)
+ vclock := NewVClockWithMockServices(stAdapter, nil, nil)
+ if err := vclock.runNtpCheck(nil); err != nil {
+ t.Errorf("Unexpected err: %v", err)
+ }
+ if isClockDataChanged(stAdapter, &originalData) {
+ t.Error("ClockData expected to be unchanged but found updated")
+ }
+}
+
+func TestWithRealNtpForNoClockData(t *testing.T) {
+ stAdapter := MockStorageAdapter()
+ vclock := NewVClockWithMockServices(stAdapter, nil, nil)
+ if err := vclock.runNtpCheck(nil); err != nil {
+ t.Errorf("Unexpected err: %v", err)
+ }
+ if !isClockDataChanged(stAdapter, nil) {
+ t.Error("ClockData expected to be updated but found unchanged")
+ }
+}
+*/
+
+func NewClockData(skew int64) ClockData {
+ return ClockData{
+ SystemTimeAtBoot: 0,
+ Skew: skew,
+ ElapsedTimeSinceBoot: 0,
+ }
+}
+
+func isClockDataChanged(stAdapter *storageAdapterMockImpl, originalData *ClockData) bool {
+ return stAdapter.clockData != originalData // check for same pointer
+}
diff --git a/services/syncbase/clock/storage_adapter.go b/services/syncbase/clock/storage_adapter.go
new file mode 100644
index 0000000..33b4cda
--- /dev/null
+++ b/services/syncbase/clock/storage_adapter.go
@@ -0,0 +1,33 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+)
+
+var _ StorageAdapter = (*storageAdapterImpl)(nil)
+
+func NewStorageAdapter(st store.Store) StorageAdapter {
+ return &storageAdapterImpl{st}
+}
+
+type storageAdapterImpl struct {
+ st store.Store
+}
+
+func (sa *storageAdapterImpl) GetClockData(ctx *context.T, data *ClockData) error {
+ return util.Get(ctx, sa.st, clockDataKey(), data)
+}
+
+func (sa *storageAdapterImpl) SetClockData(ctx *context.T, data *ClockData) error {
+ return util.Put(ctx, sa.st, clockDataKey(), data)
+}
+
+func clockDataKey() string {
+ return util.ClockPrefix
+}
diff --git a/services/syncbase/clock/test_util.go b/services/syncbase/clock/test_util.go
new file mode 100644
index 0000000..6027f49
--- /dev/null
+++ b/services/syncbase/clock/test_util.go
@@ -0,0 +1,119 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+// Utilities for testing clock.
+
+import (
+ "time"
+
+ "v.io/v23/context"
+ "v.io/v23/verror"
+)
+
+/////////////////////////////////////////////////
+// Mock for StorageAdapter
+
+var _ StorageAdapter = (*storageAdapterMockImpl)(nil)
+
+func MockStorageAdapter() *storageAdapterMockImpl {
+ return &storageAdapterMockImpl{}
+}
+
+type storageAdapterMockImpl struct {
+ clockData *ClockData
+ err error
+}
+
+func (sa *storageAdapterMockImpl) GetClockData(ctx *context.T, data *ClockData) error {
+ if sa.err != nil {
+ return sa.err
+ }
+ if sa.clockData == nil {
+ return verror.NewErrNoExist(ctx)
+ }
+ *data = *sa.clockData
+ return nil
+}
+
+func (sa *storageAdapterMockImpl) SetClockData(ctx *context.T, data *ClockData) error {
+ if sa.err != nil {
+ return sa.err
+ }
+ sa.clockData = data
+ return nil
+}
+
+func (sa *storageAdapterMockImpl) SetError(err error) {
+ sa.err = err
+}
+
+/////////////////////////////////////////////////
+// Mock for SystemClock
+
+var _ SystemClock = (*systemClockMockImpl)(nil)
+
+func MockSystemClock(now time.Time, elapsedTime time.Duration) *systemClockMockImpl {
+ return &systemClockMockImpl{
+ now: now,
+ elapsedTime: elapsedTime,
+ }
+}
+
+type systemClockMockImpl struct {
+ now time.Time
+ elapsedTime time.Duration
+}
+
+func (sc *systemClockMockImpl) Now() time.Time {
+ return sc.now
+}
+
+func (sc *systemClockMockImpl) SetNow(now time.Time) {
+ sc.now = now
+}
+
+func (sc *systemClockMockImpl) ElapsedTime() (time.Duration, error) {
+ return sc.elapsedTime, nil
+}
+
+func (sc *systemClockMockImpl) SetElapsedTime(elapsed time.Duration) {
+ sc.elapsedTime = elapsed
+}
+
+/////////////////////////////////////////////////
+// Mock for NtpSource
+
+var _ NtpSource = (*ntpSourceMockImpl)(nil)
+
+func MockNtpSource() *ntpSourceMockImpl {
+ return &ntpSourceMockImpl{}
+}
+
+type ntpSourceMockImpl struct {
+ Err error
+ Data *NtpData
+}
+
+func (ns *ntpSourceMockImpl) NtpSync(sampleCount int) (*NtpData, error) {
+ if ns.Err != nil {
+ return nil, ns.Err
+ }
+ return ns.Data, nil
+}
+
+func NewVClockWithMockServices(sa StorageAdapter, sc SystemClock, ns NtpSource) *VClock {
+ if sc == nil {
+ sc = newSystemClock()
+ }
+ if ns == nil {
+ ns = NewNtpSource(sc)
+ }
+ return &VClock{
+ clock: sc,
+ sa: sa,
+ ntpSource: ns,
+ }
+}
diff --git a/services/syncbase/clock/types.go b/services/syncbase/clock/types.go
new file mode 100644
index 0000000..6934f11
--- /dev/null
+++ b/services/syncbase/clock/types.go
@@ -0,0 +1,53 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+ "time"
+
+ "v.io/v23/context"
+)
+
+// This interface provides a wrapper over system clock to allow easy testing
+// of VClock and other code that uses timestamps.
+type SystemClock interface {
+ // Now returns the current UTC time as known by the system.
+ // This may not reflect the NTP time if the system clock is out of
+ // sync with NTP.
+ Now() time.Time
+
+ // ElapsedTime returns a duration representing the time elapsed since the device
+ // rebooted.
+ ElapsedTime() (time.Duration, error)
+}
+
+type StorageAdapter interface {
+ GetClockData(ctx *context.T, data *ClockData) error
+ SetClockData(ctx *context.T, data *ClockData) error
+}
+
+type NtpSource interface {
+ // NtpSync obtains NtpData samples from an NTP server and returns the one
+ // which has the lowest network delay.
+ // Param sampleCount is the number of samples this method will fetch.
+ // NtpData contains the clock offset and the network delay experienced while
+ // talking to the server.
+ NtpSync(sampleCount int) (*NtpData, error)
+}
+
+type NtpData struct {
+ // Offset is the difference between the NTP time and the system clock.
+ // Adding offset to system clock will give estimated NTP time.
+ offset time.Duration
+
+ // Delay is the round trip network delay experienced while talking to NTP
+ // server. The smaller the delay, the more accurate the offset is.
+ delay time.Duration
+}
+
+func (cd *ClockData) SystemBootTime() time.Time {
+ ns := time.Second.Nanoseconds()
+ return time.Unix(cd.SystemTimeAtBoot/ns, cd.SystemTimeAtBoot%ns)
+}
diff --git a/services/syncbase/clock/types.vdl b/services/syncbase/clock/types.vdl
new file mode 100644
index 0000000..b12f8f9
--- /dev/null
+++ b/services/syncbase/clock/types.vdl
@@ -0,0 +1,20 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+// ClockData is the persistent state of syncbase clock used to estimate current
+// NTP time and catch any unexpected changes to system clock.
+type ClockData struct {
+ // UTC time in unix nano seconds obtained from system clock at boot.
+ SystemTimeAtBoot int64
+
+ // Skew between the system clock and NTP time.
+ Skew int64
+
+ // The elapsed time since boot as last seen during a run of clockservice.
+ // This is used to determine if the device rebooted since the last run of
+ // clockservice.
+ ElapsedTimeSinceBoot int64
+}
\ No newline at end of file
diff --git a/services/syncbase/clock/types.vdl.go b/services/syncbase/clock/types.vdl.go
new file mode 100644
index 0000000..4749d42
--- /dev/null
+++ b/services/syncbase/clock/types.vdl.go
@@ -0,0 +1,35 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: types.vdl
+
+package clock
+
+import (
+ // VDL system imports
+ "v.io/v23/vdl"
+)
+
+// ClockData is the persistent state of syncbase clock used to estimate current
+// NTP time and catch any unexpected changes to system clock.
+type ClockData struct {
+ // UTC time in unix nano seconds obtained from system clock at boot.
+ SystemTimeAtBoot int64
+ // Skew between the system clock and NTP time.
+ Skew int64
+ // The elapsed time since boot as last seen during a run of clockservice.
+ // This is used to determine if the device rebooted since the last run of
+ // clockservice.
+ ElapsedTimeSinceBoot int64
+}
+
+func (ClockData) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/clock.ClockData"`
+}) {
+}
+
+func init() {
+ vdl.Register((*ClockData)(nil))
+}
diff --git a/services/syncbase/clock/vclock.go b/services/syncbase/clock/vclock.go
new file mode 100644
index 0000000..95719f4
--- /dev/null
+++ b/services/syncbase/clock/vclock.go
@@ -0,0 +1,73 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+ "time"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ "v.io/v23/verror"
+ "v.io/x/lib/vlog"
+)
+
+// VClock holds data required to provide an estimate of the UTC time at any
+// given point. The fields contained in here are
+// - systemTimeAtBoot : the time shown by the system clock at boot.
+// - skew : the difference between the system clock and UTC time.
+// - clock : Instance of clock.SystemClock interface providing access
+// to the system time.
+// - sa : adapter for storage of clock data.
+// - ntpSource : source for fetching NTP data.
+type VClock struct {
+ systemTimeAtBoot time.Time
+ skew time.Duration
+ clock SystemClock
+ sa StorageAdapter
+ ntpSource NtpSource
+}
+
+func NewVClock(st store.Store) *VClock {
+ sysClock := newSystemClock()
+ return &VClock{
+ clock: sysClock,
+ sa: NewStorageAdapter(st),
+ ntpSource: NewNtpSource(sysClock),
+ }
+}
+
+// Now returns current UTC time based on the estimation of skew that
+// the system clock has with respect to NTP time.
+func (c *VClock) Now(ctx *context.T) time.Time {
+ clockData := &ClockData{}
+ if err := c.sa.GetClockData(ctx, clockData); err != nil {
+ if verror.ErrorID(err) == verror.ErrNoExist.ID {
+ // VClock's cron job to setup UTC time at boot has not been run yet.
+ vlog.Error("No ClockData found while creating a timestamp")
+ } else {
+ vlog.Errorf("Error while fetching clock data: %v", err)
+ }
+ vlog.Error("Returning current system clock time")
+ return c.clock.Now()
+ }
+ skew := time.Duration(clockData.Skew)
+ return c.clock.Now().Add(skew)
+}
+
+///////////////////////////////////////////////////
+// Implementation for SystemClock.
+
+type systemClockImpl struct{}
+
+// Returns system time in UTC.
+func (sc *systemClockImpl) Now() time.Time {
+ return time.Now().UTC()
+}
+
+var _ SystemClock = (*systemClockImpl)(nil)
+
+func newSystemClock() SystemClock {
+ return &systemClockImpl{}
+}
diff --git a/services/syncbase/clock/vclock_test.go b/services/syncbase/clock/vclock_test.go
new file mode 100644
index 0000000..bf92f5c
--- /dev/null
+++ b/services/syncbase/clock/vclock_test.go
@@ -0,0 +1,69 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package clock
+
+import (
+ "testing"
+ "time"
+
+ "v.io/v23/verror"
+)
+
+func TestVClock(t *testing.T) {
+ sysTs := time.Now()
+ sysClock := MockSystemClock(sysTs, 0)
+ stAdapter := MockStorageAdapter()
+ stAdapter.SetClockData(nil, &ClockData{0, 0, 0})
+ clock := NewVClockWithMockServices(stAdapter, sysClock, nil)
+
+ ts := clock.Now(nil)
+ if ts != sysTs {
+ t.Errorf("timestamp expected to be %q but found to be %q", sysTs, ts)
+ }
+}
+
+func TestVClockWithSkew(t *testing.T) {
+ // test with positive skew
+ checkSkew(t, 5)
+ // test with negative skew
+ checkSkew(t, -5)
+}
+
+func checkSkew(t *testing.T, skew int64) {
+ sysTs := time.Now()
+ sysClock := MockSystemClock(sysTs, 0)
+
+ var elapsedTime int64 = 100
+ stAdapter := MockStorageAdapter()
+ bootTime := sysTs.UnixNano() - elapsedTime
+ clockData := ClockData{bootTime, skew, elapsedTime}
+ stAdapter.SetClockData(nil, &clockData)
+
+ clock := NewVClockWithMockServices(stAdapter, sysClock, nil)
+
+ ts := clock.Now(nil)
+ if ts == sysTs {
+ t.Errorf("timestamp expected to be %q but found to be %q", sysTs, ts)
+ }
+ if ts.UnixNano() != (sysTs.UnixNano() + skew) {
+ t.Errorf("Unexpected vclock timestamp. vclock: %v, sysclock: %v, skew: %v", ts, sysTs, skew)
+ }
+}
+
+func TestVClockWithInternalErr(t *testing.T) {
+ sysTs := time.Now()
+ sysClock := MockSystemClock(sysTs, 0)
+
+ stAdapter := MockStorageAdapter()
+ stAdapter.SetError(verror.NewErrInternal(nil))
+
+ clock := NewVClockWithMockServices(stAdapter, sysClock, nil)
+
+ // Internal err should result in vclock falling back to the system clock.
+ ts := clock.Now(nil)
+ if ts != sysTs {
+ t.Errorf("timestamp expected to be %q but found to be %q", sysTs, ts)
+ }
+}
diff --git a/services/syncbase/localblobstore/blobmap/blobmap.go b/services/syncbase/localblobstore/blobmap/blobmap.go
new file mode 100644
index 0000000..c674f68
--- /dev/null
+++ b/services/syncbase/localblobstore/blobmap/blobmap.go
@@ -0,0 +1,480 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package blobmap implements a map from chunk checksums to chunk locations
+// and vice versa, using a store.Store (currently, one implemented with
+// leveldb).
+package blobmap
+
+import "encoding/binary"
+import "sync"
+
+import "v.io/syncbase/x/ref/services/syncbase/store"
+import "v.io/syncbase/x/ref/services/syncbase/store/leveldb"
+import "v.io/v23/context"
+import "v.io/v23/verror"
+
+const pkgPath = "v.io/syncbase/x/ref/services/syncbase/localblobstore/blobmap"
+
+var (
+ errBadBlobIDLen = verror.Register(pkgPath+".errBadBlobIDLen", verror.NoRetry, "{1:}{2:} blobmap {3}: bad blob length {4} should be {5}{:_}")
+ errBadChunkHashLen = verror.Register(pkgPath+".errBadChunkHashLen", verror.NoRetry, "{1:}{2:} blobmap {3}: bad chunk hash length {4} should be {5}{:_}")
+ errNoSuchBlob = verror.Register(pkgPath+".errNoSuchBlob", verror.NoRetry, "{1:}{2:} blobmap {3}: no such blob{:_}")
+ errMalformedChunkEntry = verror.Register(pkgPath+".errMalformedChunkEntry", verror.NoRetry, "{1:}{2:} blobmap {3}: malfored chunk entry{:_}")
+ errNoSuchChunk = verror.Register(pkgPath+".errNoSuchChunk", verror.NoRetry, "{1:}{2:} blobmap {3}: no such chunk{:_}")
+ errMalformedBlobEntry = verror.Register(pkgPath+".errMalformedBlobEntry", verror.NoRetry, "{1:}{2:} blobmap {3}: malfored blob entry{:_}")
+)
+
+// There are two tables: chunk-to-location, and blob-to-chunk.
+// Each chunk is represented by one entry in each table.
+// On deletion, the latter is used to find the former, so the latter is added
+// first, and deleted last.
+//
+// chunk-to-location:
+// Key: 1-byte containing chunkPrefix, 16-byte chunk hash, 16-byte blob ID
+// Value: Varint offset, Varint length.
+// The chunk with the specified 16-byte hash had the specified length, and is
+// (or was) found at the specified offset in the blob.
+//
+// blob-to-chunk:
+// Key: 1-byte containing blobPrefix, 16-byte blob ID, 8-byte bigendian offset
+// Value: 16-byte chunk hash, Varint length.
+//
+// The varint encoded fields are written/read with
+// encoding/binary.{Put,Read}Varint. The blob-to-chunk keys encode the offset
+// as raw big-endian (encoding/binary.{Put,}Uint64) so that it will sort in
+// increasing offset order.
+
+const chunkHashLen = 16 // length of chunk hash
+const blobIDLen = 16 // length of blob ID
+const offsetLen = 8 // length of offset in blob-to-chunk key
+
+const maxKeyLen = 64 // conservative maximum key length
+const maxValLen = 64 // conservative maximum value length
+
+var chunkPrefix []byte = []byte{0} // key prefix for chunk-to-location
+var blobPrefix []byte = []byte{1} // key prefix for blob-to-chunk
+
+// offsetLimit is an offset that's greater than, and one byte longer than, any
+// real offset.
+var offsetLimit []byte = []byte{
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff,
+}
+
+// blobLimit is a blobID that's greater than, and one byte longer than, any
+// real blob ID
+var blobLimit []byte = []byte{
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff,
+}
+
+// A Location describes chunk's location within a blob.
+type Location struct {
+ BlobID []byte // ID of blob
+ Offset int64 // byte offset of chunk within blob
+ Size int64 // size of chunk
+}
+
+// A BlobMap maps chunk checksums to Locations, and vice versa.
+type BlobMap struct {
+ dir string // the directory where the store is held
+ st store.Store // private store that holds the mapping.
+}
+
+// New() returns a pointer to a BlobMap, backed by storage in directory dir.
+func New(ctx *context.T, dir string) (bm *BlobMap, err error) {
+ bm = new(BlobMap)
+ bm.dir = dir
+ bm.st, err = leveldb.Open(dir, leveldb.OpenOptions{CreateIfMissing: true, ErrorIfExists: false})
+ return bm, err
+}
+
+// Close() closes any files or other resources associated with *bm.
+// No other methods on bm may be called after Close().
+func (bm *BlobMap) Close() error {
+ return bm.st.Close()
+}
+
+// AssociateChunkWithLocation() remembers that the specified chunk hash is
+// associated with the specified Location.
+func (bm *BlobMap) AssociateChunkWithLocation(ctx *context.T, chunk []byte, loc Location) (err error) {
+ // Check of expected lengths explicitly in routines that modify the database.
+ if len(loc.BlobID) != blobIDLen {
+ err = verror.New(errBadBlobIDLen, ctx, bm.dir, len(loc.BlobID), blobIDLen)
+ } else if len(chunk) != chunkHashLen {
+ err = verror.New(errBadChunkHashLen, ctx, bm.dir, len(chunk), chunkHashLen)
+ } else {
+ var key [maxKeyLen]byte
+ var val [maxValLen]byte
+
+ // Put the blob-to-chunk entry first, since it's used
+ // to garbage collect the other.
+ keyLen := copy(key[:], blobPrefix)
+ keyLen += copy(key[keyLen:], loc.BlobID)
+ binary.BigEndian.PutUint64(key[keyLen:], uint64(loc.Offset))
+ keyLen += offsetLen
+
+ valLen := copy(val[:], chunk)
+ valLen += binary.PutVarint(val[valLen:], loc.Size)
+ err = bm.st.Put(key[:keyLen], val[:valLen])
+
+ if err == nil {
+ keyLen = copy(key[:], chunkPrefix)
+ keyLen += copy(key[keyLen:], chunk)
+ keyLen += copy(key[keyLen:], loc.BlobID)
+
+ valLen = binary.PutVarint(val[:], loc.Offset)
+ valLen += binary.PutVarint(val[valLen:], loc.Size)
+
+ err = bm.st.Put(key[:keyLen], val[:valLen])
+ }
+ }
+
+ return err
+}
+
+// DeleteBlob() deletes any of the chunk associations previously added with
+// AssociateChunkWithLocation(..., chunk, ...).
+func (bm *BlobMap) DeleteBlob(ctx *context.T, blob []byte) (err error) {
+ // Check of expected lengths explicitly in routines that modify the database.
+ if len(blob) != blobIDLen {
+ err = verror.New(errBadBlobIDLen, ctx, bm.dir, len(blob), blobIDLen)
+ } else {
+ var start [maxKeyLen]byte
+ var limit [maxKeyLen]byte
+
+ startLen := copy(start[:], blobPrefix)
+ startLen += copy(start[startLen:], blob)
+
+ limitLen := copy(limit[:], start[:startLen])
+ limitLen += copy(limit[limitLen:], offsetLimit)
+
+ var keyBuf [maxKeyLen]byte // buffer for keys returned by stream
+ var valBuf [maxValLen]byte // buffer for values returned by stream
+ var deleteKey [maxKeyLen]byte // buffer to construct chunk-to-location keys to delete
+
+ deletePrefixLen := copy(deleteKey[:], chunkPrefix)
+
+ seenAValue := false
+
+ s := bm.st.Scan(start[:startLen], limit[:limitLen])
+ for s.Advance() && err == nil {
+ seenAValue = true
+
+ key := s.Key(keyBuf[:])
+ value := s.Value(valBuf[:])
+
+ if len(value) >= chunkHashLen {
+ deleteKeyLen := deletePrefixLen
+ deleteKeyLen += copy(deleteKey[deleteKeyLen:], value[:chunkHashLen])
+ deleteKeyLen += copy(deleteKey[deleteKeyLen:], blob)
+ err = bm.st.Delete(deleteKey[:deleteKeyLen])
+ }
+
+ if err == nil {
+ // Delete the blob-to-chunk entry last, as it's
+ // used to find the chunk-to-location entry.
+ err = bm.st.Delete(key)
+ }
+ }
+
+ if err != nil {
+ s.Cancel()
+ } else {
+ err = s.Err()
+ if err == nil && !seenAValue {
+ err = verror.New(errNoSuchBlob, ctx, bm.dir, blob)
+ }
+ }
+ }
+
+ return err
+}
+
+// LookupChunk() returns a Location for the specified chunk. Only one Location
+// is returned, even if several are available in the database. If the client
+// finds that the Location is not available, perhaps because its blob has
+// been deleted, the client should remove the blob from the BlobMap using
+// DeleteBlob(loc.Blob), and try again. (The client may also wish to
+// arrange at some point to call GC() on the blob store.)
+func (bm *BlobMap) LookupChunk(ctx *context.T, chunkHash []byte) (loc Location, err error) {
+ var start [maxKeyLen]byte
+ var limit [maxKeyLen]byte
+
+ startLen := copy(start[:], chunkPrefix)
+ startLen += copy(start[startLen:], chunkHash)
+
+ limitLen := copy(limit[:], start[:startLen])
+ limitLen += copy(limit[limitLen:], blobLimit)
+
+ var keyBuf [maxKeyLen]byte // buffer for keys returned by stream
+ var valBuf [maxValLen]byte // buffer for values returned by stream
+
+ s := bm.st.Scan(start[:startLen], limit[:limitLen])
+ if s.Advance() {
+ var n int
+ key := s.Key(keyBuf[:])
+ value := s.Value(valBuf[:])
+ loc.BlobID = key[len(chunkPrefix)+chunkHashLen:]
+ loc.Offset, n = binary.Varint(value)
+ if n > 0 {
+ loc.Size, n = binary.Varint(value[n:])
+ }
+ if n <= 0 {
+ err = verror.New(errMalformedChunkEntry, ctx, bm.dir, chunkHash, key, value)
+ }
+ s.Cancel()
+ } else {
+ if err == nil {
+ err = s.Err()
+ }
+ if err == nil {
+ err = verror.New(errNoSuchChunk, ctx, bm.dir, chunkHash)
+ }
+ }
+
+ return loc, err
+}
+
+// A ChunkStream allows the client to iterate over the chunks in a blob:
+// cs := bm.NewChunkStream(ctx, blob)
+// for cs.Advance() {
+// chunkHash := cs.Value()
+// ...process chunkHash...
+// }
+// if cs.Err() != nil {
+// ...there was an error...
+// }
+type ChunkStream struct {
+ bm *BlobMap
+ ctx *context.T
+ stream store.Stream
+
+ keyBuf [maxKeyLen]byte // buffer for keys
+ valBuf [maxValLen]byte // buffer for values
+ key []byte // key for current element
+ value []byte // value of current element
+ loc Location // location of current element
+ err error // error encountered.
+ more bool // whether stream may be consulted again
+}
+
+// NewChunkStream() returns a pointer to a new ChunkStream that allows the client
+// to enumerate the chunk hashes in a blob, in order.
+func (bm *BlobMap) NewChunkStream(ctx *context.T, blob []byte) *ChunkStream {
+ var start [maxKeyLen]byte
+ var limit [maxKeyLen]byte
+
+ startLen := copy(start[:], blobPrefix)
+ startLen += copy(start[startLen:], blob)
+
+ limitLen := copy(limit[:], start[:startLen])
+ limitLen += copy(limit[limitLen:], offsetLimit)
+
+ cs := new(ChunkStream)
+ cs.bm = bm
+ cs.ctx = ctx
+ cs.stream = bm.st.Scan(start[:startLen], limit[:limitLen])
+ cs.more = true
+
+ return cs
+}
+
+// Advance() stages an element so the client can retrieve the chunk hash with
+// Value(), or its Location with Location(). Advance() returns true iff there
+// is an element to retrieve. The client must call Advance() before calling
+// Value() or Location() The client must call Cancel if it does not iterate
+// through all elements (i.e. until Advance() returns false). Advance() may
+// block if an element is not immediately available.
+func (cs *ChunkStream) Advance() (ok bool) {
+ if cs.more && cs.err == nil {
+ if !cs.stream.Advance() {
+ cs.err = cs.stream.Err()
+ cs.more = false // no more stream, even if no error
+ } else {
+ cs.key = cs.stream.Key(cs.keyBuf[:])
+ cs.value = cs.stream.Value(cs.valBuf[:])
+ ok = (len(cs.value) >= chunkHashLen) &&
+ (len(cs.key) == len(blobPrefix)+blobIDLen+offsetLen)
+ if ok {
+ var n int
+ cs.loc.BlobID = make([]byte, blobIDLen)
+ copy(cs.loc.BlobID, cs.key[len(blobPrefix):len(blobPrefix)+blobIDLen])
+ cs.loc.Offset = int64(binary.BigEndian.Uint64(cs.key[len(blobPrefix)+blobIDLen:]))
+ cs.loc.Size, n = binary.Varint(cs.value[chunkHashLen:])
+ ok = (n > 0)
+ }
+ if !ok {
+ cs.err = verror.New(errMalformedBlobEntry, cs.ctx, cs.bm.dir, cs.key, cs.value)
+ cs.stream.Cancel()
+ }
+ }
+ }
+ return ok
+}
+
+// Value() returns the content hash of the chunk staged by
+// Advance(). The returned slice may be a sub-slice of buf if buf is large
+// enough to hold the entire value. Otherwise, a newly allocated slice will be
+// returned. It is valid to pass a nil buf. Value() may panic if Advance()
+// returned false or was not called at all. Value() does not block.
+func (cs *ChunkStream) Value(buf []byte) (result []byte) {
+ if len(buf) < chunkHashLen {
+ buf = make([]byte, chunkHashLen)
+ }
+ copy(buf, cs.value[:chunkHashLen])
+ return buf[:chunkHashLen]
+}
+
+// Location() returns the Location associated with the chunk staged by
+// Advance(). Location() may panic if Advance() returned false or was not
+// called at all. Location() does not block.
+func (cs *ChunkStream) Location() Location {
+ return cs.loc
+}
+
+// Err() returns a non-nil error iff the stream encountered any errors. Err()
+// does not block.
+func (cs *ChunkStream) Err() error {
+ return cs.err
+}
+
+// Cancel() notifies the stream provider that it can stop producing elements.
+// The client must call Cancel() if it does not iterate through all elements
+// (i.e. until Advance() returns false). Cancel() is idempotent and can be
+// called concurrently with a goroutine that is iterating via Advance() and
+// Value(). Cancel() causes Advance() to subsequently return false.
+// Cancel() does not block.
+func (cs *ChunkStream) Cancel() {
+ cs.stream.Cancel()
+}
+
+// A BlobStream allows the client to iterate over the blobs in BlobMap:
+// bs := bm.NewBlobStream(ctx)
+// for bs.Advance() {
+// blobID := bs.Value()
+// ...process blobID...
+// }
+// if bs.Err() != nil {
+// ...there was an error...
+// }
+type BlobStream struct {
+ bm *BlobMap
+ ctx *context.T
+
+ key []byte // key for current element
+ keyBuf [maxKeyLen]byte // buffer for keys
+ err error // error encountered.
+ mu sync.Mutex // protects "more", which may be written in Cancel()
+ more bool // whether stream may be consulted again
+}
+
+// keyLimit is the key for limit in store.Scan() calls within a BlobStream.
+var keyLimit []byte
+
+func init() {
+ // The limit key is the maximum length key, all ones after the blobPrefix.
+ keyLimit = make([]byte, maxKeyLen)
+ for i := copy(keyLimit, blobPrefix); i != len(keyLimit); i++ {
+ keyLimit[i] = 0xff
+ }
+}
+
+// NewBlobStream() returns a pointer to a new BlobStream that allows the client
+// to enumerate the blobs BlobMap, in lexicographic order.
+func (bm *BlobMap) NewBlobStream(ctx *context.T) *BlobStream {
+ bs := new(BlobStream)
+ bs.bm = bm
+ bs.ctx = ctx
+ bs.more = true
+ return bs
+}
+
+// Advance() stages an element so the client can retrieve the next blob ID with
+// Value(). Advance() returns true iff there is an element to retrieve. The
+// client must call Advance() before calling Value(). The client must call
+// Cancel if it does not iterate through all elements (i.e. until Advance()
+// returns false). Advance() may block if an element is not immediately
+// available.
+func (bs *BlobStream) Advance() (ok bool) {
+ bs.mu.Lock()
+ ok = bs.more
+ bs.mu.Unlock()
+ if ok {
+ prefixAndKeyLen := len(blobPrefix) + blobIDLen
+ // Compute the next key to search for.
+ if len(bs.key) == 0 { // First time through: anything starting with blobPrefix.
+ n := copy(bs.keyBuf[:], blobPrefix)
+ bs.key = bs.keyBuf[:n]
+ } else {
+ // Increment the blobID to form the next possible key.
+ i := prefixAndKeyLen - 1
+ for ; i != len(blobPrefix)-1 && bs.keyBuf[i] == 0xff; i-- {
+ bs.keyBuf[i] = 0
+ }
+ if i == len(blobPrefix)-1 { // End of database
+ ok = false
+ } else {
+ bs.keyBuf[i]++
+ }
+ bs.key = bs.keyBuf[:prefixAndKeyLen]
+ }
+ if ok {
+ stream := bs.bm.st.Scan(bs.key, keyLimit)
+ if !stream.Advance() {
+ bs.err = stream.Err()
+ ok = false // no more stream, even if no error
+ } else {
+ bs.key = stream.Key(bs.keyBuf[:])
+ if len(bs.key) < prefixAndKeyLen {
+ bs.err = verror.New(errMalformedBlobEntry, bs.ctx, bs.bm.dir, bs.key, stream.Value(nil))
+ ok = false
+ }
+ stream.Cancel() // We get at most one element from each stream.
+ }
+ }
+ if !ok {
+ bs.mu.Lock()
+ bs.more = false
+ bs.mu.Unlock()
+ }
+ }
+ return ok
+}
+
+// Value() returns the blob ID staged by Advance(). The returned slice may be
+// a sub-slice of buf if buf is large enough to hold the entire value.
+// Otherwise, a newly allocated slice will be returned. It is valid to pass a
+// nil buf. Value() may panic if Advance() returned false or was not called at
+// all. Value() does not block.
+func (bs *BlobStream) Value(buf []byte) (result []byte) {
+ if len(buf) < blobIDLen {
+ buf = make([]byte, blobIDLen)
+ }
+ copy(buf, bs.key[len(blobPrefix):len(blobPrefix)+blobIDLen])
+ return buf[:blobIDLen]
+}
+
+// Err() returns a non-nil error iff the stream encountered any errors. Err()
+// does not block.
+func (bs *BlobStream) Err() error {
+ return bs.err
+}
+
+// Cancel() notifies the stream provider that it can stop producing elements.
+// The client must call Cancel() if it does not iterate through all elements
+// (i.e. until Advance() returns false). Cancel() is idempotent and can be
+// called concurrently with a goroutine that is iterating via Advance() and
+// Value(). Cancel() causes Advance() to subsequently return false.
+// Cancel() does not block.
+func (bs *BlobStream) Cancel() {
+ bs.mu.Lock()
+ bs.more = false
+ bs.mu.Unlock()
+}
diff --git a/services/syncbase/localblobstore/blobmap/blobmap_test.go b/services/syncbase/localblobstore/blobmap/blobmap_test.go
new file mode 100644
index 0000000..450049a
--- /dev/null
+++ b/services/syncbase/localblobstore/blobmap/blobmap_test.go
@@ -0,0 +1,278 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// A test for blobmap.
+package blobmap_test
+
+import "bytes"
+import "io/ioutil"
+import "math/rand"
+import "os"
+import "runtime"
+import "testing"
+
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/blobmap"
+import "v.io/v23/context"
+
+import "v.io/x/ref/test"
+import _ "v.io/x/ref/runtime/factories/generic"
+
+// id() returns a new random 16-byte byte vector.
+func id() []byte {
+ v := make([]byte, 16)
+ for i := 0; i != len(v); i++ {
+ v[i] = byte(rand.Int31n(256))
+ }
+ return v
+}
+
+// verifyBlobs() tests that the blobs in *bm are those in b[], as revealed via
+// the BlobStream() interface.
+func verifyBlobs(t *testing.T, ctx *context.T, bm *blobmap.BlobMap, b [][]byte) {
+ _, _, callerLine, _ := runtime.Caller(1)
+ seen := make([]bool, len(b)) // seen[i] == whether b[i] seen in *bm
+ bs := bm.NewBlobStream(ctx)
+ var i int
+ for i = 0; bs.Advance(); i++ {
+ blob := bs.Value(nil)
+ var j int
+ for j = 0; j != len(b) && bytes.Compare(b[j], blob) != 0; j++ {
+ }
+ if j == len(b) {
+ t.Errorf("blobmap_test: line %d: unexpected blob %v present in BlobMap",
+ callerLine, blob)
+ } else if seen[j] {
+ t.Errorf("blobmap_test: line %d: blob %v seen twice in BlobMap",
+ callerLine, blob)
+ } else {
+ seen[j] = true
+ }
+ }
+ if i != len(b) {
+ t.Errorf("blobmap_test: line %d: found %d blobs in BlobMap, but expected %d",
+ callerLine, i, len(b))
+ }
+ for j := range seen {
+ if !seen[j] {
+ t.Errorf("blobmap_test: line %d: blob %v not seen un BlobMap",
+ callerLine, b[j])
+ }
+ }
+ if bs.Err() != nil {
+ t.Errorf("blobmap_test: line %d: BlobStream.Advance: unexpected error %v",
+ callerLine, bs.Err())
+ }
+}
+
+// verifyNoChunksInBlob() tests that blob b[blobi] has no chunks in *bm, as
+// revealed by the ChunkStream interface.
+func verifyNoChunksInBlob(t *testing.T, ctx *context.T, bm *blobmap.BlobMap, blobi int, b [][]byte) {
+ _, _, callerLine, _ := runtime.Caller(1)
+ cs := bm.NewChunkStream(ctx, b[blobi])
+ for i := 0; cs.Advance(); i++ {
+ t.Errorf("blobmap_test: line %d: blob %d: chunk %d: %v",
+ callerLine, blobi, i, cs.Value(nil))
+ }
+ if cs.Err() != nil {
+ t.Errorf("blobmap_test: line %d: blob %d: ChunkStream.Advance: unexpected error %v",
+ callerLine, blobi, cs.Err())
+ }
+}
+
+// verifyChunksInBlob() tests that blob b[blobi] in *bm contains the expected
+// chunks from c[]. Each blob is expected to have 8 chunks, 0...7, except that
+// b[1] has c[8] instead of c[4] for chunk 4.
+func verifyChunksInBlob(t *testing.T, ctx *context.T, bm *blobmap.BlobMap, blobi int, b [][]byte, c [][]byte) {
+ _, _, callerLine, _ := runtime.Caller(1)
+ var err error
+ var i int
+ cs := bm.NewChunkStream(ctx, b[blobi])
+ for i = 0; cs.Advance(); i++ {
+ chunk := cs.Value(nil)
+ chunki := i
+ if blobi == 1 && i == 4 { // In blob 1, c[4] is replaced by c[8]
+ chunki = 8
+ }
+ if bytes.Compare(c[chunki], chunk) != 0 {
+ t.Errorf("blobmap_test: line %d: blob %d: chunk %d: got %v, expected %v",
+ callerLine, blobi, i, chunk, c[chunki])
+ }
+
+ var loc blobmap.Location
+ loc, err = bm.LookupChunk(ctx, chunk)
+ if err != nil {
+ t.Errorf("blobmap_test: line %d: blob %d: chunk %d: LookupChunk got unexpected error: %v",
+ callerLine, blobi, i, err)
+ } else {
+ if i == 4 {
+ if bytes.Compare(loc.BlobID, b[blobi]) != 0 {
+ t.Errorf("blobmap_test: line %d: blob %d: chunk %d: Location.BlobID got %v, expected %v",
+ callerLine, blobi, i, loc.BlobID, b[blobi])
+ }
+ } else {
+ if bytes.Compare(loc.BlobID, b[0]) != 0 && bytes.Compare(loc.BlobID, b[1]) != 0 {
+ t.Errorf("blobmap_test: line %d: blob %d: chunk %d: Location.BlobID got %v, expected %v",
+ callerLine, blobi, i, loc.BlobID, b[blobi])
+ }
+ }
+ if loc.Offset != int64(i) {
+ t.Errorf("blobmap_test: line %d: blob %d: chunk %d: Location.Offset got %d, expected %d",
+ callerLine, blobi, i, loc.Offset, i)
+ }
+ if loc.Size != 1 {
+ t.Errorf("blobmap_test: line %d: blob %d: chunk %d: Location.Size got %d, expected 1",
+ callerLine, blobi, i, loc.Size)
+ }
+
+ // The offsets and sizes will match, between the result
+ // from the stream and the result from LookupChunk(),
+ // because for all chunks written to both, they are
+ // written to the same places. However, the blob need
+ // not match, since LookupChunk() will return an
+ // arbitrary Location in the store that contains the
+ // chunk.
+ loc2 := cs.Location()
+ if loc.Offset != loc2.Offset || loc.Size != loc2.Size {
+ t.Errorf("blobmap_test: line %d: blob %d: chunk %d: disagreement about location: LookupChunk %v vs ChunkStream %v",
+ callerLine, blobi, i, loc, loc2)
+ }
+ }
+ }
+ if cs.Err() != nil {
+ t.Errorf("blobmap_test: line %d: blob %d: ChunkStream.Err() unepxected error %v",
+ callerLine, blobi, cs.Err())
+ }
+ if i != 8 {
+ t.Errorf("blobmap_test: line %d: blob %d: ChunkStream.Advance unexpectedly saw %d chunks, expected 8",
+ callerLine, blobi, i)
+ }
+}
+
+// TestAddRetrieveAndDelete() tests insertion, retrieval, and deletion of blobs
+// from a BlobMap. It's all done in one test case, because one cannot retrieve
+// or delete blobs that have not been inserted.
+func TestAddRetrieveAndDelete(t *testing.T) {
+ ctx, shutdown := test.V23Init()
+ defer shutdown()
+
+ // Make a temporary directory.
+ var err error
+ var testDirName string
+ testDirName, err = ioutil.TempDir("", "blobmap_test")
+ if err != nil {
+ t.Fatalf("blobmap_test: can't make tmp directory: %v", err)
+ }
+ defer os.RemoveAll(testDirName)
+
+ // Create a blobmap.
+ var bm *blobmap.BlobMap
+ bm, err = blobmap.New(ctx, testDirName)
+ if err != nil {
+ t.Fatalf("blobmap_test: blobmap.New failed: %v", err)
+ }
+
+ // Two blobs: b[0] and b[1].
+ b := [][]byte{id(), id()}
+
+ // Nine chunks: c[0 .. 8]
+ c := [][]byte{id(), id(), id(), id(), id(), id(), id(), id(), id()}
+
+ // Verify that there are no blobs, or chunks in blobs initially.
+ verifyBlobs(t, ctx, bm, nil)
+ verifyNoChunksInBlob(t, ctx, bm, 0, b)
+ verifyNoChunksInBlob(t, ctx, bm, 1, b)
+
+ // Verify that all chunks have no locations initially.
+ for chunki := range c {
+ _, err = bm.LookupChunk(ctx, c[chunki])
+ if err == nil {
+ t.Errorf("blobmap_test: chunk %d: LookupChunk: unexpected lack of error", chunki)
+ }
+ }
+
+ // Put chunks 0..7 into blob 0, and chunks 0..3, 8, 5..7 into blob 1.
+ // Each blob is treated as size 1.
+ for blobi := 0; blobi != 2; blobi++ {
+ for i := 0; i != 8; i++ {
+ chunki := i
+ if blobi == 1 && i == 4 { // In blob 1, c[4] 4 is replaced by c[8]
+ chunki = 8
+ }
+ err = bm.AssociateChunkWithLocation(ctx, c[chunki],
+ blobmap.Location{BlobID: b[blobi], Offset: int64(i), Size: 1})
+ if err != nil {
+ t.Errorf("blobmap_test: blob %d: AssociateChunkWithLocation: unexpected error: %v",
+ blobi, err)
+ }
+ }
+ }
+
+ // Verify that the blobs are present, with the chunks specified.
+ verifyBlobs(t, ctx, bm, b)
+ verifyChunksInBlob(t, ctx, bm, 0, b, c)
+ verifyChunksInBlob(t, ctx, bm, 1, b, c)
+
+ // Verify that all chunks now have locations.
+ for chunki := range c {
+ _, err = bm.LookupChunk(ctx, c[chunki])
+ if err != nil {
+ t.Errorf("blobmap_test: chunk %d: LookupChunk: unexpected error: %v",
+ chunki, err)
+ }
+ }
+
+ // Delete b[0].
+ err = bm.DeleteBlob(ctx, b[0])
+ if err != nil {
+ t.Errorf("blobmap_test: blob 0: DeleteBlob: unexpected error: %v", err)
+ }
+
+ // Verify that all chunks except chunk 4 (which was in only blob 0)
+ // still have locations.
+ for chunki := range c {
+ _, err = bm.LookupChunk(ctx, c[chunki])
+ if chunki == 4 {
+ if err == nil {
+ t.Errorf("blobmap_test: chunk %d: LookupChunk: expected lack of error",
+ chunki)
+ }
+ } else {
+ if err != nil {
+ t.Errorf("blobmap_test: chunk %d: LookupChunk: unexpected error: %v",
+ chunki, err)
+ }
+ }
+ }
+
+ // Verify that blob 0 is gone, but blob 1 remains.
+ verifyBlobs(t, ctx, bm, b[1:])
+ verifyNoChunksInBlob(t, ctx, bm, 0, b)
+ verifyChunksInBlob(t, ctx, bm, 1, b, c)
+
+ // Delete b[1].
+ err = bm.DeleteBlob(ctx, b[1])
+ if err != nil {
+ t.Errorf("blobmap_test: blob 1: DeleteBlob: unexpected error: %v",
+ err)
+ }
+
+ // Verify that there are no blobs, or chunks in blobs once more.
+ verifyBlobs(t, ctx, bm, nil)
+ verifyNoChunksInBlob(t, ctx, bm, 0, b)
+ verifyNoChunksInBlob(t, ctx, bm, 1, b)
+
+ // Verify that all chunks have no locations once more.
+ for chunki := range c {
+ _, err = bm.LookupChunk(ctx, c[chunki])
+ if err == nil {
+ t.Errorf("blobmap_test: chunk %d: LookupChunk: unexpected lack of error",
+ chunki)
+ }
+ }
+
+ err = bm.Close()
+ if err != nil {
+ t.Errorf("blobmap_test: unexpected error closing BlobMap: %v", err)
+ }
+}
diff --git a/services/syncbase/localblobstore/chunker/chunker.go b/services/syncbase/localblobstore/chunker/chunker.go
new file mode 100644
index 0000000..cb07533
--- /dev/null
+++ b/services/syncbase/localblobstore/chunker/chunker.go
@@ -0,0 +1,284 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package chunker breaks a stream of bytes into context-defined chunks whose
+// boundaries are chosen based on content checksums of a window that slides
+// over the data. An edited sequence with insertions and removals can share
+// many chunks with the original sequence.
+//
+// The intent is that when a sequence of bytes is to be transmitted to a
+// recipient that may have much of the data, the sequence can be broken down
+// into chunks. The checksums of the resulting chunks may then be transmitted
+// to the recipient, which can then discover which of the chunks it has, and
+// which it needs.
+//
+// Example:
+// var s *chunker.Stream = chunker.New(&chunker.DefaultParam, anIOReader)
+// for s.Advance() {
+// var chunk []byte := s.Value()
+// // process chunk
+// }
+// if s.Err() != nil {
+// // anIOReader generated an error.
+// }
+package chunker
+
+// The design is from:
+// "A Framework for Analyzing and Improving Content-Based Chunking Algorithms";
+// Kave Eshghi, Hsiu Khuern Tang; HPL-2005-30(R.1); Sep, 2005;
+// http://www.hpl.hp.com/techreports/2005/HPL-2005-30R1.pdf
+
+import "io"
+import "sync"
+
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/crc64window"
+import "v.io/v23/context"
+import "v.io/v23/verror"
+
+const pkgPath = "v.io/syncbase/x/ref/services/syncbase/localblobstore/chunker"
+
+var (
+ errStreamCancelled = verror.Register(pkgPath+".errStreamCancelled", verror.NoRetry, "{1:}{2:} Advance() called on cancelled stream{:_}")
+)
+
+// A Param contains the parameters for chunking.
+//
+// Chunks are broken based on a hash of a sliding window of width WindowWidth
+// bytes.
+// Each chunk is at most MaxChunk bytes long, and, unless end-of-file or an
+// error is reached, at least MinChunk bytes long.
+//
+// Subject to those constaints, a chunk boundary introduced at the first point
+// where the hash of the sliding window is 1 mod Primary, or if that doesn't
+// occur before MaxChunk bytes, at the last position where the hash is 1 mod
+// Secondary, or if that does not occur, after MaxChunk bytes.
+// Normally, MinChunk < Primary < MaxChunk.
+// Primary is the expected chunk size.
+// The Secondary divisor exists to make it more likely that a chunk boundary is
+// selected based on the local data when the Primary divisor by chance does not
+// find a match for a long distance. It should be a few times smaller than
+// Primary.
+//
+// Using primes for Primary and Secondary is not essential, but recommended
+// because it guarantees mixing of the checksum bits should their distribution
+// be non-uniform.
+type Param struct {
+ WindowWidth int // the window size to use when looking for chunk boundaries
+ MinChunk int64 // minimum chunk size
+ MaxChunk int64 // maximum chunk size
+ Primary uint64 // primary divisor; the expected chunk size
+ Secondary uint64 // secondary divisor
+}
+
+// DefaultParam contains default chunking parameters.
+var DefaultParam Param = Param{WindowWidth: 48, MinChunk: 512, MaxChunk: 3072, Primary: 601, Secondary: 307}
+
+// A Stream allows a client to iterate over the chunks within an io.Reader byte
+// stream.
+type Stream struct {
+ param Param // chunking parameters
+ ctx *context.T // context of creator
+ window *crc64window.Window // sliding window for computing the hash
+ buf []byte // buffer of data
+ rd io.Reader // source of data
+ err error // error from rd
+ mu sync.Mutex // protects cancelled
+ cancelled bool // whether the stream has been cancelled
+ bufferChunks bool // whether to buffer entire chunks
+ // Invariant: bufStart <= chunkStart <= chunkEnd <= bufEnd
+ bufStart int64 // offset in rd of first byte in buf[]
+ bufEnd int64 // offset in rd of next byte after those in buf[]
+ chunkStart int64 // offset in rd of first byte of current chunk
+ chunkEnd int64 // offset in rd of next byte after current chunk
+ windowEnd int64 // offset in rd of next byte to be given to window
+ hash uint64 // hash of sliding window
+}
+
+// newStream() returns a pointer to a new Stream instance, with the
+// parameters in *param. This internal version of NewStream() allows the caller
+// to specify via bufferChunks whether entire chunks should be buffered.
+func newStream(ctx *context.T, param *Param, rd io.Reader, bufferChunks bool) *Stream {
+ s := new(Stream)
+ s.param = *param
+ s.ctx = ctx
+ s.window = crc64window.New(crc64window.ECMA, s.param.WindowWidth)
+ bufSize := int64(8192)
+ if bufferChunks {
+ // If we must buffer entire chunks, arrange that the buffer
+ // size is considerably larger than the max chunk size to avoid
+ // copying data repeatedly.
+ for bufSize < 4*s.param.MaxChunk {
+ bufSize *= 2
+ }
+ }
+ s.buf = make([]byte, bufSize)
+ s.rd = rd
+ s.bufferChunks = bufferChunks
+ return s
+}
+
+// NewStream() returns a pointer to a new Stream instance, with the
+// parameters in *param.
+func NewStream(ctx *context.T, param *Param, rd io.Reader) *Stream {
+ return newStream(ctx, param, rd, true)
+}
+
+// isCancelled() returns whether s.Cancel() has been called.
+func (s *Stream) isCancelled() (cancelled bool) {
+ s.mu.Lock()
+ cancelled = s.cancelled
+ s.mu.Unlock()
+ return cancelled
+}
+
+// Advance() stages the next chunk so that it may be retrieved via Value().
+// Returns true iff there is an item to retrieve. Advance() must be called
+// before Value() is called.
+func (s *Stream) Advance() bool {
+ // Remember that s.{bufStart,bufEnd,chunkStart,chunkEnd,windowEnd}
+ // are all relative to the offset in it.rd, not it.buf.
+ // Therefore, these starts and ends can easily be compared
+ // with each other, but we must subtract bufStart when
+ // indexing into buf. (Other schemes were considered, but
+ // nothing seems uniformly better.)
+
+ // If buffering entire chunks, ensure there's enough data in the buffer
+ // for the next chunk.
+ if s.bufferChunks && s.bufEnd < s.chunkEnd+s.param.MaxChunk && s.err == nil {
+ // Next chunk might need more data.
+ if s.bufStart < s.chunkEnd {
+ // Move any remaining buffered data to start of buffer.
+ copy(s.buf, s.buf[s.chunkEnd-s.bufStart:s.bufEnd-s.bufStart])
+ s.bufStart = s.chunkEnd
+ }
+ // Fill buffer with data, unless error/EOF.
+ for s.err == nil && s.bufEnd < s.bufStart+int64(len(s.buf)) && !s.isCancelled() {
+ var n int
+ n, s.err = s.rd.Read(s.buf[s.bufEnd-s.bufStart:])
+ s.bufEnd += int64(n)
+ }
+ }
+
+ // Make the next chunk current.
+ s.chunkStart = s.chunkEnd
+ minChunk := s.chunkStart + s.param.MinChunk
+ maxChunk := s.chunkStart + s.param.MaxChunk
+ lastSecondaryBreak := maxChunk
+
+ // While not end of chunk...
+ for s.windowEnd != maxChunk &&
+ (s.windowEnd < minChunk || (s.hash%s.param.Primary) != 1) &&
+ (s.windowEnd != s.bufEnd || s.err == nil) && !s.isCancelled() {
+
+ // Fill the buffer if empty, and there's more data to read.
+ if s.windowEnd == s.bufEnd && s.err == nil {
+ if s.bufferChunks {
+ panic("chunker.Advance had to fill buffer in bufferChunks mode")
+ }
+ s.bufStart = s.bufEnd
+ var n int
+ n, s.err = s.rd.Read(s.buf)
+ s.bufEnd += int64(n)
+ }
+
+ // bufLimit is the minimum of the maximum possible chunk size and the buffer length.
+ bufLimit := maxChunk
+ if s.bufEnd < bufLimit {
+ bufLimit = s.bufEnd
+ }
+ // Advance window until both MinChunk reached and primary boundary found.
+ for s.windowEnd != bufLimit &&
+ (s.windowEnd < minChunk || (s.hash%s.param.Primary) != 1) &&
+ !s.isCancelled() {
+
+ // Advance the window by one byte.
+ s.hash = s.window.Advance(s.buf[s.windowEnd-s.bufStart])
+ s.windowEnd++
+ if (s.hash % s.param.Secondary) == 1 {
+ lastSecondaryBreak = s.windowEnd
+ }
+ }
+ }
+
+ if s.windowEnd == maxChunk && (s.hash%s.param.Primary) != 1 && lastSecondaryBreak != maxChunk {
+ // The primary break point was not found in the maximum chunk
+ // size, and a secondary break point was found; use it.
+ s.chunkEnd = lastSecondaryBreak
+ } else {
+ s.chunkEnd = s.windowEnd
+ }
+
+ return !s.isCancelled() && s.chunkStart != s.chunkEnd // We have a non-empty chunk to return.
+}
+
+// Value() returns the chunk that was staged by Advance(). May panic if
+// Advance() returned false or was not called. Never blocks.
+func (s *Stream) Value() []byte {
+ return s.buf[s.chunkStart-s.bufStart : s.chunkEnd-s.bufStart]
+}
+
+// Err() returns any error encountered by Advance(). Never blocks.
+func (s *Stream) Err() (err error) {
+ s.mu.Lock()
+ if s.cancelled && (s.err == nil || s.err == io.EOF) {
+ s.err = verror.New(errStreamCancelled, s.ctx)
+ }
+ s.mu.Unlock()
+ if s.err != io.EOF { // Do not consider EOF to be an error.
+ err = s.err
+ }
+ return err
+}
+
+// Cancel() causes the next call to Advance() to return false.
+// It should be used when the client does not wish to iterate to the end of the stream.
+// Never blocks. May be called concurrently with other method calls on s.
+func (s *Stream) Cancel() {
+ s.mu.Lock()
+ s.cancelled = true
+ s.mu.Unlock()
+}
+
+// ----------------------------------
+
+// A PosStream is just like a Stream, except that the Value() method returns only
+// the byte offsets of the ends of chunks, rather than the chunks themselves.
+// It can be used when chunks are too large to buffer a small number
+// comfortably in memory.
+type PosStream struct {
+ s *Stream
+}
+
+// NewPosStream() returns a pointer to a new PosStream instance, with the
+// parameters in *param.
+func NewPosStream(ctx *context.T, param *Param, rd io.Reader) *PosStream {
+ ps := new(PosStream)
+ ps.s = newStream(ctx, param, rd, false)
+ return ps
+}
+
+// Advance() stages the offset of the end of the next chunk so that it may be
+// retrieved via Value(). Returns true iff there is an item to retrieve.
+// Advance() must be called before Value() is called.
+func (ps *PosStream) Advance() bool {
+ return ps.s.Advance()
+}
+
+// Value() returns the chunk that was staged by Advance(). May panic if
+// Advance() returned false or was not called. Never blocks.
+func (ps *PosStream) Value() int64 {
+ return ps.s.chunkEnd
+}
+
+// Err() returns any error encountered by Advance(). Never blocks.
+func (ps *PosStream) Err() error {
+ return ps.s.Err()
+}
+
+// Cancel() causes the next call to Advance() to return false.
+// It should be used when the client does not wish to iterate to the end of the stream.
+// Never blocks. May be called concurrently with other method calls on ps.
+func (ps *PosStream) Cancel() {
+ ps.s.Cancel()
+}
diff --git a/services/syncbase/localblobstore/chunker/chunker_test.go b/services/syncbase/localblobstore/chunker/chunker_test.go
new file mode 100644
index 0000000..57c6fed
--- /dev/null
+++ b/services/syncbase/localblobstore/chunker/chunker_test.go
@@ -0,0 +1,197 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// A test for the chunker package.
+package chunker_test
+
+import "bytes"
+import "crypto/md5"
+import "fmt"
+import "io"
+import "testing"
+
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/chunker"
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/localblobstore_testlib"
+import "v.io/v23/context"
+import "v.io/x/ref/test"
+import _ "v.io/x/ref/runtime/factories/generic"
+
+// TestChunksPartitionStream() tests that the chunker partitions its input
+// stream into reasonable sized chunks, which when concatenated form the
+// original stream.
+func TestChunksPartitionStream(t *testing.T) {
+ ctx, shutdown := test.V23Init()
+ defer shutdown()
+
+ var err error
+ totalLength := 1024 * 1024
+
+ // Compute the md5 of an arbiotrary stream. We will later compare this
+ // with the md5 of the concanenation of chunks from an equivalent
+ // stream.
+ r := localblobstore_testlib.NewRandReader(1, totalLength, 0, io.EOF)
+ hStream := md5.New()
+ buf := make([]byte, 8192)
+ for err == nil {
+ var n int
+ n, err = r.Read(buf)
+ hStream.Write(buf[0:n])
+ }
+ checksumStream := hStream.Sum(nil)
+
+ // Using an equivalent stream, break it into chunks.
+ r = localblobstore_testlib.NewRandReader(1, totalLength, 0, io.EOF)
+ param := &chunker.DefaultParam
+ hChunked := md5.New()
+
+ length := 0
+ s := chunker.NewStream(ctx, param, r)
+ for s.Advance() {
+ chunk := s.Value()
+ length += len(chunk)
+ // The last chunk is permitted to be short, hence the second
+ // conjunct in the following predicate.
+ if int64(len(chunk)) < param.MinChunk && length != totalLength {
+ t.Errorf("chunker_test: chunk length %d below minimum %d", len(chunk), param.MinChunk)
+ }
+ if int64(len(chunk)) > param.MaxChunk {
+ t.Errorf("chunker_test: chunk length %d above maximum %d", len(chunk), param.MaxChunk)
+ }
+ hChunked.Write(chunk)
+ }
+ if s.Err() != nil {
+ t.Errorf("chunker_test: got error from chunker: %v\n", err)
+ }
+
+ if length != totalLength {
+ t.Errorf("chunker_test: chunk lengths summed to %d, expected %d", length, totalLength)
+ }
+
+ checksumChunked := hChunked.Sum(nil)
+ if bytes.Compare(checksumStream, checksumChunked) != 0 {
+ t.Errorf("chunker_test: md5 of stream is %v, but md5 of chunks is %v", checksumStream, checksumChunked)
+ }
+}
+
+// TestPosStream() tests that a PosStream leads to the same chunks as an Stream.
+func TestPosStream(t *testing.T) {
+ ctx, shutdown := test.V23Init()
+ defer shutdown()
+
+ totalLength := 1024 * 1024
+
+ s := chunker.NewStream(ctx, &chunker.DefaultParam,
+ localblobstore_testlib.NewRandReader(1, totalLength, 0, io.EOF))
+ ps := chunker.NewPosStream(ctx, &chunker.DefaultParam,
+ localblobstore_testlib.NewRandReader(1, totalLength, 0, io.EOF))
+
+ itReady := s.Advance()
+ pitReady := ps.Advance()
+ it_pos := 0
+ chunk_count := 0
+ for itReady && pitReady {
+ it_pos += len(s.Value())
+ if int64(it_pos) != ps.Value() {
+ t.Fatalf("chunker_test: Stream and PosStream positions diverged at chunk %d: %d vs %d", chunk_count, it_pos, ps.Value())
+ }
+ chunk_count++
+ itReady = s.Advance()
+ pitReady = ps.Advance()
+ }
+ if itReady {
+ t.Error("chunker_test: Stream ended before PosStream")
+ }
+ if pitReady {
+ t.Error("chunker_test: PosStream ended before Stream")
+ }
+ if s.Err() != nil {
+ t.Errorf("chunker_test: Stream got unexpected error: %v", s.Err())
+ }
+ if ps.Err() != nil {
+ t.Errorf("chunker_test: PosStream got unexpected error: %v", ps.Err())
+ }
+}
+
+// chunkSums() returns a vector of md5 checksums for the chunks of the
+// specified Reader, using the default chunking parameters.
+func chunkSums(ctx *context.T, r io.Reader) (sums [][md5.Size]byte) {
+ s := chunker.NewStream(ctx, &chunker.DefaultParam, r)
+ for s.Advance() {
+ sums = append(sums, md5.Sum(s.Value()))
+ }
+ return sums
+}
+
+// TestInsertions() tests the how chunk sequences differ when bytes are
+// periodically inserted into a stream.
+func TestInsertions(t *testing.T) {
+ ctx, shutdown := test.V23Init()
+ defer shutdown()
+
+ totalLength := 1024 * 1024
+ insertionInterval := 20 * 1024
+ bytesInserted := totalLength / insertionInterval
+
+ // Get the md5 sums of the chunks of two similar streams, where the
+ // second has an extra bytes every 20k bytes.
+ sums0 := chunkSums(ctx, localblobstore_testlib.NewRandReader(1, totalLength, 0, io.EOF))
+ sums1 := chunkSums(ctx, localblobstore_testlib.NewRandReader(1, totalLength, insertionInterval, io.EOF))
+
+ // Iterate over chunks of second stream, counting which are in common
+ // with first stream. We expect to find common chunks within 10 of the
+ // last chunk in common, since insertions are single bytes, widely
+ // separated.
+ same := 0 // Number of chunks in sums1 that are the same as chunks in sums0.
+ i0 := 0 // Where to search for a match in sums0.
+ for i1 := 0; i1 != len(sums1); i1++ {
+ // Be prepared to search up to the next 10 elements of sums0 from the most recent match.
+ limit := len(sums0) - i0
+ if limit > 10 {
+ limit = 10
+ }
+ var d int
+ for d = 0; d != limit && bytes.Compare(sums0[i0+d][:], sums1[i1][:]) != 0; d++ {
+ }
+ if d != limit { // found
+ same++
+ i0 += d // Advance i0 to the most recent match.
+ }
+ }
+ // The number of chunks that aren't the same as one in the original stream should be at least as large
+ // as the number of bytes inserted, and not too many more.
+ different := len(sums1) - same
+ if different < bytesInserted {
+ t.Errorf("chunker_test: saw %d different chunks, but expected at least %d", different, bytesInserted)
+ }
+ if bytesInserted+(bytesInserted/2) < different {
+ t.Errorf("chunker_test: saw %d different chunks, but expected at most %d", different, bytesInserted+(bytesInserted/2))
+ }
+ // Require that most chunks are the same, by a substantial margin.
+ if same < 5*different {
+ t.Errorf("chunker_test: saw %d different chunks, and %d same, but expected at least a factor of 5 more same than different", different, same)
+ }
+}
+
+// TestError() tests the behaviour of a chunker when given an error by its
+// reader.
+func TestError(t *testing.T) {
+ ctx, shutdown := test.V23Init()
+ defer shutdown()
+
+ notEOF := fmt.Errorf("not EOF")
+ totalLength := 50 * 1024
+ r := localblobstore_testlib.NewRandReader(1, totalLength, 0, notEOF)
+ s := chunker.NewStream(ctx, &chunker.DefaultParam, r)
+ length := 0
+ for s.Advance() {
+ chunk := s.Value()
+ length += len(chunk)
+ }
+ if s.Err() != notEOF {
+ t.Errorf("chunker_test: chunk stream ended with error %v, expected %v", s.Err(), notEOF)
+ }
+ if length != totalLength {
+ t.Errorf("chunker_test: chunk lengths summed to %d, expected %d", length, totalLength)
+ }
+}
diff --git a/services/syncbase/localblobstore/crc64window/crc64window.go b/services/syncbase/localblobstore/crc64window/crc64window.go
new file mode 100644
index 0000000..b27dbb9
--- /dev/null
+++ b/services/syncbase/localblobstore/crc64window/crc64window.go
@@ -0,0 +1,153 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package crc64window provides CRCs over fixed-sized, rolling windows of bytes.
+//
+// It uses the same polynomial representation and CRC conditioning as
+// hash/crc64, so the results are the same as computing hash/crc64 over the
+// window of the last sz bytes added, where sz is the window size. Thus, in
+// this code, rolling and nonRolling will receive the same value.
+// w := crc64window.New(crc64window.ECMA, 3) // Window size is 3 bytes.
+// w.Advance(0x17)
+// w.Advance(0x92)
+// w.Advance(0x04)
+// rolling := w.Advance(0x28) // Rolls 0x17 out, and 0x28 in.
+//
+// nonRolling := crc64.Update(0, crc64.MakeTable(crc64.ECMA), []byte{0x92, 0x04, 0x28})
+//
+// Strangely, hash/crc64's specification does not mention which of the many
+// possible bit representations and conditioning choices it uses. We assume it
+// will not change from the following, which was gleaned from the hash/crc64
+// source code:
+//
+// - All messages to be processed, CRC values, and CRC polynomials are
+// polynomials in x whose coefficients are in Z(2).
+// - CRC values are represented by uint64 values in which bit i of the integer
+// represents the coefficient of x**(63-i) in the polynomial.
+// - CRC polynomials are represented like CRC values, except that the x**64
+// coefficient of the CRC polynomial is implicitly 1, and not stored.
+// - Messages to be processed are represented by byte vectors in which the
+// lowest-order bit of the first byte is the highest-degree polynomial
+// coefficient.
+// - For a CRC polynomial p and a message m, the CRC value:
+// CRC(p, m) = c + ((c * (x**len(m)) + (m * x**64)) mod p)
+// where the conditioning constant c = x**63 + x**62 + x**61 + ... + x + 1,
+// and len(m) is the number of bits in m.
+package crc64window
+
+import "sync"
+
+// The ECMA-64 polynomial, defined in ECMA 182.
+// This polynomial is recommended for use with this package, though other
+// polynomials found in hash/crc64 will also work.
+const ECMA = 0xc96c5795d7870f42
+
+// A Window contains the state needed to compute a CRC over a fixed-sized,
+// rolling window of data.
+type Window struct {
+ crc uint64 // CRC of window, unconditioned (i.e., just the window mod the CRC polynomial).
+ window []byte // The bytes in the window.
+ pos int // Index in window[] of first byte, which is the next byte to be overwritten.
+ crcData *crcData // Pointer to the immutable CRC tables for the CRC.
+}
+
+// A crcData is immutable after initialization, and contains tables for
+// computing a particular CRC over a particular window size. Pre-computed
+// copies of crcData are stored in tables[] so that CRC tables need be computed
+// only once per (polynomial, window size) pair.
+type crcData struct {
+ conditioning uint64
+ crcTableFront [256]uint64
+ crcTableRear [256]uint64
+}
+
+var mu sync.Mutex // Protects "tables", the cache of CRC tables already computed.
+
+// A polySize represents a pair of a CRC polynomial and a window size.
+type polySize struct {
+ poly uint64
+ size int
+}
+
+// tables[] maps (polynomial,window size) pairs to computed tables, so tables
+// are computed only once. It's accessed only under mu.
+var tables map[polySize]*crcData
+
+// getCRCData() returns a pointer to a crcData for the given CRC polynomial
+// and window size, either by cache lookup on by calculating it. Requires
+// size > 0.
+func getCRCData(poly uint64, size int) *crcData {
+ mu.Lock()
+ // Use cached CRC tables if available.
+ if tables == nil {
+ tables = make(map[polySize]*crcData)
+ }
+ ps := polySize{poly: poly, size: size}
+ c, found := tables[ps]
+ if !found { // Compute and save the CRC tables.
+ c = new(crcData)
+ // Loop ensures: c.crcTableFront[m & 0xff] ^ (m >> 8)==CRC(m * x**8)
+ zeroOrPoly := []uint64{0, poly}
+ for i := 1; i != 256; i <<= 1 {
+ crc := uint64(i)
+ for j := 0; j != 8; j++ {
+ crc = (crc >> 1) ^ zeroOrPoly[crc&1]
+ }
+ for j := 0; j != i; j++ {
+ c.crcTableFront[j+i] = crc ^ c.crcTableFront[j]
+ }
+ }
+ // Loop ensures: c.crcTableRear[b] == CRC(b * x**(size*8))
+ for i := 1; i != 256; i <<= 1 {
+ crc := c.crcTableFront[i]
+ for j := 1; j != size; j++ {
+ crc = c.crcTableFront[byte(crc)] ^ (crc >> 8)
+ }
+ for j := 0; j != i; j++ {
+ c.crcTableRear[j+i] = crc ^ c.crcTableRear[j]
+ }
+ }
+
+ // Loop ensures: c.conditioning == CRC(all-ones * x**(size*8))
+ conditioning := ^uint64(0)
+ for i := 0; i != size; i++ {
+ conditioning = c.crcTableFront[byte(conditioning)] ^ (conditioning >> 8)
+ }
+ c.conditioning = conditioning
+
+ tables[ps] = c
+ }
+ mu.Unlock()
+ return c
+}
+
+// New() returns a Window with the given size and CRC polynomial.
+// Initially, all the bytes in the window are zero. Requires size > 0.
+func New(poly uint64, size int) *Window {
+ if size <= 0 {
+ panic("crc64window.New() called with size <= 0")
+ }
+ w := new(Window)
+ w.window = make([]byte, size)
+ w.crc = 0
+ w.crcData = getCRCData(poly, size)
+ return w
+}
+
+// Advance() removes the first byte from window *w, adds b as the new last
+// byte, and returns the CRC of the window.
+func (w *Window) Advance(b byte) uint64 {
+ c := w.crcData
+ pos := w.pos
+ crc := w.crc
+ crc ^= c.crcTableRear[w.window[pos]]
+ w.crc = c.crcTableFront[byte(crc)^b] ^ (crc >> 8)
+ w.window[pos] = b
+ pos++
+ if pos == len(w.window) {
+ pos = 0
+ }
+ w.pos = pos
+ return ^(c.conditioning ^ w.crc)
+}
diff --git a/services/syncbase/localblobstore/crc64window/crc64window_test.go b/services/syncbase/localblobstore/crc64window/crc64window_test.go
new file mode 100644
index 0000000..9969125
--- /dev/null
+++ b/services/syncbase/localblobstore/crc64window/crc64window_test.go
@@ -0,0 +1,51 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// A test for crc64window.
+package crc64window_test
+
+import "hash/crc64"
+import "math/rand"
+import "testing"
+
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/crc64window"
+
+// A test for the example given in the package's specification.
+func TestCRC64WindowExample(t *testing.T) {
+ w := crc64window.New(crc64.ECMA, 3)
+ w.Advance(0x17)
+ w.Advance(0x92)
+ w.Advance(0x04)
+ rolling := w.Advance(0x28) // Rolls 0x17 out, and 0x28 in.
+ nonRolling := crc64.Update(0, crc64.MakeTable(crc64.ECMA), []byte{0x92, 0x04, 0x28})
+ if rolling != nonRolling {
+ t.Errorf("crc64window: rolling(0x92, 0x04, 0x28)==%x nonRolling(0x92, 0x04, 0x28)==%x\n", rolling, nonRolling)
+ }
+}
+
+func TestCRC64Window(t *testing.T) {
+ winSize := 16
+ iterations := 1000
+
+ w := crc64window.New(crc64.ECMA, winSize)
+
+ table := crc64.MakeTable(crc64.ECMA)
+ block := make([]byte, winSize-1+iterations)
+
+ for i := 0; i != len(block); i++ {
+ block[i] = byte(rand.Int31n(256))
+ }
+
+ i := 0
+ for ; i != winSize-1; i++ {
+ w.Advance(block[i])
+ }
+ for ; i != len(block); i++ {
+ expect := crc64.Update(0, table, block[i+1-winSize:i+1])
+ got := w.Advance(block[i])
+ if expect != got {
+ t.Errorf("crc64window: i %d winSize %d got %x, expect %x\n", i, winSize, got, expect)
+ }
+ }
+}
diff --git a/services/syncbase/localblobstore/fs_cablobstore/fs_cablobstore.go b/services/syncbase/localblobstore/fs_cablobstore/fs_cablobstore.go
new file mode 100644
index 0000000..4fb7500
--- /dev/null
+++ b/services/syncbase/localblobstore/fs_cablobstore/fs_cablobstore.go
@@ -0,0 +1,1521 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package fs_cablobstore implements a content addressable blob store
+// on top of a file system. It assumes that either os.Link() or
+// os.Rename() is available.
+package fs_cablobstore
+
+// Internals:
+// Blobs are partitioned into two types of unit: "fragments" and "chunks".
+// A fragment is stored in a single file on disc. A chunk is a unit of network
+// transmission.
+//
+// The blobstore consists of a directory with "blob", "cas", "chunk", and
+// "tmp" subdirectories.
+// - "tmp" is used for temporary files that are moved into place via
+// link()/unlink() or rename(), depending on what's available.
+// - "cas" contains files whose names are content hashes of the files being
+// named. A few slashes are thrown into the name near the front so that no
+// single directory gets too large. These files are called "fragments".
+// - "blob" contains files whose names are random numbers. These names are
+// visible externally as "blob names". Again, a few slashes are thrown
+// into the name near the front so that no single directory gets too large.
+// Each of these files contains a series of lines of the form:
+// d <size> <offset> <cas-fragment>
+// followed optionally by a line of the form:
+// f <md5-hash>
+// Each "d" line indicates that the next <size> bytes of the blob appear at
+// <offset> bytes into <cas-fragment>, which is in the "cas" subtree. The
+// "f" line indicates that the blob is "finalized" and gives its complete
+// md5 hash. No fragments may be appended to a finalized blob.
+// - "chunk" contains a store (currently implemented with leveldb) that
+// maps chunks of blobs to content hashes and vice versa.
+
+import "bufio"
+import "bytes"
+import "crypto/md5"
+import "fmt"
+import "hash"
+import "io"
+import "io/ioutil"
+import "math"
+import "math/rand"
+import "os"
+import "path/filepath"
+import "strconv"
+import "strings"
+import "sync"
+import "time"
+
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore"
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/chunker"
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/blobmap"
+import "v.io/v23/context"
+import "v.io/v23/verror"
+
+const pkgPath = "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore"
+
+var (
+ errNotADir = verror.Register(pkgPath+".errNotADir", verror.NoRetry, "{1:}{2:} Not a directory{:_}")
+ errAppendFailed = verror.Register(pkgPath+".errAppendFailed", verror.NoRetry, "{1:}{2:} fs_cablobstore.Append failed{:_}")
+ errMalformedField = verror.Register(pkgPath+".errMalformedField", verror.NoRetry, "{1:}{2:} Malformed field in blob specification{:_}")
+ errAlreadyClosed = verror.Register(pkgPath+".errAlreadyClosed", verror.NoRetry, "{1:}{2:} BlobWriter is already closed{:_}")
+ errBlobAlreadyFinalized = verror.Register(pkgPath+".errBlobAlreadyFinalized", verror.NoRetry, "{1:}{2:} Blob is already finalized{:_}")
+ errIllegalPositionForRead = verror.Register(pkgPath+".errIllegalPositionForRead", verror.NoRetry, "{1:}{2:} BlobReader: illegal position {3} on Blob of size {4}{:_}")
+ errBadSeekWhence = verror.Register(pkgPath+".errBadSeekWhence", verror.NoRetry, "{1:}{2:} BlobReader: Bad value for 'whence' in Seek{:_}")
+ errNegativeSeekPosition = verror.Register(pkgPath+".errNegativeSeekPosition", verror.NoRetry, "{1:}{2:} BlobReader: negative position for Seek: offset {3}, whence {4}{:_}")
+ errBadSizeOrOffset = verror.Register(pkgPath+".errBadSizeOrOffset", verror.NoRetry, "{1:}{2:} Bad size ({3}) or offset ({4}) in blob {5} (size {6}){:_}")
+ errMalformedBlobHash = verror.Register(pkgPath+".errMalformedBlobHash", verror.NoRetry, "{1:}{2:} Blob {3} hash malformed hash{:_}")
+ errInvalidBlobName = verror.Register(pkgPath+".errInvalidBlobName", verror.NoRetry, "{1:}{2:} Invalid blob name {3}{:_}")
+ errCantDeleteBlob = verror.Register(pkgPath+".errCantDeleteBlob", verror.NoRetry, "{1:}{2:} Can't delete blob {3}{:_}")
+ errBlobDeleted = verror.Register(pkgPath+".errBlobDeleted", verror.NoRetry, "{1:}{2:} Blob is deleted{:_}")
+ errSizeTooBigForFragment = verror.Register(pkgPath+".errSizeTooBigForFragment", verror.NoRetry, "{1:}{2:} writing blob {1}, size too big for fragment{:1}")
+ errStreamCancelled = verror.Register(pkgPath+".errStreamCancelled", verror.NoRetry, "{1:}{2:} Advance() called on cancelled stream{:_}")
+)
+
+// For the moment, we disallow others from accessing the tree where blobs are
+// stored. We could in the future relax this to 0711/0755, and 0644.
+const dirPermissions = 0700
+const filePermissions = 0600
+
+// Subdirectories of the blobstore's tree
+const (
+ blobDir = "blob" // Subdirectory where blobs are indexed by blob id.
+ casDir = "cas" // Subdirectory where fragments are indexed by content hash.
+ chunkDir = "chunk" // Subdirectory where chunks are indexed by content hash.
+ tmpDir = "tmp" // Subdirectory where temporary files are created.
+)
+
+// An FsCaBlobStore represents a simple, content-addressable store.
+type FsCaBlobStore struct {
+ rootName string // The name of the root of the store.
+ bm *blobmap.BlobMap // Mapping from chunks to blob locations and vice versa.
+
+ // mu protects fields below, plus most fields in each blobDesc when used from a BlobWriter.
+ mu sync.Mutex
+ activeDesc []*blobDesc // The blob descriptors in use by active BlobReaders and BlobWriters.
+ toDelete []*map[string]bool // Sets of items that active GC threads are about to delete. (Pointers to maps, to allow pointer comparison.)
+}
+
+// hashToFileName() returns the name of the binary ID with the specified
+// prefix. Requires len(id)==16. An md5 hash is suitable.
+func hashToFileName(prefix string, hash []byte) string {
+ return filepath.Join(prefix,
+ fmt.Sprintf("%02x", hash[0]),
+ fmt.Sprintf("%02x", hash[1]),
+ fmt.Sprintf("%02x", hash[2]),
+ fmt.Sprintf("%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
+ hash[3],
+ hash[4], hash[5], hash[6], hash[7],
+ hash[8], hash[9], hash[10], hash[11],
+ hash[12], hash[13], hash[14], hash[15]))
+}
+
+// fileNameToHash() converts a file name in the format generated by
+// hashToFileName(prefix, ...) to a vector of 16 bytes. If the string is
+// malformed, the nil slice is returned.
+func fileNameToHash(prefix string, s string) []byte {
+ idStr := strings.TrimPrefix(filepath.ToSlash(s), prefix+"/")
+ hash := make([]byte, 16, 16)
+ n, err := fmt.Sscanf(idStr, "%02x/%02x/%02x/%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
+ &hash[0], &hash[1], &hash[2], &hash[3],
+ &hash[4], &hash[5], &hash[6], &hash[7],
+ &hash[8], &hash[9], &hash[10], &hash[11],
+ &hash[12], &hash[13], &hash[14], &hash[15])
+ if n != 16 || err != nil {
+ hash = nil
+ }
+ return hash
+}
+
+// newBlobName() returns a new random name for a blob.
+func newBlobName() string {
+ return filepath.Join(blobDir,
+ fmt.Sprintf("%02x", rand.Int31n(256)),
+ fmt.Sprintf("%02x", rand.Int31n(256)),
+ fmt.Sprintf("%02x", rand.Int31n(256)),
+ fmt.Sprintf("%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
+ rand.Int31n(256),
+ rand.Int31n(256), rand.Int31n(256), rand.Int31n(256), rand.Int31n(256),
+ rand.Int31n(256), rand.Int31n(256), rand.Int31n(256), rand.Int31n(256),
+ rand.Int31n(256), rand.Int31n(256), rand.Int31n(256), rand.Int31n(256)))
+}
+
+// hashToString() returns a string representation of the hash.
+// Requires len(hash)==16. An md5 hash is suitable.
+func hashToString(hash []byte) string {
+ return fmt.Sprintf("%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
+ hash[0], hash[1], hash[2], hash[3],
+ hash[4], hash[5], hash[6], hash[7],
+ hash[8], hash[9], hash[10], hash[11],
+ hash[12], hash[13], hash[14], hash[15])
+}
+
+// stringToHash() converts a string in the format generated by hashToString()
+// to a vector of 16 bytes. If the string is malformed, the nil slice is
+// returned.
+func stringToHash(s string) []byte {
+ hash := make([]byte, 16, 16)
+ n, err := fmt.Sscanf(s, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
+ &hash[0], &hash[1], &hash[2], &hash[3],
+ &hash[4], &hash[5], &hash[6], &hash[7],
+ &hash[8], &hash[9], &hash[10], &hash[11],
+ &hash[12], &hash[13], &hash[14], &hash[15])
+ if n != 16 || err != nil {
+ hash = nil
+ }
+ return hash
+}
+
+// Create() returns a pointer to an FsCaBlobStore stored in the file system at
+// "rootName". If the directory rootName does not exist, it is created.
+func Create(ctx *context.T, rootName string) (fscabs *FsCaBlobStore, err error) {
+ dir := []string{tmpDir, casDir, chunkDir, blobDir}
+ for i := 0; i != len(dir) && err == nil; i++ {
+ fullName := filepath.Join(rootName, dir[i])
+ os.MkdirAll(fullName, dirPermissions)
+ var fi os.FileInfo
+ fi, err = os.Stat(fullName)
+ if err == nil && !fi.IsDir() {
+ err = verror.New(errNotADir, ctx, fullName)
+ }
+ }
+ var bm *blobmap.BlobMap
+ if err == nil {
+ bm, err = blobmap.New(ctx, filepath.Join(rootName, chunkDir))
+ }
+ if err == nil {
+ fscabs = new(FsCaBlobStore)
+ fscabs.rootName = rootName
+ fscabs.bm = bm
+ }
+ return fscabs, err
+}
+
+// Close() closes the FsCaBlobStore. {
+func (fscabs *FsCaBlobStore) Close() error {
+ return fscabs.bm.Close()
+}
+
+// Root() returns the name of the root directory where *fscabs is stored.
+func (fscabs *FsCaBlobStore) Root() string {
+ return fscabs.rootName
+}
+
+// DeleteBlob() deletes the named blob from *fscabs.
+func (fscabs *FsCaBlobStore) DeleteBlob(ctx *context.T, blobName string) (err error) {
+ // Disallow deletions of things outside the blob tree, or that may contain "..".
+ // For simplicity, the code currently disallows '.'.
+ blobID := fileNameToHash(blobDir, blobName)
+ if blobID == nil || strings.IndexByte(blobName, '.') != -1 {
+ err = verror.New(errInvalidBlobName, ctx, blobName)
+ } else {
+ err = os.Remove(filepath.Join(fscabs.rootName, blobName))
+ if err != nil {
+ err = verror.New(errCantDeleteBlob, ctx, blobName, err)
+ } else {
+ err = fscabs.bm.DeleteBlob(ctx, blobID)
+ }
+ }
+ return err
+}
+
+// -----------------------------------------------------------
+
+// A file encapsulates both an os.File and a bufio.Writer on that file.
+type file struct {
+ fh *os.File
+ writer *bufio.Writer
+}
+
+// newFile() returns a *file containing fh and a bufio.Writer on that file, if
+// err is nil.
+func newFile(fh *os.File, err error) (*file, error) {
+ var f *file
+ if err == nil {
+ f = new(file)
+ f.fh = fh
+ f.writer = bufio.NewWriter(f.fh)
+ }
+ return f, err
+}
+
+// newTempFile() returns a *file on a new temporary file created in the
+// directory dir.
+func newTempFile(ctx *context.T, dir string) (*file, error) {
+ return newFile(ioutil.TempFile(dir, "newfile"))
+}
+
+// close() flushes buffers (if err==nil initially) and closes the file,
+// returning its name.
+func (f *file) close(ctx *context.T, err error) (string, error) {
+ name := f.fh.Name()
+ // Flush the data out to disc and close the file.
+ if err == nil {
+ err = f.writer.Flush()
+ }
+ if err == nil {
+ err = f.fh.Sync()
+ }
+ err2 := f.fh.Close()
+ if err == nil {
+ err = err2
+ }
+ return name, err
+}
+
+// closeAndRename() calls f.close(), and if err==nil initially and no new
+// errors are seen, renames the file to newName.
+func (f *file) closeAndRename(ctx *context.T, newName string, err error) error {
+ var oldName string
+ oldName, err = f.close(ctx, err)
+ if err == nil { // if temp file written successfully...
+ // Link or rename the file into place, hoping at least one is
+ // supported on this file system.
+ os.MkdirAll(filepath.Dir(newName), dirPermissions)
+ err = os.Link(oldName, newName)
+ if err == nil {
+ os.Remove(oldName)
+ } else {
+ err = os.Rename(oldName, newName)
+ }
+ }
+ if err != nil {
+ os.Remove(oldName)
+ }
+ return err
+}
+
+// -----------------------------------------------------------
+
+// addFragment() ensures that the store *fscabs contains a fragment comprising
+// the catenation of the byte vectors named by item[..].block and the contents
+// of the files named by item[..].fileName. The block field is ignored if
+// fileName!="". The fragment is not physically added if already present.
+// The fragment is added to the fragment list of the descriptor *desc.
+func (fscabs *FsCaBlobStore) addFragment(ctx *context.T, extHasher hash.Hash,
+ desc *blobDesc, item ...localblobstore.BlockOrFile) (fileName string, size int64, err error) {
+
+ hasher := md5.New()
+ var buf []byte
+ var fileHandleList []*os.File
+
+ // Hash the inputs.
+ for i := 0; i != len(item) && err == nil; i++ {
+ if len(item[i].FileName) != 0 {
+ if buf == nil {
+ buf = make([]byte, 8192, 8192)
+ fileHandleList = make([]*os.File, 0, len(item))
+ }
+ var fileHandle *os.File
+ fileHandle, err = os.Open(filepath.Join(fscabs.rootName, item[i].FileName))
+ if err == nil {
+ fileHandleList = append(fileHandleList, fileHandle)
+ at := item[i].Offset
+ toRead := item[i].Size
+ var haveRead int64
+ for err == nil && (toRead == -1 || haveRead < toRead) {
+ var n int
+ n, err = fileHandle.ReadAt(buf, at)
+ if err == nil {
+ if toRead != -1 && int64(n)+haveRead > toRead {
+ n = int(toRead - haveRead)
+ }
+ haveRead += int64(n)
+ at += int64(n)
+ size += int64(n)
+ hasher.Write(buf[0:n]) // Cannot fail; see Hash interface.
+ extHasher.Write(buf[0:n])
+ }
+ }
+ if err == io.EOF {
+ if toRead == -1 || haveRead == toRead {
+ err = nil // The loop read all that was asked; EOF is a possible outcome.
+ } else { // The loop read less than was asked; request must have been too big.
+ err = verror.New(errSizeTooBigForFragment, ctx, desc.name, item[i].FileName)
+ }
+ }
+ }
+ } else {
+ hasher.Write(item[i].Block) // Cannot fail; see Hash interface.
+ extHasher.Write(item[i].Block)
+ size += int64(len(item[i].Block))
+ }
+ }
+
+ // Compute the hash, and form the file name in the respository.
+ hash := hasher.Sum(nil)
+ relFileName := hashToFileName(casDir, hash)
+ absFileName := filepath.Join(fscabs.rootName, relFileName)
+
+ // Add the fragment's name to *desc's fragments so the garbage
+ // collector will not delete it.
+ fscabs.mu.Lock()
+ desc.fragment = append(desc.fragment, blobFragment{
+ pos: desc.size,
+ size: size,
+ offset: 0,
+ fileName: relFileName})
+ fscabs.mu.Unlock()
+
+ // If the file does not already exist, ...
+ if _, statErr := os.Stat(absFileName); err == nil && os.IsNotExist(statErr) {
+ // ... try to create it by writing to a temp file and renaming.
+ var t *file
+ t, err = newTempFile(ctx, filepath.Join(fscabs.rootName, tmpDir))
+ if err == nil {
+ // Copy the byte-sequences and input files to the temp file.
+ j := 0
+ for i := 0; i != len(item) && err == nil; i++ {
+ if len(item[i].FileName) != 0 {
+ at := item[i].Offset
+ toRead := item[i].Size
+ var haveRead int64
+ for err == nil && (toRead == -1 || haveRead < toRead) {
+ var n int
+ n, err = fileHandleList[j].ReadAt(buf, at)
+ if err == nil {
+ if toRead != -1 && int64(n)+haveRead > toRead {
+ n = int(toRead - haveRead)
+ }
+ haveRead += int64(n)
+ at += int64(n)
+ _, err = t.writer.Write(buf[0:n])
+ }
+ }
+ if err == io.EOF { // EOF is the expected outcome.
+ err = nil
+ }
+ j++
+ } else {
+ _, err = t.writer.Write(item[i].Block)
+ }
+ }
+ err = t.closeAndRename(ctx, absFileName, err)
+ }
+ } // else file already exists, nothing more to do.
+
+ for i := 0; i != len(fileHandleList); i++ {
+ fileHandleList[i].Close()
+ }
+
+ if err != nil {
+ err = verror.New(errAppendFailed, ctx, fscabs.rootName, err)
+ // Remove the entry added to fragment list above.
+ fscabs.mu.Lock()
+ desc.fragment = desc.fragment[0 : len(desc.fragment)-1]
+ fscabs.mu.Unlock()
+ } else { // commit the change by updating the size
+ fscabs.mu.Lock()
+ desc.size += size
+ desc.cv.Broadcast() // Tell blobmap BlobReader there's more to read.
+ fscabs.mu.Unlock()
+ }
+
+ return relFileName, size, err
+}
+
+// A blobFragment represents a vector of bytes and its position within a blob.
+type blobFragment struct {
+ pos int64 // position of this fragment within its containing blob.
+ size int64 // size of this fragment.
+ offset int64 // offset within fileName.
+ fileName string // name of file describing this fragment.
+}
+
+// A blobDesc is the in-memory representation of a blob.
+type blobDesc struct {
+ activeDescIndex int // Index into fscabs.activeDesc if refCount>0; under fscabs.mu.
+ refCount int // Reference count; under fscabs.mu.
+
+ name string // Name of the blob.
+
+ // The following fields are modified under fscabs.mu and in BlobWriter
+ // owner's thread; they may be read by GC (when obtained from
+ // fscabs.activeDesc) and the chunk writer under fscabs.mu. In the
+ // BlobWriter owner's thread, reading does not require a lock, but
+ // writing does. In other contexts (BlobReader, or a desc that has
+ // just been allocated by getBlob()), no locking is needed.
+
+ fragment []blobFragment // All the fragments in this blob
+ size int64 // Total size of the blob.
+ finalized bool // Whether the blob has been finalized.
+ // A finalized blob has a valid hash field, and no new bytes may be added
+ // to it. A well-formed hash has 16 bytes.
+ hash []byte
+
+ openWriter bool // Whether this descriptor is being written by an open BlobWriter.
+ cv *sync.Cond // signalled when a BlobWriter writes or closes.
+}
+
+// isBeingDeleted() returns whether fragment fragName is about to be deleted
+// by the garbage collector. Requires fscabs.mu held.
+func (fscabs *FsCaBlobStore) isBeingDeleted(fragName string) (beingDeleted bool) {
+ for i := 0; i != len(fscabs.toDelete) && !beingDeleted; i++ {
+ _, beingDeleted = (*(fscabs.toDelete[i]))[fragName]
+ }
+ return beingDeleted
+}
+
+// descRef() increments the reference count of *desc and returns whether
+// successful. It may fail if the fragments referenced by the descriptor are
+// being deleted by the garbage collector.
+func (fscabs *FsCaBlobStore) descRef(desc *blobDesc) bool {
+ beingDeleted := false
+ fscabs.mu.Lock()
+ if desc.refCount == 0 {
+ // On the first reference, check whether the fragments are
+ // being deleted, and if not, add *desc to the
+ // fscabs.activeDesc vector.
+ for i := 0; i != len(desc.fragment) && !beingDeleted; i++ {
+ beingDeleted = fscabs.isBeingDeleted(desc.fragment[i].fileName)
+ }
+ if !beingDeleted {
+ desc.activeDescIndex = len(fscabs.activeDesc)
+ fscabs.activeDesc = append(fscabs.activeDesc, desc)
+ }
+ }
+ if !beingDeleted {
+ desc.refCount++
+ }
+ fscabs.mu.Unlock()
+ return !beingDeleted
+}
+
+// descUnref() decrements the reference count of *desc if desc!=nil; if that
+// removes the last reference, *desc is removed from the fscabs.activeDesc
+// vector.
+func (fscabs *FsCaBlobStore) descUnref(desc *blobDesc) {
+ if desc != nil {
+ fscabs.mu.Lock()
+ desc.refCount--
+ if desc.refCount < 0 {
+ panic("negative reference count")
+ } else if desc.refCount == 0 {
+ // Remove desc from fscabs.activeDesc by moving the
+ // last entry in fscabs.activeDesc to desc's slot.
+ n := len(fscabs.activeDesc)
+ lastDesc := fscabs.activeDesc[n-1]
+ lastDesc.activeDescIndex = desc.activeDescIndex
+ fscabs.activeDesc[desc.activeDescIndex] = lastDesc
+ fscabs.activeDesc = fscabs.activeDesc[0 : n-1]
+ desc.activeDescIndex = -1
+ }
+ fscabs.mu.Unlock()
+ }
+}
+
+// getBlob() returns the in-memory blob descriptor for the named blob.
+func (fscabs *FsCaBlobStore) getBlob(ctx *context.T, blobName string) (desc *blobDesc, err error) {
+ slashBlobName := filepath.ToSlash(blobName)
+ if !strings.HasPrefix(slashBlobName, blobDir+"/") || strings.IndexByte(blobName, '.') != -1 {
+ err = verror.New(errInvalidBlobName, ctx, blobName)
+ } else {
+ absBlobName := filepath.Join(fscabs.rootName, blobName)
+ var fh *os.File
+ fh, err = os.Open(absBlobName)
+ if err == nil {
+ var line string
+ desc = new(blobDesc)
+ desc.activeDescIndex = -1
+ desc.name = blobName
+ desc.cv = sync.NewCond(&fscabs.mu)
+ scanner := bufio.NewScanner(fh)
+ for scanner.Scan() {
+ field := strings.Split(scanner.Text(), " ")
+ if len(field) == 4 && field[0] == "d" {
+ var fragSize int64
+ var fragOffset int64
+ fragSize, err = strconv.ParseInt(field[1], 0, 64)
+ if err == nil {
+ fragOffset, err = strconv.ParseInt(field[2], 0, 64)
+ }
+ if err == nil {
+ // No locking needed here because desc
+ // is newly allocated and not yet passed to descRef().
+ desc.fragment = append(desc.fragment,
+ blobFragment{
+ fileName: field[3],
+ pos: desc.size,
+ size: fragSize,
+ offset: fragOffset})
+ }
+ desc.size += fragSize
+ } else if len(field) == 2 && field[0] == "f" {
+ desc.hash = stringToHash(field[1])
+ desc.finalized = true
+ if desc.hash == nil {
+ err = verror.New(errMalformedBlobHash, ctx, blobName, field[1])
+ }
+ } else if len(field) > 0 && len(field[0]) == 1 && "a" <= field[0] && field[0] <= "z" {
+ // unrecognized line, reserved for extensions: ignore.
+ } else {
+ err = verror.New(errMalformedField, ctx, line)
+ }
+ }
+ err = scanner.Err()
+ fh.Close()
+ }
+ }
+ // Ensure that we return either a properly referenced desc, or nil.
+ if err != nil {
+ desc = nil
+ } else if !fscabs.descRef(desc) {
+ err = verror.New(errBlobDeleted, ctx, blobName)
+ desc = nil
+ }
+ return desc, err
+}
+
+// -----------------------------------------------------------
+
+// A BlobWriter allows a blob to be written. If a blob has not yet been
+// finalized, it also allows that blob to be extended. A BlobWriter may be
+// created with NewBlobWriter(), and should be closed with Close() or
+// CloseWithoutFinalize().
+type BlobWriter struct {
+ // The BlobWriter exists within a particular FsCaBlobStore and context.T
+ fscabs *FsCaBlobStore
+ ctx *context.T
+
+ desc *blobDesc // Description of the blob being written.
+ f *file // The file being written.
+ hasher hash.Hash // Running hash of blob.
+
+ // Fields to allow the BlobMap to be written.
+ csBr *BlobReader // Reader over the blob that's currently being written.
+ cs *chunker.Stream // Stream of chunks derived from csBr
+ csErr chan error // writeBlobMap() sends its result here; Close/CloseWithoutFinalize receives it.
+}
+
+// NewBlobWriter() returns a pointer to a newly allocated BlobWriter on
+// a newly created blob. If "name" is non-empty, it is used to name
+// the blob, and it must be in the format of a name returned by this
+// interface (probably by another instance on another device).
+// Otherwise, a new name is created, which can be found using
+// the Name() method. It is an error to attempt to overwrite a blob
+// that already exists in this blob store. BlobWriters should not be
+// used concurrently by multiple threads. The returned handle should
+// be closed with either the Close() or CloseWithoutFinalize() method
+// to avoid leaking file handles.
+func (fscabs *FsCaBlobStore) NewBlobWriter(ctx *context.T, name string) (localblobstore.BlobWriter, error) {
+ var bw *BlobWriter
+ if name == "" {
+ name = newBlobName()
+ }
+ fileName := filepath.Join(fscabs.rootName, name)
+ os.MkdirAll(filepath.Dir(fileName), dirPermissions)
+ f, err := newFile(os.OpenFile(fileName, os.O_RDWR|os.O_CREATE|os.O_EXCL, filePermissions))
+ if err == nil {
+ bw = new(BlobWriter)
+ bw.fscabs = fscabs
+ bw.ctx = ctx
+ bw.desc = new(blobDesc)
+ bw.desc.activeDescIndex = -1
+ bw.desc.name = name
+ bw.desc.cv = sync.NewCond(&fscabs.mu)
+ bw.desc.openWriter = true
+ bw.f = f
+ bw.hasher = md5.New()
+ if !fscabs.descRef(bw.desc) {
+ // Can't happen; descriptor refers to no fragments.
+ panic(verror.New(errBlobDeleted, ctx, bw.desc.name))
+ }
+ // Write the chunks of this blob into the BlobMap, as they are
+ // written by this writer.
+ bw.forkWriteBlobMap()
+ }
+ return bw, err
+}
+
+// ResumeBlobWriter() returns a pointer to a newly allocated BlobWriter on an
+// old, but unfinalized blob name.
+func (fscabs *FsCaBlobStore) ResumeBlobWriter(ctx *context.T, blobName string) (localblobstore.BlobWriter, error) {
+ var err error
+ var bw *BlobWriter
+ var desc *blobDesc
+ desc, err = fscabs.getBlob(ctx, blobName)
+ if err == nil && desc.finalized {
+ err = verror.New(errBlobAlreadyFinalized, ctx, blobName)
+ } else if err == nil {
+ bw = new(BlobWriter)
+ bw.fscabs = fscabs
+ bw.ctx = ctx
+ bw.desc = desc
+ bw.desc.openWriter = true
+ fileName := filepath.Join(fscabs.rootName, bw.desc.name)
+ bw.f, err = newFile(os.OpenFile(fileName, os.O_WRONLY|os.O_APPEND, 0666))
+ bw.hasher = md5.New()
+ // Add the existing fragments to the running hash.
+ // The descRef's ref count is incremented here to compensate
+ // for the decrement it will receive in br.Close(), below.
+ if !fscabs.descRef(bw.desc) {
+ // Can't happen; descriptor's ref count was already
+ // non-zero.
+ panic(verror.New(errBlobDeleted, ctx, fileName))
+ }
+ br := fscabs.blobReaderFromDesc(ctx, bw.desc, dontWaitForWriter)
+ buf := make([]byte, 8192, 8192)
+ for err == nil {
+ var n int
+ n, err = br.Read(buf)
+ bw.hasher.Write(buf[0:n])
+ }
+ br.Close()
+ if err == io.EOF { // EOF is expected.
+ err = nil
+ }
+ if err == nil {
+ // Write the chunks of this blob into the BlobMap, as
+ // they are written by this writer.
+ bw.forkWriteBlobMap()
+ }
+ }
+ return bw, err
+}
+
+// forkWriteBlobMap() creates a new thread to run writeBlobMap(). It adds
+// the chunks written to *bw to the blob store's BlobMap. The caller is
+// expected to call joinWriteBlobMap() at some later point.
+func (bw *BlobWriter) forkWriteBlobMap() {
+ // The descRef's ref count is incremented here to compensate
+ // for the decrement it will receive in br.Close() in joinWriteBlobMap.
+ if !bw.fscabs.descRef(bw.desc) {
+ // Can't happen; descriptor's ref count was already non-zero.
+ panic(verror.New(errBlobDeleted, bw.ctx, bw.desc.name))
+ }
+ bw.csBr = bw.fscabs.blobReaderFromDesc(bw.ctx, bw.desc, waitForWriter)
+ bw.cs = chunker.NewStream(bw.ctx, &chunker.DefaultParam, bw.csBr)
+ bw.csErr = make(chan error)
+ go bw.writeBlobMap()
+}
+
+// insertChunk() inserts chunk into the blob store's BlobMap, associating it
+// with the specified byte offset in the blob blobID being written by *bw. The byte
+// offset of the next chunk is returned.
+func (bw *BlobWriter) insertChunk(blobID []byte, chunkHash []byte, offset int64, size int64) (int64, error) {
+ err := bw.fscabs.bm.AssociateChunkWithLocation(bw.ctx, chunkHash[:],
+ blobmap.Location{BlobID: blobID, Offset: offset, Size: size})
+ if err != nil {
+ bw.cs.Cancel()
+ }
+ return offset + size, err
+}
+
+// writeBlobMap() iterates over the chunk in stream bw.cs, and associates each
+// one with the blob being written.
+func (bw *BlobWriter) writeBlobMap() {
+ var err error
+ var offset int64
+ blobID := fileNameToHash(blobDir, bw.desc.name)
+ // Associate each chunk only after the next chunk has been seen (or
+ // the blob finalized), to avoid recording an artificially short chunk
+ // at the end of a partial transfer.
+ var chunkHash [md5.Size]byte
+ var chunkLen int64
+ if bw.cs.Advance() {
+ chunk := bw.cs.Value()
+ // Record the hash and size, since chunk's underlying buffer
+ // may be reused by the next call to Advance().
+ chunkHash = md5.Sum(chunk)
+ chunkLen = int64(len(chunk))
+ for bw.cs.Advance() {
+ offset, err = bw.insertChunk(blobID, chunkHash[:], offset, chunkLen)
+ chunk = bw.cs.Value()
+ chunkHash = md5.Sum(chunk)
+ chunkLen = int64(len(chunk))
+ }
+ }
+ if err == nil {
+ err = bw.cs.Err()
+ }
+ bw.fscabs.mu.Lock()
+ if err == nil && chunkLen != 0 && bw.desc.finalized {
+ offset, err = bw.insertChunk(blobID, chunkHash[:], offset, chunkLen)
+ }
+ bw.fscabs.mu.Unlock()
+ bw.csErr <- err // wake joinWriteBlobMap()
+}
+
+// joinWriteBlobMap waits for the completion of the thread forked by forkWriteBlobMap().
+// It returns when the chunks in the blob have been written to the blob store's BlobMap.
+func (bw *BlobWriter) joinWriteBlobMap(err error) error {
+ err2 := <-bw.csErr // read error from end of writeBlobMap()
+ if err == nil {
+ err = err2
+ }
+ bw.csBr.Close()
+ return err
+}
+
+// Close() finalizes *bw, and indicates that the client will perform no further
+// append operations on *bw. Any internal open file handles are closed.
+func (bw *BlobWriter) Close() (err error) {
+ if bw.f == nil {
+ err = verror.New(errAlreadyClosed, bw.ctx, bw.desc.name)
+ } else if bw.desc.finalized {
+ err = verror.New(errBlobAlreadyFinalized, bw.ctx, bw.desc.name)
+ } else {
+ h := bw.hasher.Sum(nil)
+ _, err = fmt.Fprintf(bw.f.writer, "f %s\n", hashToString(h)) // finalize
+ _, err = bw.f.close(bw.ctx, err)
+ bw.f = nil
+ bw.fscabs.mu.Lock()
+ bw.desc.finalized = true
+ bw.desc.openWriter = false
+ bw.desc.cv.Broadcast() // Tell blobmap BlobReader that writing has ceased.
+ bw.fscabs.mu.Unlock()
+ err = bw.joinWriteBlobMap(err)
+ bw.fscabs.descUnref(bw.desc)
+ }
+ return err
+}
+
+// CloseWithoutFinalize() indicates that the client will perform no further
+// append operations on *bw, but does not finalize the blob. Any internal open
+// file handles are closed. Clients are expected to need this operation
+// infrequently.
+func (bw *BlobWriter) CloseWithoutFinalize() (err error) {
+ if bw.f == nil {
+ err = verror.New(errAlreadyClosed, bw.ctx, bw.desc.name)
+ } else {
+ bw.fscabs.mu.Lock()
+ bw.desc.openWriter = false
+ bw.desc.cv.Broadcast() // Tell blobmap BlobReader that writing has ceased.
+ bw.fscabs.mu.Unlock()
+ _, err = bw.f.close(bw.ctx, err)
+ bw.f = nil
+ err = bw.joinWriteBlobMap(err)
+ bw.fscabs.descUnref(bw.desc)
+ }
+ return err
+}
+
+// AppendFragment() appends a fragment to the blob being written by *bw, where
+// the fragment is composed of the byte vectors described by the elements of
+// item[]. The fragment is copied into the blob store.
+func (bw *BlobWriter) AppendFragment(item ...localblobstore.BlockOrFile) (err error) {
+ if bw.f == nil {
+ panic("fs_cablobstore.BlobWriter programming error: AppendFragment() after Close()")
+ }
+ var fragmentName string
+ var size int64
+ fragmentName, size, err = bw.fscabs.addFragment(bw.ctx, bw.hasher, bw.desc, item...)
+ if err == nil {
+ _, err = fmt.Fprintf(bw.f.writer, "d %d %d %s\n", size, 0 /*offset*/, fragmentName)
+ }
+ if err == nil {
+ err = bw.f.writer.Flush()
+ }
+ return err
+}
+
+// AppendBlob() adds a (substring of a) pre-existing blob to the blob being
+// written by *bw. The fragments of the pre-existing blob are not physically
+// copied; they are referenced by both blobs.
+func (bw *BlobWriter) AppendBlob(blobName string, size int64, offset int64) (err error) {
+ if bw.f == nil {
+ panic("fs_cablobstore.BlobWriter programming error: AppendBlob() after Close()")
+ }
+ var desc *blobDesc
+ desc, err = bw.fscabs.getBlob(bw.ctx, blobName)
+ origSize := bw.desc.size
+ if err == nil {
+ if size == -1 {
+ size = desc.size - offset
+ }
+ if offset < 0 || desc.size < offset+size {
+ err = verror.New(errBadSizeOrOffset, bw.ctx, size, offset, blobName, desc.size)
+ }
+ for i := 0; i != len(desc.fragment) && err == nil && size > 0; i++ {
+ if desc.fragment[i].size <= offset {
+ offset -= desc.fragment[i].size
+ } else {
+ consume := desc.fragment[i].size - offset
+ if size < consume {
+ consume = size
+ }
+ _, err = fmt.Fprintf(bw.f.writer, "d %d %d %s\n",
+ consume, offset+desc.fragment[i].offset, desc.fragment[i].fileName)
+ if err == nil {
+ // Add fragment so garbage collector can see it.
+ // The garbage collector cannot be
+ // about to delete the fragment, because
+ // getBlob() already checked for that
+ // above, and kept a reference.
+ bw.fscabs.mu.Lock()
+ bw.desc.fragment = append(bw.desc.fragment, blobFragment{
+ pos: bw.desc.size,
+ size: consume,
+ offset: offset + desc.fragment[i].offset,
+ fileName: desc.fragment[i].fileName})
+ bw.desc.size += consume
+ bw.desc.cv.Broadcast() // Tell blobmap BlobReader there's more to read.
+ bw.fscabs.mu.Unlock()
+ }
+ offset = 0
+ size -= consume
+ }
+ }
+ bw.fscabs.descUnref(desc)
+ // Add the new fragments to the running hash.
+ if !bw.fscabs.descRef(bw.desc) {
+ // Can't happen; descriptor's ref count was already
+ // non-zero.
+ panic(verror.New(errBlobDeleted, bw.ctx, blobName))
+ }
+ br := bw.fscabs.blobReaderFromDesc(bw.ctx, bw.desc, dontWaitForWriter)
+ if err == nil {
+ _, err = br.Seek(origSize, 0)
+ }
+ buf := make([]byte, 8192, 8192)
+ for err == nil {
+ var n int
+ n, err = br.Read(buf)
+ bw.hasher.Write(buf[0:n]) // Cannot fail; see Hash interface.
+ }
+ br.Close()
+ if err == io.EOF { // EOF is expected.
+ err = nil
+ }
+ if err == nil {
+ err = bw.f.writer.Flush()
+ }
+ }
+ return err
+}
+
+// IsFinalized() returns whether *bw has been finalized.
+func (bw *BlobWriter) IsFinalized() bool {
+ return bw.desc.finalized
+}
+
+// Size() returns *bw's size.
+func (bw *BlobWriter) Size() int64 {
+ return bw.desc.size
+}
+
+// Name() returns *bw's name.
+func (bw *BlobWriter) Name() string {
+ return bw.desc.name
+}
+
+// Hash() returns *bw's hash, reflecting the bytes written so far.
+func (bw *BlobWriter) Hash() []byte {
+ return bw.hasher.Sum(nil)
+}
+
+// -----------------------------------------------------------
+
+// A BlobReader allows a blob to be read using the standard ReadAt(), Read(),
+// and Seek() calls. A BlobReader can be created with NewBlobReader(), and
+// should be closed with the Close() method to avoid leaking file handles.
+type BlobReader struct {
+ // The BlobReader exists within a particular FsCaBlobStore and context.T.
+ fscabs *FsCaBlobStore
+ ctx *context.T
+
+ desc *blobDesc // A description of the blob being read.
+ waitForWriter bool // whether this reader should wait for a concurrent BlobWriter
+
+ pos int64 // The next position we will read from (used by Read/Seek, not ReadAt).
+
+ // The fields below represent a cached open fragment desc.fragment[fragmentIndex].
+ fragmentIndex int // -1 or 0 <= fragmentIndex < len(desc.fragment).
+ fh *os.File // non-nil iff fragmentIndex != -1.
+}
+
+// constants to make the calls to blobReaderFromDesc invocations more readable
+const (
+ dontWaitForWriter = false
+ waitForWriter = true
+)
+
+// blobReaderFromDesc() returns a pointer to a newly allocated BlobReader given
+// a pre-existing blobDesc. If waitForWriter is true, the reader will wait for
+// any BlobWriter to finish writing the part of the blob the reader is trying
+// to read.
+func (fscabs *FsCaBlobStore) blobReaderFromDesc(ctx *context.T, desc *blobDesc, waitForWriter bool) *BlobReader {
+ br := new(BlobReader)
+ br.fscabs = fscabs
+ br.ctx = ctx
+ br.fragmentIndex = -1
+ br.desc = desc
+ br.waitForWriter = waitForWriter
+ return br
+}
+
+// NewBlobReader() returns a pointer to a newly allocated BlobReader on the
+// specified blobName. BlobReaders should not be used concurrently by multiple
+// threads. Returned handles should be closed with Close().
+func (fscabs *FsCaBlobStore) NewBlobReader(ctx *context.T, blobName string) (br localblobstore.BlobReader, err error) {
+ var desc *blobDesc
+ desc, err = fscabs.getBlob(ctx, blobName)
+ if err == nil {
+ br = fscabs.blobReaderFromDesc(ctx, desc, dontWaitForWriter)
+ }
+ return br, err
+}
+
+// closeInternal() closes any open file handles within *br.
+func (br *BlobReader) closeInternal() {
+ if br.fh != nil {
+ br.fh.Close()
+ br.fh = nil
+ }
+ br.fragmentIndex = -1
+}
+
+// Close() indicates that the client will perform no further operations on *br.
+// It closes any open file handles within a BlobReader.
+func (br *BlobReader) Close() error {
+ br.closeInternal()
+ br.fscabs.descUnref(br.desc)
+ return nil
+}
+
+// findFragment() returns the index of the first element of fragment[] that may
+// contain "offset", based on the "pos" fields of each element.
+// Requires that fragment[] be sorted on the "pos" fields of the elements.
+func findFragment(fragment []blobFragment, offset int64) int {
+ lo := 0
+ hi := len(fragment)
+ for lo < hi {
+ mid := (lo + hi) >> 1
+ if offset < fragment[mid].pos {
+ hi = mid
+ } else {
+ lo = mid + 1
+ }
+ }
+ if lo > 0 {
+ lo--
+ }
+ return lo
+}
+
+// waitUntilAvailable() waits until position pos within *br is available for
+// reading, if this reader is waiting for writers. This may be because:
+// - *br is on an already written blob.
+// - *br is on a blob being written that has been closed, or whose writes have
+// passed position pos.
+// The value pos==math.MaxInt64 can be used to mean "until the writer is closed".
+// Requires br.fscabs.mu held.
+func (br *BlobReader) waitUntilAvailable(pos int64) {
+ for br.waitForWriter && br.desc.openWriter && br.desc.size < pos {
+ br.desc.cv.Wait()
+ }
+}
+
+// ReadAt() fills b[] with up to len(b) bytes of data starting at position "at"
+// within the blob that *br indicates, and returns the number of bytes read.
+func (br *BlobReader) ReadAt(b []byte, at int64) (n int, err error) {
+ br.fscabs.mu.Lock()
+ br.waitUntilAvailable(at + int64(len(b)))
+ i := findFragment(br.desc.fragment, at)
+ if i < len(br.desc.fragment) && at <= br.desc.size {
+ fragmenti := br.desc.fragment[i] // copy fragment data to allow releasing lock
+ br.fscabs.mu.Unlock()
+ if i != br.fragmentIndex {
+ br.closeInternal()
+ }
+ if br.fragmentIndex == -1 {
+ br.fh, err = os.Open(filepath.Join(br.fscabs.rootName, fragmenti.fileName))
+ if err == nil {
+ br.fragmentIndex = i
+ } else {
+ br.closeInternal()
+ }
+ }
+ var offset int64 = at - fragmenti.pos + fragmenti.offset
+ consume := fragmenti.size - (at - fragmenti.pos)
+ if int64(len(b)) < consume {
+ consume = int64(len(b))
+ }
+ if br.fh != nil {
+ n, err = br.fh.ReadAt(b[0:consume], offset)
+ } else if err == nil {
+ panic("failed to open blob fragment")
+ }
+ br.fscabs.mu.Lock()
+ // Return io.EOF if the Read reached the end of the last
+ // fragment, but not if it's merely the end of some interior
+ // fragment or the blob is still being extended.
+ if int64(n)+at >= br.desc.size && !(br.waitForWriter && br.desc.openWriter) {
+ if err == nil {
+ err = io.EOF
+ }
+ } else if err == io.EOF {
+ err = nil
+ }
+ } else if at == br.desc.size { // Reading at the end of the file, past the last fragment.
+ err = io.EOF
+ } else {
+ err = verror.New(errIllegalPositionForRead, br.ctx, br.pos, br.desc.size)
+ }
+ br.fscabs.mu.Unlock()
+ return n, err
+}
+
+// Read() fills b[] with up to len(b) bytes of data starting at the current
+// seek position of *br within the blob that *br indicates, and then both
+// returns the number of bytes read and advances *br's seek position by that
+// amount.
+func (br *BlobReader) Read(b []byte) (n int, err error) {
+ n, err = br.ReadAt(b, br.pos)
+ if err == nil {
+ br.pos += int64(n)
+ }
+ return n, err
+}
+
+// Seek() sets the seek position of *br to offset if whence==0,
+// offset+current_seek_position if whence==1, and offset+end_of_blob if
+// whence==2, and then returns the current seek position.
+func (br *BlobReader) Seek(offset int64, whence int) (result int64, err error) {
+ br.fscabs.mu.Lock()
+ if whence == 0 {
+ result = offset
+ } else if whence == 1 {
+ result = offset + br.pos
+ } else if whence == 2 {
+ br.waitUntilAvailable(math.MaxInt64)
+ result = offset + br.desc.size
+ } else {
+ err = verror.New(errBadSeekWhence, br.ctx, whence)
+ result = br.pos
+ }
+ if result < 0 {
+ err = verror.New(errNegativeSeekPosition, br.ctx, offset, whence)
+ result = br.pos
+ } else if result > br.desc.size {
+ err = verror.New(errIllegalPositionForRead, br.ctx, result, br.desc.size)
+ result = br.pos
+ } else if err == nil {
+ br.pos = result
+ }
+ br.fscabs.mu.Unlock()
+ return result, err
+}
+
+// IsFinalized() returns whether *br has been finalized.
+func (br *BlobReader) IsFinalized() bool {
+ br.fscabs.mu.Lock()
+ br.waitUntilAvailable(math.MaxInt64)
+ finalized := br.desc.finalized
+ br.fscabs.mu.Unlock()
+ return finalized
+}
+
+// Size() returns *br's size.
+func (br *BlobReader) Size() int64 {
+ br.fscabs.mu.Lock()
+ br.waitUntilAvailable(math.MaxInt64)
+ size := br.desc.size
+ br.fscabs.mu.Unlock()
+ return size
+}
+
+// Name() returns *br's name.
+func (br *BlobReader) Name() string {
+ return br.desc.name
+}
+
+// Hash() returns *br's hash. It may be nil if the blob is not finalized.
+func (br *BlobReader) Hash() []byte {
+ br.fscabs.mu.Lock()
+ br.waitUntilAvailable(math.MaxInt64)
+ hash := br.desc.hash
+ br.fscabs.mu.Unlock()
+ return hash
+}
+
+// -----------------------------------------------------------
+
+// A dirListing is a list of names in a directory, plus a position, which
+// indexes the last item in nameList that has been processed.
+type dirListing struct {
+ pos int // Current position in nameList; may be -1 at the start of iteration.
+ nameList []string // List of directory entries.
+}
+
+// An FsCasIter represents an iterator that allows the client to enumerate all
+// the blobs or fragments in a FsCaBlobStore.
+type FsCasIter struct {
+ fscabs *FsCaBlobStore // The parent FsCaBlobStore.
+ err error // If non-nil, the error that terminated iteration.
+ stack []dirListing // The stack of dirListings leading to the current entry.
+ ctx *context.T // context passed to ListBlobIds() or ListCAIds()
+
+ mu sync.Mutex // Protects cancelled.
+ cancelled bool // Whether Cancel() has been called.
+}
+
+// ListBlobIds() returns an iterator that can be used to enumerate the blobs in
+// an FsCaBlobStore. Expected use is:
+// fscabsi := fscabs.ListBlobIds(ctx)
+// for fscabsi.Advance() {
+// // Process fscabsi.Value() here.
+// }
+// if fscabsi.Err() != nil {
+// // The loop terminated early due to an error.
+// }
+func (fscabs *FsCaBlobStore) ListBlobIds(ctx *context.T) localblobstore.Stream {
+ stack := make([]dirListing, 1)
+ stack[0] = dirListing{pos: -1, nameList: []string{blobDir}}
+ return &FsCasIter{fscabs: fscabs, stack: stack, ctx: ctx}
+}
+
+// ListCAIds() returns an iterator that can be used to enumerate the
+// content-addressable fragments in an FsCaBlobStore.
+// Expected use is:
+// fscabsi := fscabs.ListCAIds(ctx)
+// for fscabsi.Advance() {
+// // Process fscabsi.Value() here.
+// }
+// if fscabsi.Err() != nil {
+// // The loop terminated early due to an error.
+// }
+func (fscabs *FsCaBlobStore) ListCAIds(ctx *context.T) localblobstore.Stream {
+ stack := make([]dirListing, 1)
+ stack[0] = dirListing{pos: -1, nameList: []string{casDir}}
+ return &FsCasIter{fscabs: fscabs, stack: stack, ctx: ctx}
+}
+
+// isCancelled() returns whether Cancel() has been called.
+func (fscabsi *FsCasIter) isCancelled() bool {
+ fscabsi.mu.Lock()
+ cancelled := fscabsi.cancelled
+ fscabsi.mu.Unlock()
+ return cancelled
+}
+
+// Advance() stages an item so that it may be retrieved via Value. Returns
+// true iff there is an item to retrieve. Advance must be called before Value
+// is called.
+func (fscabsi *FsCasIter) Advance() (advanced bool) {
+ stack := fscabsi.stack
+ err := fscabsi.err
+
+ for err == nil && !advanced && len(stack) != 0 && !fscabsi.isCancelled() {
+ last := len(stack) - 1
+ stack[last].pos++
+ if stack[last].pos == len(stack[last].nameList) {
+ stack = stack[0:last]
+ fscabsi.stack = stack
+ } else {
+ fullName := filepath.Join(fscabsi.fscabs.rootName, fscabsi.Value())
+ var fi os.FileInfo
+ fi, err = os.Lstat(fullName)
+ if err != nil {
+ // error: nothing to do
+ } else if fi.IsDir() {
+ var dirHandle *os.File
+ dirHandle, err = os.Open(fullName)
+ if err == nil {
+ var nameList []string
+ nameList, err = dirHandle.Readdirnames(0)
+ dirHandle.Close()
+ stack = append(stack, dirListing{pos: -1, nameList: nameList})
+ fscabsi.stack = stack
+ last = len(stack) - 1
+ }
+ } else {
+ advanced = true
+ }
+ }
+ }
+
+ if fscabsi.isCancelled() {
+ if err == nil {
+ fscabsi.err = verror.New(errStreamCancelled, fscabsi.ctx)
+ }
+ advanced = false
+ }
+
+ fscabsi.err = err
+ return advanced
+}
+
+// Value() returns the item that was staged by Advance. May panic if Advance
+// returned false or was not called. Never blocks.
+func (fscabsi *FsCasIter) Value() (name string) {
+ stack := fscabsi.stack
+ if fscabsi.err == nil && len(stack) != 0 && stack[0].pos >= 0 {
+ name = stack[0].nameList[stack[0].pos]
+ for i := 1; i != len(stack); i++ {
+ name = filepath.Join(name, stack[i].nameList[stack[i].pos])
+ }
+ }
+ return name
+}
+
+// Err() returns any error encountered by Advance. Never blocks.
+func (fscabsi *FsCasIter) Err() error {
+ return fscabsi.err
+}
+
+// Cancel() indicates that the iteration stream should terminate early.
+// Never blocks. May be called concurrently with other methods on fscabsi.
+func (fscabsi *FsCasIter) Cancel() {
+ fscabsi.mu.Lock()
+ fscabsi.cancelled = true
+ fscabsi.mu.Unlock()
+}
+
+// -----------------------------------------------------------
+
+// An errorChunkStream is a localblobstore.ChunkStream that yields an error.
+type errorChunkStream struct {
+ err error
+}
+
+func (*errorChunkStream) Advance() bool { return false }
+func (*errorChunkStream) Value([]byte) []byte { return nil }
+func (ecs *errorChunkStream) Err() error { return ecs.err }
+func (*errorChunkStream) Cancel() {}
+
+// BlobChunkStream() returns a ChunkStream that can be used to read the ordered
+// list of content hashes of chunks in blob blobName. It is expected that this
+// list will be presented to RecipeFromChunks() on another device, to create a
+// recipe for transmitting the blob efficiently to that other device.
+func (fscabs *FsCaBlobStore) BlobChunkStream(ctx *context.T, blobName string) (cs localblobstore.ChunkStream) {
+ blobID := fileNameToHash(blobDir, blobName)
+ if blobID == nil {
+ cs = &errorChunkStream{err: verror.New(errInvalidBlobName, ctx, blobName)}
+ } else {
+ cs = fscabs.bm.NewChunkStream(ctx, blobID)
+ }
+ return cs
+}
+
+// -----------------------------------------------------------
+
+// LookupChunk returns the location of a chunk with the specified chunk hash
+// within the store.
+func (fscabs *FsCaBlobStore) LookupChunk(ctx *context.T, chunkHash []byte) (loc localblobstore.Location, err error) {
+ var chunkMapLoc blobmap.Location
+ chunkMapLoc, err = fscabs.bm.LookupChunk(ctx, chunkHash)
+ if err == nil {
+ loc.BlobName = hashToFileName(blobDir, chunkMapLoc.BlobID)
+ loc.Size = chunkMapLoc.Size
+ loc.Offset = chunkMapLoc.Offset
+ }
+ return loc, err
+}
+
+// -----------------------------------------------------------
+
+// A RecipeStream implements localblobstore.RecipeStream. It allows the client
+// to iterate over the recipe steps to recreate a blob identified by a stream
+// of chunk hashes (from chunkStream), but using parts of blobs in the current
+// blob store where possible.
+type RecipeStream struct {
+ fscabs *FsCaBlobStore
+ ctx *context.T
+
+ chunkStream localblobstore.ChunkStream // the stream of chunks in the blob
+ pendingChunkBuf [16]byte // a buffer for pendingChunk
+ pendingChunk []byte // the last unprocessed chunk hash read chunkStream, or nil if none
+ step localblobstore.RecipeStep // the recipe step to be returned by Value()
+ mu sync.Mutex // protects cancelled
+ cancelled bool // whether Cancel() has been called
+}
+
+// RecipeStreamFromChunkStream() returns a pointer to a RecipeStream that allows
+// the client to iterate over each RecipeStep needed to create the blob formed
+// by the chunks in chunkStream.
+func (fscabs *FsCaBlobStore) RecipeStreamFromChunkStream(ctx *context.T, chunkStream localblobstore.ChunkStream) localblobstore.RecipeStream {
+ rs := new(RecipeStream)
+ rs.fscabs = fscabs
+ rs.ctx = ctx
+ rs.chunkStream = chunkStream
+ return rs
+}
+
+// isCancelled() returns whether rs.Cancel() has been called.
+func (rs *RecipeStream) isCancelled() bool {
+ rs.mu.Lock()
+ cancelled := rs.cancelled
+ rs.mu.Unlock()
+ return cancelled
+}
+
+// Advance() stages an item so that it may be retrieved via Value().
+// Returns true iff there is an item to retrieve. Advance() must be
+// called before Value() is called. The caller is expected to read
+// until Advance() returns false, or to call Cancel().
+func (rs *RecipeStream) Advance() (ok bool) {
+ if rs.pendingChunk == nil && rs.chunkStream.Advance() {
+ rs.pendingChunk = rs.chunkStream.Value(rs.pendingChunkBuf[:])
+ }
+ for !ok && rs.pendingChunk != nil && !rs.isCancelled() {
+ var err error
+ var loc0 blobmap.Location
+ loc0, err = rs.fscabs.bm.LookupChunk(rs.ctx, rs.pendingChunk)
+ if err == nil {
+ blobName := hashToFileName(blobDir, loc0.BlobID)
+ var blobDesc *blobDesc
+ if blobDesc, err = rs.fscabs.getBlob(rs.ctx, blobName); err != nil {
+ // The BlobMap contained a reference to a
+ // deleted blob. Delete the reference in the
+ // BlobMap; the next loop iteration will
+ // consider the chunk again.
+ rs.fscabs.bm.DeleteBlob(rs.ctx, loc0.BlobID)
+ } else {
+ rs.fscabs.descUnref(blobDesc)
+ // The chunk is in a known blob. Combine
+ // contiguous chunks into a single recipe
+ // entry.
+ rs.pendingChunk = nil // consumed
+ for rs.pendingChunk == nil && rs.chunkStream.Advance() {
+ rs.pendingChunk = rs.chunkStream.Value(rs.pendingChunkBuf[:])
+ var loc blobmap.Location
+ loc, err = rs.fscabs.bm.LookupChunk(rs.ctx, rs.pendingChunk)
+ if err == nil && bytes.Compare(loc0.BlobID, loc.BlobID) == 0 && loc.Offset == loc0.Offset+loc0.Size {
+ loc0.Size += loc.Size
+ rs.pendingChunk = nil // consumed
+ }
+ }
+ rs.step = localblobstore.RecipeStep{Blob: blobName, Offset: loc0.Offset, Size: loc0.Size}
+ ok = true
+ }
+ } else { // The chunk is not in the BlobMap; yield a single chunk hash.
+ rs.step = localblobstore.RecipeStep{Chunk: rs.pendingChunk}
+ rs.pendingChunk = nil // consumed
+ ok = true
+ }
+ }
+ return ok && !rs.isCancelled()
+}
+
+// Value() returns the item that was staged by Advance(). May panic if
+// Advance() returned false or was not called. Never blocks.
+func (rs *RecipeStream) Value() localblobstore.RecipeStep {
+ return rs.step
+}
+
+// Err() returns any error encountered by Advance. Never blocks.
+func (rs *RecipeStream) Err() error {
+ // There are no errors to return here. The errors encountered in
+ // Advance() are expected and recoverable.
+ return nil
+}
+
+// Cancel() indicates that the client wishes to cease reading from the stream.
+// It causes the next call to Advance() to return false. Never blocks.
+// It may be called concurrently with other calls on the stream.
+func (rs *RecipeStream) Cancel() {
+ rs.mu.Lock()
+ rs.cancelled = true
+ rs.mu.Unlock()
+ rs.chunkStream.Cancel()
+}
+
+// -----------------------------------------------------------
+
+// gcTemp() attempts to delete files in dirName older than threshold.
+// Errors are ignored.
+func gcTemp(dirName string, threshold time.Time) {
+ fh, err := os.Open(dirName)
+ if err == nil {
+ fi, _ := fh.Readdir(0)
+ fh.Close()
+ for i := 0; i < len(fi); i++ {
+ if fi[i].ModTime().Before(threshold) {
+ os.Remove(filepath.Join(dirName, fi[i].Name()))
+ }
+ }
+ }
+}
+
+// GC() removes old temp files and content-addressed blocks that are no longer
+// referenced by any blob. It may be called concurrently with other calls to
+// GC(), and with uses of BlobReaders and BlobWriters.
+func (fscabs *FsCaBlobStore) GC(ctx *context.T) (err error) {
+ // Remove old temporary files.
+ gcTemp(filepath.Join(fscabs.rootName, tmpDir), time.Now().Add(-10*time.Hour))
+
+ // Add a key to caSet for each content-addressed fragment in *fscabs,
+ caSet := make(map[string]bool)
+ caIter := fscabs.ListCAIds(ctx)
+ for caIter.Advance() {
+ caSet[caIter.Value()] = true
+ }
+ err = caIter.Err()
+
+ // cmBlobs maps the names of blobs found in the BlobMap to their IDs.
+ // (The IDs can be derived from the names; the map is really being used
+ // to record which blobs exist, and the value merely avoids repeated
+ // conversions.)
+ cmBlobs := make(map[string][]byte)
+ if err == nil {
+ // Record all the blobs known to the BlobMap;
+ bs := fscabs.bm.NewBlobStream(ctx)
+ for bs.Advance() {
+ blobID := bs.Value(nil)
+ cmBlobs[hashToFileName(blobDir, blobID)] = blobID
+ }
+ }
+
+ if err == nil {
+ // Remove from cmBlobs all extant blobs, and remove from
+ // caSet all their fragments.
+ blobIter := fscabs.ListBlobIds(ctx)
+ for blobIter.Advance() {
+ var blobDesc *blobDesc
+ if blobDesc, err = fscabs.getBlob(ctx, blobIter.Value()); err == nil {
+ delete(cmBlobs, blobDesc.name)
+ for i := range blobDesc.fragment {
+ delete(caSet, blobDesc.fragment[i].fileName)
+ }
+ fscabs.descUnref(blobDesc)
+ }
+ }
+ }
+
+ if err == nil {
+ // Remove all blobs still mentioned in cmBlobs from the BlobMap;
+ // these are the ones that no longer exist in the blobs directory.
+ for _, blobID := range cmBlobs {
+ err = fscabs.bm.DeleteBlob(ctx, blobID)
+ if err != nil {
+ break
+ }
+ }
+ }
+
+ if err == nil {
+ // Remove from caSet all fragments referenced by open BlobReaders and
+ // BlobWriters. Advertise to new readers and writers which blobs are
+ // about to be deleted.
+ fscabs.mu.Lock()
+ for _, desc := range fscabs.activeDesc {
+ for i := range desc.fragment {
+ delete(caSet, desc.fragment[i].fileName)
+ }
+ }
+ fscabs.toDelete = append(fscabs.toDelete, &caSet)
+ fscabs.mu.Unlock()
+
+ // Delete the things that still remain in caSet; they are no longer
+ // referenced.
+ for caName := range caSet {
+ os.Remove(filepath.Join(fscabs.rootName, caName))
+ }
+
+ // Stop advertising what's been deleted.
+ fscabs.mu.Lock()
+ n := len(fscabs.toDelete)
+ var i int
+ // We require that &caSet still be in the list.
+ for i = 0; fscabs.toDelete[i] != &caSet; i++ {
+ }
+ fscabs.toDelete[i] = fscabs.toDelete[n-1]
+ fscabs.toDelete = fscabs.toDelete[0 : n-1]
+ fscabs.mu.Unlock()
+ }
+ return err
+}
diff --git a/services/syncbase/localblobstore/fs_cablobstore/fs_cablobstore_test.go b/services/syncbase/localblobstore/fs_cablobstore/fs_cablobstore_test.go
new file mode 100644
index 0000000..0d964c3
--- /dev/null
+++ b/services/syncbase/localblobstore/fs_cablobstore/fs_cablobstore_test.go
@@ -0,0 +1,97 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// A test for fs_cablobstore
+package fs_cablobstore_test
+
+import "io/ioutil"
+import "os"
+import "testing"
+
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore"
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore"
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/localblobstore_testlib"
+import "v.io/x/ref/test"
+import _ "v.io/x/ref/runtime/factories/generic"
+
+// This test case tests adding files, retrieving them and deleting them. One
+// can't retrieve or delete something that hasn't been created, so it's all one
+// test case.
+func TestAddRetrieveAndDelete(t *testing.T) {
+ ctx, shutdown := test.V23Init()
+ defer shutdown()
+
+ // Make a temporary directory.
+ var err error
+ var testDirName string
+ testDirName, err = ioutil.TempDir("", "localblobstore_test")
+ if err != nil {
+ t.Fatalf("localblobstore_test: can't make tmp directory: %v\n", err)
+ }
+ defer os.RemoveAll(testDirName)
+
+ // Create an fs_cablobstore.
+ var bs localblobstore.BlobStore
+ bs, err = fs_cablobstore.Create(ctx, testDirName)
+ if err != nil {
+ t.Fatalf("fs_cablobstore.Create failed: %v", err)
+ }
+
+ // Test it.
+ localblobstore_testlib.AddRetrieveAndDelete(t, ctx, bs, testDirName)
+}
+
+// This test case tests the incremental transfer of blobs via chunks.
+func TestWritingViaChunks(t *testing.T) {
+ ctx, shutdown := test.V23Init()
+ defer shutdown()
+
+ var err error
+
+ // Make a pair of blobstores, each in its own temporary directory.
+ const nBlobStores = 2
+ var testDirName [nBlobStores]string
+ var bs [nBlobStores]localblobstore.BlobStore
+ for i := 0; i != nBlobStores; i++ {
+ testDirName[i], err = ioutil.TempDir("", "localblobstore_test")
+ if err != nil {
+ t.Fatalf("localblobstore_test: can't make tmp directory: %v\n", err)
+ }
+ defer os.RemoveAll(testDirName[i])
+
+ bs[i], err = fs_cablobstore.Create(ctx, testDirName[i])
+ if err != nil {
+ t.Fatalf("fs_cablobstore.Create failed: %v", err)
+ }
+ }
+
+ // Test it.
+ localblobstore_testlib.WriteViaChunks(t, ctx, bs)
+}
+
+// This test case checks that empty blobs can be created, then extended via
+// ResumeBlobWriter.
+func TestCreateAndResume(t *testing.T) {
+ ctx, shutdown := test.V23Init()
+ defer shutdown()
+
+ // Make a temporary directory.
+ var err error
+ var testDirName string
+ testDirName, err = ioutil.TempDir("", "localblobstore_test")
+ if err != nil {
+ t.Fatalf("localblobstore_test: can't make tmp directory: %v\n", err)
+ }
+ defer os.RemoveAll(testDirName)
+
+ // Create an fs_cablobstore.
+ var bs localblobstore.BlobStore
+ bs, err = fs_cablobstore.Create(ctx, testDirName)
+ if err != nil {
+ t.Fatalf("fs_cablobstore.Create failed: %v", err)
+ }
+
+ // Test it.
+ localblobstore_testlib.CreateAndResume(t, ctx, bs)
+}
diff --git a/services/syncbase/localblobstore/localblobstore_test.go b/services/syncbase/localblobstore/localblobstore_test.go
new file mode 100644
index 0000000..a258c0f
--- /dev/null
+++ b/services/syncbase/localblobstore/localblobstore_test.go
@@ -0,0 +1,97 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// A test for localblobstore
+package localblobstore_test
+
+import "io/ioutil"
+import "os"
+import "testing"
+
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore"
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore"
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/localblobstore_testlib"
+import "v.io/x/ref/test"
+import _ "v.io/x/ref/runtime/factories/generic"
+
+// This test case tests adding files, retrieving them and deleting them. One
+// can't retrieve or delete something that hasn't been created, so it's all one
+// test case.
+func TestAddRetrieveAndDelete(t *testing.T) {
+ ctx, shutdown := test.V23Init()
+ defer shutdown()
+
+ // Make a temporary directory.
+ var err error
+ var testDirName string
+ testDirName, err = ioutil.TempDir("", "localblobstore_test")
+ if err != nil {
+ t.Fatalf("localblobstore_test: can't make tmp directory: %v\n", err)
+ }
+ defer os.RemoveAll(testDirName)
+
+ // Create an fs_cablobstore.
+ var bs localblobstore.BlobStore
+ bs, err = fs_cablobstore.Create(ctx, testDirName)
+ if err != nil {
+ t.Fatalf("fs_cablobstore.Create failed: %v", err)
+ }
+
+ // Test it.
+ localblobstore_testlib.AddRetrieveAndDelete(t, ctx, bs, testDirName)
+}
+
+// This test case tests the incremental transfer of blobs via chunks.
+func TestWritingViaChunks(t *testing.T) {
+ ctx, shutdown := test.V23Init()
+ defer shutdown()
+
+ var err error
+
+ // Make a pair of blobstores, each in its own temporary directory.
+ const nBlobStores = 2
+ var testDirName [nBlobStores]string
+ var bs [nBlobStores]localblobstore.BlobStore
+ for i := 0; i != nBlobStores; i++ {
+ testDirName[i], err = ioutil.TempDir("", "localblobstore_test")
+ if err != nil {
+ t.Fatalf("localblobstore_test: can't make tmp directory: %v\n", err)
+ }
+ defer os.RemoveAll(testDirName[i])
+
+ bs[i], err = fs_cablobstore.Create(ctx, testDirName[i])
+ if err != nil {
+ t.Fatalf("fs_cablobstore.Create failed: %v", err)
+ }
+ }
+
+ // Test it.
+ localblobstore_testlib.WriteViaChunks(t, ctx, bs)
+}
+
+// This test case checks that empty blobs can be created, then extended via
+// ResumeBlobWriter.
+func TestCreateAndResume(t *testing.T) {
+ ctx, shutdown := test.V23Init()
+ defer shutdown()
+
+ // Make a temporary directory.
+ var err error
+ var testDirName string
+ testDirName, err = ioutil.TempDir("", "localblobstore_test")
+ if err != nil {
+ t.Fatalf("localblobstore_test: can't make tmp directory: %v\n", err)
+ }
+ defer os.RemoveAll(testDirName)
+
+ // Create an fs_cablobstore.
+ var bs localblobstore.BlobStore
+ bs, err = fs_cablobstore.Create(ctx, testDirName)
+ if err != nil {
+ t.Fatalf("fs_cablobstore.Create failed: %v", err)
+ }
+
+ // Test it.
+ localblobstore_testlib.CreateAndResume(t, ctx, bs)
+}
diff --git a/services/syncbase/localblobstore/localblobstore_testlib/localblobstore_testlib.go b/services/syncbase/localblobstore/localblobstore_testlib/localblobstore_testlib.go
new file mode 100644
index 0000000..d6c1d9d
--- /dev/null
+++ b/services/syncbase/localblobstore/localblobstore_testlib/localblobstore_testlib.go
@@ -0,0 +1,889 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// A test library for localblobstores.
+package localblobstore_testlib
+
+import "bytes"
+import "crypto/md5"
+import "fmt"
+import "io"
+import "io/ioutil"
+import "path/filepath"
+import "testing"
+
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore"
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/chunker"
+import "v.io/v23/context"
+import "v.io/v23/verror"
+
+// A blobOrBlockOrFile represents some bytes that may be contained in a named
+// blob, a named file, or in an explicit slice of bytes.
+type blobOrBlockOrFile struct {
+ blob string // If non-empty, the name of the blob containing the bytes.
+ file string // If non-empty and blob is empty, the name of the file containing the bytes.
+ size int64 // Size of part of file or blob, or -1 for "everything until EOF".
+ offset int64 // Offset within file or blob.
+ block []byte // If both blob and file are empty, a slice containing the bytes.
+}
+
+// A testBlob records that some specified content has been stored with a given
+// blob name in the blob store.
+type testBlob struct {
+ content []byte // content that has been stored.
+ blobName string // the name of the blob.
+}
+
+// removeBlobFromBlobVector() removes the entry named blobName from
+// blobVector[], returning the new vector.
+func removeBlobFromBlobVector(blobVector []testBlob, blobName string) []testBlob {
+ n := len(blobVector)
+ i := 0
+ for i = 0; i != n && blobName != blobVector[i].blobName; i++ {
+ }
+ if i != n {
+ blobVector[i] = blobVector[n-1]
+ blobVector = blobVector[0 : n-1]
+ }
+ return blobVector
+}
+
+// writeBlob() writes a new blob to bs, and returns its name. The new
+// blob's content is described by the elements of data[]. Any error messages
+// generated include the index of the blob in blobVector and its content; the
+// latter is assumed to be printable. The expected content of the the blob is
+// "content", so that this routine can check it. If useResume is true, and data[]
+// has length more than 1, the function artificially uses ResumeBlobWriter(),
+// to test it.
+func writeBlob(t *testing.T, ctx *context.T, bs localblobstore.BlobStore, blobVector []testBlob,
+ content []byte, useResume bool, data ...blobOrBlockOrFile) []testBlob {
+ var bw localblobstore.BlobWriter
+ var err error
+ bw, err = bs.NewBlobWriter(ctx, "")
+ if err != nil {
+ t.Errorf("localblobstore.NewBlobWriter blob %d:%s failed: %v", len(blobVector), string(content), err)
+ }
+ blobName := bw.Name()
+
+ // Construct the blob from the pieces.
+ // There is a loop within the loop to exercise the possibility of
+ // passing multiple fragments to AppendFragment().
+ for i := 0; i != len(data) && err == nil; {
+ if len(data[i].blob) != 0 {
+ err = bw.AppendBlob(data[i].blob, data[i].size, data[i].offset)
+ if err != nil {
+ t.Errorf("localblobstore.AppendBlob %d:%s blob %s failed: %v", len(blobVector), string(content), data[i].blob, err)
+ }
+ i++
+ } else {
+ var pieces []localblobstore.BlockOrFile
+ for ; i != len(data) && len(data[i].blob) == 0; i++ {
+ if len(data[i].file) != 0 {
+ pieces = append(pieces, localblobstore.BlockOrFile{
+ FileName: data[i].file,
+ Size: data[i].size,
+ Offset: data[i].offset})
+ } else {
+ pieces = append(pieces, localblobstore.BlockOrFile{Block: data[i].block})
+ }
+ }
+ err = bw.AppendFragment(pieces...)
+ if err != nil {
+ t.Errorf("localblobstore.AppendFragment %d:%s failed on %v: %v", len(blobVector), string(content), pieces, err)
+ }
+ }
+ if useResume && i < len(data)-1 && err == nil {
+ err = bw.CloseWithoutFinalize()
+ if err == nil {
+ bw, err = bs.ResumeBlobWriter(ctx, blobName)
+ }
+ }
+ }
+
+ if bw != nil {
+ if bw.Size() != int64(len(content)) {
+ t.Errorf("localblobstore.Size before finalization %d:%s got %d, expected %d", len(blobVector), string(content), bw.Size(), len(content))
+ }
+ if bw.IsFinalized() {
+ t.Errorf("localblobstore.IsFinalized %d:%s got true, expected false", len(blobVector), string(content))
+ }
+ err = bw.Close()
+ if err != nil {
+ t.Errorf("localblobstore.Close %d:%s failed: %v", len(blobVector), string(content), err)
+ }
+ if !bw.IsFinalized() {
+ t.Errorf("localblobstore.IsFinalized %d:%s got true, expected false", len(blobVector), string(content))
+ }
+ if bw.Size() != int64(len(content)) {
+ t.Errorf("localblobstore.Size %d:%s after finalization got %d, expected %d", len(blobVector), string(content), bw.Size(), len(content))
+ }
+ if bw.Name() != blobName {
+ t.Errorf("localblobstore %d:%s name changed when finalized was %s now %s", len(blobVector), string(content), blobName, bw.Name())
+ }
+ hasher := md5.New()
+ hasher.Write(content)
+ if bytes.Compare(bw.Hash(), hasher.Sum(nil)) != 0 {
+ t.Errorf("localblobstore %d:%s BlobWriter.Hash got %v, expected %v", len(blobVector), string(content), bw.Hash(), hasher.Sum(nil))
+ }
+ }
+
+ return append(blobVector,
+ testBlob{
+ content: content,
+ blobName: blobName,
+ })
+}
+
+// readBlob() returns a substring of the content of the blob named blobName in bs.
+// The return values are:
+// - the "size" bytes from the content, starting at the given "offset",
+// measured from "whence" (as defined by io.Seeker.Seek).
+// - the position to which BlobBeader seeks to,
+// - the md5 hash of the bytes read, and
+// - the md5 hash of the bytes of the blob, as returned by BlobReader.Hash(),
+// - and error.
+func readBlob(ctx *context.T, bs localblobstore.BlobStore, blobName string,
+ size int64, offset int64, whence int) (content []byte, pos int64, hash []byte, fullHash []byte, err error) {
+
+ var br localblobstore.BlobReader
+ hasher := md5.New()
+ br, err = bs.NewBlobReader(ctx, blobName)
+ if err == nil {
+ buf := make([]byte, 8192, 8192)
+ fullHash = br.Hash()
+ pos, err = br.Seek(offset, whence)
+ if err == nil {
+ var n int
+ first := true // Read at least once, to test reading zero bytes.
+ for err == nil && (size == -1 || int64(len(content)) < size || first) {
+ // Read just what was asked for.
+ var toRead []byte = buf
+ if size >= 0 && int(size)-len(content) < len(buf) {
+ toRead = buf[0 : int(size)-len(content)]
+ }
+ n, err = br.Read(toRead)
+ hasher.Write(toRead[0:n])
+ if size >= 0 && int64(len(content)+n) > size {
+ n = int(size) - len(content)
+ }
+ content = append(content, toRead[0:n]...)
+ first = false
+ }
+ }
+ br.Close()
+ }
+ return content, pos, hasher.Sum(nil), fullHash, err
+}
+
+// checkWrittenBlobsAreReadable() checks that the blobs in blobVector[] can be
+// read, and that they contain the appropriate data.
+func checkWrittenBlobsAreReadable(t *testing.T, ctx *context.T, bs localblobstore.BlobStore, blobVector []testBlob) {
+ for i := range blobVector {
+ var size int64
+ data := blobVector[i].content
+ dataLen := int64(len(data))
+ blobName := blobVector[i].blobName
+ for size = -1; size != dataLen+1; size++ {
+ var offset int64
+ for offset = -dataLen - 1; offset != dataLen+1; offset++ {
+ for whence := -1; whence != 4; whence++ {
+ content, pos, hash, fullHash, err := readBlob(ctx, bs, blobName, size, offset, whence)
+
+ // Compute expected seek position.
+ expectedPos := offset
+ if whence == 2 {
+ expectedPos += dataLen
+ }
+
+ // Computed expected size.
+ expectedSize := size
+ if expectedSize == -1 || expectedPos+expectedSize > dataLen {
+ expectedSize = dataLen - expectedPos
+ }
+
+ // Check that reads behave as expected.
+ if (whence == -1 || whence == 3) &&
+ verror.ErrorID(err) == "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore.errBadSeekWhence" {
+ // Expected error from bad "whence" value.
+ } else if expectedPos < 0 &&
+ verror.ErrorID(err) == "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore.errNegativeSeekPosition" {
+ // Expected error from negative Seek position.
+ } else if expectedPos > dataLen &&
+ verror.ErrorID(err) == "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore.errIllegalPositionForRead" {
+ // Expected error from too high a Seek position.
+ } else if 0 <= expectedPos && expectedPos+expectedSize <= int64(len(data)) &&
+ bytes.Compare(data[expectedPos:expectedPos+expectedSize], content) == 0 && err == io.EOF &&
+ pos == expectedPos && expectedPos+expectedSize == dataLen {
+ // Expected success with EOF.
+ } else if 0 <= expectedPos && expectedPos+expectedSize <= int64(len(data)) &&
+ bytes.Compare(data[expectedPos:expectedPos+expectedSize], content) == 0 && err == nil &&
+ pos == expectedPos && expectedPos+expectedSize != dataLen {
+ if pos == 0 && size == -1 && bytes.Compare(hash, fullHash) != 0 {
+ t.Errorf("localblobstore read test on %q size %d offset %d whence %d; got hash %v, expected %v (blob is %q)",
+ string(data), size, offset, whence,
+ hash, fullHash, blobName)
+ } // Else expected success without EOF.
+ } else {
+ t.Errorf("localblobstore read test on %q size %d offset %d whence %d yields %q pos %d %v (blob is %q)",
+ string(data), size, offset, whence,
+ content, pos, err, blobName)
+ }
+ }
+ }
+ }
+ }
+}
+
+// checkAllBlobs() checks all the blobs in bs to ensure they correspond to
+// those in blobVector[].
+func checkAllBlobs(t *testing.T, ctx *context.T, bs localblobstore.BlobStore, blobVector []testBlob, testDirName string) {
+ blobCount := 0
+ iterator := bs.ListBlobIds(ctx)
+ for iterator.Advance() {
+ fileName := iterator.Value()
+ i := 0
+ for ; i != len(blobVector) && fileName != blobVector[i].blobName; i++ {
+ }
+ if i == len(blobVector) {
+ t.Errorf("localblobstore.ListBlobIds found unexpected file %s", fileName)
+ } else {
+ content, pos, hash, fullHash, err := readBlob(ctx, bs, fileName, -1, 0, 0)
+ if err != nil && err != io.EOF {
+ t.Errorf("localblobstore.ListCAIds can't read %q: %v", filepath.Join(testDirName, fileName), err)
+ } else if bytes.Compare(blobVector[i].content, content) != 0 {
+ t.Errorf("localblobstore.ListCAIds found unexpected blob content: %q, contains %q, expected %q",
+ filepath.Join(testDirName, fileName), content, string(blobVector[i].content))
+ } else if pos != 0 {
+ t.Errorf("localblobstore.ListCAIds Seek on %q returned %d instead of 0",
+ filepath.Join(testDirName, fileName), pos)
+ }
+ if bytes.Compare(hash, fullHash) != 0 {
+ t.Errorf("localblobstore.ListCAIds read on %q; got hash %v, expected %v",
+ fileName, hash, fullHash)
+ }
+ }
+ blobCount++
+ }
+ if iterator.Err() != nil {
+ t.Errorf("localblobstore.ListBlobIds iteration failed: %v", iterator.Err())
+ }
+ if blobCount != len(blobVector) {
+ t.Errorf("localblobstore.ListBlobIds iteration expected 4 files, got %d", blobCount)
+ }
+}
+
+// checkFragments() checks all the fragments in bs to ensure they
+// correspond to those fragmentMap[], iff testDirName is non-empty.
+func checkFragments(t *testing.T, ctx *context.T, bs localblobstore.BlobStore, fragmentMap map[string]bool, testDirName string) {
+ if testDirName != "" {
+ caCount := 0
+ iterator := bs.ListCAIds(ctx)
+ for iterator.Advance() {
+ fileName := iterator.Value()
+ content, err := ioutil.ReadFile(filepath.Join(testDirName, fileName))
+ if err != nil && err != io.EOF {
+ t.Errorf("localblobstore.ListCAIds can't read %q: %v", filepath.Join(testDirName, fileName), err)
+ } else if !fragmentMap[string(content)] {
+ t.Errorf("localblobstore.ListCAIds found unexpected fragment entry: %q, contains %q", filepath.Join(testDirName, fileName), content)
+ } else {
+ hasher := md5.New()
+ hasher.Write(content)
+ hash := hasher.Sum(nil)
+ nameFromContent := filepath.Join("cas",
+ fmt.Sprintf("%02x", hash[0]),
+ fmt.Sprintf("%02x", hash[1]),
+ fmt.Sprintf("%02x", hash[2]),
+ fmt.Sprintf("%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
+ hash[3],
+ hash[4], hash[5], hash[6], hash[7],
+ hash[8], hash[9], hash[10], hash[11],
+ hash[12], hash[13], hash[14], hash[15]))
+ if nameFromContent != fileName {
+ t.Errorf("localblobstore.ListCAIds hash of fragment: got %q, expected %q (content=%s)", nameFromContent, fileName, string(content))
+ }
+ }
+ caCount++
+ }
+ if iterator.Err() != nil {
+ t.Errorf("localblobstore.ListCAIds iteration failed: %v", iterator.Err())
+ }
+ if caCount != len(fragmentMap) {
+ t.Errorf("localblobstore.ListCAIds iteration expected %d files, got %d", len(fragmentMap), caCount)
+ }
+ }
+}
+
+// AddRetrieveAndDelete() tests adding, retrieving, and deleting blobs from a
+// blobstore bs. One can't retrieve or delete something that hasn't been
+// created, so it's all done in one routine. If testDirName is non-empty,
+// the blobstore is assumed to be accessible in the file system, and its
+// files are checked.
+func AddRetrieveAndDelete(t *testing.T, ctx *context.T, bs localblobstore.BlobStore, testDirName string) {
+ var err error
+
+ // Check that there are no files in the blobstore we were given.
+ iterator := bs.ListBlobIds(ctx)
+ for iterator.Advance() {
+ fileName := iterator.Value()
+ t.Errorf("unexpected file %q\n", fileName)
+ }
+ if iterator.Err() != nil {
+ t.Errorf("localblobstore.ListBlobIds iteration failed: %v", iterator.Err())
+ }
+
+ // Create the strings: "wom", "bat", "wombat", "batwom", "atwo", "atwoatwoombatatwo".
+ womData := []byte("wom")
+ batData := []byte("bat")
+ wombatData := []byte("wombat")
+ batwomData := []byte("batwom")
+ atwoData := []byte("atwo")
+ atwoatwoombatatwoData := []byte("atwoatwoombatatwo")
+
+ // fragmentMap will have an entry per content-addressed fragment.
+ fragmentMap := make(map[string]bool)
+
+ // Create the blobs, by various means.
+
+ var blobVector []testBlob // Accumulate the blobs we create here.
+
+ blobVector = writeBlob(t, ctx, bs, blobVector,
+ womData, false,
+ blobOrBlockOrFile{block: womData})
+ womName := blobVector[len(blobVector)-1].blobName
+ fragmentMap[string(womData)] = true
+
+ blobVector = writeBlob(t, ctx, bs, blobVector,
+ batData, false,
+ blobOrBlockOrFile{block: batData})
+ batName := blobVector[len(blobVector)-1].blobName
+ fragmentMap[string(batData)] = true
+
+ blobVector = writeBlob(t, ctx, bs, blobVector,
+ wombatData, false,
+ blobOrBlockOrFile{block: wombatData})
+ firstWombatName := blobVector[len(blobVector)-1].blobName
+ fragmentMap[string(wombatData)] = true
+
+ blobVector = writeBlob(t, ctx, bs, blobVector,
+ wombatData, true,
+ blobOrBlockOrFile{block: womData},
+ blobOrBlockOrFile{block: batData})
+
+ blobVector = writeBlob(t, ctx, bs, blobVector,
+ wombatData, false,
+ blobOrBlockOrFile{
+ blob: firstWombatName,
+ size: -1,
+ offset: 0})
+
+ blobVector = writeBlob(t, ctx, bs, blobVector,
+ wombatData, false,
+ blobOrBlockOrFile{
+ blob: firstWombatName,
+ size: 6,
+ offset: 0})
+
+ blobVector = writeBlob(t, ctx, bs, blobVector,
+ batwomData, false,
+ blobOrBlockOrFile{
+ blob: firstWombatName,
+ size: 3,
+ offset: 3},
+ blobOrBlockOrFile{
+ blob: firstWombatName,
+ size: 3,
+ offset: 0})
+ batwomName := blobVector[len(blobVector)-1].blobName
+
+ blobVector = writeBlob(t, ctx, bs, blobVector,
+ atwoData, false,
+ blobOrBlockOrFile{
+ blob: batwomName,
+ size: 4,
+ offset: 1})
+ atwoName := blobVector[len(blobVector)-1].blobName
+
+ blobVector = writeBlob(t, ctx, bs, blobVector,
+ atwoatwoombatatwoData, true,
+ blobOrBlockOrFile{
+ blob: atwoName,
+ size: -1,
+ offset: 0},
+ blobOrBlockOrFile{
+ blob: atwoName,
+ size: 4,
+ offset: 0},
+ blobOrBlockOrFile{
+ blob: firstWombatName,
+ size: -1,
+ offset: 1},
+ blobOrBlockOrFile{
+ blob: batName,
+ size: -1,
+ offset: 1},
+ blobOrBlockOrFile{
+ blob: womName,
+ size: 2,
+ offset: 0})
+ atwoatwoombatatwoName := blobVector[len(blobVector)-1].blobName
+
+ // -------------------------------------------------
+ // Check that the state is as we expect.
+ checkWrittenBlobsAreReadable(t, ctx, bs, blobVector)
+ checkAllBlobs(t, ctx, bs, blobVector, testDirName)
+ checkFragments(t, ctx, bs, fragmentMap, testDirName)
+
+ // -------------------------------------------------
+ // Nothing should change if we garbage collect.
+ bs.GC(ctx)
+ checkWrittenBlobsAreReadable(t, ctx, bs, blobVector)
+ checkAllBlobs(t, ctx, bs, blobVector, testDirName)
+ checkFragments(t, ctx, bs, fragmentMap, testDirName)
+
+ // -------------------------------------------------
+ // Ensure that deleting non-existent blobs fails.
+ err = bs.DeleteBlob(ctx, "../../../../etc/passwd")
+ if verror.ErrorID(err) != "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore.errInvalidBlobName" {
+ t.Errorf("DeleteBlob attempted to delete a bogus blob name")
+ }
+ err = bs.DeleteBlob(ctx, "foo/00/00/00/00000000000000000000000000")
+ if verror.ErrorID(err) != "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore.errInvalidBlobName" {
+ t.Errorf("DeleteBlob attempted to delete a bogus blob name")
+ }
+
+ // -------------------------------------------------
+ // Delete a blob.
+ err = bs.DeleteBlob(ctx, batName)
+ if err != nil {
+ t.Errorf("DeleteBlob failed to delete blob %q: %v", batName, err)
+ }
+ blobVector = removeBlobFromBlobVector(blobVector, batName)
+
+ // -------------------------------------------------
+ // Check that the state is as we expect.
+ checkWrittenBlobsAreReadable(t, ctx, bs, blobVector)
+ checkAllBlobs(t, ctx, bs, blobVector, testDirName)
+ checkFragments(t, ctx, bs, fragmentMap, testDirName)
+
+ // -------------------------------------------------
+ // Nothing should change if we garbage collect.
+ bs.GC(ctx)
+ checkWrittenBlobsAreReadable(t, ctx, bs, blobVector)
+ checkAllBlobs(t, ctx, bs, blobVector, testDirName)
+ checkFragments(t, ctx, bs, fragmentMap, testDirName)
+
+ // -------------------------------------------------
+ // Open a BlobReader on a blob we're about to delete,
+ // so its fragments won't be garbage collected.
+
+ var br localblobstore.BlobReader
+ br, err = bs.NewBlobReader(ctx, atwoatwoombatatwoName)
+ if err != nil {
+ t.Errorf("NewBlobReader failed in blob %q: %v", atwoatwoombatatwoName, err)
+ }
+
+ // -------------------------------------------------
+ // Delete a blob. This should be the last on-disc reference to the
+ // content-addressed fragment "bat", but the fragment won't be deleted
+ // until close the reader and garbage collect.
+ err = bs.DeleteBlob(ctx, atwoatwoombatatwoName)
+ if err != nil {
+ t.Errorf("DeleteBlob failed to delete blob %q: %v", atwoatwoombatatwoName, err)
+ }
+ blobVector = removeBlobFromBlobVector(blobVector, atwoatwoombatatwoName)
+
+ // -------------------------------------------------
+ // Check that the state is as we expect.
+ checkWrittenBlobsAreReadable(t, ctx, bs, blobVector)
+ checkAllBlobs(t, ctx, bs, blobVector, testDirName)
+ checkFragments(t, ctx, bs, fragmentMap, testDirName)
+
+ // -------------------------------------------------
+ // Garbage collection should change nothing; the fragment involved
+ // is still referenced from the open reader *br.
+ bs.GC(ctx)
+ checkWrittenBlobsAreReadable(t, ctx, bs, blobVector)
+ checkAllBlobs(t, ctx, bs, blobVector, testDirName)
+ checkFragments(t, ctx, bs, fragmentMap, testDirName)
+
+ // -------------------------------------------------
+
+ // Close the open BlobReader and garbage collect.
+ err = br.Close()
+ if err != nil {
+ t.Errorf("BlobReader.Close failed on blob %q: %v", atwoatwoombatatwoName, err)
+ }
+ delete(fragmentMap, string(batData))
+
+ bs.GC(ctx)
+ checkWrittenBlobsAreReadable(t, ctx, bs, blobVector)
+ checkAllBlobs(t, ctx, bs, blobVector, testDirName)
+ checkFragments(t, ctx, bs, fragmentMap, testDirName)
+
+ // -------------------------------------------------
+ // Delete all blobs.
+ for len(blobVector) != 0 {
+ err = bs.DeleteBlob(ctx, blobVector[0].blobName)
+ if err != nil {
+ t.Errorf("DeleteBlob failed to delete blob %q: %v", blobVector[0].blobName, err)
+ }
+ blobVector = removeBlobFromBlobVector(blobVector, blobVector[0].blobName)
+ }
+
+ // -------------------------------------------------
+ // Check that the state is as we expect.
+ checkWrittenBlobsAreReadable(t, ctx, bs, blobVector)
+ checkAllBlobs(t, ctx, bs, blobVector, testDirName)
+ checkFragments(t, ctx, bs, fragmentMap, testDirName)
+
+ // -------------------------------------------------
+ // The remaining fragments should be removed when we garbage collect.
+ for frag := range fragmentMap {
+ delete(fragmentMap, frag)
+ }
+ bs.GC(ctx)
+ checkWrittenBlobsAreReadable(t, ctx, bs, blobVector)
+ checkAllBlobs(t, ctx, bs, blobVector, testDirName)
+ checkFragments(t, ctx, bs, fragmentMap, testDirName)
+}
+
+// writeBlobFromReader() writes the contents of rd to blobstore bs, as blob
+// "name", or picks a name name if "name" is empty. It returns the name of the
+// blob. Errors cause the test to terminate. Error messages contain the
+// "callSite" value to allow the test to tell which call site is which.
+func writeBlobFromReader(t *testing.T, ctx *context.T, bs localblobstore.BlobStore, name string, rd io.Reader, callSite int) string {
+ var err error
+ var bw localblobstore.BlobWriter
+ if bw, err = bs.NewBlobWriter(ctx, name); err != nil {
+ t.Fatalf("callSite %d: NewBlobWriter failed: %v", callSite, err)
+ }
+ blobName := bw.Name()
+ buf := make([]byte, 8192) // buffer for data read from rd.
+ for i := 0; err == nil; i++ {
+ var n int
+ if n, err = rd.Read(buf); err != nil && err != io.EOF {
+ t.Fatalf("callSite %d: unexpected error from reader: %v", callSite, err)
+ }
+ if n > 0 {
+ if err = bw.AppendFragment(localblobstore.BlockOrFile{Block: buf[:n]}); err != nil {
+ t.Fatalf("callSite %d: BlobWriter.AppendFragment failed: %v", callSite, err)
+ }
+ // Every so often, close without finalizing, and reopen.
+ if (i % 7) == 0 {
+ if err = bw.CloseWithoutFinalize(); err != nil {
+ t.Fatalf("callSite %d: BlobWriter.CloseWithoutFinalize failed: %v", callSite, err)
+ }
+ if bw, err = bs.ResumeBlobWriter(ctx, blobName); err != nil {
+ t.Fatalf("callSite %d: ResumeBlobWriter %q failed: %v", callSite, blobName, err)
+ }
+ }
+ }
+ }
+ if err = bw.Close(); err != nil {
+ t.Fatalf("callSite %d: BlobWriter.Close failed: %v", callSite, err)
+ }
+ return blobName
+}
+
+// checkBlobAgainstReader() verifies that the blob blobName has the same bytes as the reader rd.
+// Errors cause the test to terminate. Error messages contain the
+// "callSite" value to allow the test to tell which call site is which.
+func checkBlobAgainstReader(t *testing.T, ctx *context.T, bs localblobstore.BlobStore, blobName string, rd io.Reader, callSite int) {
+ // Open a reader on the blob.
+ var blob_rd io.Reader
+ var blob_err error
+ if blob_rd, blob_err = bs.NewBlobReader(ctx, blobName); blob_err != nil {
+ t.Fatalf("callSite %d: NewBlobReader on %q failed: %v", callSite, blobName, blob_err)
+ }
+
+ // Variables for reading the two streams, indexed by "reader" and "blob".
+ type stream struct {
+ name string
+ rd io.Reader // Reader for this stream
+ buf []byte // buffer for data
+ i int // bytes processed within current buffer
+ n int // valid bytes in current buffer
+ err error // error, or nil
+ }
+
+ s := [2]stream{
+ {name: "reader", rd: rd, buf: make([]byte, 8192)},
+ {name: blobName, rd: blob_rd, buf: make([]byte, 8192)},
+ }
+
+ // Descriptive names for the two elements of s, when we aren't treating them the same.
+ reader := &s[0]
+ blob := &s[1]
+
+ var pos int // position within file, for error reporting.
+
+ for x := 0; x != 2; x++ {
+ s[x].n, s[x].err = s[x].rd.Read(s[x].buf)
+ s[x].i = 0
+ }
+ for blob.n != 0 && reader.n != 0 {
+ for reader.i != reader.n && blob.i != blob.n && reader.buf[reader.i] == blob.buf[blob.i] {
+ pos++
+ blob.i++
+ reader.i++
+ }
+ if reader.i != reader.n && blob.i != blob.n {
+ t.Fatalf("callSite %d: BlobStore %q: BlobReader on blob %q and rd reader generated different bytes at position %d: 0x%x vs 0x%x",
+ callSite, bs.Root(), blobName, pos, reader.buf[reader.i], blob.buf[blob.i])
+ }
+ for x := 0; x != 2; x++ { // read more data from each reader, if needed
+ if s[x].i == s[x].n {
+ s[x].i = 0
+ s[x].n = 0
+ if s[x].err == nil {
+ s[x].n, s[x].err = s[x].rd.Read(s[x].buf)
+ }
+ }
+ }
+ }
+ for x := 0; x != 2; x++ {
+ if s[x].err != io.EOF {
+ t.Fatalf("callSite %d: %s got error %v", callSite, s[x].name, s[x].err)
+ }
+ if s[x].n != 0 {
+ t.Fatalf("callSite %d: %s is longer than %s", callSite, s[x].name, s[1-x].name)
+ }
+ }
+}
+
+// checkBlobAgainstReader() verifies that the blob blobName has the same chunks
+// (according to BlobChunkStream) as a chunker applied to the reader rd.
+// Errors cause the test to terminate. Error messages contain the
+// "callSite" value to allow the test to tell which call site is which.
+func checkBlobChunksAgainstReader(t *testing.T, ctx *context.T, bs localblobstore.BlobStore, blobName string, rd io.Reader, callSite int) {
+ buf := make([]byte, 8192) // buffer used to hold data from the chunk stream from rd.
+ rawChunks := chunker.NewStream(ctx, &chunker.DefaultParam, rd)
+ cs := bs.BlobChunkStream(ctx, blobName)
+ pos := 0 // byte position within the blob, to be retported in error messages
+ i := 0 // chunk index, to be reported in error messages
+ rawMore, more := rawChunks.Advance(), cs.Advance()
+ for rawMore && more {
+ c := rawChunks.Value()
+ rawChunk := md5.Sum(rawChunks.Value())
+ chunk := cs.Value(buf)
+ if bytes.Compare(rawChunk[:], chunk) != 0 {
+ t.Errorf("raw random stream and chunk record for blob %q have different chunk %d:\n\t%v\nvs\n\t%v\n\tpos %d\n\tlen %d\n\tc %v",
+ blobName, i, rawChunk, chunk, pos, len(c), c)
+ }
+ pos += len(c)
+ i++
+ rawMore, more = rawChunks.Advance(), cs.Advance()
+ }
+ if rawMore {
+ t.Fatalf("callSite %d: blob %q has fewer chunks than raw stream", callSite, blobName)
+ }
+ if more {
+ t.Fatalf("callSite %d: blob %q has more chunks than raw stream", callSite, blobName)
+ }
+ if rawChunks.Err() != nil {
+ t.Fatalf("callSite %d: error reading raw chunk stream: %v", callSite, rawChunks.Err())
+ }
+ if cs.Err() != nil {
+ t.Fatalf("callSite %d: error reading chunk stream for blob %q; %v", callSite, blobName, cs.Err())
+ }
+}
+
+// WriteViaChunks() tests that a large blob in one blob store can be transmitted
+// to another incrementally, without transferring chunks already in the other blob store.
+func WriteViaChunks(t *testing.T, ctx *context.T, bs [2]localblobstore.BlobStore) {
+ // The original blob will be a megabyte.
+ totalLength := 1024 * 1024
+
+ // Write a random blob to bs[0], using seed 1, then check that the
+ // bytes and chunk we get from the blob just written are the same as
+ // those obtained from an identical byte stream.
+ blob0 := writeBlobFromReader(t, ctx, bs[0], "", NewRandReader(1, totalLength, 0, io.EOF), 0)
+ checkBlobAgainstReader(t, ctx, bs[0], blob0, NewRandReader(1, totalLength, 0, io.EOF), 1)
+ checkBlobChunksAgainstReader(t, ctx, bs[0], blob0, NewRandReader(1, totalLength, 0, io.EOF), 2)
+
+ // ---------------------------------------------------------------------
+ // Write into bs[1] a blob that is similar to blob0, but not identical, and check it as above.
+ insertionInterval := 20 * 1024
+ blob1 := writeBlobFromReader(t, ctx, bs[1], "", NewRandReader(1, totalLength, insertionInterval, io.EOF), 3)
+ checkBlobAgainstReader(t, ctx, bs[1], blob1, NewRandReader(1, totalLength, insertionInterval, io.EOF), 4)
+ checkBlobChunksAgainstReader(t, ctx, bs[1], blob1, NewRandReader(1, totalLength, insertionInterval, io.EOF), 5)
+
+ // ---------------------------------------------------------------------
+ // Count the number of chunks, and the number of steps in the recipe
+ // for copying blob0 from bs[0] to bs[1]. We expect the that the
+ // former to be significantly bigger than the latter, because the
+ // insertionInterval is significantly larger than the expected chunk
+ // size.
+ cs := bs[0].BlobChunkStream(ctx, blob0) // Stream of chunks in blob0
+ rs := bs[1].RecipeStreamFromChunkStream(ctx, cs) // Recipe from bs[1]
+
+ recipeLen := 0
+ chunkCount := 0
+ for rs.Advance() {
+ step := rs.Value()
+ if step.Chunk != nil {
+ chunkCount++
+ }
+ recipeLen++
+ }
+ if rs.Err() != nil {
+ t.Fatalf("RecipeStream got error: %v", rs.Err())
+ }
+
+ cs = bs[0].BlobChunkStream(ctx, blob0) // Get the original chunk count.
+ origChunkCount := 0
+ for cs.Advance() {
+ origChunkCount++
+ }
+ if cs.Err() != nil {
+ t.Fatalf("ChunkStream got error: %v", cs.Err())
+ }
+ if origChunkCount < chunkCount*5 {
+ t.Errorf("expected fewer chunks in repipe: recipeLen %d chunkCount %d origChunkCount %d\n",
+ recipeLen, chunkCount, origChunkCount)
+ }
+
+ // Copy blob0 from bs[0] to bs[1], using chunks from blob1 (already in bs[1]) where possible.
+ cs = bs[0].BlobChunkStream(ctx, blob0) // Stream of chunks in blob0
+ // In a real application, at this point the stream cs would be sent to the device with bs[1].
+ rs = bs[1].RecipeStreamFromChunkStream(ctx, cs) // Recipe from bs[1]
+ // Write blob with known blob name.
+ var bw localblobstore.BlobWriter
+ var err error
+ if bw, err = bs[1].NewBlobWriter(ctx, blob0); err != nil {
+ t.Fatalf("bs[1].NewBlobWriter yields error: %v", err)
+ }
+ var br localblobstore.BlobReader
+ const maxFragment = 1024 * 1024
+ blocks := make([]localblobstore.BlockOrFile, maxFragment/chunker.DefaultParam.MinChunk)
+ for gotStep := rs.Advance(); gotStep; {
+ step := rs.Value()
+ if step.Chunk == nil {
+ // This part of the blob can be read from an existing blob locally (at bs[1]).
+ if err = bw.AppendBlob(step.Blob, step.Size, step.Offset); err != nil {
+ t.Fatalf("AppendBlob(%v) yields error: %v", step, err)
+ }
+ gotStep = rs.Advance()
+ } else {
+ var fragmentSize int64
+ // In a real application, the sequence of chunk hashes
+ // in recipe steps would be communicated back to bs[0],
+ // which then finds the associated chunks.
+ var b int
+ for b = 0; gotStep && step.Chunk != nil && fragmentSize+chunker.DefaultParam.MaxChunk < maxFragment; b++ {
+ var loc localblobstore.Location
+ if loc, err = bs[0].LookupChunk(ctx, step.Chunk); err != nil {
+ t.Fatalf("bs[0] unexpectedly does not have chunk %v", step.Chunk)
+ }
+ if br != nil && br.Name() != loc.BlobName { // Close blob if we need a different one.
+ if err = br.Close(); err != nil {
+ t.Fatalf("unexpected error in BlobReader.Close(): %v", err)
+ }
+ br = nil
+ }
+ if br == nil { // Open blob if needed.
+ if br, err = bs[0].NewBlobReader(ctx, loc.BlobName); err != nil {
+ t.Fatalf("unexpected failure to create BlobReader on %q: %v", loc.BlobName, err)
+ }
+ }
+ if loc.Size > chunker.DefaultParam.MaxChunk {
+ t.Fatalf("chunk exceeds max chunk size: %d vs %d", loc.Size, chunker.DefaultParam.MaxChunk)
+ }
+ fragmentSize += loc.Size
+ if blocks[b].Block == nil {
+ blocks[b].Block = make([]byte, chunker.DefaultParam.MaxChunk)
+ }
+ blocks[b].Block = blocks[b].Block[:loc.Size]
+ var i int
+ var n int64
+ for n = int64(0); n != loc.Size; n += int64(i) {
+ if i, err = br.ReadAt(blocks[b].Block[n:loc.Size], n+loc.Offset); err != nil && err != io.EOF {
+ t.Fatalf("ReadAt on %q failed: %v", br.Name(), err)
+ }
+ }
+ if gotStep = rs.Advance(); gotStep {
+ step = rs.Value()
+ }
+ }
+ if err = bw.AppendFragment(blocks[:b]...); err != nil {
+ t.Fatalf("AppendFragment on %q failed: %v", bw.Name(), err)
+ }
+ }
+ }
+ if err = bw.Close(); err != nil {
+ t.Fatalf("BlobWriter.Close on %q failed: %v", bw.Name(), err)
+ }
+
+ // Check that the transferred blob in bs[1] is the same as the original
+ // stream used to make the blob in bs[0].
+ checkBlobAgainstReader(t, ctx, bs[1], blob0, NewRandReader(1, totalLength, 0, io.EOF), 6)
+ checkBlobChunksAgainstReader(t, ctx, bs[1], blob0, NewRandReader(1, totalLength, 0, io.EOF), 7)
+}
+
+// checkBlobContent() checks that the named blob has the specified content.
+func checkBlobContent(t *testing.T, ctx *context.T, bs localblobstore.BlobStore, blobName string, content []byte) {
+ var err error
+ var br localblobstore.BlobReader
+ var data []byte
+ if br, err = bs.NewBlobReader(ctx, blobName); err != nil {
+ t.Fatalf("localblobstore.NewBlobReader failed: %v\n", err)
+ }
+ if data, err = ioutil.ReadAll(br); err != nil && err != io.EOF {
+ t.Fatalf("Read on br failed: %v\n", err)
+ }
+ if !bytes.Equal(data, content) {
+ t.Fatalf("Read on %q got %q, wanted %v\n", blobName, data, content)
+ }
+ if err = br.Close(); err != nil {
+ t.Fatalf("br.Close failed: %v\n", err)
+ }
+}
+
+// CreateAndResume() tests that it's possible to create a blob with
+// NewBlobWriter(), immediately close it, and then resume writing with
+// ResumeBlobWriter. This test is called out because syncbase does this, and
+// it exposed a bug in the reader code, which could not cope with a request to
+// read starting at the very end of a file, thus returning no bytes.
+func CreateAndResume(t *testing.T, ctx *context.T, bs localblobstore.BlobStore) {
+ var err error
+
+ // Create an empty, unfinalized blob.
+ var bw localblobstore.BlobWriter
+ if bw, err = bs.NewBlobWriter(ctx, ""); err != nil {
+ t.Fatalf("localblobstore.NewBlobWriter failed: %v\n", err)
+ }
+ blobName := bw.Name()
+ if err = bw.CloseWithoutFinalize(); err != nil {
+ t.Fatalf("bw.CloseWithoutFinalize failed: %v\n", verror.DebugString(err))
+ }
+
+ checkBlobContent(t, ctx, bs, blobName, nil)
+
+ // Reopen the blob, but append no bytes (an empty byte vector).
+ if bw, err = bs.ResumeBlobWriter(ctx, blobName); err != nil {
+ t.Fatalf("localblobstore.ResumeBlobWriter failed: %v\n", err)
+ }
+ if err = bw.AppendFragment(localblobstore.BlockOrFile{Block: []byte("")}); err != nil {
+ t.Fatalf("bw.AppendFragment failed: %v", err)
+ }
+ if err = bw.CloseWithoutFinalize(); err != nil {
+ t.Fatalf("bw.Close failed: %v\n", err)
+ }
+
+ checkBlobContent(t, ctx, bs, blobName, nil)
+
+ // Reopen the blob, and append a non-empty sequence of bytes.
+ content := []byte("some content")
+ if bw, err = bs.ResumeBlobWriter(ctx, blobName); err != nil {
+ t.Fatalf("localblobstore.ResumeBlobWriter.Close failed: %v\n", err)
+ }
+ if err = bw.AppendFragment(localblobstore.BlockOrFile{Block: content}); err != nil {
+ t.Fatalf("bw.AppendFragment failed: %v", err)
+ }
+ if err = bw.Close(); err != nil {
+ t.Fatalf("bw.Close failed: %v\n", err)
+ }
+
+ checkBlobContent(t, ctx, bs, blobName, content)
+}
diff --git a/services/syncbase/localblobstore/localblobstore_testlib/randreader.go b/services/syncbase/localblobstore/localblobstore_testlib/randreader.go
new file mode 100644
index 0000000..85e32d3
--- /dev/null
+++ b/services/syncbase/localblobstore/localblobstore_testlib/randreader.go
@@ -0,0 +1,53 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package localblobstore_testlib
+
+import "math/rand"
+
+// A RandReader contains a pointer to a rand.Read, and a size limit. Its
+// pointers implement the Read() method from io.Reader, which yields bytes
+// obtained from the random number generator.
+type RandReader struct {
+ rand *rand.Rand // Source of random bytes.
+ pos int // Number of bytes read.
+ limit int // Max number of bytes that may be read.
+ insertInterval int // If non-zero, number of bytes between insertions of zero bytes.
+ eofErr error // error to be returned at the end of the stream
+}
+
+// NewRandReader() returns a new RandReader with the specified seed and size limit.
+// It yields eofErr when the end of the stream is reached.
+// If insertInterval is non-zero, a zero byte is inserted into the stream every
+// insertInterval bytes, before resuming getting bytes from the random number
+// generator.
+func NewRandReader(seed int64, limit int, insertInterval int, eofErr error) *RandReader {
+ r := new(RandReader)
+ r.rand = rand.New(rand.NewSource(seed))
+ r.limit = limit
+ r.insertInterval = insertInterval
+ r.eofErr = eofErr
+ return r
+}
+
+// Read() implements the io.Reader Read() method for *RandReader.
+func (r *RandReader) Read(buf []byte) (n int, err error) {
+ // Generate bytes up to the end of the stream, or the end of the buffer.
+ max := r.limit - r.pos
+ if len(buf) < max {
+ max = len(buf)
+ }
+ for ; n != max; n++ {
+ if r.insertInterval == 0 || (r.pos%r.insertInterval) != 0 {
+ buf[n] = byte(r.rand.Int31n(256))
+ } else {
+ buf[n] = 0
+ }
+ r.pos++
+ }
+ if r.pos == r.limit {
+ err = r.eofErr
+ }
+ return n, err
+}
diff --git a/services/syncbase/localblobstore/localblobstore_transfer_test.go b/services/syncbase/localblobstore/localblobstore_transfer_test.go
new file mode 100644
index 0000000..b5378ec
--- /dev/null
+++ b/services/syncbase/localblobstore/localblobstore_transfer_test.go
@@ -0,0 +1,368 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Example code for transferring a blob from one device to another.
+// See the simulateResumption constant to choose whether to simulate a full
+// transfer or a resumed one.
+package localblobstore_test
+
+import "bytes"
+import "fmt"
+import "io"
+import "io/ioutil"
+import "math/rand"
+import "os"
+
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore"
+import "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore"
+import "v.io/v23/context"
+import "v.io/x/ref/test"
+import _ "v.io/x/ref/runtime/factories/generic"
+
+// simulateResumption tells the receiver whether to simulate having a partial
+// blob before blob transfer.
+const simulateResumption = true
+
+// createBlobStore() returns a new BlobStore, and the name of the directory
+// used to implement it.
+func createBlobStore(ctx *context.T) (bs localblobstore.BlobStore, dirName string) {
+ var err error
+ if dirName, err = ioutil.TempDir("", "localblobstore_transfer_test"); err != nil {
+ panic(err)
+ }
+ if bs, err = fs_cablobstore.Create(ctx, dirName); err != nil {
+ panic(err)
+ }
+ return bs, dirName
+}
+
+// createBlob writes a blob to bs of k 32kByte blocks drawn from a determinstic
+// but arbitrary random stream, starting at block offset within that stream.
+// Returns its name, which is "blob" if non-empty, and chosen arbitrarily otherwise.
+// The blob is finalized iff "complete" is true.
+func createBlob(ctx *context.T, bs localblobstore.BlobStore, blob string, complete bool, offset int, count int) string {
+ var bw localblobstore.BlobWriter
+ var err error
+ if bw, err = bs.NewBlobWriter(ctx, blob); err != nil {
+ panic(err)
+ }
+ blob = bw.Name()
+ var buffer [32 * 1024]byte
+ block := localblobstore.BlockOrFile{Block: buffer[:]}
+ r := rand.New(rand.NewSource(1)) // Always seed with 1 for repeatability.
+ for i := 0; i != offset+count; i++ {
+ for b := 0; b != len(buffer); b++ {
+ buffer[b] = byte(r.Int31n(256))
+ }
+ if i >= offset {
+ if err = bw.AppendFragment(block); err != nil {
+ panic(err)
+ }
+ }
+ }
+ if complete {
+ err = bw.Close()
+ } else {
+ err = bw.CloseWithoutFinalize()
+ }
+ if err != nil {
+ panic(err)
+ }
+ return blob
+}
+
+// A channelChunkStream turns a channel of chunk hashes into a ChunkStream.
+type channelChunkStream struct {
+ channel <-chan []byte
+ ok bool
+ value []byte
+}
+
+// newChannelChunkStream returns a ChunkStream, given a channel containing the
+// relevant chunk hashes.
+func newChannelChunkStream(ch <-chan []byte) localblobstore.ChunkStream {
+ return &channelChunkStream{channel: ch, ok: true}
+}
+
+// The following are the standard ChunkStream methods.
+func (cs *channelChunkStream) Advance() bool {
+ if cs.ok {
+ cs.value, cs.ok = <-cs.channel
+ }
+ return cs.ok
+}
+func (cs *channelChunkStream) Value(buf []byte) []byte { return cs.value }
+func (cs *channelChunkStream) Err() error { return nil }
+func (cs *channelChunkStream) Cancel() {}
+
+// Example_blobTransfer() demonstrates how to transfer a blob incrementally
+// from one device's blob store to another. In this code, the communication
+// between sender and receiver is modelled with Go channels.
+func Example_blobTransfer() {
+ ctx, shutdown := test.V23Init()
+ defer shutdown()
+
+ // ----------------------------------------------
+ // Channels used to send chunk hashes to receiver always end in
+ // ToSender or ToReceiver.
+ type blobData struct {
+ name string
+ size int64
+ checksum []byte
+ }
+ blobDataToReceiver := make(chan blobData) // indicate basic data for blob
+ needChunksToSender := make(chan bool) // indicate receiver does not have entire blob
+ chunkHashesToReceiver := make(chan []byte) // for initial trasfer of chunk hashes
+ chunkHashesToSender := make(chan []byte) // to report which chunks receiver needs
+ chunksToReceiver := make(chan []byte) // to report which chunks receiver needs
+
+ sDone := make(chan bool) // closed when sender done
+ rDone := make(chan bool) // closed when receiver done
+
+ // ----------------------------------------------
+ // The sender.
+ go func(ctx *context.T,
+ blobDataToReceiver chan<- blobData,
+ needChunksToSender <-chan bool,
+ chunkHashesToReceiver chan<- []byte,
+ chunkHashesToSender <-chan []byte,
+ chunksToReceiver chan<- []byte,
+ done chan<- bool) {
+
+ defer close(done)
+ var err error
+
+ bsS, bsSDir := createBlobStore(ctx)
+ defer os.RemoveAll(bsSDir)
+
+ blob := createBlob(ctx, bsS, "", true, 0, 32) // Create a 1M blob at the sender.
+
+ // 1. Send basic blob data to receiver.
+ var br localblobstore.BlobReader
+ if br, err = bsS.NewBlobReader(ctx, blob); err != nil {
+ panic(err)
+ }
+ blobDataToReceiver <- blobData{name: blob, size: br.Size(), checksum: br.Hash()}
+ br.Close()
+ close(blobDataToReceiver)
+
+ // 3. Get indication from receiver of whether it needs blob.
+ needChunks := <-needChunksToSender
+
+ if !needChunks { // Receiver has blob; done.
+ return
+ }
+
+ // 4. Send the chunk hashes to the receiver. This proceeds concurrently
+ // with the step below.
+ go func(ctx *context.T, blob string, chunkHashesToReceiver chan<- []byte) {
+ cs := bsS.BlobChunkStream(ctx, blob)
+ for cs.Advance() {
+ chunkHashesToReceiver <- cs.Value(nil)
+ }
+ if cs.Err() != nil {
+ panic(cs.Err())
+ }
+ close(chunkHashesToReceiver)
+ }(ctx, blob, chunkHashesToReceiver)
+
+ // 7. Get needed chunk hashes from receiver, find the relevant
+ // data, and send it back to the receiver.
+ var cbr localblobstore.BlobReader // Cached read handle on most-recent-read blob, or nil
+ // Given chunk hash h from chunkHashesToSender, send chunk to chunksToReceiver.
+ for h := range chunkHashesToSender {
+ loc, err := bsS.LookupChunk(ctx, h)
+ for err == nil && (cbr == nil || cbr.Name() != loc.BlobName) {
+ if cbr != nil && cbr.Name() != loc.BlobName {
+ cbr.Close()
+ cbr = nil
+ }
+ if cbr == nil {
+ if cbr, err = bsS.NewBlobReader(ctx, loc.BlobName); err != nil {
+ bsS.GC(ctx) // A partially-deleted blob may be confusing things.
+ loc, err = bsS.LookupChunk(ctx, h)
+ }
+ }
+ }
+ var i int = 1
+ var n int64
+ buffer := make([]byte, loc.Size) // buffer for current chunk
+ for n = int64(0); n != loc.Size && i != 0 && err == nil; n += int64(i) {
+ if i, err = cbr.ReadAt(buffer[n:loc.Size], n+loc.Offset); err == io.EOF {
+ err = nil // EOF is expected
+ }
+ }
+ if n == loc.Size { // Got chunk.
+ chunksToReceiver <- buffer[:loc.Size]
+ }
+ if err != nil {
+ break
+ }
+ }
+ close(chunksToReceiver)
+ if cbr != nil {
+ cbr.Close()
+ }
+
+ }(ctx, blobDataToReceiver, needChunksToSender, chunkHashesToReceiver, chunkHashesToSender, chunksToReceiver, sDone)
+
+ // ----------------------------------------------
+ // The receiver.
+ go func(ctx *context.T,
+ blobDataToReceiver <-chan blobData,
+ needChunksToSender chan<- bool,
+ chunkHashesToReceiver <-chan []byte,
+ chunkHashesToSender chan<- []byte,
+ chunksToReceiver <-chan []byte,
+ done chan<- bool) {
+
+ defer close(done)
+ var err error
+
+ bsR, bsRDir := createBlobStore(ctx)
+ defer os.RemoveAll(bsRDir)
+
+ // 2. Receive basic blob data from sender.
+ blobInfo := <-blobDataToReceiver
+
+ if simulateResumption {
+ // Write a fraction of the (unfinalized) blob on the receiving side
+ // to check that the transfer process can resume a partial blob.
+ createBlob(ctx, bsR, blobInfo.name, false, 0, 10)
+ }
+
+ // 3. Tell sender whether the recevier already has the complete
+ // blob.
+ needChunks := true
+ var br localblobstore.BlobReader
+ if br, err = bsR.NewBlobReader(ctx, blobInfo.name); err == nil {
+ if br.IsFinalized() {
+ if len(br.Hash()) == len(blobInfo.checksum) && bytes.Compare(br.Hash(), blobInfo.checksum) != 0 {
+ panic("receiver has a finalized blob with same name but different hash")
+ }
+ needChunks = false // The receiver already has the blob.
+ }
+ br.Close()
+ }
+ needChunksToSender <- needChunks
+ close(needChunksToSender)
+
+ if !needChunks { // Receiver has blob; done.
+ return
+ }
+
+ // 5. Receive the chunk hashes from the sender, and turn them
+ // into a recipe.
+ cs := newChannelChunkStream(chunkHashesToReceiver)
+ rs := bsR.RecipeStreamFromChunkStream(ctx, cs)
+
+ // 6. The following thread sends the chunk hashes that the
+ // receiver does not have to the sender. It also makes
+ // a duplicate of the stream on the channel rsCopy. The
+ // buffering in rsCopy allows the receiver to put several
+ // chunks into a fragment.
+ rsCopy := make(chan localblobstore.RecipeStep, 100) // A buffered copy of the rs stream.
+ go func(ctx *context.T, rs localblobstore.RecipeStream, rsCopy chan<- localblobstore.RecipeStep, chunkHashesToSender chan<- []byte) {
+ for rs.Advance() {
+
+ step := rs.Value()
+ if step.Chunk != nil { // Data must be fetched from sender.
+ chunkHashesToSender <- step.Chunk
+ }
+ rsCopy <- step
+ }
+ close(chunkHashesToSender)
+ close(rsCopy)
+ }(ctx, rs, rsCopy, chunkHashesToSender)
+
+ // 8. The following thread splices the chunks from the sender
+ // (on chunksToReceiver) into the recipe stream copy
+ // (rsCopy) to generate a full recipe stream (rsFull) in
+ // which chunks are actual data, rather than just hashes.
+ rsFull := make(chan localblobstore.RecipeStep) // A recipe stream containing chunk data, not just hashes.
+ go func(ctx *context.T, rsCopy <-chan localblobstore.RecipeStep, chunksToReceiver <-chan []byte, rsFull chan<- localblobstore.RecipeStep) {
+ var ok bool
+ for step := range rsCopy {
+ if step.Chunk != nil { // Data must be fetched from sender.
+ if step.Chunk, ok = <-chunksToReceiver; !ok {
+ break
+ }
+ }
+ rsFull <- step
+ }
+ close(rsFull)
+ }(ctx, rsCopy, chunksToReceiver, rsFull)
+
+ // 9. Write the blob using the recipe.
+ var chunksTransferred int
+ const fragmentThreshold = 1024 * 1024 // Try to write on-disc fragments fragments at least this big.
+ var ignoreBytes int64
+ var bw localblobstore.BlobWriter
+ if bw, err = bsR.ResumeBlobWriter(ctx, blobInfo.name); err != nil {
+ bw, err = bsR.NewBlobWriter(ctx, blobInfo.name)
+ } else {
+ ignoreBytes = bw.Size()
+ }
+ if err == nil {
+ var fragment []localblobstore.BlockOrFile
+ var fragmentSize int64
+ for step := range rsFull {
+ if step.Chunk == nil { // Data can be obtained from local blob.
+ if ignoreBytes >= step.Size { // Ignore chunks we already have.
+ ignoreBytes -= step.Size
+ } else {
+ err = bw.AppendBlob(step.Blob, step.Size-ignoreBytes, step.Offset+ignoreBytes)
+ ignoreBytes = 0
+ }
+ } else if ignoreBytes >= int64(len(step.Chunk)) { // Ignoer chunks we already have.
+ ignoreBytes -= int64(len(step.Chunk))
+ } else { // Data is from a chunk send by the sender.
+ chunksTransferred++
+ fragment = append(fragment, localblobstore.BlockOrFile{Block: step.Chunk[ignoreBytes:]})
+ fragmentSize += int64(len(step.Chunk)) - ignoreBytes
+ ignoreBytes = 0
+ if fragmentSize > fragmentThreshold {
+ err = bw.AppendFragment(fragment...)
+ fragment = fragment[:0]
+ fragmentSize = 0
+ }
+ }
+ if err != nil {
+ break
+ }
+ }
+ if err == nil && len(fragment) != 0 {
+ err = bw.AppendFragment(fragment...)
+ }
+ if err2 := bw.Close(); err == nil {
+ err = err2
+ }
+ if err != nil {
+ panic(err)
+ }
+ }
+
+ // 10. Verify that the blob was written correctly.
+ if br, err = bsR.NewBlobReader(ctx, blobInfo.name); err != nil {
+ panic(err)
+ }
+ if br.Size() != blobInfo.size {
+ panic("transferred blob has wrong size")
+ }
+ if len(br.Hash()) != len(blobInfo.checksum) || bytes.Compare(br.Hash(), blobInfo.checksum) != 0 {
+ panic("transferred blob has wrong checksum")
+ }
+ if err = br.Close(); err != nil {
+ panic(err)
+ }
+ fmt.Printf("%d chunks transferred\n", chunksTransferred)
+ }(ctx, blobDataToReceiver, needChunksToSender, chunkHashesToReceiver, chunkHashesToSender, chunksToReceiver, rDone)
+
+ // ----------------------------------------------
+ // Wait for sender and receiver to finish.
+ _ = <-sDone
+ _ = <-rDone
+
+ // Output: 635 chunks transferred
+}
diff --git a/services/syncbase/localblobstore/model.go b/services/syncbase/localblobstore/model.go
new file mode 100644
index 0000000..f51f455
--- /dev/null
+++ b/services/syncbase/localblobstore/model.go
@@ -0,0 +1,303 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package localblobstore is the interface to a local blob store.
+// Implementations include fs_cablobstore.
+//
+// Expected use
+// ============
+// These examples assume that bs, bsS (sender) and bsR (receiver) are blobstores.
+//
+// Writing blobs
+// bw, err := bs.NewBlobWriter(ctx, "") // For a new blob, implementation picks blob name.
+// if err == nil {
+// blobName := bw.Name() // Get name the implementation picked.
+// ... use bw.AppendFragment() to append data to the blob...
+// ... and/or bw.AppendBlob() to append data that's in another existing blob...
+// err = bw.Close()
+// }
+//
+// Resume writing a blob that was partially written due to a crash (not yet finalized).
+// bw, err := bs.ResumeBlobWriter(ctx, name)
+// if err == nil {
+// size := bw.Size() // The store has this many bytes from the blob.
+// ... write the remaining data using bwAppendFragment() and/or bw.AppendBlob()...
+// err = bw.Close()
+// }
+//
+// Reading blobs
+// br, err := bs.NewBlobReader(ctx, name)
+// if err == nil {
+// ... read bytes with br.ReadAt() or br.Read(), perhas with br.Seek()...
+// err = br.Close()
+// }
+//
+// Transferring blobs from one store to another:
+// See example in localblobstore_transfer_test.go
+// Summary:
+// - The sender sends the chunksum of the blob from BlobReader's Hash().
+// - The receiver checks whether it already has the blob, with the same
+// checksum.
+// - If the receiver does not have the blob, the sender sends the list of chunk
+// hashes in the blob using BlobChunkStream().
+// - The receiver uses RecipeStreamFromChunkStream() with the chunk hash stream
+// from the sender, and tells the sender the chunk hashes of the chunks it
+// needs.
+// - The sender uses LookupChunk() to find the data for each chunk the receiver
+// needs, and sends it to the receiver.
+// - The receiver applies the recipe steps, with the actual chunkj data from
+// the sender and its own local data.
+package localblobstore
+
+import "v.io/v23/context"
+
+// A BlobStore represents a simple, content-addressable store.
+type BlobStore interface {
+ // NewBlobReader() returns a pointer to a newly allocated BlobReader on
+ // the specified blobName. BlobReaders should not be used concurrently
+ // by multiple threads. Returned handles should be closed with
+ // Close().
+ NewBlobReader(ctx *context.T, blobName string) (br BlobReader, err error)
+
+ // NewBlobWriter() returns a pointer to a newly allocated BlobWriter on
+ // a newly created blob. If "name" is non-empty, its is used to name
+ // the blob, and it must be in the format of a name returned by this
+ // interface (probably by another instance on another device).
+ // Otherwise, otherwise a new name is created, which can be found using
+ // the Name() method. It is an error to attempt to overwrite a blob
+ // that already exists in this blob store. BlobWriters should not be
+ // used concurrently by multiple threads. The returned handle should
+ // be closed with either the Close() or CloseWithoutFinalize() method
+ // to avoid leaking file handles.
+ NewBlobWriter(ctx *context.T, name string) (bw BlobWriter, err error)
+
+ // ResumeBlobWriter() returns a pointer to a newly allocated BlobWriter on
+ // an old, but unfinalized blob name.
+ ResumeBlobWriter(ctx *context.T, blobName string) (bw BlobWriter, err error)
+
+ // DeleteBlob() deletes the named blob from the BlobStore.
+ DeleteBlob(ctx *context.T, blobName string) (err error)
+
+ // GC() removes old temp files and content-addressed blocks that are no
+ // longer referenced by any blob. It may be called concurrently with
+ // other calls to GC(), and with uses of BlobReaders and BlobWriters.
+ GC(ctx *context.T) error
+
+ // BlobChunkStream() returns a ChunkStream that can be used to read the
+ // ordered list of content hashes of chunks in blob blobName. It is
+ // expected that this list will be presented to
+ // RecipeStreamFromChunkStream() on another device, to create a recipe
+ // for transmitting the blob efficiently to that other device.
+ BlobChunkStream(ctx *context.T, blobName string) ChunkStream
+
+ // RecipeStreamFromChunkStream() returns a pointer to a RecipeStream
+ // that allows the client to iterate over each RecipeStep needed to
+ // create the blob formed by the chunks in chunkStream. It is expected
+ // that this will be called on a receiving device, and be given a
+ // ChunkStream from a sending device, to yield a recipe for efficient
+ // chunk transfer. RecipeStep values with non-nil Chunk fields need
+ // the chunk from the sender; once the data is returned it can be
+ // written with BlobWriter.AppendFragment(). Those with blob
+ // references can be written locally with BlobWriter.AppendBlob().
+ RecipeStreamFromChunkStream(ctx *context.T, chunkStream ChunkStream) RecipeStream
+
+ // LookupChunk() returns the location of a chunk with the specified chunk
+ // hash within the store. It is expected that chunk hashes from
+ // RecipeStep entries from RecipeStreamFromChunkStream() will be mapped
+ // to blob Location values on the sender for transmission to the
+ // receiver.
+ LookupChunk(ctx *context.T, chunkHash []byte) (loc Location, err error)
+
+ // ListBlobIds() returns an iterator that can be used to enumerate the
+ // blobs in a BlobStore. Expected use is:
+ //
+ // iter := bs.ListBlobIds(ctx)
+ // for iter.Advance() {
+ // // Process iter.Value() here.
+ // }
+ // if iter.Err() != nil {
+ // // The loop terminated early due to an error.
+ // }
+ ListBlobIds(ctx *context.T) (iter Stream)
+
+ // ListCAIds() returns an iterator that can be used to enumerate the
+ // content-addressable fragments in a BlobStore. Expected use is:
+ //
+ // iter := bs.ListCAIds(ctx)
+ // for iter.Advance() {
+ // // Process iter.Value() here.
+ // }
+ // if iter.Err() != nil {
+ // // The loop terminated early due to an error.
+ // }
+ ListCAIds(ctx *context.T) (iter Stream)
+
+ // Root() returns the name of the root directory where the BlobStore is stored.
+ Root() string
+
+ // Close() closes the BlobStore.
+ Close() error
+}
+
+// A Location describes chunk's location within a blob. It is returned by
+// BlobStore.LookupChunk().
+type Location struct {
+ BlobName string // name of blob
+ Offset int64 // byte offset of chunk within blob
+ Size int64 // size of chunk
+}
+
+// A BlobReader allows a blob to be read using the standard ReadAt(), Read(),
+// and Seek() calls. A BlobReader can be created with NewBlobReader(), and
+// should be closed with the Close() method to avoid leaking file handles.
+type BlobReader interface {
+ // ReadAt() fills b[] with up to len(b) bytes of data starting at
+ // position "at" within the blob that the BlobReader indicates, and
+ // returns the number of bytes read.
+ ReadAt(b []byte, at int64) (n int, err error)
+
+ // Read() fills b[] with up to len(b) bytes of data starting at the
+ // current seek position of the BlobReader within the blob that the
+ // BlobReader indicates, and then both returns the number of bytes read
+ // and advances the BlobReader's seek position by that amount.
+ Read(b []byte) (n int, err error)
+
+ // Seek() sets the seek position of the BlobReader to offset if
+ // whence==0, offset+current_seek_position if whence==1, and
+ // offset+end_of_blob if whence==2, and then returns the current seek
+ // position.
+ Seek(offset int64, whence int) (result int64, err error)
+
+ // Close() indicates that the client will perform no further operations
+ // on the BlobReader. It releases any resources held by the
+ // BlobReader.
+ Close() error
+
+ // Name() returns the BlobReader's name.
+ Name() string
+
+ // Size() returns the BlobReader's size.
+ Size() int64
+
+ // IsFinalized() returns whether the BlobReader has been finalized.
+ IsFinalized() bool
+
+ // Hash() returns the BlobReader's hash. It may be nil if the blob is
+ // not finalized.
+ Hash() []byte
+}
+
+// A BlockOrFile represents a vector of bytes, and contains either a data
+// block (as a []byte), or a (file name, size, offset) triple.
+type BlockOrFile struct {
+ Block []byte // If FileName is empty, the bytes represented.
+ FileName string // If non-empty, the name of the file containing the bytes.
+ Size int64 // If FileName is non-empty, the number of bytes (or -1 for "all")
+ Offset int64 // If FileName is non-empty, the offset of the relevant bytes within the file.
+}
+
+// A BlobWriter allows a blob to be written. If a blob has not yet been
+// finalized, it also allows that blob to be extended. A BlobWriter may be
+// created with NewBlobWriter(), and should be closed with Close() or
+// CloseWithoutFinalize().
+type BlobWriter interface {
+ // AppendBlob() adds a (substring of a) pre-existing blob to the blob
+ // being written by the BlobWriter. The fragments of the pre-existing
+ // blob are not physically copied; they are referenced by both blobs.
+ AppendBlob(blobName string, size int64, offset int64) (err error)
+
+ // AppendFragment() appends a fragment to the blob being written by the
+ // BlobWriter, where the fragment is composed of the byte vectors
+ // described by the elements of item[]. The fragment is copied into
+ // the blob store.
+ AppendFragment(item ...BlockOrFile) (err error)
+
+ // Close() finalizes the BlobWriter, and indicates that the client will
+ // perform no further append operations on the BlobWriter. Any
+ // internal open file handles are closed.
+ Close() (err error)
+
+ // CloseWithoutFinalize() indicates that the client will perform no
+ // further append operations on the BlobWriter, but does not finalize
+ // the blob. Any internal open file handles are closed. Clients are
+ // expected to need this operation infrequently.
+ CloseWithoutFinalize() (err error)
+
+ // Name() returns the BlobWriter's name.
+ Name() string
+
+ // Size() returns the BlobWriter's size.
+ Size() int64
+
+ // IsFinalized() returns whether the BlobWriter has been finalized.
+ IsFinalized() bool
+
+ // Hash() returns the BlobWriter's hash, reflecting the bytes written so far.
+ Hash() []byte
+}
+
+// A Stream represents an iterator that allows the client to enumerate
+// all the blobs or fragments in a BlobStore.
+//
+// The interfaces Stream, ChunkStream, RecipeStream all have four calls,
+// and differ only in the Value() call.
+type Stream interface {
+ // Advance() stages an item so that it may be retrieved via Value().
+ // Returns true iff there is an item to retrieve. Advance() must be
+ // called before Value() is called. The caller is expected to read
+ // until Advance() returns false, or to call Cancel().
+ Advance() bool
+
+ // Value() returns the item that was staged by Advance(). May panic if
+ // Advance() returned false or was not called. Never blocks.
+ Value() (name string)
+
+ // Err() returns any error encountered by Advance. Never blocks.
+ Err() error
+
+ // Cancel() indicates that the client wishes to cease reading from the stream.
+ // It causes the next call to Advance() to return false. Never blocks.
+ // It may be called concurrently with other calls on the stream.
+ Cancel()
+}
+
+// A ChunkStream represents an iterator that allows the client to enumerate
+// the chunks in a blob. See the comments for Stream for usage.
+type ChunkStream interface {
+ Advance() bool
+
+ // Value() returns the chunkHash that was staged by Advance(). May
+ // panic if Advance() returned false or was not called. Never blocks.
+ // The result may share storage with buf[] if it is large enough;
+ // otherwise, a new buffer is allocated. It is legal to call with
+ // buf==nil.
+ Value(buf []byte) (chunkHash []byte)
+
+ Err() error
+ Cancel()
+}
+
+// A RecipeStep describes one piece of a recipe for making a blob.
+// The step consists either of appending the chunk with content hash Chunk and size Size,
+// or (if Chunk==nil) the Size bytes from Blob, starting at Offset.
+type RecipeStep struct {
+ Chunk []byte
+ Blob string
+ Size int64
+ Offset int64
+}
+
+// A RecipeStream represents an iterator that allows the client to obtain the
+// steps needed to construct a blob with a given ChunkStream, attempting to
+// reuse data in existing blobs. See the comments for Stream for usage.
+type RecipeStream interface {
+ Advance() bool
+
+ // Value() returns the RecipeStep that was staged by Advance(). May panic if
+ // Advance() returned false or was not called. Never blocks.
+ Value() RecipeStep
+
+ Err() error
+ Cancel()
+}
diff --git a/services/syncbase/server/app.go b/services/syncbase/server/app.go
new file mode 100644
index 0000000..c404f77
--- /dev/null
+++ b/services/syncbase/server/app.go
@@ -0,0 +1,292 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package server
+
+import (
+ "path"
+ "sync"
+
+ wire "v.io/syncbase/v23/services/syncbase"
+ nosqlwire "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+ "v.io/syncbase/x/ref/services/syncbase/server/nosql"
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ "v.io/v23/glob"
+ "v.io/v23/rpc"
+ "v.io/v23/security/access"
+ "v.io/v23/verror"
+ "v.io/x/lib/vlog"
+)
+
+// app is a per-app singleton (i.e. not per-request) that handles App RPCs.
+type app struct {
+ name string
+ s *service
+ // The fields below are initialized iff this app exists.
+ exists bool
+ // Guards the fields below. Held during database Create, Delete, and
+ // SetPermissions.
+ mu sync.Mutex
+ dbs map[string]interfaces.Database
+}
+
+var (
+ _ wire.AppServerMethods = (*app)(nil)
+ _ interfaces.App = (*app)(nil)
+)
+
+////////////////////////////////////////
+// RPC methods
+
+// TODO(sadovsky): Require the app name to match the client's blessing name.
+// I.e. reserve names at the app level of the hierarchy.
+func (a *app) Create(ctx *context.T, call rpc.ServerCall, perms access.Permissions) error {
+ if a.exists {
+ return verror.New(verror.ErrExist, ctx, a.name)
+ }
+ // This app does not yet exist; a is just an ephemeral handle that holds
+ // {name string, s *service}. a.s.createApp will create a new app handle and
+ // store it in a.s.apps[a.name].
+ return a.s.createApp(ctx, call, a.name, perms)
+}
+
+func (a *app) Delete(ctx *context.T, call rpc.ServerCall) error {
+ return a.s.deleteApp(ctx, call, a.name)
+}
+
+func (a *app) Exists(ctx *context.T, call rpc.ServerCall) (bool, error) {
+ if !a.exists {
+ return false, nil
+ }
+ return util.ErrorToExists(util.GetWithAuth(ctx, call, a.s.st, a.stKey(), &appData{}))
+}
+
+func (a *app) SetPermissions(ctx *context.T, call rpc.ServerCall, perms access.Permissions, version string) error {
+ if !a.exists {
+ return verror.New(verror.ErrNoExist, ctx, a.name)
+ }
+ return a.s.setAppPerms(ctx, call, a.name, perms, version)
+}
+
+func (a *app) GetPermissions(ctx *context.T, call rpc.ServerCall) (perms access.Permissions, version string, err error) {
+ if !a.exists {
+ return nil, "", verror.New(verror.ErrNoExist, ctx, a.name)
+ }
+ data := &appData{}
+ if err := util.GetWithAuth(ctx, call, a.s.st, a.stKey(), data); err != nil {
+ return nil, "", err
+ }
+ return data.Perms, util.FormatVersion(data.Version), nil
+}
+
+func (a *app) GlobChildren__(ctx *context.T, call rpc.GlobChildrenServerCall, matcher *glob.Element) error {
+ if !a.exists {
+ return verror.New(verror.ErrNoExist, ctx, a.name)
+ }
+ // Check perms.
+ sn := a.s.st.NewSnapshot()
+ if err := util.GetWithAuth(ctx, call, sn, a.stKey(), &appData{}); err != nil {
+ sn.Abort()
+ return err
+ }
+ return util.Glob(ctx, call, matcher, sn, sn.Abort, util.JoinKeyParts(util.DbInfoPrefix, a.name))
+}
+
+////////////////////////////////////////
+// interfaces.App methods
+
+func (a *app) Service() interfaces.Service {
+ return a.s
+}
+
+func (a *app) NoSQLDatabase(ctx *context.T, call rpc.ServerCall, dbName string) (interfaces.Database, error) {
+ if !a.exists {
+ vlog.Fatalf("app %q does not exist", a.name)
+ }
+ a.mu.Lock()
+ defer a.mu.Unlock()
+ d, ok := a.dbs[dbName]
+ if !ok {
+ return nil, verror.New(verror.ErrNoExist, ctx, dbName)
+ }
+ return d, nil
+}
+
+func (a *app) NoSQLDatabaseNames(ctx *context.T, call rpc.ServerCall) ([]string, error) {
+ if !a.exists {
+ vlog.Fatalf("app %q does not exist", a.name)
+ }
+ // In the future this API will likely be replaced by one that streams the
+ // database names.
+ a.mu.Lock()
+ defer a.mu.Unlock()
+ dbNames := make([]string, 0, len(a.dbs))
+ for n := range a.dbs {
+ dbNames = append(dbNames, n)
+ }
+ return dbNames, nil
+}
+
+func (a *app) CreateNoSQLDatabase(ctx *context.T, call rpc.ServerCall, dbName string, perms access.Permissions, metadata *nosqlwire.SchemaMetadata) error {
+ if !a.exists {
+ vlog.Fatalf("app %q does not exist", a.name)
+ }
+ // TODO(sadovsky): Crash if any step fails, and use WAL to ensure that if we
+ // crash, upon restart we execute any remaining steps before we start handling
+ // client requests.
+ //
+ // Steps:
+ // 1. Check appData perms, create dbInfo record.
+ // 2. Initialize database.
+ // 3. Flip dbInfo.Initialized to true. <===== CHANGE BECOMES VISIBLE
+ a.mu.Lock()
+ defer a.mu.Unlock()
+ if _, ok := a.dbs[dbName]; ok {
+ // TODO(sadovsky): Should this be ErrExistOrNoAccess, for privacy?
+ return verror.New(verror.ErrExist, ctx, dbName)
+ }
+
+ // 1. Check appData perms, create dbInfo record.
+ rootDir, engine := a.rootDirForDb(dbName), a.s.opts.Engine
+ aData := &appData{}
+ if err := store.RunInTransaction(a.s.st, func(tx store.Transaction) error {
+ // Check appData perms.
+ if err := util.GetWithAuth(ctx, call, tx, a.stKey(), aData); err != nil {
+ return err
+ }
+ // Check for "database already exists".
+ if _, err := a.getDbInfo(ctx, tx, dbName); verror.ErrorID(err) != verror.ErrNoExist.ID {
+ if err != nil {
+ return err
+ }
+ // TODO(sadovsky): Should this be ErrExistOrNoAccess, for privacy?
+ return verror.New(verror.ErrExist, ctx, dbName)
+ }
+ // Write new dbInfo.
+ info := &dbInfo{
+ Name: dbName,
+ RootDir: rootDir,
+ Engine: engine,
+ }
+ return a.putDbInfo(ctx, tx, dbName, info)
+ }); err != nil {
+ return err
+ }
+
+ // 2. Initialize database.
+ if perms == nil {
+ perms = aData.Perms
+ }
+ d, err := nosql.NewDatabase(ctx, a, dbName, metadata, nosql.DatabaseOptions{
+ Perms: perms,
+ RootDir: rootDir,
+ Engine: engine,
+ })
+ if err != nil {
+ return err
+ }
+
+ // 3. Flip dbInfo.Initialized to true.
+ if err := store.RunInTransaction(a.s.st, func(tx store.Transaction) error {
+ return a.updateDbInfo(ctx, tx, dbName, func(info *dbInfo) error {
+ info.Initialized = true
+ return nil
+ })
+ }); err != nil {
+ return err
+ }
+
+ a.dbs[dbName] = d
+ return nil
+}
+
+func (a *app) DeleteNoSQLDatabase(ctx *context.T, call rpc.ServerCall, dbName string) error {
+ if !a.exists {
+ vlog.Fatalf("app %q does not exist", a.name)
+ }
+ // TODO(sadovsky): Crash if any step fails, and use WAL to ensure that if we
+ // crash, upon restart we execute any remaining steps before we start handling
+ // client requests.
+ //
+ // Steps:
+ // 1. Check databaseData perms.
+ // 2. Flip dbInfo.Deleted to true. <===== CHANGE BECOMES VISIBLE
+ // 3. Delete database.
+ // 4. Delete dbInfo record.
+ a.mu.Lock()
+ defer a.mu.Unlock()
+ d, ok := a.dbs[dbName]
+ if !ok {
+ return nil // delete is idempotent
+ }
+
+ // 1. Check databaseData perms.
+ if err := d.CheckPermsInternal(ctx, call, d.St()); err != nil {
+ if verror.ErrorID(err) == verror.ErrNoExist.ID {
+ return nil // delete is idempotent
+ }
+ return err
+ }
+
+ // 2. Flip dbInfo.Deleted to true.
+ if err := store.RunInTransaction(a.s.st, func(tx store.Transaction) error {
+ return a.updateDbInfo(ctx, tx, dbName, func(info *dbInfo) error {
+ info.Deleted = true
+ return nil
+ })
+ }); err != nil {
+ return err
+ }
+
+ // 3. Delete database.
+ if err := d.St().Close(); err != nil {
+ return err
+ }
+ if err := util.DestroyStore(a.s.opts.Engine, a.rootDirForDb(dbName)); err != nil {
+ return err
+ }
+
+ // 4. Delete dbInfo record.
+ if err := a.delDbInfo(ctx, a.s.st, dbName); err != nil {
+ return err
+ }
+
+ delete(a.dbs, dbName)
+ return nil
+}
+
+func (a *app) SetDatabasePerms(ctx *context.T, call rpc.ServerCall, dbName string, perms access.Permissions, version string) error {
+ if !a.exists {
+ vlog.Fatalf("app %q does not exist", a.name)
+ }
+ a.mu.Lock()
+ defer a.mu.Unlock()
+ d, ok := a.dbs[dbName]
+ if !ok {
+ return verror.New(verror.ErrNoExist, ctx, dbName)
+ }
+ return d.SetPermsInternal(ctx, call, perms, version)
+}
+
+func (a *app) Name() string {
+ return a.name
+}
+
+////////////////////////////////////////
+// Internal helpers
+
+func (a *app) stKey() string {
+ return util.JoinKeyParts(util.AppPrefix, a.stKeyPart())
+}
+
+func (a *app) stKeyPart() string {
+ return a.name
+}
+
+func (a *app) rootDirForDb(dbName string) string {
+ return path.Join(a.s.opts.RootDir, "apps", a.name, dbName)
+}
diff --git a/services/syncbase/server/db_info.go b/services/syncbase/server/db_info.go
new file mode 100644
index 0000000..f750d57
--- /dev/null
+++ b/services/syncbase/server/db_info.go
@@ -0,0 +1,55 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package server
+
+// This file defines internal app methods for manipulating dbInfo.
+// None of these methods perform authorization checks.
+//
+// The fundamental reason why these methods are needed is that information about
+// a database is spread across two storage engines. The source of truth for the
+// existence of the database, as well as things like the database type, is the
+// service-level storage engine, while database permissions are tracked in the
+// database's storage engine.
+
+import (
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+)
+
+func dbInfoStKey(a *app, dbName string) string {
+ return util.JoinKeyParts(util.DbInfoPrefix, a.stKeyPart(), dbName)
+}
+
+// getDbInfo reads data from the storage engine.
+func (a *app) getDbInfo(ctx *context.T, sntx store.SnapshotOrTransaction, dbName string) (*dbInfo, error) {
+ info := &dbInfo{}
+ if err := util.Get(ctx, sntx, dbInfoStKey(a, dbName), info); err != nil {
+ return nil, err
+ }
+ return info, nil
+}
+
+// putDbInfo writes data to the storage engine.
+func (a *app) putDbInfo(ctx *context.T, tx store.Transaction, dbName string, info *dbInfo) error {
+ return util.Put(ctx, tx, dbInfoStKey(a, dbName), info)
+}
+
+// delDbInfo deletes data from the storage engine.
+func (a *app) delDbInfo(ctx *context.T, stw store.StoreWriter, dbName string) error {
+ return util.Delete(ctx, stw, dbInfoStKey(a, dbName))
+}
+
+// updateDbInfo performs a read-modify-write. fn should "modify" v.
+func (a *app) updateDbInfo(ctx *context.T, tx store.Transaction, dbName string, fn func(info *dbInfo) error) error {
+ info, err := a.getDbInfo(ctx, tx, dbName)
+ if err != nil {
+ return err
+ }
+ if err := fn(info); err != nil {
+ return err
+ }
+ return a.putDbInfo(ctx, tx, dbName, info)
+}
diff --git a/services/syncbase/server/db_info_test.go b/services/syncbase/server/db_info_test.go
new file mode 100644
index 0000000..7bc0870
--- /dev/null
+++ b/services/syncbase/server/db_info_test.go
@@ -0,0 +1,25 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package server
+
+import (
+ "testing"
+)
+
+func TestStKey(t *testing.T) {
+ tests := []struct {
+ appName string
+ dbName string
+ stKey string
+ }{
+ {"app1", "db1", "$dbInfo:app1:db1"},
+ }
+ for _, test := range tests {
+ got, want := dbInfoStKey(&app{name: test.appName}, test.dbName), test.stKey
+ if got != want {
+ t.Errorf("wrong stKey: got %q, want %q", got, want)
+ }
+ }
+}
diff --git a/services/syncbase/server/dispatcher.go b/services/syncbase/server/dispatcher.go
new file mode 100644
index 0000000..4b51a00
--- /dev/null
+++ b/services/syncbase/server/dispatcher.go
@@ -0,0 +1,85 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package server
+
+import (
+ "strings"
+
+ wire "v.io/syncbase/v23/services/syncbase"
+ pubutil "v.io/syncbase/v23/syncbase/util"
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+ "v.io/syncbase/x/ref/services/syncbase/server/nosql"
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/v23/context"
+ "v.io/v23/rpc"
+ "v.io/v23/security"
+ "v.io/v23/verror"
+)
+
+type dispatcher struct {
+ s *service
+}
+
+var _ rpc.Dispatcher = (*dispatcher)(nil)
+
+func NewDispatcher(s *service) *dispatcher {
+ return &dispatcher{s: s}
+}
+
+// We always return an AllowEveryone authorizer from Lookup(), and rely on our
+// RPC method implementations to perform proper authorization.
+var auth security.Authorizer = security.AllowEveryone()
+
+func (disp *dispatcher) Lookup(ctx *context.T, suffix string) (interface{}, security.Authorizer, error) {
+ suffix = strings.TrimPrefix(suffix, "/")
+ parts := strings.SplitN(suffix, "/", 2)
+
+ if len(suffix) == 0 {
+ return wire.ServiceServer(disp.s), auth, nil
+ }
+
+ if parts[0] == util.SyncbaseSuffix {
+ return interfaces.SyncServer(disp.s.sync), auth, nil
+ }
+
+ // Validate all key atoms up front, so that we can avoid doing so in all our
+ // method implementations.
+ appName := parts[0]
+ if !pubutil.ValidName(appName) {
+ return nil, nil, wire.NewErrInvalidName(nil, suffix)
+ }
+
+ aExists := false
+ var a *app
+ if aInt, err := disp.s.App(nil, nil, appName); err == nil {
+ a = aInt.(*app) // panics on failure, as desired
+ aExists = true
+ } else {
+ if verror.ErrorID(err) != verror.ErrNoExist.ID {
+ return nil, nil, err
+ } else {
+ a = &app{
+ name: appName,
+ s: disp.s,
+ }
+ }
+ }
+
+ if len(parts) == 1 {
+ return wire.AppServer(a), auth, nil
+ }
+
+ // All database, table, and row methods require the app to exist. If it
+ // doesn't, abort early.
+ if !aExists {
+ return nil, nil, verror.New(verror.ErrNoExist, nil, a.name)
+ }
+
+ // Note, it's possible for the app to be deleted concurrently with downstream
+ // handling of this request. Depending on the order in which things execute,
+ // the client may not get an error, but in any case ultimately the store will
+ // end up in a consistent state.
+ return nosql.NewDispatcher(a).Lookup(ctx, parts[1])
+}
diff --git a/services/syncbase/server/interfaces/app.go b/services/syncbase/server/interfaces/app.go
new file mode 100644
index 0000000..e990b29
--- /dev/null
+++ b/services/syncbase/server/interfaces/app.go
@@ -0,0 +1,36 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package interfaces
+
+import (
+ wire "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/v23/context"
+ "v.io/v23/rpc"
+ "v.io/v23/security/access"
+)
+
+// App is an internal interface to the app layer.
+type App interface {
+ // Service returns the service handle for this app.
+ Service() Service
+
+ // NoSQLDatabase returns the Database for the specified NoSQL database.
+ NoSQLDatabase(ctx *context.T, call rpc.ServerCall, dbName string) (Database, error)
+
+ // NoSQLDatabaseNames returns the names of the NoSQL databases within the App.
+ NoSQLDatabaseNames(ctx *context.T, call rpc.ServerCall) ([]string, error)
+
+ // CreateNoSQLDatabase creates the specified NoSQL database.
+ CreateNoSQLDatabase(ctx *context.T, call rpc.ServerCall, dbName string, perms access.Permissions, metadata *wire.SchemaMetadata) error
+
+ // DeleteNoSQLDatabase deletes the specified NoSQL database.
+ DeleteNoSQLDatabase(ctx *context.T, call rpc.ServerCall, dbName string) error
+
+ // SetDatabasePerms sets the perms for the specified database.
+ SetDatabasePerms(ctx *context.T, call rpc.ServerCall, dbName string, perms access.Permissions, version string) error
+
+ // Name returns the name of this app.
+ Name() string
+}
diff --git a/services/syncbase/server/interfaces/database.go b/services/syncbase/server/interfaces/database.go
new file mode 100644
index 0000000..8be30d8
--- /dev/null
+++ b/services/syncbase/server/interfaces/database.go
@@ -0,0 +1,33 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package interfaces
+
+import (
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ "v.io/v23/rpc"
+ "v.io/v23/security/access"
+)
+
+// Database is an internal interface to the database layer.
+type Database interface {
+ // St returns the storage engine instance for this database.
+ St() store.Store
+
+ // App returns the app handle for this database.
+ App() App
+
+ // CheckPermsInternal checks whether the given RPC (ctx, call) is allowed per
+ // the database perms.
+ // Designed for use from within App.DeleteNoSQLDatabase.
+ CheckPermsInternal(ctx *context.T, call rpc.ServerCall, st store.StoreReader) error
+
+ // SetPermsInternal updates the database perms.
+ // Designed for use from within App.SetDatabasePerms.
+ SetPermsInternal(ctx *context.T, call rpc.ServerCall, perms access.Permissions, version string) error
+
+ // Name returns the name of this database.
+ Name() string
+}
diff --git a/services/syncbase/server/interfaces/doc.go b/services/syncbase/server/interfaces/doc.go
new file mode 100644
index 0000000..384f2f7
--- /dev/null
+++ b/services/syncbase/server/interfaces/doc.go
@@ -0,0 +1,10 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package interfaces defines internal interfaces for various objects in the
+// Syncbase server implementation. Defining these interfaces in a separate
+// package helps prevent import cycles: all other packages can import the
+// interfaces package, and individual modules can pass each other interfaces to
+// enable bidirectional cross-package communication.
+package interfaces
diff --git a/services/syncbase/server/interfaces/service.go b/services/syncbase/server/interfaces/service.go
new file mode 100644
index 0000000..ce665e2
--- /dev/null
+++ b/services/syncbase/server/interfaces/service.go
@@ -0,0 +1,26 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package interfaces
+
+import (
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ "v.io/v23/rpc"
+)
+
+// Service is an internal interface to the service layer.
+type Service interface {
+ // St returns the storage engine instance for this service.
+ St() store.Store
+
+ // Sync returns the sync instance for this service.
+ Sync() SyncServerMethods
+
+ // App returns the App with the specified name.
+ App(ctx *context.T, call rpc.ServerCall, appName string) (App, error)
+
+ // AppNames returns the names of the Apps within the service.
+ AppNames(ctx *context.T, call rpc.ServerCall) ([]string, error)
+}
diff --git a/services/syncbase/server/interfaces/sync.vdl b/services/syncbase/server/interfaces/sync.vdl
new file mode 100644
index 0000000..b97e845
--- /dev/null
+++ b/services/syncbase/server/interfaces/sync.vdl
@@ -0,0 +1,59 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package interfaces
+
+import (
+ wire "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/v23/security/access"
+)
+
+// Sync defines methods for data exchange between Syncbases.
+// TODO(hpucha): Flesh this out further.
+type Sync interface {
+ // GetDeltas returns the responder's current generation vector and all
+ // the missing log records when compared to the initiator's generation
+ // vector. This process happens one Database at a time encompassing all
+ // the SyncGroups common to the initiator and the responder. For each
+ // Database, the initiator sends a DeltaReq. In response, the
+ // responder sends a "Start" DeltaResp record, all the missing log
+ // records, the responder's genvector, and a "Finish" DeltaResp
+ // record. The initiator parses the stream between a Start and a Finish
+ // record as the response to its DeltaReq, and then moves on to the
+ // next Database in common with this responder.
+ GetDeltas(initiator string) stream<DeltaReq, DeltaResp> error {access.Read}
+
+ // SyncGroup-related methods.
+
+ // PublishSyncGroup is typically invoked on a "central" peer to publish
+ // the SyncGroup.
+ PublishSyncGroup(sg SyncGroup) error {access.Write}
+
+ // JoinSyncGroupAtAdmin is invoked by a prospective SyncGroup member's
+ // Syncbase on a SyncGroup admin. It checks whether the requestor is
+ // allowed to join the named SyncGroup, and if so, adds the requestor to
+ // the SyncGroup.
+ JoinSyncGroupAtAdmin(sgName, joinerName string, myInfo wire.SyncGroupMemberInfo) (SyncGroup | error) {access.Read}
+
+ // BlobSync methods.
+
+ // HaveBlob verifies that the peer has the requested blob, and if
+ // present, returns its size.
+ HaveBlob(br wire.BlobRef) (int64 | error)
+
+ // FetchBlob fetches the requested blob.
+ FetchBlob(br wire.BlobRef) stream<_, []byte> error
+
+ // Methods for incremental blob transfer. The transfer starts with the
+ // receiver making a FetchBlobRecipe call to the sender for a given
+ // BlobRef. The sender, in turn, sends the chunk hashes of all the
+ // chunks that make up the requested blob (blob recipe). The receiver
+ // looks up the chunk hashes in its local blob store, and identifies the
+ // missing ones. The receiver then fetches the missing chunks using a
+ // FetchChunks call from the sender. Finally, the receiver finishes the
+ // blob fetch by combining the chunks obtained over the network with the
+ // already available local chunks as per the blob recipe.
+ FetchBlobRecipe(br wire.BlobRef) stream<_, ChunkHash> error
+ FetchChunks() stream<ChunkHash, ChunkData> error
+}
diff --git a/services/syncbase/server/interfaces/sync.vdl.go b/services/syncbase/server/interfaces/sync.vdl.go
new file mode 100644
index 0000000..9cd1383
--- /dev/null
+++ b/services/syncbase/server/interfaces/sync.vdl.go
@@ -0,0 +1,946 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: sync.vdl
+
+package interfaces
+
+import (
+ // VDL system imports
+ "io"
+ "v.io/v23"
+ "v.io/v23/context"
+ "v.io/v23/rpc"
+ "v.io/v23/vdl"
+
+ // VDL user imports
+ "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/v23/security/access"
+)
+
+// SyncClientMethods is the client interface
+// containing Sync methods.
+//
+// Sync defines methods for data exchange between Syncbases.
+// TODO(hpucha): Flesh this out further.
+type SyncClientMethods interface {
+ // GetDeltas returns the responder's current generation vector and all
+ // the missing log records when compared to the initiator's generation
+ // vector. This process happens one Database at a time encompassing all
+ // the SyncGroups common to the initiator and the responder. For each
+ // Database, the initiator sends a DeltaReq. In response, the
+ // responder sends a "Start" DeltaResp record, all the missing log
+ // records, the responder's genvector, and a "Finish" DeltaResp
+ // record. The initiator parses the stream between a Start and a Finish
+ // record as the response to its DeltaReq, and then moves on to the
+ // next Database in common with this responder.
+ GetDeltas(ctx *context.T, initiator string, opts ...rpc.CallOpt) (SyncGetDeltasClientCall, error)
+ // PublishSyncGroup is typically invoked on a "central" peer to publish
+ // the SyncGroup.
+ PublishSyncGroup(ctx *context.T, sg SyncGroup, opts ...rpc.CallOpt) error
+ // JoinSyncGroupAtAdmin is invoked by a prospective SyncGroup member's
+ // Syncbase on a SyncGroup admin. It checks whether the requestor is
+ // allowed to join the named SyncGroup, and if so, adds the requestor to
+ // the SyncGroup.
+ JoinSyncGroupAtAdmin(ctx *context.T, sgName string, joinerName string, myInfo nosql.SyncGroupMemberInfo, opts ...rpc.CallOpt) (SyncGroup, error)
+ // HaveBlob verifies that the peer has the requested blob, and if
+ // present, returns its size.
+ HaveBlob(ctx *context.T, br nosql.BlobRef, opts ...rpc.CallOpt) (int64, error)
+ // FetchBlob fetches the requested blob.
+ FetchBlob(ctx *context.T, br nosql.BlobRef, opts ...rpc.CallOpt) (SyncFetchBlobClientCall, error)
+ // Methods for incremental blob transfer. The transfer starts with the
+ // receiver making a FetchBlobRecipe call to the sender for a given
+ // BlobRef. The sender, in turn, sends the chunk hashes of all the
+ // chunks that make up the requested blob (blob recipe). The receiver
+ // looks up the chunk hashes in its local blob store, and identifies the
+ // missing ones. The receiver then fetches the missing chunks using a
+ // FetchChunks call from the sender. Finally, the receiver finishes the
+ // blob fetch by combining the chunks obtained over the network with the
+ // already available local chunks as per the blob recipe.
+ FetchBlobRecipe(ctx *context.T, br nosql.BlobRef, opts ...rpc.CallOpt) (SyncFetchBlobRecipeClientCall, error)
+ FetchChunks(*context.T, ...rpc.CallOpt) (SyncFetchChunksClientCall, error)
+}
+
+// SyncClientStub adds universal methods to SyncClientMethods.
+type SyncClientStub interface {
+ SyncClientMethods
+ rpc.UniversalServiceMethods
+}
+
+// SyncClient returns a client stub for Sync.
+func SyncClient(name string) SyncClientStub {
+ return implSyncClientStub{name}
+}
+
+type implSyncClientStub struct {
+ name string
+}
+
+func (c implSyncClientStub) GetDeltas(ctx *context.T, i0 string, opts ...rpc.CallOpt) (ocall SyncGetDeltasClientCall, err error) {
+ var call rpc.ClientCall
+ if call, err = v23.GetClient(ctx).StartCall(ctx, c.name, "GetDeltas", []interface{}{i0}, opts...); err != nil {
+ return
+ }
+ ocall = &implSyncGetDeltasClientCall{ClientCall: call}
+ return
+}
+
+func (c implSyncClientStub) PublishSyncGroup(ctx *context.T, i0 SyncGroup, opts ...rpc.CallOpt) (err error) {
+ err = v23.GetClient(ctx).Call(ctx, c.name, "PublishSyncGroup", []interface{}{i0}, nil, opts...)
+ return
+}
+
+func (c implSyncClientStub) JoinSyncGroupAtAdmin(ctx *context.T, i0 string, i1 string, i2 nosql.SyncGroupMemberInfo, opts ...rpc.CallOpt) (o0 SyncGroup, err error) {
+ err = v23.GetClient(ctx).Call(ctx, c.name, "JoinSyncGroupAtAdmin", []interface{}{i0, i1, i2}, []interface{}{&o0}, opts...)
+ return
+}
+
+func (c implSyncClientStub) HaveBlob(ctx *context.T, i0 nosql.BlobRef, opts ...rpc.CallOpt) (o0 int64, err error) {
+ err = v23.GetClient(ctx).Call(ctx, c.name, "HaveBlob", []interface{}{i0}, []interface{}{&o0}, opts...)
+ return
+}
+
+func (c implSyncClientStub) FetchBlob(ctx *context.T, i0 nosql.BlobRef, opts ...rpc.CallOpt) (ocall SyncFetchBlobClientCall, err error) {
+ var call rpc.ClientCall
+ if call, err = v23.GetClient(ctx).StartCall(ctx, c.name, "FetchBlob", []interface{}{i0}, opts...); err != nil {
+ return
+ }
+ ocall = &implSyncFetchBlobClientCall{ClientCall: call}
+ return
+}
+
+func (c implSyncClientStub) FetchBlobRecipe(ctx *context.T, i0 nosql.BlobRef, opts ...rpc.CallOpt) (ocall SyncFetchBlobRecipeClientCall, err error) {
+ var call rpc.ClientCall
+ if call, err = v23.GetClient(ctx).StartCall(ctx, c.name, "FetchBlobRecipe", []interface{}{i0}, opts...); err != nil {
+ return
+ }
+ ocall = &implSyncFetchBlobRecipeClientCall{ClientCall: call}
+ return
+}
+
+func (c implSyncClientStub) FetchChunks(ctx *context.T, opts ...rpc.CallOpt) (ocall SyncFetchChunksClientCall, err error) {
+ var call rpc.ClientCall
+ if call, err = v23.GetClient(ctx).StartCall(ctx, c.name, "FetchChunks", nil, opts...); err != nil {
+ return
+ }
+ ocall = &implSyncFetchChunksClientCall{ClientCall: call}
+ return
+}
+
+// SyncGetDeltasClientStream is the client stream for Sync.GetDeltas.
+type SyncGetDeltasClientStream interface {
+ // RecvStream returns the receiver side of the Sync.GetDeltas client stream.
+ RecvStream() interface {
+ // Advance stages an item so that it may be retrieved via Value. Returns
+ // true iff there is an item to retrieve. Advance must be called before
+ // Value is called. May block if an item is not available.
+ Advance() bool
+ // Value returns the item that was staged by Advance. May panic if Advance
+ // returned false or was not called. Never blocks.
+ Value() DeltaResp
+ // Err returns any error encountered by Advance. Never blocks.
+ Err() error
+ }
+ // SendStream returns the send side of the Sync.GetDeltas client stream.
+ SendStream() interface {
+ // Send places the item onto the output stream. Returns errors
+ // encountered while sending, or if Send is called after Close or
+ // the stream has been canceled. Blocks if there is no buffer
+ // space; will unblock when buffer space is available or after
+ // the stream has been canceled.
+ Send(item DeltaReq) error
+ // Close indicates to the server that no more items will be sent;
+ // server Recv calls will receive io.EOF after all sent items.
+ // This is an optional call - e.g. a client might call Close if it
+ // needs to continue receiving items from the server after it's
+ // done sending. Returns errors encountered while closing, or if
+ // Close is called after the stream has been canceled. Like Send,
+ // blocks if there is no buffer space available.
+ Close() error
+ }
+}
+
+// SyncGetDeltasClientCall represents the call returned from Sync.GetDeltas.
+type SyncGetDeltasClientCall interface {
+ SyncGetDeltasClientStream
+ // Finish performs the equivalent of SendStream().Close, then blocks until
+ // the server is done, and returns the positional return values for the call.
+ //
+ // Finish returns immediately if the call has been canceled; depending on the
+ // timing the output could either be an error signaling cancelation, or the
+ // valid positional return values from the server.
+ //
+ // Calling Finish is mandatory for releasing stream resources, unless the call
+ // has been canceled or any of the other methods return an error. Finish should
+ // be called at most once.
+ Finish() error
+}
+
+type implSyncGetDeltasClientCall struct {
+ rpc.ClientCall
+ valRecv DeltaResp
+ errRecv error
+}
+
+func (c *implSyncGetDeltasClientCall) RecvStream() interface {
+ Advance() bool
+ Value() DeltaResp
+ Err() error
+} {
+ return implSyncGetDeltasClientCallRecv{c}
+}
+
+type implSyncGetDeltasClientCallRecv struct {
+ c *implSyncGetDeltasClientCall
+}
+
+func (c implSyncGetDeltasClientCallRecv) Advance() bool {
+ c.c.errRecv = c.c.Recv(&c.c.valRecv)
+ return c.c.errRecv == nil
+}
+func (c implSyncGetDeltasClientCallRecv) Value() DeltaResp {
+ return c.c.valRecv
+}
+func (c implSyncGetDeltasClientCallRecv) Err() error {
+ if c.c.errRecv == io.EOF {
+ return nil
+ }
+ return c.c.errRecv
+}
+func (c *implSyncGetDeltasClientCall) SendStream() interface {
+ Send(item DeltaReq) error
+ Close() error
+} {
+ return implSyncGetDeltasClientCallSend{c}
+}
+
+type implSyncGetDeltasClientCallSend struct {
+ c *implSyncGetDeltasClientCall
+}
+
+func (c implSyncGetDeltasClientCallSend) Send(item DeltaReq) error {
+ return c.c.Send(item)
+}
+func (c implSyncGetDeltasClientCallSend) Close() error {
+ return c.c.CloseSend()
+}
+func (c *implSyncGetDeltasClientCall) Finish() (err error) {
+ err = c.ClientCall.Finish()
+ return
+}
+
+// SyncFetchBlobClientStream is the client stream for Sync.FetchBlob.
+type SyncFetchBlobClientStream interface {
+ // RecvStream returns the receiver side of the Sync.FetchBlob client stream.
+ RecvStream() interface {
+ // Advance stages an item so that it may be retrieved via Value. Returns
+ // true iff there is an item to retrieve. Advance must be called before
+ // Value is called. May block if an item is not available.
+ Advance() bool
+ // Value returns the item that was staged by Advance. May panic if Advance
+ // returned false or was not called. Never blocks.
+ Value() []byte
+ // Err returns any error encountered by Advance. Never blocks.
+ Err() error
+ }
+}
+
+// SyncFetchBlobClientCall represents the call returned from Sync.FetchBlob.
+type SyncFetchBlobClientCall interface {
+ SyncFetchBlobClientStream
+ // Finish blocks until the server is done, and returns the positional return
+ // values for call.
+ //
+ // Finish returns immediately if the call has been canceled; depending on the
+ // timing the output could either be an error signaling cancelation, or the
+ // valid positional return values from the server.
+ //
+ // Calling Finish is mandatory for releasing stream resources, unless the call
+ // has been canceled or any of the other methods return an error. Finish should
+ // be called at most once.
+ Finish() error
+}
+
+type implSyncFetchBlobClientCall struct {
+ rpc.ClientCall
+ valRecv []byte
+ errRecv error
+}
+
+func (c *implSyncFetchBlobClientCall) RecvStream() interface {
+ Advance() bool
+ Value() []byte
+ Err() error
+} {
+ return implSyncFetchBlobClientCallRecv{c}
+}
+
+type implSyncFetchBlobClientCallRecv struct {
+ c *implSyncFetchBlobClientCall
+}
+
+func (c implSyncFetchBlobClientCallRecv) Advance() bool {
+ c.c.errRecv = c.c.Recv(&c.c.valRecv)
+ return c.c.errRecv == nil
+}
+func (c implSyncFetchBlobClientCallRecv) Value() []byte {
+ return c.c.valRecv
+}
+func (c implSyncFetchBlobClientCallRecv) Err() error {
+ if c.c.errRecv == io.EOF {
+ return nil
+ }
+ return c.c.errRecv
+}
+func (c *implSyncFetchBlobClientCall) Finish() (err error) {
+ err = c.ClientCall.Finish()
+ return
+}
+
+// SyncFetchBlobRecipeClientStream is the client stream for Sync.FetchBlobRecipe.
+type SyncFetchBlobRecipeClientStream interface {
+ // RecvStream returns the receiver side of the Sync.FetchBlobRecipe client stream.
+ RecvStream() interface {
+ // Advance stages an item so that it may be retrieved via Value. Returns
+ // true iff there is an item to retrieve. Advance must be called before
+ // Value is called. May block if an item is not available.
+ Advance() bool
+ // Value returns the item that was staged by Advance. May panic if Advance
+ // returned false or was not called. Never blocks.
+ Value() ChunkHash
+ // Err returns any error encountered by Advance. Never blocks.
+ Err() error
+ }
+}
+
+// SyncFetchBlobRecipeClientCall represents the call returned from Sync.FetchBlobRecipe.
+type SyncFetchBlobRecipeClientCall interface {
+ SyncFetchBlobRecipeClientStream
+ // Finish blocks until the server is done, and returns the positional return
+ // values for call.
+ //
+ // Finish returns immediately if the call has been canceled; depending on the
+ // timing the output could either be an error signaling cancelation, or the
+ // valid positional return values from the server.
+ //
+ // Calling Finish is mandatory for releasing stream resources, unless the call
+ // has been canceled or any of the other methods return an error. Finish should
+ // be called at most once.
+ Finish() error
+}
+
+type implSyncFetchBlobRecipeClientCall struct {
+ rpc.ClientCall
+ valRecv ChunkHash
+ errRecv error
+}
+
+func (c *implSyncFetchBlobRecipeClientCall) RecvStream() interface {
+ Advance() bool
+ Value() ChunkHash
+ Err() error
+} {
+ return implSyncFetchBlobRecipeClientCallRecv{c}
+}
+
+type implSyncFetchBlobRecipeClientCallRecv struct {
+ c *implSyncFetchBlobRecipeClientCall
+}
+
+func (c implSyncFetchBlobRecipeClientCallRecv) Advance() bool {
+ c.c.valRecv = ChunkHash{}
+ c.c.errRecv = c.c.Recv(&c.c.valRecv)
+ return c.c.errRecv == nil
+}
+func (c implSyncFetchBlobRecipeClientCallRecv) Value() ChunkHash {
+ return c.c.valRecv
+}
+func (c implSyncFetchBlobRecipeClientCallRecv) Err() error {
+ if c.c.errRecv == io.EOF {
+ return nil
+ }
+ return c.c.errRecv
+}
+func (c *implSyncFetchBlobRecipeClientCall) Finish() (err error) {
+ err = c.ClientCall.Finish()
+ return
+}
+
+// SyncFetchChunksClientStream is the client stream for Sync.FetchChunks.
+type SyncFetchChunksClientStream interface {
+ // RecvStream returns the receiver side of the Sync.FetchChunks client stream.
+ RecvStream() interface {
+ // Advance stages an item so that it may be retrieved via Value. Returns
+ // true iff there is an item to retrieve. Advance must be called before
+ // Value is called. May block if an item is not available.
+ Advance() bool
+ // Value returns the item that was staged by Advance. May panic if Advance
+ // returned false or was not called. Never blocks.
+ Value() ChunkData
+ // Err returns any error encountered by Advance. Never blocks.
+ Err() error
+ }
+ // SendStream returns the send side of the Sync.FetchChunks client stream.
+ SendStream() interface {
+ // Send places the item onto the output stream. Returns errors
+ // encountered while sending, or if Send is called after Close or
+ // the stream has been canceled. Blocks if there is no buffer
+ // space; will unblock when buffer space is available or after
+ // the stream has been canceled.
+ Send(item ChunkHash) error
+ // Close indicates to the server that no more items will be sent;
+ // server Recv calls will receive io.EOF after all sent items.
+ // This is an optional call - e.g. a client might call Close if it
+ // needs to continue receiving items from the server after it's
+ // done sending. Returns errors encountered while closing, or if
+ // Close is called after the stream has been canceled. Like Send,
+ // blocks if there is no buffer space available.
+ Close() error
+ }
+}
+
+// SyncFetchChunksClientCall represents the call returned from Sync.FetchChunks.
+type SyncFetchChunksClientCall interface {
+ SyncFetchChunksClientStream
+ // Finish performs the equivalent of SendStream().Close, then blocks until
+ // the server is done, and returns the positional return values for the call.
+ //
+ // Finish returns immediately if the call has been canceled; depending on the
+ // timing the output could either be an error signaling cancelation, or the
+ // valid positional return values from the server.
+ //
+ // Calling Finish is mandatory for releasing stream resources, unless the call
+ // has been canceled or any of the other methods return an error. Finish should
+ // be called at most once.
+ Finish() error
+}
+
+type implSyncFetchChunksClientCall struct {
+ rpc.ClientCall
+ valRecv ChunkData
+ errRecv error
+}
+
+func (c *implSyncFetchChunksClientCall) RecvStream() interface {
+ Advance() bool
+ Value() ChunkData
+ Err() error
+} {
+ return implSyncFetchChunksClientCallRecv{c}
+}
+
+type implSyncFetchChunksClientCallRecv struct {
+ c *implSyncFetchChunksClientCall
+}
+
+func (c implSyncFetchChunksClientCallRecv) Advance() bool {
+ c.c.valRecv = ChunkData{}
+ c.c.errRecv = c.c.Recv(&c.c.valRecv)
+ return c.c.errRecv == nil
+}
+func (c implSyncFetchChunksClientCallRecv) Value() ChunkData {
+ return c.c.valRecv
+}
+func (c implSyncFetchChunksClientCallRecv) Err() error {
+ if c.c.errRecv == io.EOF {
+ return nil
+ }
+ return c.c.errRecv
+}
+func (c *implSyncFetchChunksClientCall) SendStream() interface {
+ Send(item ChunkHash) error
+ Close() error
+} {
+ return implSyncFetchChunksClientCallSend{c}
+}
+
+type implSyncFetchChunksClientCallSend struct {
+ c *implSyncFetchChunksClientCall
+}
+
+func (c implSyncFetchChunksClientCallSend) Send(item ChunkHash) error {
+ return c.c.Send(item)
+}
+func (c implSyncFetchChunksClientCallSend) Close() error {
+ return c.c.CloseSend()
+}
+func (c *implSyncFetchChunksClientCall) Finish() (err error) {
+ err = c.ClientCall.Finish()
+ return
+}
+
+// SyncServerMethods is the interface a server writer
+// implements for Sync.
+//
+// Sync defines methods for data exchange between Syncbases.
+// TODO(hpucha): Flesh this out further.
+type SyncServerMethods interface {
+ // GetDeltas returns the responder's current generation vector and all
+ // the missing log records when compared to the initiator's generation
+ // vector. This process happens one Database at a time encompassing all
+ // the SyncGroups common to the initiator and the responder. For each
+ // Database, the initiator sends a DeltaReq. In response, the
+ // responder sends a "Start" DeltaResp record, all the missing log
+ // records, the responder's genvector, and a "Finish" DeltaResp
+ // record. The initiator parses the stream between a Start and a Finish
+ // record as the response to its DeltaReq, and then moves on to the
+ // next Database in common with this responder.
+ GetDeltas(ctx *context.T, call SyncGetDeltasServerCall, initiator string) error
+ // PublishSyncGroup is typically invoked on a "central" peer to publish
+ // the SyncGroup.
+ PublishSyncGroup(ctx *context.T, call rpc.ServerCall, sg SyncGroup) error
+ // JoinSyncGroupAtAdmin is invoked by a prospective SyncGroup member's
+ // Syncbase on a SyncGroup admin. It checks whether the requestor is
+ // allowed to join the named SyncGroup, and if so, adds the requestor to
+ // the SyncGroup.
+ JoinSyncGroupAtAdmin(ctx *context.T, call rpc.ServerCall, sgName string, joinerName string, myInfo nosql.SyncGroupMemberInfo) (SyncGroup, error)
+ // HaveBlob verifies that the peer has the requested blob, and if
+ // present, returns its size.
+ HaveBlob(ctx *context.T, call rpc.ServerCall, br nosql.BlobRef) (int64, error)
+ // FetchBlob fetches the requested blob.
+ FetchBlob(ctx *context.T, call SyncFetchBlobServerCall, br nosql.BlobRef) error
+ // Methods for incremental blob transfer. The transfer starts with the
+ // receiver making a FetchBlobRecipe call to the sender for a given
+ // BlobRef. The sender, in turn, sends the chunk hashes of all the
+ // chunks that make up the requested blob (blob recipe). The receiver
+ // looks up the chunk hashes in its local blob store, and identifies the
+ // missing ones. The receiver then fetches the missing chunks using a
+ // FetchChunks call from the sender. Finally, the receiver finishes the
+ // blob fetch by combining the chunks obtained over the network with the
+ // already available local chunks as per the blob recipe.
+ FetchBlobRecipe(ctx *context.T, call SyncFetchBlobRecipeServerCall, br nosql.BlobRef) error
+ FetchChunks(*context.T, SyncFetchChunksServerCall) error
+}
+
+// SyncServerStubMethods is the server interface containing
+// Sync methods, as expected by rpc.Server.
+// The only difference between this interface and SyncServerMethods
+// is the streaming methods.
+type SyncServerStubMethods interface {
+ // GetDeltas returns the responder's current generation vector and all
+ // the missing log records when compared to the initiator's generation
+ // vector. This process happens one Database at a time encompassing all
+ // the SyncGroups common to the initiator and the responder. For each
+ // Database, the initiator sends a DeltaReq. In response, the
+ // responder sends a "Start" DeltaResp record, all the missing log
+ // records, the responder's genvector, and a "Finish" DeltaResp
+ // record. The initiator parses the stream between a Start and a Finish
+ // record as the response to its DeltaReq, and then moves on to the
+ // next Database in common with this responder.
+ GetDeltas(ctx *context.T, call *SyncGetDeltasServerCallStub, initiator string) error
+ // PublishSyncGroup is typically invoked on a "central" peer to publish
+ // the SyncGroup.
+ PublishSyncGroup(ctx *context.T, call rpc.ServerCall, sg SyncGroup) error
+ // JoinSyncGroupAtAdmin is invoked by a prospective SyncGroup member's
+ // Syncbase on a SyncGroup admin. It checks whether the requestor is
+ // allowed to join the named SyncGroup, and if so, adds the requestor to
+ // the SyncGroup.
+ JoinSyncGroupAtAdmin(ctx *context.T, call rpc.ServerCall, sgName string, joinerName string, myInfo nosql.SyncGroupMemberInfo) (SyncGroup, error)
+ // HaveBlob verifies that the peer has the requested blob, and if
+ // present, returns its size.
+ HaveBlob(ctx *context.T, call rpc.ServerCall, br nosql.BlobRef) (int64, error)
+ // FetchBlob fetches the requested blob.
+ FetchBlob(ctx *context.T, call *SyncFetchBlobServerCallStub, br nosql.BlobRef) error
+ // Methods for incremental blob transfer. The transfer starts with the
+ // receiver making a FetchBlobRecipe call to the sender for a given
+ // BlobRef. The sender, in turn, sends the chunk hashes of all the
+ // chunks that make up the requested blob (blob recipe). The receiver
+ // looks up the chunk hashes in its local blob store, and identifies the
+ // missing ones. The receiver then fetches the missing chunks using a
+ // FetchChunks call from the sender. Finally, the receiver finishes the
+ // blob fetch by combining the chunks obtained over the network with the
+ // already available local chunks as per the blob recipe.
+ FetchBlobRecipe(ctx *context.T, call *SyncFetchBlobRecipeServerCallStub, br nosql.BlobRef) error
+ FetchChunks(*context.T, *SyncFetchChunksServerCallStub) error
+}
+
+// SyncServerStub adds universal methods to SyncServerStubMethods.
+type SyncServerStub interface {
+ SyncServerStubMethods
+ // Describe the Sync interfaces.
+ Describe__() []rpc.InterfaceDesc
+}
+
+// SyncServer returns a server stub for Sync.
+// It converts an implementation of SyncServerMethods into
+// an object that may be used by rpc.Server.
+func SyncServer(impl SyncServerMethods) SyncServerStub {
+ stub := implSyncServerStub{
+ impl: impl,
+ }
+ // Initialize GlobState; always check the stub itself first, to handle the
+ // case where the user has the Glob method defined in their VDL source.
+ if gs := rpc.NewGlobState(stub); gs != nil {
+ stub.gs = gs
+ } else if gs := rpc.NewGlobState(impl); gs != nil {
+ stub.gs = gs
+ }
+ return stub
+}
+
+type implSyncServerStub struct {
+ impl SyncServerMethods
+ gs *rpc.GlobState
+}
+
+func (s implSyncServerStub) GetDeltas(ctx *context.T, call *SyncGetDeltasServerCallStub, i0 string) error {
+ return s.impl.GetDeltas(ctx, call, i0)
+}
+
+func (s implSyncServerStub) PublishSyncGroup(ctx *context.T, call rpc.ServerCall, i0 SyncGroup) error {
+ return s.impl.PublishSyncGroup(ctx, call, i0)
+}
+
+func (s implSyncServerStub) JoinSyncGroupAtAdmin(ctx *context.T, call rpc.ServerCall, i0 string, i1 string, i2 nosql.SyncGroupMemberInfo) (SyncGroup, error) {
+ return s.impl.JoinSyncGroupAtAdmin(ctx, call, i0, i1, i2)
+}
+
+func (s implSyncServerStub) HaveBlob(ctx *context.T, call rpc.ServerCall, i0 nosql.BlobRef) (int64, error) {
+ return s.impl.HaveBlob(ctx, call, i0)
+}
+
+func (s implSyncServerStub) FetchBlob(ctx *context.T, call *SyncFetchBlobServerCallStub, i0 nosql.BlobRef) error {
+ return s.impl.FetchBlob(ctx, call, i0)
+}
+
+func (s implSyncServerStub) FetchBlobRecipe(ctx *context.T, call *SyncFetchBlobRecipeServerCallStub, i0 nosql.BlobRef) error {
+ return s.impl.FetchBlobRecipe(ctx, call, i0)
+}
+
+func (s implSyncServerStub) FetchChunks(ctx *context.T, call *SyncFetchChunksServerCallStub) error {
+ return s.impl.FetchChunks(ctx, call)
+}
+
+func (s implSyncServerStub) Globber() *rpc.GlobState {
+ return s.gs
+}
+
+func (s implSyncServerStub) Describe__() []rpc.InterfaceDesc {
+ return []rpc.InterfaceDesc{SyncDesc}
+}
+
+// SyncDesc describes the Sync interface.
+var SyncDesc rpc.InterfaceDesc = descSync
+
+// descSync hides the desc to keep godoc clean.
+var descSync = rpc.InterfaceDesc{
+ Name: "Sync",
+ PkgPath: "v.io/syncbase/x/ref/services/syncbase/server/interfaces",
+ Doc: "// Sync defines methods for data exchange between Syncbases.\n// TODO(hpucha): Flesh this out further.",
+ Methods: []rpc.MethodDesc{
+ {
+ Name: "GetDeltas",
+ Doc: "// GetDeltas returns the responder's current generation vector and all\n// the missing log records when compared to the initiator's generation\n// vector. This process happens one Database at a time encompassing all\n// the SyncGroups common to the initiator and the responder. For each\n// Database, the initiator sends a DeltaReq. In response, the\n// responder sends a \"Start\" DeltaResp record, all the missing log\n// records, the responder's genvector, and a \"Finish\" DeltaResp\n// record. The initiator parses the stream between a Start and a Finish\n// record as the response to its DeltaReq, and then moves on to the\n// next Database in common with this responder.",
+ InArgs: []rpc.ArgDesc{
+ {"initiator", ``}, // string
+ },
+ Tags: []*vdl.Value{vdl.ValueOf(access.Tag("Read"))},
+ },
+ {
+ Name: "PublishSyncGroup",
+ Doc: "// PublishSyncGroup is typically invoked on a \"central\" peer to publish\n// the SyncGroup.",
+ InArgs: []rpc.ArgDesc{
+ {"sg", ``}, // SyncGroup
+ },
+ Tags: []*vdl.Value{vdl.ValueOf(access.Tag("Write"))},
+ },
+ {
+ Name: "JoinSyncGroupAtAdmin",
+ Doc: "// JoinSyncGroupAtAdmin is invoked by a prospective SyncGroup member's\n// Syncbase on a SyncGroup admin. It checks whether the requestor is\n// allowed to join the named SyncGroup, and if so, adds the requestor to\n// the SyncGroup.",
+ InArgs: []rpc.ArgDesc{
+ {"sgName", ``}, // string
+ {"joinerName", ``}, // string
+ {"myInfo", ``}, // nosql.SyncGroupMemberInfo
+ },
+ OutArgs: []rpc.ArgDesc{
+ {"", ``}, // SyncGroup
+ },
+ Tags: []*vdl.Value{vdl.ValueOf(access.Tag("Read"))},
+ },
+ {
+ Name: "HaveBlob",
+ Doc: "// HaveBlob verifies that the peer has the requested blob, and if\n// present, returns its size.",
+ InArgs: []rpc.ArgDesc{
+ {"br", ``}, // nosql.BlobRef
+ },
+ OutArgs: []rpc.ArgDesc{
+ {"", ``}, // int64
+ },
+ },
+ {
+ Name: "FetchBlob",
+ Doc: "// FetchBlob fetches the requested blob.",
+ InArgs: []rpc.ArgDesc{
+ {"br", ``}, // nosql.BlobRef
+ },
+ },
+ {
+ Name: "FetchBlobRecipe",
+ Doc: "// Methods for incremental blob transfer. The transfer starts with the\n// receiver making a FetchBlobRecipe call to the sender for a given\n// BlobRef. The sender, in turn, sends the chunk hashes of all the\n// chunks that make up the requested blob (blob recipe). The receiver\n// looks up the chunk hashes in its local blob store, and identifies the\n// missing ones. The receiver then fetches the missing chunks using a\n// FetchChunks call from the sender. Finally, the receiver finishes the\n// blob fetch by combining the chunks obtained over the network with the\n// already available local chunks as per the blob recipe.",
+ InArgs: []rpc.ArgDesc{
+ {"br", ``}, // nosql.BlobRef
+ },
+ },
+ {
+ Name: "FetchChunks",
+ },
+ },
+}
+
+// SyncGetDeltasServerStream is the server stream for Sync.GetDeltas.
+type SyncGetDeltasServerStream interface {
+ // RecvStream returns the receiver side of the Sync.GetDeltas server stream.
+ RecvStream() interface {
+ // Advance stages an item so that it may be retrieved via Value. Returns
+ // true iff there is an item to retrieve. Advance must be called before
+ // Value is called. May block if an item is not available.
+ Advance() bool
+ // Value returns the item that was staged by Advance. May panic if Advance
+ // returned false or was not called. Never blocks.
+ Value() DeltaReq
+ // Err returns any error encountered by Advance. Never blocks.
+ Err() error
+ }
+ // SendStream returns the send side of the Sync.GetDeltas server stream.
+ SendStream() interface {
+ // Send places the item onto the output stream. Returns errors encountered
+ // while sending. Blocks if there is no buffer space; will unblock when
+ // buffer space is available.
+ Send(item DeltaResp) error
+ }
+}
+
+// SyncGetDeltasServerCall represents the context passed to Sync.GetDeltas.
+type SyncGetDeltasServerCall interface {
+ rpc.ServerCall
+ SyncGetDeltasServerStream
+}
+
+// SyncGetDeltasServerCallStub is a wrapper that converts rpc.StreamServerCall into
+// a typesafe stub that implements SyncGetDeltasServerCall.
+type SyncGetDeltasServerCallStub struct {
+ rpc.StreamServerCall
+ valRecv DeltaReq
+ errRecv error
+}
+
+// Init initializes SyncGetDeltasServerCallStub from rpc.StreamServerCall.
+func (s *SyncGetDeltasServerCallStub) Init(call rpc.StreamServerCall) {
+ s.StreamServerCall = call
+}
+
+// RecvStream returns the receiver side of the Sync.GetDeltas server stream.
+func (s *SyncGetDeltasServerCallStub) RecvStream() interface {
+ Advance() bool
+ Value() DeltaReq
+ Err() error
+} {
+ return implSyncGetDeltasServerCallRecv{s}
+}
+
+type implSyncGetDeltasServerCallRecv struct {
+ s *SyncGetDeltasServerCallStub
+}
+
+func (s implSyncGetDeltasServerCallRecv) Advance() bool {
+ s.s.valRecv = DeltaReq{}
+ s.s.errRecv = s.s.Recv(&s.s.valRecv)
+ return s.s.errRecv == nil
+}
+func (s implSyncGetDeltasServerCallRecv) Value() DeltaReq {
+ return s.s.valRecv
+}
+func (s implSyncGetDeltasServerCallRecv) Err() error {
+ if s.s.errRecv == io.EOF {
+ return nil
+ }
+ return s.s.errRecv
+}
+
+// SendStream returns the send side of the Sync.GetDeltas server stream.
+func (s *SyncGetDeltasServerCallStub) SendStream() interface {
+ Send(item DeltaResp) error
+} {
+ return implSyncGetDeltasServerCallSend{s}
+}
+
+type implSyncGetDeltasServerCallSend struct {
+ s *SyncGetDeltasServerCallStub
+}
+
+func (s implSyncGetDeltasServerCallSend) Send(item DeltaResp) error {
+ return s.s.Send(item)
+}
+
+// SyncFetchBlobServerStream is the server stream for Sync.FetchBlob.
+type SyncFetchBlobServerStream interface {
+ // SendStream returns the send side of the Sync.FetchBlob server stream.
+ SendStream() interface {
+ // Send places the item onto the output stream. Returns errors encountered
+ // while sending. Blocks if there is no buffer space; will unblock when
+ // buffer space is available.
+ Send(item []byte) error
+ }
+}
+
+// SyncFetchBlobServerCall represents the context passed to Sync.FetchBlob.
+type SyncFetchBlobServerCall interface {
+ rpc.ServerCall
+ SyncFetchBlobServerStream
+}
+
+// SyncFetchBlobServerCallStub is a wrapper that converts rpc.StreamServerCall into
+// a typesafe stub that implements SyncFetchBlobServerCall.
+type SyncFetchBlobServerCallStub struct {
+ rpc.StreamServerCall
+}
+
+// Init initializes SyncFetchBlobServerCallStub from rpc.StreamServerCall.
+func (s *SyncFetchBlobServerCallStub) Init(call rpc.StreamServerCall) {
+ s.StreamServerCall = call
+}
+
+// SendStream returns the send side of the Sync.FetchBlob server stream.
+func (s *SyncFetchBlobServerCallStub) SendStream() interface {
+ Send(item []byte) error
+} {
+ return implSyncFetchBlobServerCallSend{s}
+}
+
+type implSyncFetchBlobServerCallSend struct {
+ s *SyncFetchBlobServerCallStub
+}
+
+func (s implSyncFetchBlobServerCallSend) Send(item []byte) error {
+ return s.s.Send(item)
+}
+
+// SyncFetchBlobRecipeServerStream is the server stream for Sync.FetchBlobRecipe.
+type SyncFetchBlobRecipeServerStream interface {
+ // SendStream returns the send side of the Sync.FetchBlobRecipe server stream.
+ SendStream() interface {
+ // Send places the item onto the output stream. Returns errors encountered
+ // while sending. Blocks if there is no buffer space; will unblock when
+ // buffer space is available.
+ Send(item ChunkHash) error
+ }
+}
+
+// SyncFetchBlobRecipeServerCall represents the context passed to Sync.FetchBlobRecipe.
+type SyncFetchBlobRecipeServerCall interface {
+ rpc.ServerCall
+ SyncFetchBlobRecipeServerStream
+}
+
+// SyncFetchBlobRecipeServerCallStub is a wrapper that converts rpc.StreamServerCall into
+// a typesafe stub that implements SyncFetchBlobRecipeServerCall.
+type SyncFetchBlobRecipeServerCallStub struct {
+ rpc.StreamServerCall
+}
+
+// Init initializes SyncFetchBlobRecipeServerCallStub from rpc.StreamServerCall.
+func (s *SyncFetchBlobRecipeServerCallStub) Init(call rpc.StreamServerCall) {
+ s.StreamServerCall = call
+}
+
+// SendStream returns the send side of the Sync.FetchBlobRecipe server stream.
+func (s *SyncFetchBlobRecipeServerCallStub) SendStream() interface {
+ Send(item ChunkHash) error
+} {
+ return implSyncFetchBlobRecipeServerCallSend{s}
+}
+
+type implSyncFetchBlobRecipeServerCallSend struct {
+ s *SyncFetchBlobRecipeServerCallStub
+}
+
+func (s implSyncFetchBlobRecipeServerCallSend) Send(item ChunkHash) error {
+ return s.s.Send(item)
+}
+
+// SyncFetchChunksServerStream is the server stream for Sync.FetchChunks.
+type SyncFetchChunksServerStream interface {
+ // RecvStream returns the receiver side of the Sync.FetchChunks server stream.
+ RecvStream() interface {
+ // Advance stages an item so that it may be retrieved via Value. Returns
+ // true iff there is an item to retrieve. Advance must be called before
+ // Value is called. May block if an item is not available.
+ Advance() bool
+ // Value returns the item that was staged by Advance. May panic if Advance
+ // returned false or was not called. Never blocks.
+ Value() ChunkHash
+ // Err returns any error encountered by Advance. Never blocks.
+ Err() error
+ }
+ // SendStream returns the send side of the Sync.FetchChunks server stream.
+ SendStream() interface {
+ // Send places the item onto the output stream. Returns errors encountered
+ // while sending. Blocks if there is no buffer space; will unblock when
+ // buffer space is available.
+ Send(item ChunkData) error
+ }
+}
+
+// SyncFetchChunksServerCall represents the context passed to Sync.FetchChunks.
+type SyncFetchChunksServerCall interface {
+ rpc.ServerCall
+ SyncFetchChunksServerStream
+}
+
+// SyncFetchChunksServerCallStub is a wrapper that converts rpc.StreamServerCall into
+// a typesafe stub that implements SyncFetchChunksServerCall.
+type SyncFetchChunksServerCallStub struct {
+ rpc.StreamServerCall
+ valRecv ChunkHash
+ errRecv error
+}
+
+// Init initializes SyncFetchChunksServerCallStub from rpc.StreamServerCall.
+func (s *SyncFetchChunksServerCallStub) Init(call rpc.StreamServerCall) {
+ s.StreamServerCall = call
+}
+
+// RecvStream returns the receiver side of the Sync.FetchChunks server stream.
+func (s *SyncFetchChunksServerCallStub) RecvStream() interface {
+ Advance() bool
+ Value() ChunkHash
+ Err() error
+} {
+ return implSyncFetchChunksServerCallRecv{s}
+}
+
+type implSyncFetchChunksServerCallRecv struct {
+ s *SyncFetchChunksServerCallStub
+}
+
+func (s implSyncFetchChunksServerCallRecv) Advance() bool {
+ s.s.valRecv = ChunkHash{}
+ s.s.errRecv = s.s.Recv(&s.s.valRecv)
+ return s.s.errRecv == nil
+}
+func (s implSyncFetchChunksServerCallRecv) Value() ChunkHash {
+ return s.s.valRecv
+}
+func (s implSyncFetchChunksServerCallRecv) Err() error {
+ if s.s.errRecv == io.EOF {
+ return nil
+ }
+ return s.s.errRecv
+}
+
+// SendStream returns the send side of the Sync.FetchChunks server stream.
+func (s *SyncFetchChunksServerCallStub) SendStream() interface {
+ Send(item ChunkData) error
+} {
+ return implSyncFetchChunksServerCallSend{s}
+}
+
+type implSyncFetchChunksServerCallSend struct {
+ s *SyncFetchChunksServerCallStub
+}
+
+func (s implSyncFetchChunksServerCallSend) Send(item ChunkData) error {
+ return s.s.Send(item)
+}
diff --git a/services/syncbase/server/interfaces/sync_types.vdl b/services/syncbase/server/interfaces/sync_types.vdl
new file mode 100644
index 0000000..324928b
--- /dev/null
+++ b/services/syncbase/server/interfaces/sync_types.vdl
@@ -0,0 +1,129 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package interfaces
+
+import (
+ "time"
+
+ wire "v.io/syncbase/v23/services/syncbase/nosql"
+)
+
+const (
+ NoGroupId = GroupId(0)
+)
+
+// TODO(hpucha): These are not final yet. This is an intermediate step.
+
+const (
+ // NodeRec type log record adds a new node in the dag.
+ NodeRec = byte(0)
+
+ // LinkRec type log record adds a new link in the dag. Link records are
+ // added when a conflict is resolved by picking the local or the remote
+ // version as the resolution of a conflict, instead of creating a new
+ // version.
+ LinkRec = byte(1)
+)
+
+// PrefixGenVector is the generation vector for a data prefix, which maps each
+// device id to its last locally known generation in the scope of that prefix.
+type PrefixGenVector map[uint64]uint64
+
+// GenVector is the generation vector for a Database, and maps prefixes to their
+// generation vectors. Note that the prefixes in a GenVector are relative to the
+// the Application and Database name.
+type GenVector map[string]PrefixGenVector
+
+// LogRecMetadata represents the metadata of a single log record that is
+// exchanged between two peers. Each log record represents a change made to an
+// object in the store.
+//
+// TODO(hpucha): Add readset/scanset. Look into sending tx metadata only once
+// per transaction.
+type LogRecMetadata struct {
+ // Log related information.
+ Id uint64 // device id that created the log record.
+ Gen uint64 // generation number for the log record.
+ RecType byte // type of log record.
+
+ // Object related information.
+
+ // Id of the object that was updated. This id is relative to Application
+ // and Database names and is the store key for a particular row in a
+ // table.
+ ObjId string
+ CurVers string // current version number of the object.
+ Parents []string // 0, 1 or 2 parent versions that the current version is derived from.
+ UpdTime time.Time // timestamp when the update is generated.
+ Delete bool // indicates whether the update resulted in object being deleted from the store.
+ BatchId uint64 // unique id of the Batch this update belongs to.
+ BatchCount uint64 // number of objects in the Batch.
+}
+
+// LogRec represents the on-wire representation of an entire log record: its
+// metadata and data. Value is the actual value of a store object.
+type LogRec struct {
+ Metadata LogRecMetadata
+ Value []byte
+}
+
+// GroupId is a globally unique SyncGroup ID.
+type GroupId uint64
+
+// Possible states for a SyncGroup.
+type SyncGroupStatus enum {
+ // Indicates that a SyncGroup is operational, but publishing to the
+ // remote server is pending.
+ PublishPending
+
+ // Indicates that the SyncGroup is operational, but the publishing
+ // failed.
+ PublishRejected
+
+ // Indicates that the SyncGroup is operational and published.
+ Running
+}
+
+// SyncGroup contains the state of a SyncGroup object.
+type SyncGroup struct {
+ Id GroupId // globally unique identifier generated by Syncbase
+ Name string // globally unique Vanadium name chosen by app
+ SpecVersion string // version on SyncGroup spec for concurrency control
+ Spec wire.SyncGroupSpec // app-given specification
+ Creator string // Creator's Vanadium name
+ AppName string // Globally unique App name
+ DbName string // Database name within the App
+ Status SyncGroupStatus // Status of the SyncGroup
+ Joiners map[string]wire.SyncGroupMemberInfo // map of joiners to their metadata
+}
+
+// DeltaReq contains the initiator's genvector and the set of SyncGroups it is
+// interested in within a Database (specified by the AppName/DbName) when
+// requesting deltas for that Database.
+type DeltaReq struct {
+ AppName string
+ DbName string
+ SgIds set[GroupId]
+ InitVec GenVector
+}
+
+// DeltaResp contains the responder's genvector or the missing log records
+// returned in response to an initiator's request for deltas for a Database.
+type DeltaResp union {
+ Start bool
+ Finish bool
+ Rec LogRec
+ RespVec GenVector
+}
+
+// ChunkHash contains the hash of a chunk that is part of a blob's recipe.
+type ChunkHash struct {
+ Hash []byte
+}
+
+// ChunkData contains the data of a chunk.
+type ChunkData struct {
+ Data []byte
+}
diff --git a/services/syncbase/server/interfaces/sync_types.vdl.go b/services/syncbase/server/interfaces/sync_types.vdl.go
new file mode 100644
index 0000000..8ef80a8
--- /dev/null
+++ b/services/syncbase/server/interfaces/sync_types.vdl.go
@@ -0,0 +1,274 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: sync_types.vdl
+
+package interfaces
+
+import (
+ // VDL system imports
+ "fmt"
+ "v.io/v23/vdl"
+
+ // VDL user imports
+ "time"
+ "v.io/syncbase/v23/services/syncbase/nosql"
+ _ "v.io/v23/vdlroot/time"
+)
+
+// PrefixGenVector is the generation vector for a data prefix, which maps each
+// device id to its last locally known generation in the scope of that prefix.
+type PrefixGenVector map[uint64]uint64
+
+func (PrefixGenVector) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.PrefixGenVector"`
+}) {
+}
+
+// GenVector is the generation vector for a Database, and maps prefixes to their
+// generation vectors. Note that the prefixes in a GenVector are relative to the
+// the Application and Database name.
+type GenVector map[string]PrefixGenVector
+
+func (GenVector) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.GenVector"`
+}) {
+}
+
+// LogRecMetadata represents the metadata of a single log record that is
+// exchanged between two peers. Each log record represents a change made to an
+// object in the store.
+//
+// TODO(hpucha): Add readset/scanset. Look into sending tx metadata only once
+// per transaction.
+type LogRecMetadata struct {
+ // Log related information.
+ Id uint64 // device id that created the log record.
+ Gen uint64 // generation number for the log record.
+ RecType byte // type of log record.
+ // Id of the object that was updated. This id is relative to Application
+ // and Database names and is the store key for a particular row in a
+ // table.
+ ObjId string
+ CurVers string // current version number of the object.
+ Parents []string // 0, 1 or 2 parent versions that the current version is derived from.
+ UpdTime time.Time // timestamp when the update is generated.
+ Delete bool // indicates whether the update resulted in object being deleted from the store.
+ BatchId uint64 // unique id of the Batch this update belongs to.
+ BatchCount uint64 // number of objects in the Batch.
+}
+
+func (LogRecMetadata) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.LogRecMetadata"`
+}) {
+}
+
+// LogRec represents the on-wire representation of an entire log record: its
+// metadata and data. Value is the actual value of a store object.
+type LogRec struct {
+ Metadata LogRecMetadata
+ Value []byte
+}
+
+func (LogRec) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.LogRec"`
+}) {
+}
+
+// GroupId is a globally unique SyncGroup ID.
+type GroupId uint64
+
+func (GroupId) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.GroupId"`
+}) {
+}
+
+// Possible states for a SyncGroup.
+type SyncGroupStatus int
+
+const (
+ SyncGroupStatusPublishPending SyncGroupStatus = iota
+ SyncGroupStatusPublishRejected
+ SyncGroupStatusRunning
+)
+
+// SyncGroupStatusAll holds all labels for SyncGroupStatus.
+var SyncGroupStatusAll = [...]SyncGroupStatus{SyncGroupStatusPublishPending, SyncGroupStatusPublishRejected, SyncGroupStatusRunning}
+
+// SyncGroupStatusFromString creates a SyncGroupStatus from a string label.
+func SyncGroupStatusFromString(label string) (x SyncGroupStatus, err error) {
+ err = x.Set(label)
+ return
+}
+
+// Set assigns label to x.
+func (x *SyncGroupStatus) Set(label string) error {
+ switch label {
+ case "PublishPending", "publishpending":
+ *x = SyncGroupStatusPublishPending
+ return nil
+ case "PublishRejected", "publishrejected":
+ *x = SyncGroupStatusPublishRejected
+ return nil
+ case "Running", "running":
+ *x = SyncGroupStatusRunning
+ return nil
+ }
+ *x = -1
+ return fmt.Errorf("unknown label %q in interfaces.SyncGroupStatus", label)
+}
+
+// String returns the string label of x.
+func (x SyncGroupStatus) String() string {
+ switch x {
+ case SyncGroupStatusPublishPending:
+ return "PublishPending"
+ case SyncGroupStatusPublishRejected:
+ return "PublishRejected"
+ case SyncGroupStatusRunning:
+ return "Running"
+ }
+ return ""
+}
+
+func (SyncGroupStatus) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.SyncGroupStatus"`
+ Enum struct{ PublishPending, PublishRejected, Running string }
+}) {
+}
+
+// SyncGroup contains the state of a SyncGroup object.
+type SyncGroup struct {
+ Id GroupId // globally unique identifier generated by Syncbase
+ Name string // globally unique Vanadium name chosen by app
+ SpecVersion string // version on SyncGroup spec for concurrency control
+ Spec nosql.SyncGroupSpec // app-given specification
+ Creator string // Creator's Vanadium name
+ AppName string // Globally unique App name
+ DbName string // Database name within the App
+ Status SyncGroupStatus // Status of the SyncGroup
+ Joiners map[string]nosql.SyncGroupMemberInfo // map of joiners to their metadata
+}
+
+func (SyncGroup) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.SyncGroup"`
+}) {
+}
+
+// DeltaReq contains the initiator's genvector and the set of SyncGroups it is
+// interested in within a Database (specified by the AppName/DbName) when
+// requesting deltas for that Database.
+type DeltaReq struct {
+ AppName string
+ DbName string
+ SgIds map[GroupId]struct{}
+ InitVec GenVector
+}
+
+func (DeltaReq) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.DeltaReq"`
+}) {
+}
+
+type (
+ // DeltaResp represents any single field of the DeltaResp union type.
+ //
+ // DeltaResp contains the responder's genvector or the missing log records
+ // returned in response to an initiator's request for deltas for a Database.
+ DeltaResp interface {
+ // Index returns the field index.
+ Index() int
+ // Interface returns the field value as an interface.
+ Interface() interface{}
+ // Name returns the field name.
+ Name() string
+ // __VDLReflect describes the DeltaResp union type.
+ __VDLReflect(__DeltaRespReflect)
+ }
+ // DeltaRespStart represents field Start of the DeltaResp union type.
+ DeltaRespStart struct{ Value bool }
+ // DeltaRespFinish represents field Finish of the DeltaResp union type.
+ DeltaRespFinish struct{ Value bool }
+ // DeltaRespRec represents field Rec of the DeltaResp union type.
+ DeltaRespRec struct{ Value LogRec }
+ // DeltaRespRespVec represents field RespVec of the DeltaResp union type.
+ DeltaRespRespVec struct{ Value GenVector }
+ // __DeltaRespReflect describes the DeltaResp union type.
+ __DeltaRespReflect struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.DeltaResp"`
+ Type DeltaResp
+ Union struct {
+ Start DeltaRespStart
+ Finish DeltaRespFinish
+ Rec DeltaRespRec
+ RespVec DeltaRespRespVec
+ }
+ }
+)
+
+func (x DeltaRespStart) Index() int { return 0 }
+func (x DeltaRespStart) Interface() interface{} { return x.Value }
+func (x DeltaRespStart) Name() string { return "Start" }
+func (x DeltaRespStart) __VDLReflect(__DeltaRespReflect) {}
+
+func (x DeltaRespFinish) Index() int { return 1 }
+func (x DeltaRespFinish) Interface() interface{} { return x.Value }
+func (x DeltaRespFinish) Name() string { return "Finish" }
+func (x DeltaRespFinish) __VDLReflect(__DeltaRespReflect) {}
+
+func (x DeltaRespRec) Index() int { return 2 }
+func (x DeltaRespRec) Interface() interface{} { return x.Value }
+func (x DeltaRespRec) Name() string { return "Rec" }
+func (x DeltaRespRec) __VDLReflect(__DeltaRespReflect) {}
+
+func (x DeltaRespRespVec) Index() int { return 3 }
+func (x DeltaRespRespVec) Interface() interface{} { return x.Value }
+func (x DeltaRespRespVec) Name() string { return "RespVec" }
+func (x DeltaRespRespVec) __VDLReflect(__DeltaRespReflect) {}
+
+// ChunkHash contains the hash of a chunk that is part of a blob's recipe.
+type ChunkHash struct {
+ Hash []byte
+}
+
+func (ChunkHash) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.ChunkHash"`
+}) {
+}
+
+// ChunkData contains the data of a chunk.
+type ChunkData struct {
+ Data []byte
+}
+
+func (ChunkData) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/interfaces.ChunkData"`
+}) {
+}
+
+func init() {
+ vdl.Register((*PrefixGenVector)(nil))
+ vdl.Register((*GenVector)(nil))
+ vdl.Register((*LogRecMetadata)(nil))
+ vdl.Register((*LogRec)(nil))
+ vdl.Register((*GroupId)(nil))
+ vdl.Register((*SyncGroupStatus)(nil))
+ vdl.Register((*SyncGroup)(nil))
+ vdl.Register((*DeltaReq)(nil))
+ vdl.Register((*DeltaResp)(nil))
+ vdl.Register((*ChunkHash)(nil))
+ vdl.Register((*ChunkData)(nil))
+}
+
+const NoGroupId = GroupId(0)
+
+// NodeRec type log record adds a new node in the dag.
+const NodeRec = byte(0)
+
+// LinkRec type log record adds a new link in the dag. Link records are
+// added when a conflict is resolved by picking the local or the remote
+// version as the resolution of a conflict, instead of creating a new
+// version.
+const LinkRec = byte(1)
diff --git a/services/syncbase/server/mojo_call.go b/services/syncbase/server/mojo_call.go
new file mode 100644
index 0000000..d0b9dac
--- /dev/null
+++ b/services/syncbase/server/mojo_call.go
@@ -0,0 +1,68 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build mojo
+
+package server
+
+import (
+ "v.io/v23"
+ "v.io/v23/context"
+ "v.io/v23/naming"
+ "v.io/v23/rpc"
+ "v.io/v23/security"
+)
+
+type mojoServerCall struct {
+ sec security.Call
+ srv rpc.Server
+ suffix string
+}
+
+// TODO(sadovsky): Synthesize endpoints and discharges as needed.
+func newMojoServerCall(ctx *context.T, srv rpc.Server, suffix string, method rpc.MethodDesc) rpc.ServerCall {
+ p := v23.GetPrincipal(ctx)
+ // HACK: For now, we set the remote (client, i.e. Mojo app) blessing to be the
+ // same as the local (server, i.e. Syncbase Mojo service) blessing.
+ // TODO(sadovsky): Eliminate this hack.
+ blessings := p.BlessingStore().Default()
+ return &mojoServerCall{
+ sec: security.NewCall(&security.CallParams{
+ Method: method.Name,
+ MethodTags: method.Tags,
+ Suffix: suffix,
+ LocalPrincipal: p,
+ LocalBlessings: blessings,
+ RemoteBlessings: blessings,
+ }),
+ srv: srv,
+ suffix: suffix,
+ }
+}
+
+var _ rpc.ServerCall = (*mojoServerCall)(nil)
+
+func (call *mojoServerCall) Security() security.Call {
+ return call.sec
+}
+
+func (call *mojoServerCall) Suffix() string {
+ return call.suffix
+}
+
+func (call *mojoServerCall) LocalEndpoint() naming.Endpoint {
+ return call.sec.LocalEndpoint()
+}
+
+func (call *mojoServerCall) RemoteEndpoint() naming.Endpoint {
+ return call.sec.RemoteEndpoint()
+}
+
+func (call *mojoServerCall) GrantedBlessings() security.Blessings {
+ return security.Blessings{}
+}
+
+func (call *mojoServerCall) Server() rpc.Server {
+ return call.srv
+}
diff --git a/services/syncbase/server/mojo_impl.go b/services/syncbase/server/mojo_impl.go
new file mode 100644
index 0000000..973bf7d
--- /dev/null
+++ b/services/syncbase/server/mojo_impl.go
@@ -0,0 +1,535 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build mojo
+
+// Implementation of Syncbase Mojo stubs. Our strategy is to translate Mojo stub
+// requests into Vanadium stub requests, and Vanadium stub responses into Mojo
+// stub responses. As part of this procedure, we synthesize "fake" ctx and call
+// objects to pass to the Vanadium stubs.
+
+package server
+
+import (
+ "bytes"
+ "fmt"
+ "strings"
+
+ "mojo/public/go/bindings"
+
+ mojom "mojom/syncbase"
+ wire "v.io/syncbase/v23/services/syncbase"
+ nosqlwire "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/v23/context"
+ "v.io/v23/rpc"
+ "v.io/v23/security/access"
+ "v.io/v23/services/permissions"
+ "v.io/v23/verror"
+ "v.io/v23/vtrace"
+)
+
+const NoSchema int32 = -1
+
+type mojoImpl struct {
+ ctx *context.T
+ srv rpc.Server
+ disp rpc.Dispatcher
+}
+
+func NewMojoImpl(ctx *context.T, srv rpc.Server, disp rpc.Dispatcher) *mojoImpl {
+ return &mojoImpl{ctx: ctx, srv: srv, disp: disp}
+}
+
+func methodDesc(desc rpc.InterfaceDesc, name string) rpc.MethodDesc {
+ for _, method := range desc.Methods {
+ if method.Name == name {
+ return method
+ }
+ }
+ panic(fmt.Sprintf("unknown method: %s.%s", desc.Name, name))
+}
+
+func (m *mojoImpl) newCtxCall(suffix string, method rpc.MethodDesc) (*context.T, rpc.ServerCall) {
+ ctx, _ := vtrace.WithNewTrace(m.ctx)
+ return ctx, newMojoServerCall(ctx, m.srv, suffix, method)
+}
+
+////////////////////////////////////////
+// Struct converters
+
+func toMojoError(err error) mojom.Error {
+ if err == nil {
+ return mojom.Error{}
+ }
+ return mojom.Error{
+ Id: string(verror.ErrorID(err)),
+ ActionCode: uint32(verror.Action(err)),
+ Msg: err.Error(),
+ }
+}
+
+func toV23Perms(mPerms mojom.Perms) (access.Permissions, error) {
+ return access.ReadPermissions(strings.NewReader(mPerms.Json))
+}
+
+func toMojoPerms(vPerms access.Permissions) (mojom.Perms, error) {
+ b := new(bytes.Buffer)
+ if err := access.WritePermissions(b, vPerms); err != nil {
+ return mojom.Perms{}, err
+ }
+ return mojom.Perms{Json: b.String()}, nil
+}
+
+////////////////////////////////////////
+// Stub getters
+
+func (m *mojoImpl) lookupAndAuthorize(ctx *context.T, call rpc.ServerCall, suffix string) (interface{}, error) {
+ resInt, auth, err := m.disp.Lookup(ctx, suffix)
+ if err != nil {
+ return nil, err
+ }
+ if err := auth.Authorize(ctx, call.Security()); err != nil {
+ return nil, verror.New(verror.ErrNoAccess, ctx, err)
+ }
+ return resInt, nil
+}
+
+func (m *mojoImpl) getService(ctx *context.T, call rpc.ServerCall) (wire.ServiceServerStubMethods, error) {
+ resInt, err := m.lookupAndAuthorize(ctx, call, "")
+ if err != nil {
+ return nil, err
+ }
+ if res, ok := resInt.(wire.ServiceServerStubMethods); !ok {
+ return nil, verror.NewErrInternal(ctx)
+ } else {
+ return res, nil
+ }
+}
+
+func (m *mojoImpl) getApp(ctx *context.T, call rpc.ServerCall, name string) (wire.AppServerStubMethods, error) {
+ resInt, err := m.lookupAndAuthorize(ctx, call, name)
+ if err != nil {
+ return nil, err
+ }
+ if res, ok := resInt.(wire.AppServerStubMethods); !ok {
+ return nil, verror.NewErrInternal(ctx)
+ } else {
+ return res, nil
+ }
+}
+
+func (m *mojoImpl) getDb(ctx *context.T, call rpc.ServerCall, name string) (nosqlwire.DatabaseServerStubMethods, error) {
+ resInt, err := m.lookupAndAuthorize(ctx, call, name)
+ if err != nil {
+ return nil, err
+ }
+ if res, ok := resInt.(nosqlwire.DatabaseServerStubMethods); !ok {
+ return nil, verror.NewErrInternal(ctx)
+ } else {
+ return res, nil
+ }
+}
+
+func (m *mojoImpl) getTable(ctx *context.T, call rpc.ServerCall, name string) (nosqlwire.TableServerStubMethods, error) {
+ resInt, err := m.lookupAndAuthorize(ctx, call, name)
+ if err != nil {
+ return nil, err
+ }
+ if res, ok := resInt.(nosqlwire.TableServerStubMethods); !ok {
+ return nil, verror.NewErrInternal(ctx)
+ } else {
+ return res, nil
+ }
+}
+
+func (m *mojoImpl) getRow(ctx *context.T, call rpc.ServerCall, name string) (nosqlwire.RowServerStubMethods, error) {
+ resInt, err := m.lookupAndAuthorize(ctx, call, name)
+ if err != nil {
+ return nil, err
+ }
+ if res, ok := resInt.(nosqlwire.RowServerStubMethods); !ok {
+ return nil, verror.NewErrInternal(ctx)
+ } else {
+ return res, nil
+ }
+}
+
+////////////////////////////////////////
+// Service
+
+// TODO(sadovsky): All stub implementations return a nil error (the last return
+// value), since that error doesn't make it back to the IPC client. Chat with
+// rogulenko@ about whether we should change the Go Mojo stub generator to drop
+// these errors.
+func (m *mojoImpl) ServiceGetPermissions() (mojom.Error, mojom.Perms, string, error) {
+ ctx, call := m.newCtxCall("", methodDesc(permissions.ObjectDesc, "GetPermissions"))
+ stub, err := m.getService(ctx, call)
+ if err != nil {
+ return toMojoError(err), mojom.Perms{}, "", nil
+ }
+ vPerms, version, err := stub.GetPermissions(ctx, call)
+ if err != nil {
+ return toMojoError(err), mojom.Perms{}, "", nil
+ }
+ mPerms, err := toMojoPerms(vPerms)
+ if err != nil {
+ return toMojoError(err), mojom.Perms{}, "", nil
+ }
+ return toMojoError(err), mPerms, version, nil
+}
+
+func (m *mojoImpl) ServiceSetPermissions(mPerms mojom.Perms, version string) (mojom.Error, error) {
+ ctx, call := m.newCtxCall("", methodDesc(permissions.ObjectDesc, "SetPermissions"))
+ stub, err := m.getService(ctx, call)
+ if err != nil {
+ return toMojoError(err), nil
+ }
+ vPerms, err := toV23Perms(mPerms)
+ if err != nil {
+ return toMojoError(err), nil
+ }
+ err = stub.SetPermissions(ctx, call, vPerms, version)
+ return toMojoError(err), nil
+}
+
+////////////////////////////////////////
+// App
+
+func (m *mojoImpl) AppCreate(name string, mPerms mojom.Perms) (mojom.Error, error) {
+ ctx, call := m.newCtxCall(name, methodDesc(wire.AppDesc, "Create"))
+ stub, err := m.getApp(ctx, call, name)
+ if err != nil {
+ return toMojoError(err), nil
+ }
+ vPerms, err := toV23Perms(mPerms)
+ if err != nil {
+ return toMojoError(err), nil
+ }
+ err = stub.Create(ctx, call, vPerms)
+ return toMojoError(err), nil
+}
+
+func (m *mojoImpl) AppDelete(name string) (mojom.Error, error) {
+ ctx, call := m.newCtxCall(name, methodDesc(wire.AppDesc, "Delete"))
+ stub, err := m.getApp(ctx, call, name)
+ if err != nil {
+ return toMojoError(err), nil
+ }
+ err = stub.Delete(ctx, call)
+ return toMojoError(err), nil
+}
+
+func (m *mojoImpl) AppExists(name string) (mojom.Error, bool, error) {
+ ctx, call := m.newCtxCall(name, methodDesc(wire.AppDesc, "Exists"))
+ stub, err := m.getApp(ctx, call, name)
+ if err != nil {
+ return toMojoError(err), false, nil
+ }
+ exists, err := stub.Exists(ctx, call)
+ return toMojoError(err), exists, nil
+}
+
+func (m *mojoImpl) AppGetPermissions(name string) (mojom.Error, mojom.Perms, string, error) {
+ ctx, call := m.newCtxCall(name, methodDesc(permissions.ObjectDesc, "GetPermissions"))
+ stub, err := m.getApp(ctx, call, name)
+ if err != nil {
+ return toMojoError(err), mojom.Perms{}, "", nil
+ }
+ vPerms, version, err := stub.GetPermissions(ctx, call)
+ if err != nil {
+ return toMojoError(err), mojom.Perms{}, "", nil
+ }
+ mPerms, err := toMojoPerms(vPerms)
+ if err != nil {
+ return toMojoError(err), mojom.Perms{}, "", nil
+ }
+ return toMojoError(err), mPerms, version, nil
+}
+
+func (m *mojoImpl) AppSetPermissions(name string, mPerms mojom.Perms, version string) (mojom.Error, error) {
+ ctx, call := m.newCtxCall(name, methodDesc(permissions.ObjectDesc, "SetPermissions"))
+ stub, err := m.getApp(ctx, call, name)
+ if err != nil {
+ return toMojoError(err), nil
+ }
+ vPerms, err := toV23Perms(mPerms)
+ if err != nil {
+ return toMojoError(err), nil
+ }
+ err = stub.SetPermissions(ctx, call, vPerms, version)
+ return toMojoError(err), nil
+}
+
+////////////////////////////////////////
+// nosql.Database
+
+func (m *mojoImpl) DbCreate(name string, mPerms mojom.Perms) (mojom.Error, error) {
+ ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.DatabaseDesc, "Create"))
+ stub, err := m.getDb(ctx, call, name)
+ if err != nil {
+ return toMojoError(err), nil
+ }
+ vPerms, err := toV23Perms(mPerms)
+ if err != nil {
+ return toMojoError(err), nil
+ }
+ err = stub.Create(ctx, call, nil, vPerms)
+ return toMojoError(err), nil
+}
+
+func (m *mojoImpl) DbDelete(name string) (mojom.Error, error) {
+ ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.DatabaseDesc, "Delete"))
+ stub, err := m.getDb(ctx, call, name)
+ if err != nil {
+ return toMojoError(err), nil
+ }
+ err = stub.Delete(ctx, call, NoSchema)
+ return toMojoError(err), nil
+}
+
+func (m *mojoImpl) DbExists(name string) (mojom.Error, bool, error) {
+ ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.DatabaseDesc, "Exists"))
+ stub, err := m.getDb(ctx, call, name)
+ if err != nil {
+ return toMojoError(err), false, nil
+ }
+ exists, err := stub.Exists(ctx, call, NoSchema)
+ return toMojoError(err), exists, nil
+}
+
+func (m *mojoImpl) DbExec(name string, query string, stream mojom.ExecStream_Pointer) (mojom.Error, error) {
+ return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) DbBeginBatch(name string, bo *mojom.BatchOptions) (mojom.Error, string, error) {
+ return mojom.Error{}, "", nil
+}
+
+func (m *mojoImpl) DbCommit(name string) (mojom.Error, error) {
+ return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) DbAbort(name string) (mojom.Error, error) {
+ return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) DbGetPermissions(name string) (mojom.Error, mojom.Perms, string, error) {
+ ctx, call := m.newCtxCall(name, methodDesc(permissions.ObjectDesc, "GetPermissions"))
+ stub, err := m.getDb(ctx, call, name)
+ if err != nil {
+ return toMojoError(err), mojom.Perms{}, "", nil
+ }
+ vPerms, version, err := stub.GetPermissions(ctx, call)
+ if err != nil {
+ return toMojoError(err), mojom.Perms{}, "", nil
+ }
+ mPerms, err := toMojoPerms(vPerms)
+ if err != nil {
+ return toMojoError(err), mojom.Perms{}, "", nil
+ }
+ return toMojoError(err), mPerms, version, nil
+}
+
+func (m *mojoImpl) DbSetPermissions(name string, mPerms mojom.Perms, version string) (mojom.Error, error) {
+ ctx, call := m.newCtxCall(name, methodDesc(permissions.ObjectDesc, "SetPermissions"))
+ stub, err := m.getDb(ctx, call, name)
+ if err != nil {
+ return toMojoError(err), nil
+ }
+ vPerms, err := toV23Perms(mPerms)
+ if err != nil {
+ return toMojoError(err), nil
+ }
+ err = stub.SetPermissions(ctx, call, vPerms, version)
+ return toMojoError(err), nil
+}
+
+////////////////////////////////////////
+// nosql.Database:SyncGroupManager
+
+func (m *mojoImpl) DbGetSyncGroupNames(name string) (mojom.Error, []string, error) {
+ return mojom.Error{}, nil, nil
+}
+
+func (m *mojoImpl) DbCreateSyncGroup(name, sgName string, spec mojom.SyncGroupSpec, myInfo mojom.SyncGroupMemberInfo) (mojom.Error, error) {
+ return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) DbJoinSyncGroup(name, sgName string, myInfo mojom.SyncGroupMemberInfo) (mojom.Error, error) {
+ return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) DbLeaveSyncGroup(name, sgName string) (mojom.Error, error) {
+ return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) DbDestroySyncGroup(name, sgName string) (mojom.Error, error) {
+ return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) DbEjectFromSyncGroup(name, sgName string, member string) (mojom.Error, error) {
+ return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) DbGetSyncGroupSpec(name, sgName string) (mojom.Error, mojom.SyncGroupSpec, string, error) {
+ return mojom.Error{}, mojom.SyncGroupSpec{}, "", nil
+}
+
+func (m *mojoImpl) DbSetSyncGroupSpec(name, sgName string, spec mojom.SyncGroupSpec, version string) (mojom.Error, error) {
+ return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) DbGetSyncGroupMembers(name, sgName string) (mojom.Error, map[string]mojom.SyncGroupMemberInfo, error) {
+ return mojom.Error{}, nil, nil
+}
+
+////////////////////////////////////////
+// nosql.Table
+
+func (m *mojoImpl) TableCreate(name string, mPerms mojom.Perms) (mojom.Error, error) {
+ ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.TableDesc, "Create"))
+ stub, err := m.getTable(ctx, call, name)
+ if err != nil {
+ return toMojoError(err), nil
+ }
+ vPerms, err := toV23Perms(mPerms)
+ if err != nil {
+ return toMojoError(err), nil
+ }
+ err = stub.Create(ctx, call, NoSchema, vPerms)
+ return toMojoError(err), nil
+}
+
+func (m *mojoImpl) TableDelete(name string) (mojom.Error, error) {
+ ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.TableDesc, "Delete"))
+ stub, err := m.getTable(ctx, call, name)
+ if err != nil {
+ return toMojoError(err), nil
+ }
+ err = stub.Delete(ctx, call, NoSchema)
+ return toMojoError(err), nil
+}
+
+func (m *mojoImpl) TableExists(name string) (mojom.Error, bool, error) {
+ ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.TableDesc, "Exists"))
+ stub, err := m.getTable(ctx, call, name)
+ if err != nil {
+ return toMojoError(err), false, nil
+ }
+ exists, err := stub.Exists(ctx, call, NoSchema)
+ return toMojoError(err), exists, nil
+}
+
+func (m *mojoImpl) TableDeleteRowRange(name string, start, limit []byte) (mojom.Error, error) {
+ return mojom.Error{}, nil
+}
+
+type scanStreamImpl struct {
+ ctx *context.T
+ proxy *mojom.ScanStream_Proxy
+}
+
+func (s *scanStreamImpl) Send(item interface{}) error {
+ kv, ok := item.(nosqlwire.KeyValue)
+ if !ok {
+ return verror.NewErrInternal(s.ctx)
+ }
+
+ return s.proxy.OnKeyValue(mojom.KeyValue{
+ Key: kv.Key,
+ Value: kv.Value,
+ })
+}
+
+func (s *scanStreamImpl) Recv(_ interface{}) error {
+ // This should never be called.
+ return verror.NewErrInternal(s.ctx)
+}
+
+var _ rpc.Stream = (*scanStreamImpl)(nil)
+
+// TODO(nlacasse): Provide some way for the client to cancel the stream.
+func (m *mojoImpl) TableScan(name string, start, limit []byte, ptr mojom.ScanStream_Pointer) (mojom.Error, error) {
+ ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.TableDesc, "Scan"))
+ stub, err := m.getTable(ctx, call, name)
+ if err != nil {
+ return toMojoError(err), nil
+ }
+
+ proxy := mojom.NewScanStreamProxy(ptr, bindings.GetAsyncWaiter())
+
+ tableScanServerCallStub := &nosqlwire.TableScanServerCallStub{struct {
+ rpc.Stream
+ rpc.ServerCall
+ }{
+ &scanStreamImpl{
+ ctx: ctx,
+ proxy: proxy,
+ },
+ call,
+ }}
+
+ err = stub.Scan(ctx, tableScanServerCallStub, NoSchema, start, limit)
+
+ // NOTE(nlacasse): Since we are already streaming, we send any error back
+ // to the client on the stream. The TableScan function itself should not
+ // return an error at this point.
+ proxy.OnDone(toMojoError(err))
+ return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) TableGetPermissions(name, key string) (mojom.Error, []mojom.PrefixPerms, error) {
+ return mojom.Error{}, nil, nil
+}
+
+func (m *mojoImpl) TableSetPermissions(name, prefix string, mPerms mojom.Perms) (mojom.Error, error) {
+ return mojom.Error{}, nil
+}
+
+func (m *mojoImpl) TableDeletePermissions(name, prefix string) (mojom.Error, error) {
+ return mojom.Error{}, nil
+}
+
+////////////////////////////////////////
+// nosql.Row
+
+func (m *mojoImpl) RowExists(name string) (mojom.Error, bool, error) {
+ ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.RowDesc, "Exists"))
+ stub, err := m.getRow(ctx, call, name)
+ if err != nil {
+ return toMojoError(err), false, nil
+ }
+ exists, err := stub.Exists(ctx, call, NoSchema)
+ return toMojoError(err), exists, nil
+}
+
+func (m *mojoImpl) RowGet(name string) (mojom.Error, []byte, error) {
+ ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.RowDesc, "Get"))
+ stub, err := m.getRow(ctx, call, name)
+ if err != nil {
+ return toMojoError(err), nil, nil
+ }
+ value, err := stub.Get(ctx, call, NoSchema)
+ return toMojoError(err), value, nil
+}
+
+func (m *mojoImpl) RowPut(name string, value []byte) (mojom.Error, error) {
+ ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.RowDesc, "Put"))
+ stub, err := m.getRow(ctx, call, name)
+ if err != nil {
+ return toMojoError(err), nil
+ }
+ err = stub.Put(ctx, call, NoSchema, value)
+ return toMojoError(err), nil
+}
+
+func (m *mojoImpl) RowDelete(name string) (mojom.Error, error) {
+ ctx, call := m.newCtxCall(name, methodDesc(nosqlwire.RowDesc, "Delete"))
+ stub, err := m.getRow(ctx, call, name)
+ if err != nil {
+ return toMojoError(err), nil
+ }
+ err = stub.Delete(ctx, call, NoSchema)
+ return toMojoError(err), nil
+}
diff --git a/services/syncbase/server/nosql/database.go b/services/syncbase/server/nosql/database.go
new file mode 100644
index 0000000..a9f503f
--- /dev/null
+++ b/services/syncbase/server/nosql/database.go
@@ -0,0 +1,577 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+ "math/rand"
+ "path"
+ "strconv"
+ "strings"
+ "sync"
+ "time"
+
+ wire "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/syncbase/v23/syncbase/nosql/query_db"
+ "v.io/syncbase/v23/syncbase/nosql/query_exec"
+ "v.io/syncbase/x/ref/services/syncbase/clock"
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/server/watchable"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ "v.io/v23/glob"
+ "v.io/v23/rpc"
+ "v.io/v23/security/access"
+ "v.io/v23/vdl"
+ "v.io/v23/verror"
+ "v.io/v23/vom"
+ "v.io/x/lib/vlog"
+)
+
+// database is a per-database singleton (i.e. not per-request). It does not
+// directly handle RPCs.
+// Note: If a database does not exist at the time of a database RPC, the
+// dispatcher creates a short-lived database object to service that particular
+// request.
+type database struct {
+ name string
+ a interfaces.App
+ // The fields below are initialized iff this database exists.
+ exists bool
+ // TODO(sadovsky): Make st point to a store.Store wrapper that handles paging,
+ // and do not actually open the store in NewDatabase.
+ st store.Store // stores all data for a single database
+
+ // Active snapshots and transactions corresponding to client batches.
+ // TODO(sadovsky): Add timeouts and GC.
+ mu sync.Mutex // protects the fields below
+ sns map[uint64]store.Snapshot
+ txs map[uint64]store.Transaction
+
+ // Active ConflictResolver connection from the app to this database.
+ // NOTE: For now, we assume there's only one open conflict resolution stream
+ // per database (typically, from the app that owns the database).
+ resolver wire.ConflictManagerStartConflictResolverServerCall
+}
+
+// databaseReq is a per-request object that handles Database RPCs.
+// It embeds database and tracks request-specific batch state.
+type databaseReq struct {
+ *database
+ // If non-nil, sn or tx will be non-nil.
+ batchId *uint64
+ sn store.Snapshot
+ tx store.Transaction
+}
+
+var (
+ _ wire.DatabaseServerMethods = (*databaseReq)(nil)
+ _ interfaces.Database = (*database)(nil)
+)
+
+// DatabaseOptions configures a database.
+type DatabaseOptions struct {
+ // Database-level permissions.
+ Perms access.Permissions
+ // Root dir for data storage.
+ RootDir string
+ // Storage engine to use.
+ Engine string
+}
+
+// OpenDatabase opens a database and returns a *database for it. Designed for
+// use from within NewDatabase and server.NewService.
+func OpenDatabase(ctx *context.T, a interfaces.App, name string, opts DatabaseOptions, openOpts util.OpenOptions) (*database, error) {
+ st, err := util.OpenStore(opts.Engine, path.Join(opts.RootDir, opts.Engine), openOpts)
+ if err != nil {
+ return nil, err
+ }
+ vclock := clock.NewVClock(a.Service().St())
+ st, err = watchable.Wrap(st, vclock, &watchable.Options{
+ ManagedPrefixes: []string{util.RowPrefix, util.PermsPrefix},
+ })
+ if err != nil {
+ return nil, err
+ }
+ return &database{
+ name: name,
+ a: a,
+ exists: true,
+ st: st,
+ sns: make(map[uint64]store.Snapshot),
+ txs: make(map[uint64]store.Transaction),
+ }, nil
+}
+
+// NewDatabase creates a new database instance and returns it.
+// Designed for use from within App.CreateNoSQLDatabase.
+func NewDatabase(ctx *context.T, a interfaces.App, name string, metadata *wire.SchemaMetadata, opts DatabaseOptions) (*database, error) {
+ if opts.Perms == nil {
+ return nil, verror.New(verror.ErrInternal, ctx, "perms must be specified")
+ }
+ d, err := OpenDatabase(ctx, a, name, opts, util.OpenOptions{CreateIfMissing: true, ErrorIfExists: true})
+ if err != nil {
+ return nil, err
+ }
+ data := &databaseData{
+ Name: d.name,
+ Perms: opts.Perms,
+ SchemaMetadata: metadata,
+ }
+ if err := util.Put(ctx, d.st, d.stKey(), data); err != nil {
+ return nil, err
+ }
+ return d, nil
+}
+
+////////////////////////////////////////
+// RPC methods
+
+func (d *databaseReq) Create(ctx *context.T, call rpc.ServerCall, metadata *wire.SchemaMetadata, perms access.Permissions) error {
+ if d.exists {
+ return verror.New(verror.ErrExist, ctx, d.name)
+ }
+ if d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ // This database does not yet exist; d is just an ephemeral handle that holds
+ // {name string, a *app}. d.a.CreateNoSQLDatabase will create a new database
+ // handle and store it in d.a.dbs[d.name].
+ return d.a.CreateNoSQLDatabase(ctx, call, d.name, perms, metadata)
+}
+
+func (d *databaseReq) Delete(ctx *context.T, call rpc.ServerCall, schemaVersion int32) error {
+ if d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ if err := d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+ return err
+ }
+ return d.a.DeleteNoSQLDatabase(ctx, call, d.name)
+}
+
+func (d *databaseReq) Exists(ctx *context.T, call rpc.ServerCall, schemaVersion int32) (bool, error) {
+ if !d.exists {
+ return false, nil
+ }
+ if err := d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+ return false, err
+ }
+ return util.ErrorToExists(util.GetWithAuth(ctx, call, d.st, d.stKey(), &databaseData{}))
+}
+
+var rng *rand.Rand = rand.New(rand.NewSource(time.Now().UTC().UnixNano()))
+
+func (d *databaseReq) BeginBatch(ctx *context.T, call rpc.ServerCall, schemaVersion int32, bo wire.BatchOptions) (string, error) {
+ if !d.exists {
+ return "", verror.New(verror.ErrNoExist, ctx, d.name)
+ }
+ if d.batchId != nil {
+ return "", wire.NewErrBoundToBatch(ctx)
+ }
+ if err := d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+ return "", err
+ }
+
+ d.mu.Lock()
+ defer d.mu.Unlock()
+ var id uint64
+ var batchType string
+ for {
+ id = uint64(rng.Int63())
+ if bo.ReadOnly {
+ if _, ok := d.sns[id]; !ok {
+ d.sns[id] = d.st.NewSnapshot()
+ batchType = "sn"
+ break
+ }
+ } else {
+ if _, ok := d.txs[id]; !ok {
+ d.txs[id] = d.st.NewTransaction()
+ batchType = "tx"
+ break
+ }
+ }
+ }
+ return strings.Join([]string{d.name, batchType, strconv.FormatUint(id, 10)}, util.BatchSep), nil
+}
+
+func (d *databaseReq) Commit(ctx *context.T, call rpc.ServerCall, schemaVersion int32) error {
+ if !d.exists {
+ return verror.New(verror.ErrNoExist, ctx, d.name)
+ }
+ if d.batchId == nil {
+ return wire.NewErrNotBoundToBatch(ctx)
+ }
+ if d.tx == nil {
+ return wire.NewErrReadOnlyBatch(ctx)
+ }
+ if err := d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+ return err
+ }
+ var err error
+ if err = d.tx.Commit(); err == nil {
+ d.mu.Lock()
+ delete(d.txs, *d.batchId)
+ d.mu.Unlock()
+ }
+ if verror.ErrorID(err) == store.ErrConcurrentTransaction.ID {
+ return verror.New(wire.ErrConcurrentBatch, ctx, err)
+ }
+ return err
+}
+
+func (d *databaseReq) Abort(ctx *context.T, call rpc.ServerCall, schemaVersion int32) error {
+ if !d.exists {
+ return verror.New(verror.ErrNoExist, ctx, d.name)
+ }
+ if d.batchId == nil {
+ return wire.NewErrNotBoundToBatch(ctx)
+ }
+ if err := d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+ return err
+ }
+ var err error
+ if d.tx != nil {
+ if err = d.tx.Abort(); err == nil {
+ d.mu.Lock()
+ delete(d.txs, *d.batchId)
+ d.mu.Unlock()
+ }
+ } else {
+ if err = d.sn.Abort(); err == nil {
+ d.mu.Lock()
+ delete(d.sns, *d.batchId)
+ d.mu.Unlock()
+ }
+ }
+ return err
+}
+
+func (d *databaseReq) Exec(ctx *context.T, call wire.DatabaseExecServerCall, schemaVersion int32, q string) error {
+ if err := d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+ return err
+ }
+ impl := func(headers []string, rs ResultStream, err error) error {
+ if err != nil {
+ return err
+ }
+ sender := call.SendStream()
+ // Push the headers first -- the client will retrieve them and return
+ // them separately from the results.
+ var resultHeaders []*vdl.Value
+ for _, header := range headers {
+ resultHeaders = append(resultHeaders, vdl.ValueOf(header))
+ }
+ sender.Send(resultHeaders)
+ for rs.Advance() {
+ result := rs.Result()
+ if err := sender.Send(result); err != nil {
+ rs.Cancel()
+ return err
+ }
+ }
+ return rs.Err()
+ }
+ var sntx store.SnapshotOrTransaction
+ if d.batchId != nil {
+ sntx = d.batchReader()
+ } else {
+ sntx = d.st.NewSnapshot()
+ defer sntx.Abort()
+ }
+ // queryDb implements query_db.Database
+ // which is needed by the query package's
+ // Exec function.
+ db := &queryDb{
+ ctx: ctx,
+ call: call,
+ req: d,
+ sntx: sntx,
+ }
+
+ return impl(query_exec.Exec(db, q))
+}
+
+func (d *databaseReq) SetPermissions(ctx *context.T, call rpc.ServerCall, perms access.Permissions, version string) error {
+ if !d.exists {
+ return verror.New(verror.ErrNoExist, ctx, d.name)
+ }
+ if d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ return d.a.SetDatabasePerms(ctx, call, d.name, perms, version)
+}
+
+func (d *databaseReq) GetPermissions(ctx *context.T, call rpc.ServerCall) (perms access.Permissions, version string, err error) {
+ if !d.exists {
+ return nil, "", verror.New(verror.ErrNoExist, ctx, d.name)
+ }
+ if d.batchId != nil {
+ return nil, "", wire.NewErrBoundToBatch(ctx)
+ }
+ data := &databaseData{}
+ if err := util.GetWithAuth(ctx, call, d.st, d.stKey(), data); err != nil {
+ return nil, "", err
+ }
+ return data.Perms, util.FormatVersion(data.Version), nil
+}
+
+func (d *databaseReq) GlobChildren__(ctx *context.T, call rpc.GlobChildrenServerCall, matcher *glob.Element) error {
+ if !d.exists {
+ return verror.New(verror.ErrNoExist, ctx, d.name)
+ }
+ if d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ // Check perms.
+ sn := d.st.NewSnapshot()
+ if err := util.GetWithAuth(ctx, call, sn, d.stKey(), &databaseData{}); err != nil {
+ sn.Abort()
+ return err
+ }
+ return util.Glob(ctx, call, matcher, sn, sn.Abort, util.TablePrefix)
+}
+
+////////////////////////////////////////
+// ResultStream interface
+
+// ResultStream is an interface for iterating through results (a.k.a, rows) returned from a
+// query. Each resulting rows are arrays of vdl objects.
+type ResultStream interface {
+ // Advance stages an element so the client can retrieve it with Result.
+ // Advance returns true iff there is a result to retrieve. The client must
+ // call Advance before calling Result. The client must call Cancel if it
+ // does not iterate through all elements (i.e. until Advance returns false).
+ // Advance may block if an element is not immediately available.
+ Advance() bool
+
+ // Result returns the row (i.e., array of vdl Values) that was staged by Advance.
+ // Result may panic if Advance returned false or was not called at all.
+ // Result does not block.
+ Result() []*vdl.Value
+
+ // Err returns a non-nil error iff the stream encountered any errors. Err does
+ // not block.
+ Err() error
+
+ // Cancel notifies the ResultStream provider that it can stop producing results.
+ // The client must call Cancel if it does not iterate through all results
+ // (i.e. until Advance returns false). Cancel is idempotent and can be called
+ // concurrently with a goroutine that is iterating via Advance/Result.
+ // Cancel causes Advance to subsequently return false. Cancel does not block.
+ Cancel()
+}
+
+////////////////////////////////////////
+// interfaces.Database methods
+
+func (d *database) St() store.Store {
+ if !d.exists {
+ vlog.Fatalf("database %q does not exist", d.name)
+ }
+ return d.st
+}
+
+func (d *database) App() interfaces.App {
+ return d.a
+}
+
+func (d *database) CheckPermsInternal(ctx *context.T, call rpc.ServerCall, st store.StoreReader) error {
+ if !d.exists {
+ vlog.Fatalf("database %q does not exist", d.name)
+ }
+ return util.GetWithAuth(ctx, call, st, d.stKey(), &databaseData{})
+}
+
+func (d *database) SetPermsInternal(ctx *context.T, call rpc.ServerCall, perms access.Permissions, version string) error {
+ if !d.exists {
+ vlog.Fatalf("database %q does not exist", d.name)
+ }
+ return store.RunInTransaction(d.st, func(tx store.Transaction) error {
+ data := &databaseData{}
+ return util.UpdateWithAuth(ctx, call, tx, d.stKey(), data, func() error {
+ if err := util.CheckVersion(ctx, version, data.Version); err != nil {
+ return err
+ }
+ data.Perms = perms
+ data.Version++
+ return nil
+ })
+ })
+}
+
+func (d *database) Name() string {
+ return d.name
+}
+
+////////////////////////////////////////
+// query_db implementation
+
+// Implement query_db's Database, Table and KeyValueStream interfaces.
+type queryDb struct {
+ ctx *context.T
+ call wire.DatabaseExecServerCall
+ req *databaseReq
+ sntx store.SnapshotOrTransaction
+}
+
+func (db *queryDb) GetContext() *context.T {
+ return db.ctx
+}
+
+func (db *queryDb) GetTable(name string) (query_db.Table, error) {
+ tDb := &tableDb{
+ qdb: db,
+ req: &tableReq{
+ name: name,
+ d: db.req,
+ },
+ }
+ // Now that we have a table, we need to check permissions.
+ if err := util.GetWithAuth(db.ctx, db.call, db.sntx, tDb.req.stKey(), &tableData{}); err != nil {
+ return nil, err
+ }
+ return tDb, nil
+}
+
+type tableDb struct {
+ qdb *queryDb
+ req *tableReq
+}
+
+func (t *tableDb) Scan(keyRanges query_db.KeyRanges) (query_db.KeyValueStream, error) {
+ streams := []store.Stream{}
+ for _, keyRange := range keyRanges {
+ // TODO(jkline): For now, acquire all of the streams at once to minimize the race condition.
+ // Need a way to Scan multiple ranges at the same state of uncommitted changes.
+ streams = append(streams, t.qdb.sntx.Scan(util.ScanRangeArgs(util.JoinKeyParts(util.RowPrefix, t.req.name), keyRange.Start, keyRange.Limit)))
+ }
+ return &kvs{
+ t: t,
+ curr: 0,
+ validRow: false,
+ it: streams,
+ err: nil,
+ }, nil
+}
+
+type kvs struct {
+ t *tableDb
+ curr int
+ validRow bool
+ currKey string
+ currValue *vdl.Value
+ it []store.Stream // array of store.Streams
+ err error
+}
+
+func (s *kvs) Advance() bool {
+ if s.err != nil {
+ return false
+ }
+ for s.curr < len(s.it) {
+ if s.it[s.curr].Advance() {
+ // key
+ keyBytes := s.it[s.curr].Key(nil)
+ parts := util.SplitKeyParts(string(keyBytes))
+ // TODO(rogulenko): Check access for the key.
+ s.currKey = parts[len(parts)-1]
+ // value
+ valueBytes := s.it[s.curr].Value(nil)
+ var currValue *vdl.Value
+ if err := vom.Decode(valueBytes, &currValue); err != nil {
+ s.validRow = false
+ s.err = err
+ return false
+ }
+ s.currValue = currValue
+ s.validRow = true
+ return true
+ }
+ // Advance returned false. It could be an err, or it could
+ // be we've reached the end.
+ if err := s.it[s.curr].Err(); err != nil {
+ s.validRow = false
+ s.err = err
+ return false
+ }
+ // We've reached the end of the iterator for this keyRange.
+ // Jump to the next one.
+ s.it[s.curr] = nil
+ s.curr++
+ s.validRow = false
+ }
+ // There are no more prefixes to scan.
+ return false
+}
+
+func (s *kvs) KeyValue() (string, *vdl.Value) {
+ if !s.validRow {
+ return "", nil
+ }
+ return s.currKey, s.currValue
+}
+
+func (s *kvs) Err() error {
+ return s.err
+}
+
+func (s *kvs) Cancel() {
+ if s.it != nil {
+ for i := s.curr; i < len(s.it); i++ {
+ s.it[i].Cancel()
+ }
+ s.it = nil
+ }
+ // set curr to end of keyRanges so Advance will return false
+ s.curr = len(s.it)
+}
+
+////////////////////////////////////////
+// Internal helpers
+
+func (d *database) stKey() string {
+ return util.DatabasePrefix
+}
+
+func (d *databaseReq) batchReader() store.SnapshotOrTransaction {
+ if d.batchId == nil {
+ return nil
+ } else if d.sn != nil {
+ return d.sn
+ } else {
+ return d.tx
+ }
+}
+
+func (d *databaseReq) batchTransaction() (store.Transaction, error) {
+ if d.batchId == nil {
+ return nil, nil
+ } else if d.tx != nil {
+ return d.tx, nil
+ } else {
+ return nil, wire.NewErrReadOnlyBatch(nil)
+ }
+}
+
+// TODO(jlodhia): Schema check should happen within a transaction for each
+// operation in database, table and row. Do schema check along with permissions
+// check when fully-specified permission model is implemented.
+func (d *databaseReq) checkSchemaVersion(ctx *context.T, schemaVersion int32) error {
+ if !d.exists {
+ // database does not exist yet and hence there is no schema to check.
+ // This can happen if delete is called twice on the same database.
+ return nil
+ }
+ schemaMetadata, err := d.getSchemaMetadataWithoutAuth(ctx)
+ if err != nil {
+ return err
+ }
+ if (schemaMetadata == nil) || (schemaMetadata.Version == schemaVersion) {
+ return nil
+ }
+ return wire.NewErrSchemaVersionMismatch(ctx)
+}
diff --git a/services/syncbase/server/nosql/database_bm.go b/services/syncbase/server/nosql/database_bm.go
new file mode 100644
index 0000000..e646c42
--- /dev/null
+++ b/services/syncbase/server/nosql/database_bm.go
@@ -0,0 +1,95 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+ wire "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/syncbase/x/ref/services/syncbase/vsync"
+ "v.io/v23/context"
+ "v.io/v23/rpc"
+)
+
+////////////////////////////////////////////////////////////////////////////////
+// RPCs for managing blobs between Syncbase and its clients.
+
+func (d *databaseReq) CreateBlob(ctx *context.T, call rpc.ServerCall) (wire.BlobRef, error) {
+ if d.batchId != nil {
+ return wire.NullBlobRef, wire.NewErrBoundToBatch(ctx)
+ }
+ sd := vsync.NewSyncDatabase(d)
+ return sd.CreateBlob(ctx, call)
+}
+
+func (d *databaseReq) PutBlob(ctx *context.T, call wire.BlobManagerPutBlobServerCall, br wire.BlobRef) error {
+ if d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ sd := vsync.NewSyncDatabase(d)
+ return sd.PutBlob(ctx, call, br)
+}
+
+func (d *databaseReq) CommitBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) error {
+ if d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ sd := vsync.NewSyncDatabase(d)
+ return sd.CommitBlob(ctx, call, br)
+}
+
+func (d *databaseReq) GetBlobSize(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) (int64, error) {
+ if d.batchId != nil {
+ return 0, wire.NewErrBoundToBatch(ctx)
+ }
+ sd := vsync.NewSyncDatabase(d)
+ return sd.GetBlobSize(ctx, call, br)
+}
+
+func (d *databaseReq) DeleteBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) error {
+ if d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ sd := vsync.NewSyncDatabase(d)
+ return sd.DeleteBlob(ctx, call, br)
+}
+
+func (d *databaseReq) GetBlob(ctx *context.T, call wire.BlobManagerGetBlobServerCall, br wire.BlobRef, offset int64) error {
+ if d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ sd := vsync.NewSyncDatabase(d)
+ return sd.GetBlob(ctx, call, br, offset)
+}
+
+func (d *databaseReq) FetchBlob(ctx *context.T, call wire.BlobManagerFetchBlobServerCall, br wire.BlobRef, priority uint64) error {
+ if d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ sd := vsync.NewSyncDatabase(d)
+ return sd.FetchBlob(ctx, call, br, priority)
+}
+
+func (d *databaseReq) PinBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) error {
+ if d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ sd := vsync.NewSyncDatabase(d)
+ return sd.PinBlob(ctx, call, br)
+}
+
+func (d *databaseReq) UnpinBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) error {
+ if d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ sd := vsync.NewSyncDatabase(d)
+ return sd.UnpinBlob(ctx, call, br)
+}
+
+func (d *databaseReq) KeepBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef, rank uint64) error {
+ if d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ sd := vsync.NewSyncDatabase(d)
+ return sd.KeepBlob(ctx, call, br, rank)
+}
diff --git a/services/syncbase/server/nosql/database_crm.go b/services/syncbase/server/nosql/database_crm.go
new file mode 100644
index 0000000..e1135a9
--- /dev/null
+++ b/services/syncbase/server/nosql/database_crm.go
@@ -0,0 +1,20 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+ wire "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/v23/context"
+)
+
+////////////////////////////////////////
+// ConflictManager RPC methods
+
+func (d *databaseReq) StartConflictResolver(ctx *context.T, call wire.ConflictManagerStartConflictResolverServerCall) error {
+ // Store the conflict resolver connection in the per-app, per-database
+ // singleton so that sync can access it.
+ d.database.resolver = call
+ return nil
+}
diff --git a/services/syncbase/server/nosql/database_sgm.go b/services/syncbase/server/nosql/database_sgm.go
new file mode 100644
index 0000000..cc1a73d
--- /dev/null
+++ b/services/syncbase/server/nosql/database_sgm.go
@@ -0,0 +1,85 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+ wire "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/syncbase/x/ref/services/syncbase/vsync"
+ "v.io/v23/context"
+ "v.io/v23/rpc"
+ "v.io/v23/verror"
+)
+
+////////////////////////////////////////
+// SyncGroup RPC methods
+
+func (d *databaseReq) GetSyncGroupNames(ctx *context.T, call rpc.ServerCall) ([]string, error) {
+ if d.batchId != nil {
+ return nil, wire.NewErrBoundToBatch(ctx)
+ }
+ sd := vsync.NewSyncDatabase(d)
+ return sd.GetSyncGroupNames(ctx, call)
+}
+
+func (d *databaseReq) CreateSyncGroup(ctx *context.T, call rpc.ServerCall, sgName string, spec wire.SyncGroupSpec, myInfo wire.SyncGroupMemberInfo) error {
+ if d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ sd := vsync.NewSyncDatabase(d)
+ return sd.CreateSyncGroup(ctx, call, sgName, spec, myInfo)
+}
+
+func (d *databaseReq) JoinSyncGroup(ctx *context.T, call rpc.ServerCall, sgName string, myInfo wire.SyncGroupMemberInfo) (wire.SyncGroupSpec, error) {
+ if d.batchId != nil {
+ return wire.SyncGroupSpec{}, wire.NewErrBoundToBatch(ctx)
+ }
+ sd := vsync.NewSyncDatabase(d)
+ return sd.JoinSyncGroup(ctx, call, sgName, myInfo)
+}
+
+func (d *databaseReq) LeaveSyncGroup(ctx *context.T, call rpc.ServerCall, sgName string) error {
+ if d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ return verror.NewErrNotImplemented(ctx)
+}
+
+func (d *databaseReq) DestroySyncGroup(ctx *context.T, call rpc.ServerCall, sgName string) error {
+ if d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ return verror.NewErrNotImplemented(ctx)
+}
+
+func (d *databaseReq) EjectFromSyncGroup(ctx *context.T, call rpc.ServerCall, sgName, member string) error {
+ if d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ return verror.NewErrNotImplemented(ctx)
+}
+
+func (d *databaseReq) GetSyncGroupSpec(ctx *context.T, call rpc.ServerCall, sgName string) (wire.SyncGroupSpec, string, error) {
+ if d.batchId != nil {
+ return wire.SyncGroupSpec{}, "", wire.NewErrBoundToBatch(ctx)
+ }
+ sd := vsync.NewSyncDatabase(d)
+ return sd.GetSyncGroupSpec(ctx, call, sgName)
+}
+
+func (d *databaseReq) SetSyncGroupSpec(ctx *context.T, call rpc.ServerCall, sgName string, spec wire.SyncGroupSpec, version string) error {
+ if d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ sd := vsync.NewSyncDatabase(d)
+ return sd.SetSyncGroupSpec(ctx, call, sgName, spec, version)
+}
+
+func (d *databaseReq) GetSyncGroupMembers(ctx *context.T, call rpc.ServerCall, sgName string) (map[string]wire.SyncGroupMemberInfo, error) {
+ if d.batchId != nil {
+ return nil, wire.NewErrBoundToBatch(ctx)
+ }
+ sd := vsync.NewSyncDatabase(d)
+ return sd.GetSyncGroupMembers(ctx, call, sgName)
+}
diff --git a/services/syncbase/server/nosql/database_sm.go b/services/syncbase/server/nosql/database_sm.go
new file mode 100644
index 0000000..3c87a6b
--- /dev/null
+++ b/services/syncbase/server/nosql/database_sm.go
@@ -0,0 +1,64 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+ wire "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ "v.io/v23/rpc"
+ "v.io/v23/verror"
+)
+
+////////////////////////////////////////
+// SchemaManager RPC methods
+
+func (d *databaseReq) GetSchemaMetadata(ctx *context.T, call rpc.ServerCall) (wire.SchemaMetadata, error) {
+ metadata := wire.SchemaMetadata{}
+
+ if !d.exists {
+ return metadata, verror.New(verror.ErrNoExist, ctx, d.Name())
+ }
+
+ // Check permissions on Database and retreve schema metadata.
+ dbData := databaseData{}
+ if err := util.GetWithAuth(ctx, call, d.st, d.stKey(), &dbData); err != nil {
+ return metadata, err
+ }
+ if dbData.SchemaMetadata == nil {
+ return metadata, verror.New(verror.ErrNoExist, ctx, "Schema does not exist for the db")
+ }
+ return *dbData.SchemaMetadata, nil
+}
+
+func (d *databaseReq) SetSchemaMetadata(ctx *context.T, call rpc.ServerCall, metadata wire.SchemaMetadata) error {
+ // Check if database exists
+ if !d.exists {
+ return verror.New(verror.ErrNoExist, ctx, d.Name())
+ }
+
+ // Check permissions on Database and store schema metadata.
+ return store.RunInTransaction(d.st, func(tx store.Transaction) error {
+ dbData := databaseData{}
+ return util.UpdateWithAuth(ctx, call, tx, d.stKey(), &dbData, func() error {
+ // NOTE: For now we expect the client to not issue multiple
+ // concurrent SetSchemaMetadata calls.
+ dbData.SchemaMetadata = &metadata
+ return nil
+ })
+ })
+}
+
+func (d *databaseReq) getSchemaMetadataWithoutAuth(ctx *context.T) (*wire.SchemaMetadata, error) {
+ if !d.exists {
+ return nil, verror.New(verror.ErrInternal, ctx, "field store in database cannot be nil")
+ }
+ dbData := databaseData{}
+ if err := util.Get(ctx, d.st, d.stKey(), &dbData); err != nil {
+ return nil, err
+ }
+ return dbData.SchemaMetadata, nil
+}
diff --git a/services/syncbase/server/nosql/database_watch.go b/services/syncbase/server/nosql/database_watch.go
new file mode 100644
index 0000000..35228e6
--- /dev/null
+++ b/services/syncbase/server/nosql/database_watch.go
@@ -0,0 +1,217 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+ "bytes"
+ "strings"
+
+ wire "v.io/syncbase/v23/services/syncbase/nosql"
+ pubutil "v.io/syncbase/v23/syncbase/util"
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/server/watchable"
+ "v.io/v23/context"
+ "v.io/v23/naming"
+ "v.io/v23/rpc"
+ "v.io/v23/services/watch"
+ "v.io/v23/vdl"
+ "v.io/v23/verror"
+)
+
+// GetResumeMarker implements the wire.DatabaseWatcher interface.
+func (d *databaseReq) GetResumeMarker(ctx *context.T, call rpc.ServerCall) (watch.ResumeMarker, error) {
+ if !d.exists {
+ return nil, verror.New(verror.ErrNoExist, ctx, d.name)
+ }
+ if d.batchId != nil {
+ return watchable.GetResumeMarker(d.batchReader())
+ } else {
+ return watchable.GetResumeMarker(d.st)
+ }
+}
+
+// WatchGlob implements the wire.DatabaseWatcher interface.
+func (d *databaseReq) WatchGlob(ctx *context.T, call watch.GlobWatcherWatchGlobServerCall, req watch.GlobRequest) error {
+ // TODO(rogulenko): Check permissions here and in other methods.
+ if !d.exists {
+ return verror.New(verror.ErrNoExist, ctx, d.name)
+ }
+ if d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ // Parse the pattern.
+ if !strings.HasSuffix(req.Pattern, "*") {
+ return verror.New(verror.ErrBadArg, ctx, req.Pattern)
+ }
+ table, prefix, err := pubutil.ParseTableRowPair(ctx, strings.TrimSuffix(req.Pattern, "*"))
+ if err != nil {
+ return err
+ }
+ // Get the resume marker and fetch the initial state if necessary.
+ resumeMarker := req.ResumeMarker
+ if bytes.Equal(resumeMarker, []byte("now")) || len(resumeMarker) == 0 {
+ var err error
+ if resumeMarker, err = watchable.GetResumeMarker(d.st); err != nil {
+ return err
+ }
+ if len(req.ResumeMarker) == 0 {
+ // TODO(rogulenko): Fetch the initial state.
+ return verror.NewErrNotImplemented(ctx)
+ }
+ }
+ t := tableReq{
+ name: table,
+ d: d,
+ }
+ return t.watchUpdates(ctx, call, prefix, resumeMarker)
+}
+
+// watchUpdates waits for database updates and sends them to the client.
+// This function does two steps in a for loop:
+// - scan through the watch log until the end, sending all updates to the client
+// - wait for one of two signals: new updates available or the call is canceled.
+// The 'new updates' signal is sent by a worker goroutine that translates a
+// condition variable signal to a Go channel. The worker goroutine waits on the
+// condition variable for changes. Whenever the state changes, the worker sends
+// a signal through the Go channel.
+func (t *tableReq) watchUpdates(ctx *context.T, call watch.GlobWatcherWatchGlobServerCall, prefix string, resumeMarker watch.ResumeMarker) error {
+ // The Go channel to send notifications from the worker to the main
+ // goroutine.
+ hasUpdates := make(chan struct{})
+ // The Go channel to signal the worker to stop. The worker might block
+ // on the condition variable, but we don't want the main goroutine
+ // to wait for the worker to stop, so we create a buffered channel.
+ cancelWorker := make(chan struct{}, 1)
+ defer close(cancelWorker)
+ go func() {
+ waitForChange := watchable.WatchUpdates(t.d.st)
+ var state, newState uint64 = 1, 1
+ for {
+ // Wait until the state changes or the main function returns.
+ for newState == state {
+ select {
+ case <-cancelWorker:
+ return
+ default:
+ }
+ newState = waitForChange(state)
+ }
+ // Update the current state to the new value and sends a signal to
+ // the main goroutine.
+ state = newState
+ if state == 0 {
+ close(hasUpdates)
+ return
+ }
+ // cancelWorker is closed as soons as the main function returns.
+ select {
+ case hasUpdates <- struct{}{}:
+ case <-cancelWorker:
+ return
+ }
+ }
+ }()
+
+ sender := call.SendStream()
+ for {
+ // Drain the log queue.
+ for {
+ logs, nextResumeMarker, err := watchable.ReadBatchFromLog(t.d.st, resumeMarker)
+ if err != nil {
+ return err
+ }
+ if logs == nil {
+ // No new log records available now.
+ break
+ }
+ resumeMarker = nextResumeMarker
+ changes, err := t.processLogBatch(ctx, call, prefix, logs)
+ if err != nil {
+ return err
+ }
+ if changes == nil {
+ // All batch changes are filtered out.
+ continue
+ }
+ changes[len(changes)-1].ResumeMarker = resumeMarker
+ for _, change := range changes {
+ if err := sender.Send(change); err != nil {
+ return err
+ }
+ }
+ }
+ // Wait for new updates or cancel.
+ select {
+ case _, ok := <-hasUpdates:
+ if !ok {
+ return verror.NewErrAborted(ctx)
+ }
+ case <-ctx.Done():
+ return ctx.Err()
+ }
+ }
+}
+
+// processLogBatch converts []*watchable.LogEntry to []watch.Change, filtering
+// out unnecessary or inaccessible log records.
+func (t *tableReq) processLogBatch(ctx *context.T, call rpc.ServerCall, prefix string, logs []*watchable.LogEntry) ([]watch.Change, error) {
+ sn := t.d.st.NewSnapshot()
+ defer sn.Abort()
+ var changes []watch.Change
+ for _, logEntry := range logs {
+ var opKey string
+ switch op := logEntry.Op.(type) {
+ case watchable.OpPut:
+ opKey = string(op.Value.Key)
+ case watchable.OpDelete:
+ opKey = string(op.Value.Key)
+ default:
+ continue
+ }
+ parts := util.SplitKeyParts(opKey)
+ // TODO(rogulenko): Currently we process only rows, i.e. keys of the form
+ // $row:xxx:yyy. Consider processing other keys.
+ if len(parts) != 3 || parts[0] != util.RowPrefix {
+ continue
+ }
+ table, row := parts[1], parts[2]
+ // Filter out unnecessary rows and rows that we can't access.
+ if table != t.name || !strings.HasPrefix(row, prefix) {
+ continue
+ }
+ if err := t.checkAccess(ctx, call, sn, row); err != nil {
+ if verror.ErrorID(err) != verror.ErrNoAccess.ID {
+ return nil, err
+ }
+ continue
+ }
+ change := watch.Change{
+ Name: naming.Join(table, row),
+ Continued: true,
+ }
+ switch op := logEntry.Op.(type) {
+ case watchable.OpPut:
+ rowValue, err := watchable.GetAtVersion(ctx, sn, op.Value.Key, nil, op.Value.Version)
+ if err != nil {
+ return nil, err
+ }
+ change.State = watch.Exists
+ change.Value = vdl.ValueOf(wire.StoreChange{
+ Value: rowValue,
+ FromSync: logEntry.FromSync,
+ })
+ case watchable.OpDelete:
+ change.State = watch.DoesNotExist
+ change.Value = vdl.ValueOf(wire.StoreChange{
+ FromSync: logEntry.FromSync,
+ })
+ }
+ changes = append(changes, change)
+ }
+ if len(changes) > 0 {
+ changes[len(changes)-1].Continued = false
+ }
+ return changes, nil
+}
diff --git a/services/syncbase/server/nosql/dispatcher.go b/services/syncbase/server/nosql/dispatcher.go
new file mode 100644
index 0000000..284f939
--- /dev/null
+++ b/services/syncbase/server/nosql/dispatcher.go
@@ -0,0 +1,141 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+ "strconv"
+ "strings"
+
+ wire "v.io/syncbase/v23/services/syncbase"
+ nosqlWire "v.io/syncbase/v23/services/syncbase/nosql"
+ pubutil "v.io/syncbase/v23/syncbase/util"
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/v23/context"
+ "v.io/v23/rpc"
+ "v.io/v23/security"
+ "v.io/v23/verror"
+ "v.io/x/lib/vlog"
+)
+
+type dispatcher struct {
+ a interfaces.App
+}
+
+var _ rpc.Dispatcher = (*dispatcher)(nil)
+
+func NewDispatcher(a interfaces.App) *dispatcher {
+ return &dispatcher{a: a}
+}
+
+// We always return an AllowEveryone authorizer from Lookup(), and rely on our
+// RPC method implementations to perform proper authorization.
+var auth security.Authorizer = security.AllowEveryone()
+
+func (disp *dispatcher) Lookup(_ *context.T, suffix string) (interface{}, security.Authorizer, error) {
+ suffix = strings.TrimPrefix(suffix, "/")
+ parts := strings.Split(suffix, "/")
+
+ if len(parts) == 0 {
+ vlog.Fatal("invalid nosql.dispatcher Lookup")
+ }
+
+ dParts := strings.Split(parts[0], util.BatchSep)
+ dName := dParts[0]
+
+ // Validate all key atoms up front, so that we can avoid doing so in all our
+ // method implementations.
+ if !pubutil.ValidName(dName) {
+ return nil, nil, wire.NewErrInvalidName(nil, suffix)
+ }
+ for _, s := range parts[1:] {
+ if !pubutil.ValidName(s) {
+ return nil, nil, wire.NewErrInvalidName(nil, suffix)
+ }
+ }
+
+ dExists := false
+ var d *database
+ if dInt, err := disp.a.NoSQLDatabase(nil, nil, dName); err == nil {
+ d = dInt.(*database) // panics on failure, as desired
+ dExists = true
+ } else {
+ if verror.ErrorID(err) != verror.ErrNoExist.ID {
+ return nil, nil, err
+ } else {
+ // Database does not exist. Create a short-lived database object to
+ // service this request.
+ d = &database{
+ name: dName,
+ a: disp.a,
+ }
+ }
+ }
+
+ dReq := &databaseReq{database: d}
+ if !setBatchFields(dReq, dParts) {
+ return nil, nil, wire.NewErrInvalidName(nil, suffix)
+ }
+ if len(parts) == 1 {
+ return nosqlWire.DatabaseServer(dReq), auth, nil
+ }
+
+ // All table and row methods require the database to exist. If it doesn't,
+ // abort early.
+ if !dExists {
+ return nil, nil, verror.New(verror.ErrNoExist, nil, d.name)
+ }
+
+ // Note, it's possible for the database to be deleted concurrently with
+ // downstream handling of this request. Depending on the order in which things
+ // execute, the client may not get an error, but in any case ultimately the
+ // store will end up in a consistent state.
+ tReq := &tableReq{
+ name: parts[1],
+ d: dReq,
+ }
+ if len(parts) == 2 {
+ return nosqlWire.TableServer(tReq), auth, nil
+ }
+
+ rReq := &rowReq{
+ key: parts[2],
+ t: tReq,
+ }
+ if len(parts) == 3 {
+ return nosqlWire.RowServer(rReq), auth, nil
+ }
+
+ return nil, nil, verror.NewErrNoExist(nil)
+}
+
+// setBatchFields sets the batch-related fields in databaseReq based on the
+// value of dParts, the parts of the database name component. It returns false
+// if dParts is malformed.
+func setBatchFields(d *databaseReq, dParts []string) bool {
+ if len(dParts) == 1 {
+ return true
+ }
+ if len(dParts) != 3 {
+ return false
+ }
+ batchId, err := strconv.ParseUint(dParts[2], 0, 64)
+ if err != nil {
+ return false
+ }
+ d.batchId = &batchId
+ d.mu.Lock()
+ defer d.mu.Unlock()
+ var ok bool
+ switch dParts[1] {
+ case "sn":
+ d.sn, ok = d.sns[batchId]
+ case "tx":
+ d.tx, ok = d.txs[batchId]
+ default:
+ return false
+ }
+ return ok
+}
diff --git a/services/syncbase/server/nosql/row.go b/services/syncbase/server/nosql/row.go
new file mode 100644
index 0000000..a6fbf9d
--- /dev/null
+++ b/services/syncbase/server/nosql/row.go
@@ -0,0 +1,141 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+ wire "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ "v.io/v23/rpc"
+ "v.io/v23/verror"
+)
+
+// rowReq is a per-request object that handles Row RPCs.
+type rowReq struct {
+ key string
+ t *tableReq
+}
+
+var (
+ _ wire.RowServerMethods = (*rowReq)(nil)
+)
+
+////////////////////////////////////////
+// RPC methods
+
+func (r *rowReq) Exists(ctx *context.T, call rpc.ServerCall, schemaVersion int32) (bool, error) {
+ _, err := r.Get(ctx, call, schemaVersion)
+ return util.ErrorToExists(err)
+}
+
+func (r *rowReq) Get(ctx *context.T, call rpc.ServerCall, schemaVersion int32) ([]byte, error) {
+ impl := func(sntx store.SnapshotOrTransaction) ([]byte, error) {
+ if err := r.t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+ return []byte{}, err
+ }
+ return r.get(ctx, call, sntx)
+ }
+ if r.t.d.batchId != nil {
+ return impl(r.t.d.batchReader())
+ } else {
+ sn := r.t.d.st.NewSnapshot()
+ defer sn.Abort()
+ return impl(sn)
+ }
+}
+
+func (r *rowReq) Put(ctx *context.T, call rpc.ServerCall, schemaVersion int32, value []byte) error {
+ impl := func(tx store.Transaction) error {
+ if err := r.t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+ return err
+ }
+ return r.put(ctx, call, tx, value)
+ }
+ if r.t.d.batchId != nil {
+ if tx, err := r.t.d.batchTransaction(); err != nil {
+ return err
+ } else {
+ return impl(tx)
+ }
+ } else {
+ return store.RunInTransaction(r.t.d.st, impl)
+ }
+}
+
+func (r *rowReq) Delete(ctx *context.T, call rpc.ServerCall, schemaVersion int32) error {
+ impl := func(tx store.Transaction) error {
+ if err := r.t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+ return err
+ }
+ return r.delete(ctx, call, tx)
+ }
+ if r.t.d.batchId != nil {
+ if tx, err := r.t.d.batchTransaction(); err != nil {
+ return err
+ } else {
+ return impl(tx)
+ }
+ } else {
+ return store.RunInTransaction(r.t.d.st, impl)
+ }
+}
+
+////////////////////////////////////////
+// Internal helpers
+
+func (r *rowReq) stKey() string {
+ return util.JoinKeyParts(util.RowPrefix, r.stKeyPart())
+}
+
+func (r *rowReq) stKeyPart() string {
+ return util.JoinKeyParts(r.t.stKeyPart(), r.key)
+}
+
+// checkAccess checks that this row's table exists in the database, and performs
+// an authorization check.
+func (r *rowReq) checkAccess(ctx *context.T, call rpc.ServerCall, sntx store.SnapshotOrTransaction) error {
+ return r.t.checkAccess(ctx, call, sntx, r.key)
+}
+
+// get reads data from the storage engine.
+// Performs authorization check.
+func (r *rowReq) get(ctx *context.T, call rpc.ServerCall, sntx store.SnapshotOrTransaction) ([]byte, error) {
+ if err := r.checkAccess(ctx, call, sntx); err != nil {
+ return nil, err
+ }
+ value, err := sntx.Get([]byte(r.stKey()), nil)
+ if err != nil {
+ if verror.ErrorID(err) == store.ErrUnknownKey.ID {
+ return nil, verror.New(verror.ErrNoExist, ctx, r.stKey())
+ }
+ return nil, verror.New(verror.ErrInternal, ctx, err)
+ }
+ return value, nil
+}
+
+// put writes data to the storage engine.
+// Performs authorization check.
+func (r *rowReq) put(ctx *context.T, call rpc.ServerCall, tx store.Transaction, value []byte) error {
+ if err := r.checkAccess(ctx, call, tx); err != nil {
+ return err
+ }
+ if err := tx.Put([]byte(r.stKey()), value); err != nil {
+ return verror.New(verror.ErrInternal, ctx, err)
+ }
+ return nil
+}
+
+// delete deletes data from the storage engine.
+// Performs authorization check.
+func (r *rowReq) delete(ctx *context.T, call rpc.ServerCall, tx store.Transaction) error {
+ if err := r.checkAccess(ctx, call, tx); err != nil {
+ return err
+ }
+ if err := tx.Delete([]byte(r.stKey())); err != nil {
+ return verror.New(verror.ErrInternal, ctx, err)
+ }
+ return nil
+}
diff --git a/services/syncbase/server/nosql/table.go b/services/syncbase/server/nosql/table.go
new file mode 100644
index 0000000..8cf744e
--- /dev/null
+++ b/services/syncbase/server/nosql/table.go
@@ -0,0 +1,486 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+ "strings"
+
+ wire "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ "v.io/v23/glob"
+ "v.io/v23/rpc"
+ "v.io/v23/security/access"
+ "v.io/v23/verror"
+ "v.io/v23/vom"
+)
+
+// tableReq is a per-request object that handles Table RPCs.
+type tableReq struct {
+ name string
+ d *databaseReq
+}
+
+var (
+ _ wire.TableServerMethods = (*tableReq)(nil)
+)
+
+////////////////////////////////////////
+// RPC methods
+
+func (t *tableReq) Create(ctx *context.T, call rpc.ServerCall, schemaVersion int32, perms access.Permissions) error {
+ if t.d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ if err := t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+ return err
+ }
+ return store.RunInTransaction(t.d.st, func(tx store.Transaction) error {
+ // Check databaseData perms.
+ dData := &databaseData{}
+ if err := util.GetWithAuth(ctx, call, tx, t.d.stKey(), dData); err != nil {
+ return err
+ }
+ // Check for "table already exists".
+ if err := util.Get(ctx, tx, t.stKey(), &tableData{}); verror.ErrorID(err) != verror.ErrNoExist.ID {
+ if err != nil {
+ return err
+ }
+ // TODO(sadovsky): Should this be ErrExistOrNoAccess, for privacy?
+ return verror.New(verror.ErrExist, ctx, t.name)
+ }
+ // Write new tableData.
+ if perms == nil {
+ perms = dData.Perms
+ }
+ data := &tableData{
+ Name: t.name,
+ Perms: perms,
+ }
+ return util.Put(ctx, tx, t.stKey(), data)
+ })
+}
+
+func (t *tableReq) Delete(ctx *context.T, call rpc.ServerCall, schemaVersion int32) error {
+ if t.d.batchId != nil {
+ return wire.NewErrBoundToBatch(ctx)
+ }
+ if err := t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+ return err
+ }
+ return store.RunInTransaction(t.d.st, func(tx store.Transaction) error {
+ // Read-check-delete tableData.
+ if err := util.GetWithAuth(ctx, call, tx, t.stKey(), &tableData{}); err != nil {
+ if verror.ErrorID(err) == verror.ErrNoExist.ID {
+ return nil // delete is idempotent
+ }
+ return err
+ }
+ // TODO(sadovsky): Delete all rows in this table.
+ return util.Delete(ctx, tx, t.stKey())
+ })
+}
+
+func (t *tableReq) Exists(ctx *context.T, call rpc.ServerCall, schemaVersion int32) (bool, error) {
+ if err := t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+ return false, err
+ }
+ return util.ErrorToExists(util.GetWithAuth(ctx, call, t.d.st, t.stKey(), &tableData{}))
+}
+
+func (t *tableReq) DeleteRowRange(ctx *context.T, call rpc.ServerCall, schemaVersion int32, start, limit []byte) error {
+ impl := func(tx store.Transaction) error {
+ // Check for table-level access before doing a scan.
+ if err := t.checkAccess(ctx, call, tx, ""); err != nil {
+ return err
+ }
+ // Check if the db schema version and the version provided by client
+ // matches.
+ if err := t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+ return err
+ }
+ it := tx.Scan(util.ScanRangeArgs(util.JoinKeyParts(util.RowPrefix, t.name), string(start), string(limit)))
+ key := []byte{}
+ for it.Advance() {
+ key = it.Key(key)
+ // Check perms.
+ parts := util.SplitKeyParts(string(key))
+ externalKey := parts[len(parts)-1]
+ if err := t.checkAccess(ctx, call, tx, externalKey); err != nil {
+ // TODO(rogulenko): Revisit this behavior. Probably we should
+ // delete all rows that we have access to.
+ it.Cancel()
+ return err
+ }
+ // Delete the key-value pair.
+ if err := tx.Delete(key); err != nil {
+ return verror.New(verror.ErrInternal, ctx, err)
+ }
+ }
+ if err := it.Err(); err != nil {
+ return verror.New(verror.ErrInternal, ctx, err)
+ }
+ return nil
+ }
+ if t.d.batchId != nil {
+ if tx, err := t.d.batchTransaction(); err != nil {
+ return err
+ } else {
+ return impl(tx)
+ }
+ } else {
+ return store.RunInTransaction(t.d.st, impl)
+ }
+}
+
+func (t *tableReq) Scan(ctx *context.T, call wire.TableScanServerCall, schemaVersion int32, start, limit []byte) error {
+ impl := func(sntx store.SnapshotOrTransaction) error {
+ // Check for table-level access before doing a scan.
+ if err := t.checkAccess(ctx, call, sntx, ""); err != nil {
+ return err
+ }
+ if err := t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+ return err
+ }
+ it := sntx.Scan(util.ScanRangeArgs(util.JoinKeyParts(util.RowPrefix, t.name), string(start), string(limit)))
+ sender := call.SendStream()
+ key, value := []byte{}, []byte{}
+ for it.Advance() {
+ key, value = it.Key(key), it.Value(value)
+ // Check perms.
+ parts := util.SplitKeyParts(string(key))
+ externalKey := parts[len(parts)-1]
+ if err := t.checkAccess(ctx, call, sntx, externalKey); err != nil {
+ it.Cancel()
+ return err
+ }
+ if err := sender.Send(wire.KeyValue{Key: externalKey, Value: value}); err != nil {
+ it.Cancel()
+ return err
+ }
+ }
+ if err := it.Err(); err != nil {
+ return verror.New(verror.ErrInternal, ctx, err)
+ }
+ return nil
+ }
+ if t.d.batchId != nil {
+ return impl(t.d.batchReader())
+ } else {
+ sntx := t.d.st.NewSnapshot()
+ defer sntx.Abort()
+ return impl(sntx)
+ }
+}
+
+func (t *tableReq) GetPermissions(ctx *context.T, call rpc.ServerCall, schemaVersion int32, key string) ([]wire.PrefixPermissions, error) {
+ impl := func(sntx store.SnapshotOrTransaction) ([]wire.PrefixPermissions, error) {
+ // Check permissions only at table level.
+ if err := t.checkAccess(ctx, call, sntx, ""); err != nil {
+ return nil, err
+ }
+ if err := t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+ return nil, err
+ }
+ // Get the most specific permissions object.
+ prefix, prefixPerms, err := t.permsForKey(ctx, sntx, key)
+ if err != nil {
+ return nil, err
+ }
+ result := []wire.PrefixPermissions{{Prefix: prefix, Perms: prefixPerms.Perms}}
+ // Collect all parent permissions objects all the way up to the table level.
+ for prefix != "" {
+ prefix = prefixPerms.Parent
+ if prefixPerms, err = t.permsForPrefix(ctx, sntx, prefixPerms.Parent); err != nil {
+ return nil, err
+ }
+ result = append(result, wire.PrefixPermissions{Prefix: prefix, Perms: prefixPerms.Perms})
+ }
+ return result, nil
+ }
+ if t.d.batchId != nil {
+ return impl(t.d.batchReader())
+ } else {
+ sntx := t.d.st.NewSnapshot()
+ defer sntx.Abort()
+ return impl(sntx)
+ }
+}
+
+func (t *tableReq) SetPermissions(ctx *context.T, call rpc.ServerCall, schemaVersion int32, prefix string, perms access.Permissions) error {
+ impl := func(tx store.Transaction) error {
+ if err := t.checkAccess(ctx, call, tx, prefix); err != nil {
+ return err
+ }
+ if err := t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+ return err
+ }
+ // Concurrent transactions that touch this table should fail with
+ // ErrConcurrentTransaction when this transaction commits.
+ if err := t.lock(ctx, tx); err != nil {
+ return err
+ }
+ if prefix == "" {
+ data := &tableData{}
+ return util.UpdateWithAuth(ctx, call, tx, t.stKey(), data, func() error {
+ data.Perms = perms
+ return nil
+ })
+ }
+ // Get the most specific permissions object.
+ parent, prefixPerms, err := t.permsForKey(ctx, tx, prefix)
+ if err != nil {
+ return err
+ }
+ // In case there is no permissions object for the given prefix, we need
+ // to add a new node to the prefix permissions tree. We do it by updating
+ // parents for all children of the prefix to the node corresponding to
+ // the prefix.
+ if parent != prefix {
+ if err := t.updateParentRefs(ctx, tx, prefix, prefix); err != nil {
+ return err
+ }
+ } else {
+ parent = prefixPerms.Parent
+ }
+ stPrefix := t.prefixPermsKey(prefix)
+ stPrefixLimit := stPrefix + util.PrefixRangeLimitSuffix
+ prefixPerms = stPrefixPerms{Parent: parent, Perms: perms}
+ // Put the (prefix, perms) pair to the database.
+ if err := util.Put(ctx, tx, stPrefix, prefixPerms); err != nil {
+ return err
+ }
+ return util.Put(ctx, tx, stPrefixLimit, prefixPerms)
+ }
+ if t.d.batchId != nil {
+ if tx, err := t.d.batchTransaction(); err != nil {
+ return err
+ } else {
+ return impl(tx)
+ }
+ } else {
+ return store.RunInTransaction(t.d.st, impl)
+ }
+}
+
+func (t *tableReq) DeletePermissions(ctx *context.T, call rpc.ServerCall, schemaVersion int32, prefix string) error {
+ if prefix == "" {
+ return verror.New(verror.ErrBadArg, ctx, prefix)
+ }
+ impl := func(tx store.Transaction) error {
+ if err := t.checkAccess(ctx, call, tx, prefix); err != nil {
+ return err
+ }
+ if err := t.d.checkSchemaVersion(ctx, schemaVersion); err != nil {
+ return err
+ }
+ // Concurrent transactions that touch this table should fail with
+ // ErrConcurrentTransaction when this transaction commits.
+ if err := t.lock(ctx, tx); err != nil {
+ return err
+ }
+ // Get the most specific permissions object.
+ parent, prefixPerms, err := t.permsForKey(ctx, tx, prefix)
+ if err != nil {
+ return err
+ }
+ if parent != prefix {
+ // This can happen only if there is no permissions object for the
+ // given prefix. Since DeletePermissions is idempotent, return nil.
+ return nil
+ }
+ // We need to delete the node corresponding to the prefix from the prefix
+ // permissions tree. We do it by updating parents for all children of the
+ // prefix to the parent of the node corresponding to the prefix.
+ if err := t.updateParentRefs(ctx, tx, prefix, prefixPerms.Parent); err != nil {
+ return err
+ }
+ stPrefix := []byte(t.prefixPermsKey(prefix))
+ stPrefixLimit := append(stPrefix, util.PrefixRangeLimitSuffix...)
+ if err := tx.Delete(stPrefix); err != nil {
+ return err
+ }
+ return tx.Delete(stPrefixLimit)
+ }
+ if t.d.batchId != nil {
+ if tx, err := t.d.batchTransaction(); err != nil {
+ return err
+ } else {
+ return impl(tx)
+ }
+ } else {
+ return store.RunInTransaction(t.d.st, impl)
+ }
+}
+
+func (t *tableReq) GlobChildren__(ctx *context.T, call rpc.GlobChildrenServerCall, matcher *glob.Element) error {
+ impl := func(sntx store.SnapshotOrTransaction, closeSntx func() error) error {
+ // Check perms.
+ if err := t.checkAccess(ctx, call, sntx, ""); err != nil {
+ closeSntx()
+ return err
+ }
+ // TODO(rogulenko): Check prefix permissions for children.
+ return util.Glob(ctx, call, matcher, sntx, closeSntx, util.JoinKeyParts(util.RowPrefix, t.name))
+ }
+ if t.d.batchId != nil {
+ return impl(t.d.batchReader(), func() error {
+ return nil
+ })
+ } else {
+ sn := t.d.st.NewSnapshot()
+ return impl(sn, sn.Abort)
+ }
+}
+
+////////////////////////////////////////
+// Internal helpers
+
+func (t *tableReq) stKey() string {
+ return util.JoinKeyParts(util.TablePrefix, t.stKeyPart())
+}
+
+func (t *tableReq) stKeyPart() string {
+ return t.name
+}
+
+// updateParentRefs updates the parent for all children of the given
+// prefix to newParent.
+func (t *tableReq) updateParentRefs(ctx *context.T, tx store.Transaction, prefix, newParent string) error {
+ stPrefix := []byte(t.prefixPermsKey(prefix))
+ stPrefixStart := append(stPrefix, 0)
+ stPrefixLimit := append(stPrefix, util.PrefixRangeLimitSuffix...)
+ it := tx.Scan(stPrefixStart, stPrefixLimit)
+ var key, value []byte
+ for it.Advance() {
+ key, value = it.Key(key), it.Value(value)
+ var prefixPerms stPrefixPerms
+ if err := vom.Decode(value, &prefixPerms); err != nil {
+ it.Cancel()
+ return verror.New(verror.ErrInternal, ctx, err)
+ }
+ prefixPerms.Parent = newParent
+ if err := util.Put(ctx, tx, string(key), prefixPerms); err != nil {
+ it.Cancel()
+ return err
+ }
+ }
+ if err := it.Err(); err != nil {
+ return verror.New(verror.ErrInternal, ctx, err)
+ }
+ return nil
+}
+
+// lock invalidates all in-flight transactions that have touched this table,
+// such that any subsequent tx.Commit() will return ErrConcurrentTransaction.
+//
+// It is necessary to call lock() every time prefix permissions are updated so
+// that snapshots inside all transactions reflect up-to-date permissions. Since
+// every public function that touches this table has to read the table-level
+// permissions object, it suffices to add the key of this object to the write
+// set of the current transaction.
+//
+// TODO(rogulenko): Revisit this behavior to provide more granularity.
+// One option is to add a prefix and its parent to the write set of the current
+// transaction when the permissions object for that prefix is updated.
+func (t *tableReq) lock(ctx *context.T, tx store.Transaction) error {
+ var data tableData
+ if err := util.Get(ctx, tx, t.stKey(), &data); err != nil {
+ return err
+ }
+ return util.Put(ctx, tx, t.stKey(), data)
+}
+
+// checkAccess checks that this table exists in the database, and performs
+// an authorization check. The access is checked at table level and at the
+// level of the most specific prefix for the given key.
+// TODO(rogulenko): Revisit this behavior. Eventually we'll want the table-level
+// access check to be a check for "Resolve", i.e. also check access to
+// service, app and database.
+func (t *tableReq) checkAccess(ctx *context.T, call rpc.ServerCall, sntx store.SnapshotOrTransaction, key string) error {
+ prefix, prefixPerms, err := t.permsForKey(ctx, sntx, key)
+ if err != nil {
+ return err
+ }
+ if prefix != "" {
+ if err := util.GetWithAuth(ctx, call, sntx, t.stKey(), &tableData{}); err != nil {
+ return err
+ }
+ }
+ auth, _ := access.PermissionsAuthorizer(prefixPerms.Perms, access.TypicalTagType())
+ if err := auth.Authorize(ctx, call.Security()); err != nil {
+ return verror.New(verror.ErrNoAccess, ctx, prefix)
+ }
+ return nil
+}
+
+// permsForKey returns the longest prefix of the given key that has
+// associated permissions, along with its permissions object.
+// permsForKey doesn't perform an authorization check.
+//
+// Effectively, we represent all prefixes as a forest T, where each vertex maps
+// to a prefix. A parent for a string is the maximum proper prefix of it that
+// belongs to T. Each prefix P from T is represented as a pair of entries with
+// keys P and P~ with values of type stPrefixPerms (parent + perms). High level
+// explanation of how this function works:
+// 1 iter = db.Scan(K, "")
+// Here last character of iter.Key() is removed automatically if it is '~'
+// 2 if hasPrefix(K, iter.Key()) return iter.Value()
+// 3 return parent(iter.Key())
+// Short proof:
+// iter returned on line 1 points to one of the following:
+// - a string t that is equal to K;
+// - a string t~: if t is not a prefix of K, then K < t < t~ which
+// contradicts with property of returned iterator on line 1 => t is prefix of
+// K; also t is the largest prefix of K, as all larger prefixes of K are
+// less than t~; in this case line 2 returns correct result;
+// - a string t that doesn't end with '~': it can't be a prefix of K, as all
+// proper prefixes of K are less than K; parent(t) is a prefix of K, otherwise
+// K < parent(t) < t; parent(t) is the largest prefix of K, otherwise t is a
+// prefix of K; in this case line 3 returns correct result.
+func (t *tableReq) permsForKey(ctx *context.T, sntx store.SnapshotOrTransaction, key string) (string, stPrefixPerms, error) {
+ it := sntx.Scan(util.ScanRangeArgs(util.JoinKeyParts(util.PermsPrefix, t.name), key, ""))
+ if !it.Advance() {
+ prefixPerms, err := t.permsForPrefix(ctx, sntx, "")
+ return "", prefixPerms, err
+ }
+ defer it.Cancel()
+ parts := util.SplitKeyParts(string(it.Key(nil)))
+ prefix := strings.TrimSuffix(parts[len(parts)-1], util.PrefixRangeLimitSuffix)
+ value := it.Value(nil)
+ var prefixPerms stPrefixPerms
+ if err := vom.Decode(value, &prefixPerms); err != nil {
+ return "", stPrefixPerms{}, verror.New(verror.ErrInternal, ctx, err)
+ }
+ if strings.HasPrefix(key, prefix) {
+ return prefix, prefixPerms, nil
+ }
+ prefixPerms, err := t.permsForPrefix(ctx, sntx, prefixPerms.Parent)
+ return prefixPerms.Parent, prefixPerms, err
+}
+
+// permsForPrefix returns the permissions object associated with the
+// provided prefix.
+func (t *tableReq) permsForPrefix(ctx *context.T, sntx store.SnapshotOrTransaction, prefix string) (stPrefixPerms, error) {
+ if prefix == "" {
+ var data tableData
+ if err := util.Get(ctx, sntx, t.stKey(), &data); err != nil {
+ return stPrefixPerms{}, err
+ }
+ return stPrefixPerms{Perms: data.Perms}, nil
+ }
+ var prefixPerms stPrefixPerms
+ if err := util.Get(ctx, sntx, t.prefixPermsKey(prefix), &prefixPerms); err != nil {
+ return stPrefixPerms{}, verror.New(verror.ErrInternal, ctx, err)
+ }
+ return prefixPerms, nil
+}
+
+// prefixPermsKey returns the key used for storing permissions for the given
+// prefix in the table.
+func (t *tableReq) prefixPermsKey(prefix string) string {
+ return util.JoinKeyParts(util.PermsPrefix, t.name, prefix)
+}
diff --git a/services/syncbase/server/nosql/types.go b/services/syncbase/server/nosql/types.go
new file mode 100644
index 0000000..7a87916
--- /dev/null
+++ b/services/syncbase/server/nosql/types.go
@@ -0,0 +1,23 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/v23/security/access"
+)
+
+var (
+ _ util.Permser = (*databaseData)(nil)
+ _ util.Permser = (*tableData)(nil)
+)
+
+func (data *databaseData) GetPerms() access.Permissions {
+ return data.Perms
+}
+
+func (data *tableData) GetPerms() access.Permissions {
+ return data.Perms
+}
diff --git a/services/syncbase/server/nosql/types.vdl b/services/syncbase/server/nosql/types.vdl
new file mode 100644
index 0000000..8ede239
--- /dev/null
+++ b/services/syncbase/server/nosql/types.vdl
@@ -0,0 +1,39 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package nosql
+
+import (
+ "v.io/v23/security/access"
+ "v.io/syncbase/v23/services/syncbase/nosql"
+)
+
+// databaseData represents the persistent state of a Database.
+type databaseData struct {
+ Name string
+ Version uint64 // covers the Perms field below
+ Perms access.Permissions
+ SchemaMetadata ?nosql.SchemaMetadata
+}
+
+// tableData represents the persistent state of a Table.
+// TODO(sadovsky): Decide whether to track "empty-prefix" perms here.
+type tableData struct {
+ Name string
+ Perms access.Permissions
+}
+
+// stPrefixPerms describes internal representation of prefix permissions
+// in the store.
+//
+// Each pair of (key, perms) is stored as two key-value pairs:
+// "$perms:%table:key" - stPrefixPerms{parent, perms}
+// "$perms:%table:key~" - stPrefixPerms{parent, perms}
+// where "~" represents a reserved char that's lexicographically greater than
+// all chars allowed by clients, %table is the name of the table and parent is
+// the longest proper prefix of the key that has associated permissions object.
+type stPrefixPerms struct {
+ Parent string
+ Perms access.Permissions
+}
diff --git a/services/syncbase/server/nosql/types.vdl.go b/services/syncbase/server/nosql/types.vdl.go
new file mode 100644
index 0000000..bf5f346
--- /dev/null
+++ b/services/syncbase/server/nosql/types.vdl.go
@@ -0,0 +1,67 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: types.vdl
+
+package nosql
+
+import (
+ // VDL system imports
+ "v.io/v23/vdl"
+
+ // VDL user imports
+ "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/v23/security/access"
+)
+
+// databaseData represents the persistent state of a Database.
+type databaseData struct {
+ Name string
+ Version uint64 // covers the Perms field below
+ Perms access.Permissions
+ SchemaMetadata *nosql.SchemaMetadata
+}
+
+func (databaseData) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/nosql.databaseData"`
+}) {
+}
+
+// tableData represents the persistent state of a Table.
+// TODO(sadovsky): Decide whether to track "empty-prefix" perms here.
+type tableData struct {
+ Name string
+ Perms access.Permissions
+}
+
+func (tableData) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/nosql.tableData"`
+}) {
+}
+
+// stPrefixPerms describes internal representation of prefix permissions
+// in the store.
+//
+// Each pair of (key, perms) is stored as two key-value pairs:
+// "$perms:%table:key" - stPrefixPerms{parent, perms}
+// "$perms:%table:key~" - stPrefixPerms{parent, perms}
+// where "~" represents a reserved char that's lexicographically greater than
+// all chars allowed by clients, %table is the name of the table and parent is
+// the longest proper prefix of the key that has associated permissions object.
+type stPrefixPerms struct {
+ Parent string
+ Perms access.Permissions
+}
+
+func (stPrefixPerms) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/nosql.stPrefixPerms"`
+}) {
+}
+
+func init() {
+ vdl.Register((*databaseData)(nil))
+ vdl.Register((*tableData)(nil))
+ vdl.Register((*stPrefixPerms)(nil))
+}
diff --git a/services/syncbase/server/server_test.go b/services/syncbase/server/server_test.go
new file mode 100644
index 0000000..d6b51ad
--- /dev/null
+++ b/services/syncbase/server/server_test.go
@@ -0,0 +1,25 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package server_test
+
+// Note: Most of our unit tests are client-side and cover end-to-end behavior.
+// Tests of the "server" package (and below) specifically target aspects of the
+// implementation that are difficult to test from the client side.
+
+import (
+ "testing"
+
+ tu "v.io/syncbase/v23/syncbase/testutil"
+ _ "v.io/x/ref/runtime/factories/generic"
+)
+
+////////////////////////////////////////
+// Test cases
+
+// TODO(sadovsky): Write some tests.
+func TestSomething(t *testing.T) {
+ _, _, cleanup := tu.SetupOrDie(nil)
+ defer cleanup()
+}
diff --git a/services/syncbase/server/service.go b/services/syncbase/server/service.go
new file mode 100644
index 0000000..180692f
--- /dev/null
+++ b/services/syncbase/server/service.go
@@ -0,0 +1,307 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package server
+
+// TODO(sadovsky): Check Resolve access on parent where applicable. Relatedly,
+// convert ErrNoExist and ErrNoAccess to ErrNoExistOrNoAccess where needed to
+// preserve privacy.
+
+import (
+ "path"
+ "sync"
+
+ wire "v.io/syncbase/v23/services/syncbase"
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+ "v.io/syncbase/x/ref/services/syncbase/server/nosql"
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/syncbase/x/ref/services/syncbase/vsync"
+ "v.io/v23/context"
+ "v.io/v23/glob"
+ "v.io/v23/rpc"
+ "v.io/v23/security/access"
+ "v.io/v23/verror"
+ "v.io/v23/vom"
+)
+
+// service is a singleton (i.e. not per-request) that handles Service RPCs.
+type service struct {
+ st store.Store // keeps track of which apps and databases exist, etc.
+ sync interfaces.SyncServerMethods
+ opts ServiceOptions
+ // Guards the fields below. Held during app Create, Delete, and
+ // SetPermissions.
+ mu sync.Mutex
+ apps map[string]*app
+}
+
+var (
+ _ wire.ServiceServerMethods = (*service)(nil)
+ _ interfaces.Service = (*service)(nil)
+)
+
+// ServiceOptions configures a service.
+type ServiceOptions struct {
+ // Service-level permissions.
+ Perms access.Permissions
+ // Root dir for data storage.
+ RootDir string
+ // Storage engine to use (for service and per-database engines).
+ Engine string
+ // RPC server for this service. Needed to advertise this service in
+ // mount tables attached to SyncGroups.
+ Server rpc.Server
+}
+
+// NewService creates a new service instance and returns it.
+// TODO(sadovsky): If possible, close all stores when the server is stopped.
+func NewService(ctx *context.T, call rpc.ServerCall, opts ServiceOptions) (*service, error) {
+ if opts.Perms == nil {
+ return nil, verror.New(verror.ErrInternal, ctx, "perms must be specified")
+ }
+ st, err := util.OpenStore(opts.Engine, path.Join(opts.RootDir, opts.Engine), util.OpenOptions{CreateIfMissing: true, ErrorIfExists: false})
+ if err != nil {
+ return nil, err
+ }
+ s := &service{
+ st: st,
+ opts: opts,
+ apps: map[string]*app{},
+ }
+ data := &serviceData{
+ Perms: opts.Perms,
+ }
+ if err := util.Get(ctx, st, s.stKey(), &serviceData{}); verror.ErrorID(err) != verror.ErrNoExist.ID {
+ if err != nil {
+ return nil, err
+ }
+ // Service exists. Initialize in-memory data structures.
+ // Read all apps, populate apps map.
+ aIt := st.Scan(util.ScanPrefixArgs(util.AppPrefix, ""))
+ aBytes := []byte{}
+ for aIt.Advance() {
+ aBytes = aIt.Value(aBytes)
+ aData := &appData{}
+ if err := vom.Decode(aBytes, aData); err != nil {
+ return nil, verror.New(verror.ErrInternal, ctx, err)
+ }
+ a := &app{
+ name: aData.Name,
+ s: s,
+ exists: true,
+ dbs: make(map[string]interfaces.Database),
+ }
+ s.apps[a.name] = a
+ // Read all dbs for this app, populate dbs map.
+ dIt := st.Scan(util.ScanPrefixArgs(util.JoinKeyParts(util.DbInfoPrefix, aData.Name), ""))
+ dBytes := []byte{}
+ for dIt.Advance() {
+ dBytes = dIt.Value(dBytes)
+ info := &dbInfo{}
+ if err := vom.Decode(dBytes, info); err != nil {
+ return nil, verror.New(verror.ErrInternal, ctx, err)
+ }
+ d, err := nosql.OpenDatabase(ctx, a, info.Name, nosql.DatabaseOptions{
+ RootDir: info.RootDir,
+ Engine: info.Engine,
+ }, util.OpenOptions{
+ CreateIfMissing: false,
+ ErrorIfExists: false,
+ })
+ if err != nil {
+ return nil, verror.New(verror.ErrInternal, ctx, err)
+ }
+ a.dbs[info.Name] = d
+ }
+ if err := dIt.Err(); err != nil {
+ return nil, verror.New(verror.ErrInternal, ctx, err)
+ }
+ }
+ if err := aIt.Err(); err != nil {
+ return nil, verror.New(verror.ErrInternal, ctx, err)
+ }
+ } else {
+ // Service does not exist.
+ if err := util.Put(ctx, st, s.stKey(), data); err != nil {
+ return nil, err
+ }
+ }
+ // Note, vsync.New internally handles both first-time and subsequent
+ // invocations.
+ if s.sync, err = vsync.New(ctx, call, s, opts.Server, opts.RootDir); err != nil {
+ return nil, err
+ }
+ return s, nil
+}
+
+////////////////////////////////////////
+// RPC methods
+
+func (s *service) SetPermissions(ctx *context.T, call rpc.ServerCall, perms access.Permissions, version string) error {
+ return store.RunInTransaction(s.st, func(tx store.Transaction) error {
+ data := &serviceData{}
+ return util.UpdateWithAuth(ctx, call, tx, s.stKey(), data, func() error {
+ if err := util.CheckVersion(ctx, version, data.Version); err != nil {
+ return err
+ }
+ data.Perms = perms
+ data.Version++
+ return nil
+ })
+ })
+}
+
+func (s *service) GetPermissions(ctx *context.T, call rpc.ServerCall) (perms access.Permissions, version string, err error) {
+ data := &serviceData{}
+ if err := util.GetWithAuth(ctx, call, s.st, s.stKey(), data); err != nil {
+ return nil, "", err
+ }
+ return data.Perms, util.FormatVersion(data.Version), nil
+}
+
+func (s *service) GlobChildren__(ctx *context.T, call rpc.GlobChildrenServerCall, matcher *glob.Element) error {
+ // Check perms.
+ sn := s.st.NewSnapshot()
+ if err := util.GetWithAuth(ctx, call, sn, s.stKey(), &serviceData{}); err != nil {
+ sn.Abort()
+ return err
+ }
+ return util.Glob(ctx, call, matcher, sn, sn.Abort, util.AppPrefix)
+}
+
+////////////////////////////////////////
+// interfaces.Service methods
+
+func (s *service) St() store.Store {
+ return s.st
+}
+
+func (s *service) Sync() interfaces.SyncServerMethods {
+ return s.sync
+}
+
+func (s *service) App(ctx *context.T, call rpc.ServerCall, appName string) (interfaces.App, error) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ // Note, currently the service's apps map as well as per-app dbs maps are
+ // populated at startup.
+ a, ok := s.apps[appName]
+ if !ok {
+ return nil, verror.New(verror.ErrNoExist, ctx, appName)
+ }
+ return a, nil
+}
+
+func (s *service) AppNames(ctx *context.T, call rpc.ServerCall) ([]string, error) {
+ // In the future this API will likely be replaced by one that streams the app
+ // names.
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ appNames := make([]string, 0, len(s.apps))
+ for n := range s.apps {
+ appNames = append(appNames, n)
+ }
+ return appNames, nil
+}
+
+////////////////////////////////////////
+// App management methods
+
+func (s *service) createApp(ctx *context.T, call rpc.ServerCall, appName string, perms access.Permissions) error {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if _, ok := s.apps[appName]; ok {
+ return verror.New(verror.ErrExist, ctx, appName)
+ }
+
+ a := &app{
+ name: appName,
+ s: s,
+ exists: true,
+ dbs: make(map[string]interfaces.Database),
+ }
+
+ if err := store.RunInTransaction(s.st, func(tx store.Transaction) error {
+ // Check serviceData perms.
+ sData := &serviceData{}
+ if err := util.GetWithAuth(ctx, call, tx, s.stKey(), sData); err != nil {
+ return err
+ }
+ // Check for "app already exists".
+ if err := util.Get(ctx, tx, a.stKey(), &appData{}); verror.ErrorID(err) != verror.ErrNoExist.ID {
+ if err != nil {
+ return err
+ }
+ return verror.New(verror.ErrExist, ctx, appName)
+ }
+ // Write new appData.
+ if perms == nil {
+ perms = sData.Perms
+ }
+ data := &appData{
+ Name: appName,
+ Perms: perms,
+ }
+ return util.Put(ctx, tx, a.stKey(), data)
+ }); err != nil {
+ return err
+ }
+
+ s.apps[appName] = a
+ return nil
+}
+
+func (s *service) deleteApp(ctx *context.T, call rpc.ServerCall, appName string) error {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ a, ok := s.apps[appName]
+ if !ok {
+ return nil // delete is idempotent
+ }
+
+ if err := store.RunInTransaction(s.st, func(tx store.Transaction) error {
+ // Read-check-delete appData.
+ if err := util.GetWithAuth(ctx, call, tx, a.stKey(), &appData{}); err != nil {
+ if verror.ErrorID(err) == verror.ErrNoExist.ID {
+ return nil // delete is idempotent
+ }
+ return err
+ }
+ // TODO(sadovsky): Delete all databases in this app.
+ return util.Delete(ctx, tx, a.stKey())
+ }); err != nil {
+ return err
+ }
+
+ delete(s.apps, appName)
+ return nil
+}
+
+func (s *service) setAppPerms(ctx *context.T, call rpc.ServerCall, appName string, perms access.Permissions, version string) error {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ a, ok := s.apps[appName]
+ if !ok {
+ return verror.New(verror.ErrNoExist, ctx, appName)
+ }
+ return store.RunInTransaction(s.st, func(tx store.Transaction) error {
+ data := &appData{}
+ return util.UpdateWithAuth(ctx, call, tx, a.stKey(), data, func() error {
+ if err := util.CheckVersion(ctx, version, data.Version); err != nil {
+ return err
+ }
+ data.Perms = perms
+ data.Version++
+ return nil
+ })
+ })
+}
+
+////////////////////////////////////////
+// Other internal helpers
+
+func (s *service) stKey() string {
+ return util.ServicePrefix
+}
diff --git a/services/syncbase/server/types.go b/services/syncbase/server/types.go
new file mode 100644
index 0000000..2879d56
--- /dev/null
+++ b/services/syncbase/server/types.go
@@ -0,0 +1,23 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package server
+
+import (
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/v23/security/access"
+)
+
+var (
+ _ util.Permser = (*serviceData)(nil)
+ _ util.Permser = (*appData)(nil)
+)
+
+func (data *serviceData) GetPerms() access.Permissions {
+ return data.Perms
+}
+
+func (data *appData) GetPerms() access.Permissions {
+ return data.Perms
+}
diff --git a/services/syncbase/server/types.vdl b/services/syncbase/server/types.vdl
new file mode 100644
index 0000000..4999f77
--- /dev/null
+++ b/services/syncbase/server/types.vdl
@@ -0,0 +1,34 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package server
+
+import (
+ "v.io/v23/security/access"
+)
+
+// serviceData represents the persistent state of a Service.
+type serviceData struct {
+ Version uint64 // covers the fields below
+ Perms access.Permissions
+}
+
+// appData represents the persistent state of an App.
+type appData struct {
+ Name string
+ Version uint64 // covers the fields below
+ Perms access.Permissions
+}
+
+// dbInfo contains information about one database for an App.
+// TODO(sadovsky): Track NoSQL vs. SQL.
+type dbInfo struct {
+ Name string
+ Initialized bool
+ Deleted bool
+ // Select fields from nosql.DatabaseOptions, needed in order to open storage
+ // engine on restart.
+ RootDir string // interpreted by storage engine
+ Engine string // name of storage engine, e.g. "leveldb"
+}
diff --git a/services/syncbase/server/types.vdl.go b/services/syncbase/server/types.vdl.go
new file mode 100644
index 0000000..aec38cd
--- /dev/null
+++ b/services/syncbase/server/types.vdl.go
@@ -0,0 +1,62 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: types.vdl
+
+package server
+
+import (
+ // VDL system imports
+ "v.io/v23/vdl"
+
+ // VDL user imports
+ "v.io/v23/security/access"
+)
+
+// serviceData represents the persistent state of a Service.
+type serviceData struct {
+ Version uint64 // covers the fields below
+ Perms access.Permissions
+}
+
+func (serviceData) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server.serviceData"`
+}) {
+}
+
+// appData represents the persistent state of an App.
+type appData struct {
+ Name string
+ Version uint64 // covers the fields below
+ Perms access.Permissions
+}
+
+func (appData) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server.appData"`
+}) {
+}
+
+// dbInfo contains information about one database for an App.
+// TODO(sadovsky): Track NoSQL vs. SQL.
+type dbInfo struct {
+ Name string
+ Initialized bool
+ Deleted bool
+ // Select fields from nosql.DatabaseOptions, needed in order to open storage
+ // engine on restart.
+ RootDir string // interpreted by storage engine
+ Engine string // name of storage engine, e.g. "leveldb"
+}
+
+func (dbInfo) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server.dbInfo"`
+}) {
+}
+
+func init() {
+ vdl.Register((*serviceData)(nil))
+ vdl.Register((*appData)(nil))
+ vdl.Register((*dbInfo)(nil))
+}
diff --git a/services/syncbase/server/util/constants.go b/services/syncbase/server/util/constants.go
new file mode 100644
index 0000000..ab2e401
--- /dev/null
+++ b/services/syncbase/server/util/constants.go
@@ -0,0 +1,51 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package util
+
+import (
+ "time"
+)
+
+// TODO(sadovsky): Consider using shorter strings.
+
+// Constants related to storage engine keys.
+const (
+ AppPrefix = "$app"
+ ClockPrefix = "$clock"
+ DatabasePrefix = "$database"
+ DbInfoPrefix = "$dbInfo"
+ LogPrefix = "$log"
+ PermsPrefix = "$perms"
+ RowPrefix = "$row"
+ ServicePrefix = "$service"
+ SyncPrefix = "$sync"
+ TablePrefix = "$table"
+ VersionPrefix = "$version"
+)
+
+// Constants related to object names.
+const (
+ // Service object name suffix for Syncbase-to-Syncbase RPCs.
+ SyncbaseSuffix = "$sync"
+ // Separator for batch info in database names.
+ BatchSep = ":"
+ // Separator for parts of storage engine keys.
+ KeyPartSep = ":"
+ // PrefixRangeLimitSuffix is the suffix of a key which indicates the end of
+ // a prefix range. Should be more than any regular key in the store.
+ // TODO(rogulenko): Change this constant to something out of the UTF8 space.
+ PrefixRangeLimitSuffix = "~"
+)
+
+// Constants related to syncbase clock.
+const (
+ // The pool.ntp.org project is a big virtual cluster of timeservers
+ // providing reliable easy to use NTP service for millions of clients.
+ // See more at http://www.pool.ntp.org/en/
+ NtpServerPool = "pool.ntp.org"
+ NtpSampleCount = 15
+ LocalClockDriftThreshold = float64(time.Second)
+ NtpDiffThreshold = float64(2 * time.Second)
+)
diff --git a/services/syncbase/server/util/glob.go b/services/syncbase/server/util/glob.go
new file mode 100644
index 0000000..4a73870
--- /dev/null
+++ b/services/syncbase/server/util/glob.go
@@ -0,0 +1,40 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package util
+
+import (
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ "v.io/v23/glob"
+ "v.io/v23/naming"
+ "v.io/v23/rpc"
+ "v.io/x/lib/vlog"
+)
+
+// NOTE(nlacasse): Syncbase handles Glob requests by implementing
+// GlobChildren__ at each level (service, app, database, table).
+
+// Glob performs a glob. It calls closeSntx to close sntx.
+func Glob(ctx *context.T, call rpc.GlobChildrenServerCall, matcher *glob.Element, sntx store.SnapshotOrTransaction, closeSntx func() error, stKeyPrefix string) error {
+ prefix, _ := matcher.FixedPrefix()
+ it := sntx.Scan(ScanPrefixArgs(stKeyPrefix, prefix))
+ defer closeSntx()
+ key := []byte{}
+ for it.Advance() {
+ key = it.Key(key)
+ parts := SplitKeyParts(string(key))
+ name := parts[len(parts)-1]
+ if matcher.Match(name) {
+ if err := call.SendStream().Send(naming.GlobChildrenReplyName{Value: name}); err != nil {
+ return err
+ }
+ }
+ }
+ if err := it.Err(); err != nil {
+ vlog.VI(1).Infof("Glob() failed: %v", err)
+ call.SendStream().Send(naming.GlobChildrenReplyError{Value: naming.GlobError{Error: err}})
+ }
+ return nil
+}
diff --git a/services/syncbase/server/util/key_util.go b/services/syncbase/server/util/key_util.go
new file mode 100644
index 0000000..80a8a6d
--- /dev/null
+++ b/services/syncbase/server/util/key_util.go
@@ -0,0 +1,37 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package util
+
+import (
+ "strings"
+
+ "v.io/syncbase/v23/syncbase/util"
+)
+
+// JoinKeyParts builds keys for accessing data in the storage engine.
+func JoinKeyParts(parts ...string) string {
+ // TODO(sadovsky): Figure out which delimiter makes the most sense.
+ return strings.Join(parts, KeyPartSep)
+}
+
+// SplitKeyParts is the inverse of JoinKeyParts.
+func SplitKeyParts(key string) []string {
+ return strings.Split(key, KeyPartSep)
+}
+
+// ScanPrefixArgs returns args for sn.Scan() for the specified prefix.
+func ScanPrefixArgs(stKeyPrefix, prefix string) ([]byte, []byte) {
+ return ScanRangeArgs(stKeyPrefix, util.PrefixRangeStart(prefix), util.PrefixRangeLimit(prefix))
+}
+
+// ScanRangeArgs returns args for sn.Scan() for the specified range.
+// If limit is "", all rows with keys >= start are included.
+func ScanRangeArgs(stKeyPrefix, start, limit string) ([]byte, []byte) {
+ fullStart, fullLimit := JoinKeyParts(stKeyPrefix, start), JoinKeyParts(stKeyPrefix, limit)
+ if limit == "" {
+ fullLimit = util.PrefixRangeLimit(fullLimit)
+ }
+ return []byte(fullStart), []byte(fullLimit)
+}
diff --git a/services/syncbase/server/util/key_util_test.go b/services/syncbase/server/util/key_util_test.go
new file mode 100644
index 0000000..2240531
--- /dev/null
+++ b/services/syncbase/server/util/key_util_test.go
@@ -0,0 +1,83 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package util_test
+
+import (
+ "reflect"
+ "testing"
+
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+)
+
+type kpt struct {
+ parts []string
+ key string
+}
+
+var keyPartTests []kpt = []kpt{
+ {[]string{"a", "b"}, "a:b"},
+ {[]string{"aa", "bb"}, "aa:bb"},
+ {[]string{"a", "b", "c"}, "a:b:c"},
+}
+
+func TestJoinKeyParts(t *testing.T) {
+ for _, test := range keyPartTests {
+ got, want := util.JoinKeyParts(test.parts...), test.key
+ if !reflect.DeepEqual(got, want) {
+ t.Errorf("%v: got %q, want %q", test.parts, got, want)
+ }
+ }
+}
+
+func TestSplitKeyParts(t *testing.T) {
+ for _, test := range keyPartTests {
+ got, want := util.SplitKeyParts(test.key), test.parts
+ if !reflect.DeepEqual(got, want) {
+ t.Errorf("%q: got %v, want %v", test.key, got, want)
+ }
+ }
+}
+
+func TestScanPrefixArgs(t *testing.T) {
+ tests := []struct {
+ stKeyPrefix, prefix, wantStart, wantLimit string
+ }{
+ {"x", "", "x:", "x;"},
+ {"x", "a", "x:a", "x:b"},
+ {"x", "a\xff", "x:a\xff", "x:b"},
+ }
+ for _, test := range tests {
+ start, limit := util.ScanPrefixArgs(test.stKeyPrefix, test.prefix)
+ gotStart, gotLimit := string(start), string(limit)
+ if gotStart != test.wantStart {
+ t.Errorf("{%q, %q} start: got %q, want %q", test.stKeyPrefix, test.prefix, gotStart, test.wantStart)
+ }
+ if gotLimit != test.wantLimit {
+ t.Errorf("{%q, %q} limit: got %q, want %q", test.stKeyPrefix, test.prefix, gotLimit, test.wantLimit)
+ }
+ }
+}
+
+func TestScanRangeArgs(t *testing.T) {
+ tests := []struct {
+ stKeyPrefix, start, limit, wantStart, wantLimit string
+ }{
+ {"x", "", "", "x:", "x;"}, // limit "" means "no limit"
+ {"x", "a", "", "x:a", "x;"}, // limit "" means "no limit"
+ {"x", "a", "b", "x:a", "x:b"},
+ {"x", "a", "a", "x:a", "x:a"}, // empty range
+ {"x", "b", "a", "x:b", "x:a"}, // empty range
+ }
+ for _, test := range tests {
+ start, limit := util.ScanRangeArgs(test.stKeyPrefix, test.start, test.limit)
+ gotStart, gotLimit := string(start), string(limit)
+ if gotStart != test.wantStart {
+ t.Errorf("{%q, %q, %q} start: got %q, want %q", test.stKeyPrefix, test.start, test.limit, gotStart, test.wantStart)
+ }
+ if gotLimit != test.wantLimit {
+ t.Errorf("{%q, %q, %q} limit: got %q, want %q", test.stKeyPrefix, test.start, test.limit, gotLimit, test.wantLimit)
+ }
+ }
+}
diff --git a/services/syncbase/server/util/store_util.go b/services/syncbase/server/util/store_util.go
new file mode 100644
index 0000000..b8f1905
--- /dev/null
+++ b/services/syncbase/server/util/store_util.go
@@ -0,0 +1,164 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package util
+
+import (
+ "os"
+ "strconv"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/syncbase/x/ref/services/syncbase/store/leveldb"
+ "v.io/syncbase/x/ref/services/syncbase/store/memstore"
+ "v.io/v23/context"
+ "v.io/v23/rpc"
+ "v.io/v23/security/access"
+ "v.io/v23/verror"
+ "v.io/v23/vom"
+)
+
+func FormatVersion(version uint64) string {
+ return strconv.FormatUint(version, 10)
+}
+
+func CheckVersion(ctx *context.T, presented string, actual uint64) error {
+ if presented != "" && presented != FormatVersion(actual) {
+ return verror.NewErrBadVersion(ctx)
+ }
+ return nil
+}
+
+// TODO(sadovsky): Perhaps these functions should strip key prefixes such as
+// "$table:" from the error messages they return.
+
+type Permser interface {
+ // GetPerms returns the Permissions for this Layer.
+ GetPerms() access.Permissions
+}
+
+// Get does st.Get(k, v) and wraps the returned error.
+func Get(ctx *context.T, st store.StoreReader, k string, v interface{}) error {
+ bytes, err := st.Get([]byte(k), nil)
+ if err != nil {
+ if verror.ErrorID(err) == store.ErrUnknownKey.ID {
+ return verror.New(verror.ErrNoExist, ctx, k)
+ }
+ return verror.New(verror.ErrInternal, ctx, err)
+ }
+ if err = vom.Decode(bytes, v); err != nil {
+ return verror.New(verror.ErrInternal, ctx, err)
+ }
+ return nil
+}
+
+// GetWithAuth does Get followed by an auth check.
+func GetWithAuth(ctx *context.T, call rpc.ServerCall, st store.StoreReader, k string, v Permser) error {
+ if err := Get(ctx, st, k, v); err != nil {
+ return err
+ }
+ auth, _ := access.PermissionsAuthorizer(v.GetPerms(), access.TypicalTagType())
+ if err := auth.Authorize(ctx, call.Security()); err != nil {
+ return verror.New(verror.ErrNoAccess, ctx, err)
+ }
+ return nil
+}
+
+// Put does stw.Put(k, v) and wraps the returned error.
+func Put(ctx *context.T, stw store.StoreWriter, k string, v interface{}) error {
+ bytes, err := vom.Encode(v)
+ if err != nil {
+ return verror.New(verror.ErrInternal, ctx, err)
+ }
+ if err = stw.Put([]byte(k), bytes); err != nil {
+ return verror.New(verror.ErrInternal, ctx, err)
+ }
+ return nil
+}
+
+// Delete does stw.Delete(k, v) and wraps the returned error.
+func Delete(ctx *context.T, stw store.StoreWriter, k string) error {
+ if err := stw.Delete([]byte(k)); err != nil {
+ return verror.New(verror.ErrInternal, ctx, err)
+ }
+ return nil
+}
+
+// UpdateWithAuth performs a read-modify-write.
+// Input v is populated by the "read" step. fn should "modify" v.
+// Performs an auth check as part of the "read" step.
+func UpdateWithAuth(ctx *context.T, call rpc.ServerCall, tx store.Transaction, k string, v Permser, fn func() error) error {
+ if err := GetWithAuth(ctx, call, tx, k, v); err != nil {
+ return err
+ }
+ if err := fn(); err != nil {
+ return err
+ }
+ return Put(ctx, tx, k, v)
+}
+
+// Wraps a call to Get and returns true if Get found the object, false
+// otherwise, suppressing ErrNoExist. Access errors are suppressed as well
+// because they imply existence in some Get implementations.
+// TODO(ivanpi): Revisit once ACL specification is finalized.
+func ErrorToExists(err error) (bool, error) {
+ if err == nil {
+ return true, nil
+ }
+ switch verror.ErrorID(err) {
+ case verror.ErrNoExist.ID:
+ return false, nil
+ case verror.ErrNoAccess.ID, verror.ErrNoExistOrNoAccess.ID:
+ return false, nil
+ default:
+ return false, err
+ }
+}
+
+type OpenOptions struct {
+ CreateIfMissing bool
+ ErrorIfExists bool
+}
+
+// OpenStore opens the given store.Store. OpenOptions are respected to the
+// degree possible for the specified engine.
+func OpenStore(engine, path string, opts OpenOptions) (store.Store, error) {
+ switch engine {
+ case "memstore":
+ if !opts.CreateIfMissing {
+ return nil, verror.New(verror.ErrInternal, nil, "cannot open memstore")
+ }
+ // By definition, the memstore does not already exist.
+ return memstore.New(), nil
+ case "leveldb":
+ leveldbOpts := leveldb.OpenOptions{
+ CreateIfMissing: opts.CreateIfMissing,
+ ErrorIfExists: opts.ErrorIfExists,
+ }
+ if opts.CreateIfMissing {
+ // Note, os.MkdirAll is a noop if the path already exists. We rely on
+ // leveldb to enforce ErrorIfExists.
+ if err := os.MkdirAll(path, 0700); err != nil {
+ return nil, verror.New(verror.ErrInternal, nil, err)
+ }
+ }
+ return leveldb.Open(path, leveldbOpts)
+ default:
+ return nil, verror.New(verror.ErrBadArg, nil, engine)
+ }
+}
+
+func DestroyStore(engine, path string) error {
+ switch engine {
+ case "memstore":
+ // memstore doesn't persist any data on the disc, do nothing.
+ return nil
+ case "leveldb":
+ if err := os.RemoveAll(path); err != nil {
+ return verror.New(verror.ErrInternal, nil, err)
+ }
+ return nil
+ default:
+ return verror.New(verror.ErrBadArg, nil, engine)
+ }
+}
diff --git a/services/syncbase/server/watchable/snapshot.go b/services/syncbase/server/watchable/snapshot.go
new file mode 100644
index 0000000..37af4e1
--- /dev/null
+++ b/services/syncbase/server/watchable/snapshot.go
@@ -0,0 +1,45 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+import (
+ "v.io/syncbase/x/ref/services/syncbase/store"
+)
+
+type snapshot struct {
+ store.SnapshotSpecImpl
+ isn store.Snapshot
+ st *wstore
+}
+
+var _ store.Snapshot = (*snapshot)(nil)
+
+func newSnapshot(st *wstore) *snapshot {
+ return &snapshot{
+ isn: st.ist.NewSnapshot(),
+ st: st,
+ }
+}
+
+// Abort implements the store.Snapshot interface.
+func (s *snapshot) Abort() error {
+ return s.isn.Abort()
+}
+
+// Get implements the store.StoreReader interface.
+func (s *snapshot) Get(key, valbuf []byte) ([]byte, error) {
+ if !s.st.managesKey(key) {
+ return s.isn.Get(key, valbuf)
+ }
+ return getVersioned(s.isn, key, valbuf)
+}
+
+// Scan implements the store.StoreReader interface.
+func (s *snapshot) Scan(start, limit []byte) store.Stream {
+ if !s.st.managesRange(start, limit) {
+ return s.isn.Scan(start, limit)
+ }
+ return newStreamVersioned(s.isn, start, limit)
+}
diff --git a/services/syncbase/server/watchable/store.go b/services/syncbase/server/watchable/store.go
new file mode 100644
index 0000000..0b19678
--- /dev/null
+++ b/services/syncbase/server/watchable/store.go
@@ -0,0 +1,159 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package watchable provides a Syncbase-specific store.Store wrapper that
+// provides versioned storage for specified prefixes and maintains a watchable
+// log of operations performed on versioned records. This log forms the basis
+// for the implementation of client-facing watch as well as the sync module's
+// internal watching of store updates.
+//
+// LogEntry records are stored chronologically, using keys of the form
+// "$log:<seq>". Sequence numbers are zero-padded to ensure that the
+// lexicographic order matches the numeric order.
+//
+// Version number records are stored using keys of the form "$version:<key>",
+// where <key> is the client-specified key.
+package watchable
+
+import (
+ "fmt"
+ "strings"
+ "sync"
+
+ pubutil "v.io/syncbase/v23/syncbase/util"
+ "v.io/syncbase/x/ref/services/syncbase/clock"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+)
+
+// Store is a store.Store that provides versioned storage and a watchable oplog.
+// TODO(sadovsky): Extend interface.
+type Store interface {
+ store.Store
+}
+
+// Options configures a watchable.Store.
+type Options struct {
+ // Key prefixes to version and log. If nil, all keys are managed.
+ ManagedPrefixes []string
+}
+
+// Wrap returns a watchable.Store that wraps the given store.Store.
+func Wrap(st store.Store, vclock *clock.VClock, opts *Options) (Store, error) {
+ seq, err := getNextLogSeq(st)
+ if err != nil {
+ return nil, err
+ }
+ return &wstore{
+ ist: st,
+ watcher: newWatcher(),
+ opts: opts,
+ seq: seq,
+ clock: vclock,
+ }, nil
+}
+
+type wstore struct {
+ ist store.Store
+ watcher *watcher
+ opts *Options
+ mu sync.Mutex // held during transaction commits; protects seq
+ seq uint64 // the next sequence number to be used for a new commit
+ clock *clock.VClock // used to provide write timestamps
+}
+
+var _ Store = (*wstore)(nil)
+
+// Close implements the store.Store interface.
+func (st *wstore) Close() error {
+ st.watcher.close()
+ return st.ist.Close()
+}
+
+// Get implements the store.StoreReader interface.
+func (st *wstore) Get(key, valbuf []byte) ([]byte, error) {
+ if !st.managesKey(key) {
+ return st.ist.Get(key, valbuf)
+ }
+ sn := newSnapshot(st)
+ defer sn.Abort()
+ return sn.Get(key, valbuf)
+}
+
+// Scan implements the store.StoreReader interface.
+func (st *wstore) Scan(start, limit []byte) store.Stream {
+ if !st.managesRange(start, limit) {
+ return st.ist.Scan(start, limit)
+ }
+ // TODO(sadovsky): Close snapshot once stream is finished or canceled.
+ return newSnapshot(st).Scan(start, limit)
+}
+
+// Put implements the store.StoreWriter interface.
+func (st *wstore) Put(key, value []byte) error {
+ // Use watchable.Store transaction so this op gets logged.
+ return store.RunInTransaction(st, func(tx store.Transaction) error {
+ return tx.Put(key, value)
+ })
+}
+
+// Delete implements the store.StoreWriter interface.
+func (st *wstore) Delete(key []byte) error {
+ // Use watchable.Store transaction so this op gets logged.
+ return store.RunInTransaction(st, func(tx store.Transaction) error {
+ return tx.Delete(key)
+ })
+}
+
+// NewTransaction implements the store.Store interface.
+func (st *wstore) NewTransaction() store.Transaction {
+ return newTransaction(st)
+}
+
+// NewSnapshot implements the store.Store interface.
+func (st *wstore) NewSnapshot() store.Snapshot {
+ return newSnapshot(st)
+}
+
+// GetOptions returns the options configured on a watchable.Store.
+// TODO(rdaoud): expose watchable store through an interface and change this
+// function to be a method on the store.
+func GetOptions(st store.Store) (*Options, error) {
+ wst := st.(*wstore)
+ return wst.opts, nil
+}
+
+////////////////////////////////////////
+// Internal helpers
+
+func (st *wstore) managesKey(key []byte) bool {
+ if st.opts.ManagedPrefixes == nil {
+ return true
+ }
+ ikey := string(key)
+ // TODO(sadovsky): Optimize, e.g. use binary search (here and below).
+ for _, p := range st.opts.ManagedPrefixes {
+ if strings.HasPrefix(ikey, p) {
+ return true
+ }
+ }
+ return false
+}
+
+func (st *wstore) managesRange(start, limit []byte) bool {
+ if st.opts.ManagedPrefixes == nil {
+ return true
+ }
+ istart, ilimit := string(start), string(limit)
+ for _, p := range st.opts.ManagedPrefixes {
+ pstart, plimit := pubutil.PrefixRangeStart(p), pubutil.PrefixRangeLimit(p)
+ if pstart <= istart && ilimit <= plimit {
+ return true
+ }
+ if !(plimit <= istart || ilimit <= pstart) {
+ // If this happens, there's a bug in the Syncbase server implementation.
+ panic(fmt.Sprintf("partial overlap: %q %q %q", p, start, limit))
+ }
+ }
+ return false
+}
diff --git a/services/syncbase/server/watchable/store_test.go b/services/syncbase/server/watchable/store_test.go
new file mode 100644
index 0000000..8c1c370
--- /dev/null
+++ b/services/syncbase/server/watchable/store_test.go
@@ -0,0 +1,74 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+import (
+ "runtime"
+ "testing"
+
+ "v.io/syncbase/x/ref/services/syncbase/clock"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/syncbase/x/ref/services/syncbase/store/test"
+)
+
+func init() {
+ runtime.GOMAXPROCS(10)
+}
+
+func TestStream(t *testing.T) {
+ runTest(t, []string{}, test.RunStreamTest)
+ runTest(t, nil, test.RunStreamTest)
+}
+
+func TestSnapshot(t *testing.T) {
+ runTest(t, []string{}, test.RunSnapshotTest)
+ runTest(t, nil, test.RunSnapshotTest)
+}
+
+func TestStoreState(t *testing.T) {
+ runTest(t, []string{}, test.RunStoreStateTest)
+ runTest(t, nil, test.RunStoreStateTest)
+}
+
+func TestClose(t *testing.T) {
+ runTest(t, []string{}, test.RunCloseTest)
+ runTest(t, nil, test.RunCloseTest)
+}
+
+func TestReadWriteBasic(t *testing.T) {
+ runTest(t, []string{}, test.RunReadWriteBasicTest)
+ runTest(t, nil, test.RunReadWriteBasicTest)
+}
+
+func TestReadWriteRandom(t *testing.T) {
+ runTest(t, []string{}, test.RunReadWriteRandomTest)
+ runTest(t, nil, test.RunReadWriteRandomTest)
+}
+
+func TestConcurrentTransactions(t *testing.T) {
+ runTest(t, []string{}, test.RunConcurrentTransactionsTest)
+ runTest(t, nil, test.RunConcurrentTransactionsTest)
+}
+
+func TestTransactionState(t *testing.T) {
+ runTest(t, []string{}, test.RunTransactionStateTest)
+ runTest(t, nil, test.RunTransactionStateTest)
+}
+
+func TestTransactionsWithGet(t *testing.T) {
+ runTest(t, []string{}, test.RunTransactionsWithGetTest)
+ runTest(t, nil, test.RunTransactionsWithGetTest)
+}
+
+func runTest(t *testing.T, mp []string, f func(t *testing.T, st store.Store)) {
+ st, destroy := createStore()
+ defer destroy()
+ vClock := clock.NewVClockWithMockServices(clock.MockStorageAdapter(), nil, nil)
+ st, err := Wrap(st, vClock, &Options{ManagedPrefixes: mp})
+ if err != nil {
+ t.Fatal(err)
+ }
+ f(t, st)
+}
diff --git a/services/syncbase/server/watchable/stream.go b/services/syncbase/server/watchable/stream.go
new file mode 100644
index 0000000..26502e1
--- /dev/null
+++ b/services/syncbase/server/watchable/stream.go
@@ -0,0 +1,94 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+import (
+ "sync"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+)
+
+// stream streams keys and values for versioned records.
+type stream struct {
+ iit store.Stream
+ sntx store.SnapshotOrTransaction
+ mu sync.Mutex
+ err error
+ hasValue bool
+ key []byte
+ value []byte
+}
+
+var _ store.Stream = (*stream)(nil)
+
+// newStreamVersioned creates a new stream. It assumes all records in range
+// [start, limit) are managed, i.e. versioned.
+func newStreamVersioned(sntx store.SnapshotOrTransaction, start, limit []byte) *stream {
+ return &stream{
+ iit: sntx.Scan(makeVersionKey(start), makeVersionKey(limit)),
+ sntx: sntx,
+ }
+}
+
+// Advance implements the store.Stream interface.
+func (s *stream) Advance() bool {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ s.hasValue = false
+ if s.err != nil {
+ return false
+ }
+ if advanced := s.iit.Advance(); !advanced {
+ return false
+ }
+ versionKey, version := s.iit.Key(nil), s.iit.Value(nil)
+ s.key = []byte(join(split(string(versionKey))[1:]...)) // drop "$version" prefix
+ s.value, s.err = s.sntx.Get(makeAtVersionKey(s.key, version), nil)
+ if s.err != nil {
+ return false
+ }
+ s.hasValue = true
+ return true
+}
+
+// Key implements the store.Stream interface.
+func (s *stream) Key(keybuf []byte) []byte {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if !s.hasValue {
+ panic("nothing staged")
+ }
+ return store.CopyBytes(keybuf, s.key)
+}
+
+// Value implements the store.Stream interface.
+func (s *stream) Value(valbuf []byte) []byte {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if !s.hasValue {
+ panic("nothing staged")
+ }
+ return store.CopyBytes(valbuf, s.value)
+}
+
+// Err implements the store.Stream interface.
+func (s *stream) Err() error {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if s.err != nil {
+ return convertError(s.err)
+ }
+ return s.iit.Err()
+}
+
+// Cancel implements the store.Stream interface.
+func (s *stream) Cancel() {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if s.err != nil {
+ return
+ }
+ s.iit.Cancel()
+}
diff --git a/services/syncbase/server/watchable/test_util.go b/services/syncbase/server/watchable/test_util.go
new file mode 100644
index 0000000..e14854a
--- /dev/null
+++ b/services/syncbase/server/watchable/test_util.go
@@ -0,0 +1,144 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+import (
+ "fmt"
+ "io/ioutil"
+ "math"
+ "time"
+
+ "v.io/syncbase/x/ref/services/syncbase/clock"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/syncbase/x/ref/services/syncbase/store/leveldb"
+ "v.io/syncbase/x/ref/services/syncbase/store/memstore"
+ "v.io/v23/vom"
+)
+
+// This file provides utility methods for tests related to watchable store.
+
+////////////////////////////////////////////////////////////
+// Functions for store creation/cleanup
+
+// createStore returns a store along with a function to destroy the store
+// once it is no longer needed.
+func createStore() (store.Store, func()) {
+ var st store.Store
+ // With Memstore, TestReadWriteRandom is slow with ManagedPrefixes=nil since
+ // every watchable.Store.Get() takes a snapshot, and memstore snapshots are
+ // relatively expensive since the entire data map is copied. LevelDB snapshots
+ // are cheap, so with LevelDB ManagedPrefixes=nil is still reasonably fast.
+ if false {
+ st = memstore.New()
+ return st, func() {
+ st.Close()
+ }
+ } else {
+ path := getPath()
+ st = createLevelDB(path)
+ return st, func() {
+ destroyLevelDB(st, path)
+ }
+ }
+}
+
+func getPath() string {
+ path, err := ioutil.TempDir("", "syncbase_leveldb")
+ if err != nil {
+ panic(fmt.Sprintf("can't create temp dir: %v", err))
+ }
+ return path
+}
+
+func createLevelDB(path string) store.Store {
+ st, err := leveldb.Open(path, leveldb.OpenOptions{CreateIfMissing: true, ErrorIfExists: true})
+ if err != nil {
+ panic(fmt.Sprintf("can't open db at %v: %v", path, err))
+ }
+ return st
+}
+
+func destroyLevelDB(st store.Store, path string) {
+ st.Close()
+ if err := leveldb.Destroy(path); err != nil {
+ panic(fmt.Sprintf("can't destroy db at %v: %v", path, err))
+ }
+}
+
+////////////////////////////////////////////////////////////
+// Functions related to watchable store
+
+func getSeq(st Store) uint64 {
+ wst := st.(*wstore)
+ return wst.seq
+}
+
+// logEntryReader provides a stream-like interface to scan over the log entries
+// of a single batch, starting for a given sequence number. It opens a stream
+// that scans the log from the sequence number given. It stops after reading
+// the last entry in that batch (indicated by a false Continued flag).
+type logEntryReader struct {
+ stream store.Stream // scan stream on the store Database
+ done bool // true after reading the last batch entry
+ key string // key of most recent log entry read
+ entry LogEntry // most recent log entry read
+}
+
+func newLogEntryReader(st store.Store, seq uint64) *logEntryReader {
+ stream := st.Scan([]byte(logEntryKey(seq)), []byte(logEntryKey(math.MaxUint64)))
+ return &logEntryReader{stream: stream}
+}
+
+func (ler *logEntryReader) Advance() bool {
+ if ler.done {
+ return false
+ }
+
+ if ler.stream.Advance() {
+ ler.key = string(ler.stream.Key(nil))
+ if err := vom.Decode(ler.stream.Value(nil), &ler.entry); err != nil {
+ panic(fmt.Errorf("Failed to decode LogEntry for key: %q", ler.key))
+ }
+ if ler.entry.Continued == false {
+ ler.done = true
+ }
+ return true
+ }
+
+ ler.key = ""
+ ler.entry = LogEntry{}
+ return false
+}
+
+func (ler *logEntryReader) GetEntry() (string, LogEntry) {
+ return ler.key, ler.entry
+}
+
+////////////////////////////////////////////////////////////
+// Clock related utility code
+
+type mockSystemClock struct {
+ time time.Time // current time returned by call to Now()
+ increment time.Duration // how much to increment the clock by for subsequent calls to Now()
+}
+
+func newMockSystemClock(firstTimestamp time.Time, increment time.Duration) *mockSystemClock {
+ return &mockSystemClock{
+ time: firstTimestamp,
+ increment: increment,
+ }
+}
+
+func (sc *mockSystemClock) Now() time.Time {
+ now := sc.time
+ sc.time = sc.time.Add(sc.increment)
+ return now
+}
+
+func (sc *mockSystemClock) ElapsedTime() (time.Duration, error) {
+ return sc.increment, nil
+}
+
+var _ clock.SystemClock = (*mockSystemClock)(nil)
diff --git a/services/syncbase/server/watchable/transaction.go b/services/syncbase/server/watchable/transaction.go
new file mode 100644
index 0000000..8a67f8f
--- /dev/null
+++ b/services/syncbase/server/watchable/transaction.go
@@ -0,0 +1,304 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+import (
+ "fmt"
+ "math"
+ "sync"
+
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ "v.io/v23/verror"
+)
+
+type transaction struct {
+ itx store.Transaction
+ st *wstore
+ mu sync.Mutex // protects the fields below
+ err error
+ ops []Op
+ // fromSync is true when a transaction is created by sync. This causes
+ // the log entries written at commit time to have their "FromSync" field
+ // set to true. That in turn causes the sync watcher to filter out such
+ // updates since sync already knows about them (echo suppression).
+ fromSync bool
+}
+
+var _ store.Transaction = (*transaction)(nil)
+
+func cp(src []byte) []byte {
+ dst := make([]byte, len(src))
+ for i := 0; i < len(src); i++ {
+ dst[i] = src[i]
+ }
+ return dst
+}
+
+func cpStrings(src []string) []string {
+ dst := make([]string, len(src))
+ for i := 0; i < len(src); i++ {
+ dst[i] = src[i]
+ }
+ return dst
+}
+
+func newTransaction(st *wstore) *transaction {
+ return &transaction{
+ itx: st.ist.NewTransaction(),
+ st: st,
+ }
+}
+
+// Get implements the store.StoreReader interface.
+func (tx *transaction) Get(key, valbuf []byte) ([]byte, error) {
+ tx.mu.Lock()
+ defer tx.mu.Unlock()
+ if tx.err != nil {
+ return valbuf, convertError(tx.err)
+ }
+ var err error
+ if !tx.st.managesKey(key) {
+ valbuf, err = tx.itx.Get(key, valbuf)
+ } else {
+ valbuf, err = getVersioned(tx.itx, key, valbuf)
+ tx.ops = append(tx.ops, &OpGet{GetOp{Key: cp(key)}})
+ }
+ return valbuf, err
+}
+
+// Scan implements the store.StoreReader interface.
+func (tx *transaction) Scan(start, limit []byte) store.Stream {
+ tx.mu.Lock()
+ defer tx.mu.Unlock()
+ if tx.err != nil {
+ return &store.InvalidStream{Error: tx.err}
+ }
+ var it store.Stream
+ if !tx.st.managesRange(start, limit) {
+ it = tx.itx.Scan(start, limit)
+ } else {
+ it = newStreamVersioned(tx.itx, start, limit)
+ tx.ops = append(tx.ops, &OpScan{ScanOp{Start: cp(start), Limit: cp(limit)}})
+ }
+ return it
+}
+
+// Put implements the store.StoreWriter interface.
+func (tx *transaction) Put(key, value []byte) error {
+ tx.mu.Lock()
+ defer tx.mu.Unlock()
+ if tx.err != nil {
+ return convertError(tx.err)
+ }
+ if !tx.st.managesKey(key) {
+ return tx.itx.Put(key, value)
+ }
+ version, err := putVersioned(tx.itx, key, value)
+ if err != nil {
+ return err
+ }
+ tx.ops = append(tx.ops, &OpPut{PutOp{Key: cp(key), Version: version}})
+ return nil
+}
+
+// Delete implements the store.StoreWriter interface.
+func (tx *transaction) Delete(key []byte) error {
+ tx.mu.Lock()
+ defer tx.mu.Unlock()
+ if tx.err != nil {
+ return convertError(tx.err)
+ }
+ var err error
+ if !tx.st.managesKey(key) {
+ return tx.itx.Delete(key)
+ }
+ err = deleteVersioned(tx.itx, key)
+ if err != nil {
+ return err
+ }
+ tx.ops = append(tx.ops, &OpDelete{DeleteOp{Key: cp(key)}})
+ return nil
+}
+
+// Commit implements the store.Transaction interface.
+func (tx *transaction) Commit() error {
+ tx.mu.Lock()
+ defer tx.mu.Unlock()
+ if tx.err != nil {
+ return convertError(tx.err)
+ }
+ tx.err = verror.New(verror.ErrBadState, nil, store.ErrMsgCommittedTxn)
+ tx.st.mu.Lock()
+ defer tx.st.mu.Unlock()
+ // Check if there is enough space left in the sequence number.
+ if (math.MaxUint64 - tx.st.seq) < uint64(len(tx.ops)) {
+ return verror.New(verror.ErrInternal, nil, "seq maxed out")
+ }
+ // Write LogEntry records.
+ timestamp := tx.st.clock.Now(nil).UnixNano()
+ seq := tx.st.seq
+ for i, op := range tx.ops {
+ key := logEntryKey(seq)
+ value := &LogEntry{
+ Op: op,
+ CommitTimestamp: timestamp,
+ FromSync: tx.fromSync,
+ Continued: i < len(tx.ops)-1,
+ }
+ if err := util.Put(nil, tx.itx, key, value); err != nil {
+ return err
+ }
+ seq++
+ }
+ if err := tx.itx.Commit(); err != nil {
+ return err
+ }
+ tx.st.seq = seq
+ tx.st.watcher.broadcastUpdates()
+ return nil
+}
+
+// Abort implements the store.Transaction interface.
+func (tx *transaction) Abort() error {
+ tx.mu.Lock()
+ defer tx.mu.Unlock()
+ if tx.err != nil {
+ return convertError(tx.err)
+ }
+ tx.err = verror.New(verror.ErrCanceled, nil, store.ErrMsgAbortedTxn)
+ return tx.itx.Abort()
+}
+
+// AddSyncGroupOp injects a SyncGroup operation notification in the log entries
+// that the transaction writes when it is committed. It allows the SyncGroup
+// operations (create, join, leave, destroy) to notify the sync watcher of the
+// change at its proper position in the timeline (the transaction commit).
+// Note: this is an internal function used by sync, not part of the interface.
+func AddSyncGroupOp(ctx *context.T, tx store.Transaction, prefixes []string, remove bool) error {
+ wtx := tx.(*transaction)
+ wtx.mu.Lock()
+ defer wtx.mu.Unlock()
+ if wtx.err != nil {
+ return convertError(wtx.err)
+ }
+ // Make a defensive copy of prefixes slice.
+ wtx.ops = append(wtx.ops, &OpSyncGroup{SyncGroupOp{Prefixes: cpStrings(prefixes), Remove: remove}})
+ return nil
+}
+
+// AddSyncSnapshotOp injects a sync snapshot operation notification in the log
+// entries that the transaction writes when it is committed. It allows the
+// SyncGroup create or join operations to notify the sync watcher of the
+// current keys and their versions to use when initializing the sync metadata
+// at the point in the timeline when these keys become syncable (at commit).
+// Note: this is an internal function used by sync, not part of the interface.
+func AddSyncSnapshotOp(ctx *context.T, tx store.Transaction, key, version []byte) error {
+ wtx := tx.(*transaction)
+ wtx.mu.Lock()
+ defer wtx.mu.Unlock()
+ if wtx.err != nil {
+ return convertError(wtx.err)
+ }
+ if !wtx.st.managesKey(key) {
+ return verror.New(verror.ErrInternal, ctx, fmt.Sprintf("cannot create SyncSnapshotOp on unmanaged key: %s", string(key)))
+ }
+ wtx.ops = append(wtx.ops, &OpSyncSnapshot{SyncSnapshotOp{Key: cp(key), Version: cp(version)}})
+ return nil
+}
+
+// SetTransactionFromSync marks this transaction as created by sync as opposed
+// to one created by an application. The net effect is that, at commit time,
+// the log entries written are marked as made by sync. This allows the sync
+// Watcher to ignore them (echo suppression) because it made these updates.
+// Note: this is an internal function used by sync, not part of the interface.
+// TODO(rdaoud): support a generic echo-suppression mechanism for apps as well
+// maybe by having a creator ID in the transaction and log entries.
+// TODO(rdaoud): fold this flag (or creator ID) into Tx options when available.
+func SetTransactionFromSync(tx store.Transaction) {
+ wtx := tx.(*transaction)
+ wtx.mu.Lock()
+ defer wtx.mu.Unlock()
+ wtx.fromSync = true
+}
+
+// GetVersion returns the current version of a managed key. This method is used
+// by the Sync module when the initiator is attempting to add new versions of
+// objects. Reading the version key is used for optimistic concurrency
+// control. At minimum, an object implementing the Transaction interface is
+// required since this is a Get operation.
+func GetVersion(ctx *context.T, tx store.Transaction, key []byte) ([]byte, error) {
+ switch w := tx.(type) {
+ case *transaction:
+ w.mu.Lock()
+ defer w.mu.Unlock()
+ if w.err != nil {
+ return nil, convertError(w.err)
+ }
+ return getVersion(w.itx, key)
+ }
+ return nil, verror.New(verror.ErrInternal, ctx, "unsupported store type")
+}
+
+// GetAtVersion returns the value of a managed key at the requested
+// version. This method is used by the Sync module when the responder needs to
+// send objects over the wire. At minimum, an object implementing the
+// StoreReader interface is required since this is a Get operation.
+func GetAtVersion(ctx *context.T, st store.StoreReader, key, valbuf, version []byte) ([]byte, error) {
+ switch w := st.(type) {
+ case *snapshot:
+ return getAtVersion(w.isn, key, valbuf, version)
+ case *transaction:
+ w.mu.Lock()
+ defer w.mu.Unlock()
+ if w.err != nil {
+ return valbuf, convertError(w.err)
+ }
+ return getAtVersion(w.itx, key, valbuf, version)
+ case *wstore:
+ return getAtVersion(w.ist, key, valbuf, version)
+ }
+ return nil, verror.New(verror.ErrInternal, ctx, "unsupported store type")
+}
+
+// PutAtVersion puts a value for the managed key at the requested version. This
+// method is used by the Sync module exclusively when the initiator adds objects
+// with versions created on other Syncbases. At minimum, an object implementing
+// the Transaction interface is required since this is a Put operation.
+func PutAtVersion(ctx *context.T, tx store.Transaction, key, valbuf, version []byte) error {
+ wtx := tx.(*transaction)
+
+ wtx.mu.Lock()
+ defer wtx.mu.Unlock()
+ if wtx.err != nil {
+ return convertError(wtx.err)
+ }
+
+ // Note that we do not enqueue a PutOp in the log since this Put is not
+ // updating the current version of a key.
+ return wtx.itx.Put(makeAtVersionKey(key, version), valbuf)
+}
+
+// PutVersion updates the version of a managed key to the requested
+// version. This method is used by the Sync module exclusively when the
+// initiator selects which of the already stored versions (via PutAtVersion
+// calls) becomes the current version. At minimum, an object implementing
+// the Transaction interface is required since this is a Put operation.
+func PutVersion(ctx *context.T, tx store.Transaction, key, version []byte) error {
+ wtx := tx.(*transaction)
+
+ wtx.mu.Lock()
+ defer wtx.mu.Unlock()
+ if wtx.err != nil {
+ return convertError(wtx.err)
+ }
+
+ if err := wtx.itx.Put(makeVersionKey(key), version); err != nil {
+ return err
+ }
+ wtx.ops = append(wtx.ops, &OpPut{PutOp{Key: cp(key), Version: cp(version)}})
+ return nil
+}
diff --git a/services/syncbase/server/watchable/transaction_test.go b/services/syncbase/server/watchable/transaction_test.go
new file mode 100644
index 0000000..5fcdf94
--- /dev/null
+++ b/services/syncbase/server/watchable/transaction_test.go
@@ -0,0 +1,225 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+import (
+ "bytes"
+ "fmt"
+ "reflect"
+ "runtime/debug"
+ "testing"
+ "time"
+
+ "v.io/syncbase/x/ref/services/syncbase/clock"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+)
+
+type testData struct {
+ key string
+ createVal string
+ updateVal string
+}
+
+var data1 testData = testData{
+ key: "key-a",
+ createVal: "val-a1",
+ updateVal: "val-a2",
+}
+
+var data2 testData = testData{
+ key: "key-b",
+ createVal: "val-b1",
+ updateVal: "val-b2",
+}
+
+func checkAndUpdate(tx store.Transaction, data testData) error {
+ // check and update data1
+ keyBytes := []byte(data.key)
+ val, err := tx.Get(keyBytes, nil)
+ if err != nil {
+ return fmt.Errorf("can't get key %q: %v", data.key, err)
+ }
+ if !bytes.Equal(val, []byte(data.createVal)) {
+ return fmt.Errorf("Unexpected value for key %q: %q", data.key, string(val))
+ }
+ if err := tx.Put(keyBytes, []byte(data.updateVal)); err != nil {
+ return fmt.Errorf("can't put {%q: %v}: %v", data.key, data.updateVal, err)
+ }
+ return nil
+}
+
+func verifyCommitLog(t *testing.T, st store.Store, seq uint64, wantNumEntries int, wantTimestamp time.Time) {
+ ler := newLogEntryReader(st, seq)
+ numEntries := 0
+ for ler.Advance() {
+ _, entry := ler.GetEntry()
+ numEntries++
+ if entry.CommitTimestamp != wantTimestamp.UnixNano() {
+ t.Errorf("Unexpected timestamp found for entry: got %v, want %v", entry.CommitTimestamp, wantTimestamp.UnixNano())
+ }
+ }
+ if numEntries != wantNumEntries {
+ t.Errorf("Unexpected number of log entries: got %v, want %v", numEntries, wantNumEntries)
+ }
+}
+
+func TestLogEntryTimestamps(t *testing.T) {
+ ist, destroy := createStore()
+ defer destroy()
+ t1 := time.Now()
+ inc := time.Duration(1) * time.Second
+ mockClock := newMockSystemClock(t1, inc)
+ var mockAdapter clock.StorageAdapter = clock.MockStorageAdapter()
+
+ vclock := clock.NewVClockWithMockServices(mockAdapter, mockClock, nil)
+ wst1, err := Wrap(ist, vclock, &Options{ManagedPrefixes: nil})
+ if err != nil {
+ t.Fatalf("Wrap failed: %v", err)
+ }
+ seqForCreate := getSeq(wst1)
+
+ // Create data in store
+ if err := store.RunInTransaction(wst1, func(tx store.Transaction) error {
+ // add data1
+ if err := tx.Put([]byte(data1.key), []byte(data1.createVal)); err != nil {
+ return fmt.Errorf("can't put {%q: %v}: %v", data1.key, data1.createVal, err)
+ }
+ // add data2
+ if err := tx.Put([]byte(data2.key), []byte(data2.createVal)); err != nil {
+ return fmt.Errorf("can't put {%q: %v}: %v", data2.key, data2.createVal, err)
+ }
+ return nil
+ }); err != nil {
+ panic(fmt.Errorf("can't commit transaction: %v", err))
+ }
+
+ // read and verify LogEntries written as part of above transaction
+ // We expect 2 entries in the log for the two puts.
+ // Timestamp from mockclock for the commit should be t1
+ verifyCommitLog(t, ist, seqForCreate, 2, t1)
+
+ // Update data already present in store with a new watchable store
+ wst2, err := Wrap(ist, vclock, &Options{ManagedPrefixes: nil})
+ if err != nil {
+ t.Fatalf("Wrap failed: %v", err)
+ }
+ seqForUpdate := getSeq(wst2)
+ // We expect the sequence number to have moved by +2 for the two puts.
+ if seqForUpdate != (seqForCreate + 2) {
+ t.Errorf("unexpected sequence number for update. seq for create: %d, seq for update: %d", seqForCreate, seqForUpdate)
+ }
+
+ if err := store.RunInTransaction(wst2, func(tx store.Transaction) error {
+ if err := checkAndUpdate(tx, data1); err != nil {
+ return err
+ }
+ if err := checkAndUpdate(tx, data2); err != nil {
+ return err
+ }
+ return nil
+ }); err != nil {
+ panic(fmt.Errorf("can't commit transaction: %v", err))
+ }
+
+ // read and verify LogEntries written as part of above transaction
+ // We expect 4 entries in the log for the two gets and two puts.
+ // Timestamp from mockclock for the commit should be t1 + 1 sec
+ t2 := t1.Add(inc)
+ verifyCommitLog(t, ist, seqForUpdate, 4, t2)
+}
+
+func eq(t *testing.T, got, want interface{}) {
+ if !reflect.DeepEqual(got, want) {
+ debug.PrintStack()
+ t.Fatalf("got %v, want %v", got, want)
+ }
+}
+
+func TestOpLogConsistency(t *testing.T) {
+ ist, destroy := createStore()
+ defer destroy()
+ t1 := time.Now()
+ inc := time.Duration(1) * time.Second
+ mockClock := newMockSystemClock(t1, inc)
+ var mockAdapter clock.StorageAdapter = clock.MockStorageAdapter()
+
+ vclock := clock.NewVClockWithMockServices(mockAdapter, mockClock, nil)
+ wst, err := Wrap(ist, vclock, &Options{ManagedPrefixes: nil})
+ if err != nil {
+ t.Fatalf("Wrap failed: %v", err)
+ }
+
+ if err := store.RunInTransaction(wst, func(tx store.Transaction) error {
+ putKey, putVal := []byte("foo"), []byte("bar")
+ if err := tx.Put(putKey, putVal); err != nil {
+ return err
+ }
+ getKey := []byte("foo")
+ if getVal, err := tx.Get(getKey, nil); err != nil {
+ return err
+ } else {
+ eq(t, getVal, putVal)
+ }
+ start, limit := []byte("aaa"), []byte("bbb")
+ tx.Scan(start, limit)
+ delKey := []byte("foo")
+ if err := tx.Delete(delKey); err != nil {
+ return err
+ }
+ sgPrefixes := []string{"sga", "sgb"}
+ if err := AddSyncGroupOp(nil, tx, sgPrefixes, false); err != nil {
+ return err
+ }
+ snKey, snVersion := []byte("aa"), []byte("123")
+ if err := AddSyncSnapshotOp(nil, tx, snKey, snVersion); err != nil {
+ return err
+ }
+ pvKey, pvVersion := []byte("pv"), []byte("456")
+ if err := PutVersion(nil, tx, pvKey, pvVersion); err != nil {
+ return err
+ }
+ for _, buf := range [][]byte{putKey, putVal, getKey, start, limit, delKey, snKey, snVersion, pvKey, pvVersion} {
+ buf[0] = '#'
+ }
+ sgPrefixes[0] = "zebra"
+ return nil
+ }); err != nil {
+ t.Fatalf("failed to commit txn: %v", err)
+ }
+
+ // Read first (and only) batch.
+ ler := newLogEntryReader(ist, 0)
+ numEntries, wantNumEntries := 0, 7
+ sawPut := false
+ for ler.Advance() {
+ _, entry := ler.GetEntry()
+ numEntries++
+ switch op := entry.Op.(type) {
+ case OpGet:
+ eq(t, string(op.Value.Key), "foo")
+ case OpScan:
+ eq(t, string(op.Value.Start), "aaa")
+ eq(t, string(op.Value.Limit), "bbb")
+ case OpPut:
+ if !sawPut {
+ eq(t, string(op.Value.Key), "foo")
+ sawPut = true
+ } else {
+ eq(t, string(op.Value.Key), "pv")
+ eq(t, string(op.Value.Version), "456")
+ }
+ case OpDelete:
+ eq(t, string(op.Value.Key), "foo")
+ case OpSyncGroup:
+ eq(t, op.Value.Prefixes, []string{"sga", "sgb"})
+ case OpSyncSnapshot:
+ eq(t, string(op.Value.Key), "aa")
+ eq(t, string(op.Value.Version), "123")
+ default:
+ t.Fatalf("Unexpected op type in entry: %v", entry)
+ }
+ }
+ eq(t, numEntries, wantNumEntries)
+}
diff --git a/services/syncbase/server/watchable/types.vdl b/services/syncbase/server/watchable/types.vdl
new file mode 100644
index 0000000..3f5181b
--- /dev/null
+++ b/services/syncbase/server/watchable/types.vdl
@@ -0,0 +1,77 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+// GetOp represents a store get operation.
+type GetOp struct {
+ Key []byte
+}
+
+// ScanOp represents a store scan operation.
+type ScanOp struct {
+ Start []byte
+ Limit []byte
+}
+
+// PutOp represents a store put operation. The new version is written instead
+// of the value to avoid duplicating the user data in the store. The version
+// is used to access the user data of that specific mutation.
+type PutOp struct {
+ Key []byte
+ Version []byte
+}
+
+// DeleteOp represents a store delete operation.
+type DeleteOp struct {
+ Key []byte
+}
+
+// SyncGroupOp represents a change in SyncGroup tracking, adding or removing
+// key prefixes to sync. SyncGroup prefixes cannot be changed, this is used
+// to track changes due to SyncGroup create/join/leave/destroy.
+type SyncGroupOp struct {
+ Prefixes []string
+ Remove bool
+}
+
+// SyncSnapshotOp represents a snapshot operation when creating and joining a
+// SyncGroup. The sync watcher needs to get a snapshot of the Database at the
+// point of creating/joining a SyncGroup. A SyncSnapshotOp entry is written to
+// the log for each Database key that falls within the SyncGroup prefixes. This
+// allows sync to initialize its metadata at the correct versions of the objects
+// when they become syncable. These log entries should be filtered by the
+// client-facing Watch interface because the user data did not actually change.
+type SyncSnapshotOp struct {
+ Key []byte
+ Version []byte
+}
+
+// Op represents a store operation.
+type Op union {
+ Get GetOp
+ Scan ScanOp
+ Put PutOp
+ Delete DeleteOp
+ SyncGroup SyncGroupOp
+ SyncSnapshot SyncSnapshotOp
+}
+
+// LogEntry represents a single store operation. This operation may have been
+// part of a transaction, as signified by the Continued boolean. Read-only
+// operations (and read-only transactions) are not logged.
+type LogEntry struct {
+ // The store operation that was performed.
+ Op Op
+
+ // Time when the operation was committed.
+ CommitTimestamp int64
+
+ // Operation came from sync (used for echo suppression).
+ FromSync bool
+
+ // If true, this entry is followed by more entries that belong to the same
+ // commit as this entry.
+ Continued bool
+}
diff --git a/services/syncbase/server/watchable/types.vdl.go b/services/syncbase/server/watchable/types.vdl.go
new file mode 100644
index 0000000..5fd2e04
--- /dev/null
+++ b/services/syncbase/server/watchable/types.vdl.go
@@ -0,0 +1,189 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: types.vdl
+
+package watchable
+
+import (
+ // VDL system imports
+ "v.io/v23/vdl"
+)
+
+// GetOp represents a store get operation.
+type GetOp struct {
+ Key []byte
+}
+
+func (GetOp) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/watchable.GetOp"`
+}) {
+}
+
+// ScanOp represents a store scan operation.
+type ScanOp struct {
+ Start []byte
+ Limit []byte
+}
+
+func (ScanOp) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/watchable.ScanOp"`
+}) {
+}
+
+// PutOp represents a store put operation. The new version is written instead
+// of the value to avoid duplicating the user data in the store. The version
+// is used to access the user data of that specific mutation.
+type PutOp struct {
+ Key []byte
+ Version []byte
+}
+
+func (PutOp) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/watchable.PutOp"`
+}) {
+}
+
+// DeleteOp represents a store delete operation.
+type DeleteOp struct {
+ Key []byte
+}
+
+func (DeleteOp) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/watchable.DeleteOp"`
+}) {
+}
+
+// SyncGroupOp represents a change in SyncGroup tracking, adding or removing
+// key prefixes to sync. SyncGroup prefixes cannot be changed, this is used
+// to track changes due to SyncGroup create/join/leave/destroy.
+type SyncGroupOp struct {
+ Prefixes []string
+ Remove bool
+}
+
+func (SyncGroupOp) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/watchable.SyncGroupOp"`
+}) {
+}
+
+// SyncSnapshotOp represents a snapshot operation when creating and joining a
+// SyncGroup. The sync watcher needs to get a snapshot of the Database at the
+// point of creating/joining a SyncGroup. A SyncSnapshotOp entry is written to
+// the log for each Database key that falls within the SyncGroup prefixes. This
+// allows sync to initialize its metadata at the correct versions of the objects
+// when they become syncable. These log entries should be filtered by the
+// client-facing Watch interface because the user data did not actually change.
+type SyncSnapshotOp struct {
+ Key []byte
+ Version []byte
+}
+
+func (SyncSnapshotOp) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/watchable.SyncSnapshotOp"`
+}) {
+}
+
+type (
+ // Op represents any single field of the Op union type.
+ //
+ // Op represents a store operation.
+ Op interface {
+ // Index returns the field index.
+ Index() int
+ // Interface returns the field value as an interface.
+ Interface() interface{}
+ // Name returns the field name.
+ Name() string
+ // __VDLReflect describes the Op union type.
+ __VDLReflect(__OpReflect)
+ }
+ // OpGet represents field Get of the Op union type.
+ OpGet struct{ Value GetOp }
+ // OpScan represents field Scan of the Op union type.
+ OpScan struct{ Value ScanOp }
+ // OpPut represents field Put of the Op union type.
+ OpPut struct{ Value PutOp }
+ // OpDelete represents field Delete of the Op union type.
+ OpDelete struct{ Value DeleteOp }
+ // OpSyncGroup represents field SyncGroup of the Op union type.
+ OpSyncGroup struct{ Value SyncGroupOp }
+ // OpSyncSnapshot represents field SyncSnapshot of the Op union type.
+ OpSyncSnapshot struct{ Value SyncSnapshotOp }
+ // __OpReflect describes the Op union type.
+ __OpReflect struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/watchable.Op"`
+ Type Op
+ Union struct {
+ Get OpGet
+ Scan OpScan
+ Put OpPut
+ Delete OpDelete
+ SyncGroup OpSyncGroup
+ SyncSnapshot OpSyncSnapshot
+ }
+ }
+)
+
+func (x OpGet) Index() int { return 0 }
+func (x OpGet) Interface() interface{} { return x.Value }
+func (x OpGet) Name() string { return "Get" }
+func (x OpGet) __VDLReflect(__OpReflect) {}
+
+func (x OpScan) Index() int { return 1 }
+func (x OpScan) Interface() interface{} { return x.Value }
+func (x OpScan) Name() string { return "Scan" }
+func (x OpScan) __VDLReflect(__OpReflect) {}
+
+func (x OpPut) Index() int { return 2 }
+func (x OpPut) Interface() interface{} { return x.Value }
+func (x OpPut) Name() string { return "Put" }
+func (x OpPut) __VDLReflect(__OpReflect) {}
+
+func (x OpDelete) Index() int { return 3 }
+func (x OpDelete) Interface() interface{} { return x.Value }
+func (x OpDelete) Name() string { return "Delete" }
+func (x OpDelete) __VDLReflect(__OpReflect) {}
+
+func (x OpSyncGroup) Index() int { return 4 }
+func (x OpSyncGroup) Interface() interface{} { return x.Value }
+func (x OpSyncGroup) Name() string { return "SyncGroup" }
+func (x OpSyncGroup) __VDLReflect(__OpReflect) {}
+
+func (x OpSyncSnapshot) Index() int { return 5 }
+func (x OpSyncSnapshot) Interface() interface{} { return x.Value }
+func (x OpSyncSnapshot) Name() string { return "SyncSnapshot" }
+func (x OpSyncSnapshot) __VDLReflect(__OpReflect) {}
+
+// LogEntry represents a single store operation. This operation may have been
+// part of a transaction, as signified by the Continued boolean. Read-only
+// operations (and read-only transactions) are not logged.
+type LogEntry struct {
+ // The store operation that was performed.
+ Op Op
+ // Time when the operation was committed.
+ CommitTimestamp int64
+ // Operation came from sync (used for echo suppression).
+ FromSync bool
+ // If true, this entry is followed by more entries that belong to the same
+ // commit as this entry.
+ Continued bool
+}
+
+func (LogEntry) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/server/watchable.LogEntry"`
+}) {
+}
+
+func init() {
+ vdl.Register((*GetOp)(nil))
+ vdl.Register((*ScanOp)(nil))
+ vdl.Register((*PutOp)(nil))
+ vdl.Register((*DeleteOp)(nil))
+ vdl.Register((*SyncGroupOp)(nil))
+ vdl.Register((*SyncSnapshotOp)(nil))
+ vdl.Register((*Op)(nil))
+ vdl.Register((*LogEntry)(nil))
+}
diff --git a/services/syncbase/server/watchable/util.go b/services/syncbase/server/watchable/util.go
new file mode 100644
index 0000000..8eb606e
--- /dev/null
+++ b/services/syncbase/server/watchable/util.go
@@ -0,0 +1,93 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+// TODO(sadovsky): Avoid copying back and forth between []byte's and strings.
+// We should probably convert incoming strings to []byte's as early as possible,
+// and deal exclusively in []byte's internally.
+// TODO(rdaoud): I propose we standardize on key and version being strings and
+// the value being []byte within Syncbase. We define invalid characters in the
+// key space (and reserve "$" and ":"). The lower storage engine layers are
+// free to map that to what they need internally ([]byte or string).
+
+import (
+ "fmt"
+ "math/rand"
+ "sync"
+ "time"
+
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/verror"
+)
+
+var (
+ rng *rand.Rand = rand.New(rand.NewSource(time.Now().UTC().UnixNano()))
+ rngLock sync.Mutex
+)
+
+// NewVersion returns a new version for a store entry mutation.
+func NewVersion() []byte {
+ // TODO(rdaoud): revisit the number of bits: should we use 128 bits?
+ // Note: the version has to be unique per object key, not on its own.
+ // TODO(rdaoud): move sync's rand64() to a general Syncbase spot and
+ // reuse it here.
+ rngLock.Lock()
+ num := rng.Int63()
+ rngLock.Unlock()
+
+ return []byte(fmt.Sprintf("%x", num))
+}
+
+func makeVersionKey(key []byte) []byte {
+ return []byte(join(util.VersionPrefix, string(key)))
+}
+
+func makeAtVersionKey(key, version []byte) []byte {
+ return []byte(join(string(key), string(version)))
+}
+
+func getVersion(sntx store.SnapshotOrTransaction, key []byte) ([]byte, error) {
+ return sntx.Get(makeVersionKey(key), nil)
+}
+
+func getAtVersion(st store.StoreReader, key, valbuf, version []byte) ([]byte, error) {
+ return st.Get(makeAtVersionKey(key, version), valbuf)
+}
+
+func getVersioned(sntx store.SnapshotOrTransaction, key, valbuf []byte) ([]byte, error) {
+ version, err := getVersion(sntx, key)
+ if err != nil {
+ return valbuf, err
+ }
+ return getAtVersion(sntx, key, valbuf, version)
+}
+
+func putVersioned(tx store.Transaction, key, value []byte) ([]byte, error) {
+ version := NewVersion()
+ if err := tx.Put(makeVersionKey(key), version); err != nil {
+ return nil, err
+ }
+ if err := tx.Put(makeAtVersionKey(key, version), value); err != nil {
+ return nil, err
+ }
+ return version, nil
+}
+
+func deleteVersioned(tx store.Transaction, key []byte) error {
+ return tx.Delete(makeVersionKey(key))
+}
+
+func join(parts ...string) string {
+ return util.JoinKeyParts(parts...)
+}
+
+func split(key string) []string {
+ return util.SplitKeyParts(key)
+}
+
+func convertError(err error) error {
+ return verror.Convert(verror.IDAction{}, nil, err)
+}
diff --git a/services/syncbase/server/watchable/util_test.go b/services/syncbase/server/watchable/util_test.go
new file mode 100644
index 0000000..193c06d
--- /dev/null
+++ b/services/syncbase/server/watchable/util_test.go
@@ -0,0 +1,33 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+import (
+ "testing"
+
+ "v.io/syncbase/x/ref/services/syncbase/clock"
+)
+
+// TestGetNextLogSeq tests that the getNextLogSeq helper works on range 0..10.
+func TestGetNextLogSeq(t *testing.T) {
+ st, destroy := createStore()
+ defer destroy()
+ var mockAdapter clock.StorageAdapter = clock.MockStorageAdapter()
+ vclock := clock.NewVClockWithMockServices(mockAdapter, nil, nil)
+ st, err := Wrap(st, vclock, &Options{})
+ if err != nil {
+ t.Fatal(err)
+ }
+ for i := uint64(0); i <= uint64(10); i++ {
+ seq, err := getNextLogSeq(st)
+ if err != nil {
+ t.Fatalf("failed to get log seq: %v", err)
+ }
+ if got, want := seq, i; got != want {
+ t.Fatalf("unexpected log seq: got %v, want %v", got, want)
+ }
+ st.Put([]byte(logEntryKey(i)), nil)
+ }
+}
diff --git a/services/syncbase/server/watchable/watcher.go b/services/syncbase/server/watchable/watcher.go
new file mode 100644
index 0000000..fc0481a
--- /dev/null
+++ b/services/syncbase/server/watchable/watcher.go
@@ -0,0 +1,212 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+import (
+ "fmt"
+ "strconv"
+ "sync"
+
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/services/watch"
+ "v.io/v23/verror"
+ "v.io/v23/vom"
+ "v.io/x/lib/vlog"
+)
+
+// watcher maintains a state and a condition variable. The watcher sends
+// a broadcast signal every time the state changes. The state is increased
+// by 1 every time the store has new data. Initially the state equals to 1.
+// If the state becomes 0, then the watcher is closed and the state will not
+// be changed later.
+// TODO(rogulenko): Broadcast a signal from time to time to unblock waiting
+// clients.
+type watcher struct {
+ mu *sync.RWMutex
+ cond *sync.Cond
+ state uint64
+}
+
+func newWatcher() *watcher {
+ mu := &sync.RWMutex{}
+ return &watcher{
+ mu: mu,
+ cond: sync.NewCond(mu.RLocker()),
+ state: 1,
+ }
+}
+
+// close closes the watcher.
+func (w *watcher) close() {
+ w.mu.Lock()
+ w.state = 0
+ w.cond.Broadcast()
+ w.mu.Unlock()
+}
+
+// broadcastUpdates broadcast the update notification to watch clients.
+func (w *watcher) broadcastUpdates() {
+ w.mu.Lock()
+ if w.state != 0 {
+ w.state++
+ w.cond.Broadcast()
+ } else {
+ vlog.Error("broadcastUpdates() called on a closed watcher")
+ }
+ w.mu.Unlock()
+}
+
+// WatchUpdates returns a function that can be used to watch for changes of
+// the database. The store maintains a state (initially 1) that is increased
+// by 1 every time the store has new data. The waitForChange function takes
+// the last returned state and blocks until the state changes, returning the new
+// state. State equal to 0 means the store is closed and no updates will come
+// later. If waitForChange function takes a state different from the current
+// state of the store or the store is closed, the waitForChange function returns
+// immediately. It might happen that the waitForChange function returns
+// a non-zero state equal to the state passed as the argument. This behavior
+// helps to unblock clients if the store doesn't have updates for a long period
+// of time.
+func WatchUpdates(st store.Store) (waitForChange func(state uint64) uint64) {
+ // TODO(rogulenko): Remove dynamic type assertion here and in other places.
+ watcher := st.(*wstore).watcher
+ return func(state uint64) uint64 {
+ watcher.cond.L.Lock()
+ defer watcher.cond.L.Unlock()
+ if watcher.state != 0 && watcher.state == state {
+ watcher.cond.Wait()
+ }
+ return watcher.state
+ }
+}
+
+// GetResumeMarker returns the ResumeMarker that points to the current end
+// of the event log.
+func GetResumeMarker(st store.StoreReader) (watch.ResumeMarker, error) {
+ seq, err := getNextLogSeq(st)
+ return watch.ResumeMarker(logEntryKey(seq)), err
+}
+
+// MakeResumeMarker converts a sequence number to the resume marker.
+func MakeResumeMarker(seq uint64) watch.ResumeMarker {
+ return watch.ResumeMarker(logEntryKey(seq))
+}
+
+func logEntryKey(seq uint64) string {
+ // Note: MaxUint64 is 0xffffffffffffffff.
+ // TODO(sadovsky): Use a more space-efficient lexicographic number encoding.
+ return join(util.LogPrefix, fmt.Sprintf("%016x", seq))
+}
+
+// ReadBatchFromLog returns a batch of watch log records (a transaction) from
+// the given database and the new resume marker at the end of the batch.
+func ReadBatchFromLog(st store.Store, resumeMarker watch.ResumeMarker) ([]*LogEntry, watch.ResumeMarker, error) {
+ seq, err := parseResumeMarker(string(resumeMarker))
+ if err != nil {
+ return nil, resumeMarker, err
+ }
+ _, scanLimit := util.ScanPrefixArgs(util.LogPrefix, "")
+ scanStart := resumeMarker
+ endOfBatch := false
+
+ // Use the store directly to scan these read-only log entries, no need
+ // to create a snapshot since they are never overwritten. Read and
+ // buffer a batch before processing it.
+ var logs []*LogEntry
+ stream := st.Scan(scanStart, scanLimit)
+ for stream.Advance() {
+ seq++
+ var logEnt LogEntry
+ if err := vom.Decode(stream.Value(nil), &logEnt); err != nil {
+ return nil, resumeMarker, err
+ }
+
+ logs = append(logs, &logEnt)
+
+ // Stop if this is the end of the batch.
+ if logEnt.Continued == false {
+ endOfBatch = true
+ break
+ }
+ }
+
+ if err = stream.Err(); err != nil {
+ return nil, resumeMarker, err
+ }
+ if !endOfBatch {
+ if len(logs) > 0 {
+ vlog.Fatalf("end of batch not found after %d entries", len(logs))
+ }
+ return nil, resumeMarker, nil
+ }
+ return logs, watch.ResumeMarker(logEntryKey(seq)), nil
+}
+
+func parseResumeMarker(resumeMarker string) (uint64, error) {
+ parts := split(resumeMarker)
+ if len(parts) != 2 {
+ return 0, verror.New(watch.ErrUnknownResumeMarker, nil, resumeMarker)
+ }
+ seq, err := strconv.ParseUint(parts[1], 16, 64)
+ if err != nil {
+ return 0, verror.New(watch.ErrUnknownResumeMarker, nil, resumeMarker)
+ }
+ return seq, nil
+}
+
+// logEntryExists returns true iff the log contains an entry with the given
+// sequence number.
+func logEntryExists(st store.StoreReader, seq uint64) (bool, error) {
+ _, err := st.Get([]byte(logEntryKey(seq)), nil)
+ if err != nil && verror.ErrorID(err) != store.ErrUnknownKey.ID {
+ return false, err
+ }
+ return err == nil, nil
+}
+
+// getNextLogSeq returns the next sequence number to be used for a new commit.
+// NOTE: this function assumes that all sequence numbers in the log represent
+// some range [start, limit] without gaps.
+func getNextLogSeq(st store.StoreReader) (uint64, error) {
+ // Determine initial value for seq.
+ // TODO(sadovsky): Consider using a bigger seq.
+
+ // Find the beginning of the log.
+ it := st.Scan(util.ScanPrefixArgs(util.LogPrefix, ""))
+ if !it.Advance() {
+ return 0, nil
+ }
+ if it.Err() != nil {
+ return 0, it.Err()
+ }
+ seq, err := parseResumeMarker(string(it.Key(nil)))
+ if err != nil {
+ return 0, err
+ }
+ var step uint64 = 1
+ // Suppose the actual value we are looking for is S. First, we estimate the
+ // range for S. We find seq, step: seq < S <= seq + step.
+ for {
+ if ok, err := logEntryExists(st, seq+step); err != nil {
+ return 0, err
+ } else if !ok {
+ break
+ }
+ seq += step
+ step *= 2
+ }
+ // Next we keep the seq < S <= seq + step invariant, reducing step to 1.
+ for step > 1 {
+ step /= 2
+ if ok, err := logEntryExists(st, seq+step); err != nil {
+ return 0, err
+ } else if ok {
+ seq += step
+ }
+ }
+ // Now seq < S <= seq + 1, thus S = seq + 1.
+ return seq + 1, nil
+}
diff --git a/services/syncbase/server/watchable/watcher_test.go b/services/syncbase/server/watchable/watcher_test.go
new file mode 100644
index 0000000..c978123
--- /dev/null
+++ b/services/syncbase/server/watchable/watcher_test.go
@@ -0,0 +1,93 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package watchable
+
+import (
+ "bytes"
+ "fmt"
+ "testing"
+
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+)
+
+// TestWatchLogBatch tests fetching a batch of log records.
+func TestWatchLogBatch(t *testing.T) {
+ runTest(t, []string{util.RowPrefix, util.PermsPrefix}, runWatchLogBatchTest)
+}
+
+// runWatchLogBatchTest tests fetching a batch of log records.
+func runWatchLogBatchTest(t *testing.T, st store.Store) {
+ // Create a set of batches to fill the log queue.
+ numTx, numPut := 3, 4
+
+ makeKeyVal := func(batchNum, recNum int) ([]byte, []byte) {
+ key := util.JoinKeyParts(util.RowPrefix, fmt.Sprintf("foo-%d-%d", batchNum, recNum))
+ val := fmt.Sprintf("val-%d-%d", batchNum, recNum)
+ return []byte(key), []byte(val)
+ }
+
+ for i := 0; i < numTx; i++ {
+ tx := st.NewTransaction()
+ for j := 0; j < numPut; j++ {
+ key, val := makeKeyVal(i, j)
+ if err := tx.Put(key, val); err != nil {
+ t.Errorf("cannot put %s (%s): %v", key, val, err)
+ }
+ }
+ tx.Commit()
+ }
+
+ // Fetch the batches and a few more empty fetches and verify them.
+ resmark := MakeResumeMarker(0)
+ var seq uint64
+
+ for i := 0; i < (numTx + 3); i++ {
+ logs, newResmark, err := ReadBatchFromLog(st, resmark)
+ if err != nil {
+ t.Fatalf("can't get watch log batch: %v", err)
+ }
+ if i < numTx {
+ if len(logs) != numPut {
+ t.Errorf("log fetch (i=%d) wrong log seq: %d instead of %d",
+ i, len(logs), numPut)
+ }
+
+ seq += uint64(len(logs))
+ expResmark := MakeResumeMarker(seq)
+ if !bytes.Equal(newResmark, expResmark) {
+ t.Errorf("log fetch (i=%d) wrong resmark: %s instead of %s",
+ i, newResmark, expResmark)
+ }
+
+ for j, log := range logs {
+ op := log.Op.(OpPut)
+ expKey, expVal := makeKeyVal(i, j)
+ key := op.Value.Key
+ if !bytes.Equal(key, expKey) {
+ t.Errorf("log fetch (i=%d, j=%d) bad key: %s instead of %s",
+ i, j, key, expKey)
+ }
+ tx := st.NewTransaction()
+ var val []byte
+ val, err := GetAtVersion(nil, tx, key, val, op.Value.Version)
+ if err != nil {
+ t.Errorf("log fetch (i=%d, j=%d) cannot GetAtVersion(): %v", i, j, err)
+ }
+ if !bytes.Equal(val, expVal) {
+ t.Errorf("log fetch (i=%d, j=%d) bad value: %s instead of %s",
+ i, j, val, expVal)
+ }
+ tx.Abort()
+ }
+ } else {
+ if logs != nil || !bytes.Equal(newResmark, resmark) {
+ t.Errorf("NOP log fetch (i=%d) had changes: %d logs, resmask %s",
+ i, len(logs), newResmark)
+ }
+ }
+ resmark = newResmark
+ }
+}
diff --git a/services/syncbase/signing/hashcache/hashcache.go b/services/syncbase/signing/hashcache/hashcache.go
new file mode 100644
index 0000000..26e8c94
--- /dev/null
+++ b/services/syncbase/signing/hashcache/hashcache.go
@@ -0,0 +1,77 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package hashcache implements a simple cache intended to be indexed by hash
+// values. The keys are of type []byte. Values are arbitrary interface{}
+// values. Entries may expire if not used for a duration specified by the
+// client.
+package hashcache
+
+import "sync"
+import "time"
+
+// An internalValue is the client's data plus the data's expiry time.
+type internalValue struct {
+ data interface{}
+ expiry time.Time
+}
+
+// A Cache allows the user to store arbitrary values, keyed by the contents of
+// byte vectors. Entries may be added, deleted, and looked up. They may
+// expire if not used.
+type Cache struct {
+ expiry time.Duration
+ mu sync.Mutex // protects fields below.
+ entries map[string]*internalValue
+ insertionsSinceGC int // number of insertions since last GC
+}
+
+// New() returns a pointer to a new, empty Cache.
+// Entries may expire if not used for "expiry".
+func New(expiry time.Duration) *Cache {
+ return &Cache{expiry: expiry, entries: make(map[string]*internalValue)}
+}
+
+// Lookup() returns the data associated with key[] in *c, and whether there is
+// such a value. The client may not modify the returned data; it is shared
+// with *c.
+func (c *Cache) Lookup(key []byte) (data interface{}, isPresent bool) {
+ var value *internalValue
+ c.mu.Lock()
+ value, isPresent = c.entries[string(key)]
+ if isPresent {
+ value.expiry = time.Now().Add(c.expiry)
+ data = value.data
+ }
+ c.mu.Unlock()
+ return data, isPresent
+}
+
+// Add() associates data with key[] in *c. Any data previously associated with
+// key[] are forgotten. The implementation may discard the association at some
+// future time (set by NewCache()) to limit the size of the cache. data may
+// not be modified after this call; it is shared with *c.
+func (c *Cache) Add(key []byte, data interface{}) {
+ c.mu.Lock()
+ now := time.Now()
+ c.entries[string(key)] = &internalValue{data: data, expiry: now.Add(c.expiry)}
+ c.insertionsSinceGC++
+ // Scan to expire entries if 20% were added since last scan.
+ if c.insertionsSinceGC*5 > len(c.entries) {
+ for ik, iv := range c.entries {
+ if iv.expiry.Before(now) {
+ delete(c.entries, ik)
+ }
+ }
+ c.insertionsSinceGC = 0
+ }
+ c.mu.Unlock()
+}
+
+// Delete() removes any association of data with key[] in *c.
+func (c *Cache) Delete(key []byte) {
+ c.mu.Lock()
+ delete(c.entries, string(key))
+ c.mu.Unlock()
+}
diff --git a/services/syncbase/signing/hashcache/hashcache_test.go b/services/syncbase/signing/hashcache/hashcache_test.go
new file mode 100644
index 0000000..96ba865
--- /dev/null
+++ b/services/syncbase/signing/hashcache/hashcache_test.go
@@ -0,0 +1,83 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package hashcache_test tests the hashcache package.
+package hashcache_test
+
+import "runtime"
+import "testing"
+import "time"
+
+import "v.io/syncbase/x/ref/services/syncbase/signing/hashcache"
+
+// checkHashesWithNoData() checks that hash[start:] have no data in the cache.
+// (The start index is passed, rather than expecting the caller to sub-slice,
+// so that error messages refer to the index.)
+func checkHashesWithNoData(t *testing.T, cache *hashcache.Cache, start int, hash [][]byte) {
+ _, _, callerLine, _ := runtime.Caller(1)
+ for i := start; i != len(hash); i++ {
+ value, found := cache.Lookup(hash[i])
+ if value != nil || found {
+ t.Errorf("line %d: unset cache entry hash[%d]=%v has value %v, but is expected not to be set", callerLine, i, hash[i], value)
+ }
+ }
+}
+
+func TestCache(t *testing.T) {
+ hash := [][]byte{
+ []byte{0x00, 0x01, 0x02, 0x3},
+ []byte{0x04, 0x05, 0x06, 0x7},
+ []byte{0x08, 0x09, 0x0a, 0xb}}
+ var value interface{}
+ var found bool
+ var want string
+
+ cache := hashcache.New(5 * time.Second)
+
+ // The cache should initially have none of the keys.
+ checkHashesWithNoData(t, cache, 0, hash)
+
+ // Add the first key, and check that it's there.
+ want = "hash0"
+ cache.Add(hash[0], want)
+ value, found = cache.Lookup(hash[0])
+ if s, ok := value.(string); !found || !ok || s != want {
+ t.Errorf("cache entry hash[%d]=%v got %v, want %v", 0, hash[0], s, want)
+ }
+ checkHashesWithNoData(t, cache, 1, hash)
+
+ // Add the second key, and check that both it and the first key are there.
+ want = "hash1"
+ cache.Add(hash[1], want)
+ value, found = cache.Lookup(hash[1])
+ if s, ok := value.(string); !ok || s != want {
+ t.Errorf("cache entry hash[%d]=%v got %v, want %v", 1, hash[1], s, want)
+ }
+ want = "hash0"
+ value, found = cache.Lookup(hash[0])
+ if s, ok := value.(string); !found || !ok || s != want {
+ t.Errorf("cache entry hash[%d]=%v got %v, want %v", 0, hash[0], s, want)
+ }
+ checkHashesWithNoData(t, cache, 2, hash)
+
+ // Wait for all entries to time out.
+ time.Sleep(6 * time.Second) // sleep past expiry time
+
+ // Add the first key again, and so many that it will trigger garbage
+ // collection.
+ for i := 0; i != 10; i++ {
+ want = "hash0 again"
+ cache.Add(hash[0], want)
+ value, found = cache.Lookup(hash[0])
+ if s, ok := value.(string); !found || !ok || s != want {
+ t.Errorf("cache entry hash[%d]=%v got %v, want %v", 0, hash[0], s, want)
+ }
+ }
+ // The entry for hash1 should have expired, since the expiry time has
+ // passed, and many things have been inserted into the cache.
+ checkHashesWithNoData(t, cache, 1, hash)
+
+ cache.Delete(hash[0])
+ checkHashesWithNoData(t, cache, 0, hash)
+}
diff --git a/services/syncbase/signing/krl/krl.go b/services/syncbase/signing/krl/krl.go
new file mode 100644
index 0000000..422f53e
--- /dev/null
+++ b/services/syncbase/signing/krl/krl.go
@@ -0,0 +1,39 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package krl implements a trivial, in-memory key revocation list.
+// It is a placeholder for a real key revocation mechanism.
+package krl
+
+import "crypto/sha256"
+import "time"
+
+// A KRL is a key revocation list. It maps the hashes of keys that have been revoked
+// to revocation times.
+type KRL struct {
+ table map[[sha256.Size]byte]time.Time
+}
+
+var notYetRevoked = time.Now().Add(100 * 365 * 24 * time.Hour) // far future
+
+// New() returns a pointer to a new, empty key recovation list.
+func New() *KRL {
+ return &KRL{table: make(map[[sha256.Size]byte]time.Time)}
+}
+
+// Revoke() inserts an entry into *krl recording that key[] was revoked at time
+// "when".
+func (krl *KRL) Revoke(key []byte, when time.Time) {
+ krl.table[sha256.Sum256(key)] = when
+}
+
+// RevocationTime() returns the revocation time for key[].
+// If key[] is not in the list, a time in the far future is returned.
+func (krl *KRL) RevocationTime(key []byte) (whenRevoked time.Time) {
+ var found bool
+ if whenRevoked, found = krl.table[sha256.Sum256(key)]; !found {
+ whenRevoked = notYetRevoked
+ }
+ return whenRevoked
+}
diff --git a/services/syncbase/signing/krl/krl_test.go b/services/syncbase/signing/krl/krl_test.go
new file mode 100644
index 0000000..73f48ad
--- /dev/null
+++ b/services/syncbase/signing/krl/krl_test.go
@@ -0,0 +1,54 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package krl tests the key revocation list package.
+package krl_test
+
+import "runtime"
+import "testing"
+import "time"
+
+import "v.io/syncbase/x/ref/services/syncbase/signing/krl"
+
+// checkKeysNotRevoked() checks that key[start:] have not been revoked. (The
+// start index is passed, rather than expecting the called to sub-slice, so
+// that error messages refer to the expected index.)
+func checkKeysNotRevoked(t *testing.T, krl *krl.KRL, start int, key [][]byte, now time.Time) {
+ _, _, callerLine, _ := runtime.Caller(1)
+ year := 365 * 24 * time.Hour
+ for i := start; i != len(key); i++ {
+ revoked := krl.RevocationTime(key[i])
+ if revoked.Before(now.Add(year)) {
+ t.Errorf("line %d: unrevoked key[%d]=%v has revocation time %v, which is not far enough in the future", callerLine, i, key[i], revoked)
+ }
+ }
+}
+
+func TestKRL(t *testing.T) {
+ now := time.Now()
+ key := [][]byte{
+ []byte{0x00, 0x01, 0x02, 0x3},
+ []byte{0x04, 0x05, 0x06, 0x7},
+ []byte{0x08, 0x09, 0x0a, 0xb}}
+ var revoked time.Time
+
+ krl := krl.New()
+
+ checkKeysNotRevoked(t, krl, 0, key, now)
+
+ krl.Revoke(key[0], now)
+ if revoked = krl.RevocationTime(key[0]); !revoked.Equal(now) {
+ t.Errorf("unrevoked key %v has revocation time %v, but expected %v", key[0], revoked, now)
+ }
+ checkKeysNotRevoked(t, krl, 1, key, now)
+
+ krl.Revoke(key[1], now)
+ if revoked = krl.RevocationTime(key[0]); !revoked.Equal(now) {
+ t.Errorf("unrevoked key %v has revocation time %v, but expected %v", key[0], revoked, now)
+ }
+ if revoked = krl.RevocationTime(key[1]); !revoked.Equal(now) {
+ t.Errorf("unrevoked key %v has revocation time %v, but expected %v", key[1], revoked, now)
+ }
+ checkKeysNotRevoked(t, krl, 2, key, now)
+}
diff --git a/services/syncbase/signing/signeddata.vdl b/services/syncbase/signing/signeddata.vdl
new file mode 100644
index 0000000..4b4ceb8
--- /dev/null
+++ b/services/syncbase/signing/signeddata.vdl
@@ -0,0 +1,72 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package signing
+
+import "v.io/v23/security"
+
+// A DataWithSignature represents a signed, and possibily validated, collection
+// of Item structs.
+//
+// If IsValidated==false and the AuthorSigned signature is valid, it means:
+// The signer whose Blessings have hash BlessingsHash asserts Data.
+//
+// If IsValidated==true and both AuthorSigned and ValidatorSigned signatures are is valid,
+// it means both:
+// 1) The signer whose Blessings b have hash BlessingsHash asserts Data.
+// 2) If vd is the ValidatorData with hash ValidatorDataHash, the owner of
+// vd.PublicKey asserts that it checked that at least the names vd.Names[] were
+// valid in b.
+//
+// The sender obtains:
+// - BlessingsHash (and the wire form of the blessings) with ValidationCache.AddBlessings().
+// - ValidatorDataHash (and the wire form of the ValidataData) with ValidationCache.AddValidatorData().
+//
+// The receiver looks up:
+// - BlessingsHash with ValidationCache.LookupBlessingsData()
+// - ValidatorDataHash with ValidationCache.LookupValidatorData()
+//
+// If not yet there, the receiver inserts the valus into its ValidationCache with:
+// - ValidationCache.AddWireBlessings()
+// - ValidationCache.AddValidatorData()
+type DataWithSignature struct {
+ Data []Item
+ // BlessingsHash is a key for the validation cache; the corresponding
+ // cached value is a security.Blessings.
+ BlessingsHash []byte
+ // AuthorSigned is the signature of Data and BlessingsHash using the
+ // private key associated with the blessings hashed in BlessingsHash.
+ AuthorSigned security.Signature
+
+ IsValidated bool // Whether fields below are meaningful.
+
+ // ValidatorDataHash is a key for the validation cache returned by
+ // ValidatorData.Hash(); the corresponding cached value is the
+ // ValidatorData.
+ ValidatorDataHash []byte
+ ValidatorSigned security.Signature
+}
+
+// An Item represents either a marshalled data item or its SHA-256 hash.
+// The Data field is a []byte, rather than an "any" to make signatures
+// determistic. VOM encoding is not deterministic for two reasons:
+// - map elements may be marshalled in any order
+// - different versions of VOM may marshal in different ways.
+// Thus, the initial producer of a data item marshals the data once, and it is
+// this marshalled form that is transmitted from device to device. If the
+// data were unmarshalled and then remarsahalled, the signatures might not
+// match. The Hash field is used instead of the Data field when the recipient
+// of the DataWithSignature is not permitted to see certain Items' Data
+// fields.
+type Item union {
+ Data []byte // Marshalled form of data.
+ Hash []byte // Hash of what would have been in Data, as returned by SumByteVectorWithLength(Data).
+}
+
+// WireValidatorData is the wire form of ValidatorData.
+// It excludes the unmarshalled form of the public key.
+type WireValidatorData struct {
+ Names []string // Names of valid signing blessings in the Blessings referred to by BlessingsHash.
+ MarshalledPublicKey []byte // PublicKey, marshalled with MarshalBinary().
+}
diff --git a/services/syncbase/signing/signeddata.vdl.go b/services/syncbase/signing/signeddata.vdl.go
new file mode 100644
index 0000000..f96fe50
--- /dev/null
+++ b/services/syncbase/signing/signeddata.vdl.go
@@ -0,0 +1,128 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: signeddata.vdl
+
+package signing
+
+import (
+ // VDL system imports
+ "v.io/v23/vdl"
+
+ // VDL user imports
+ "v.io/v23/security"
+)
+
+// A DataWithSignature represents a signed, and possibily validated, collection
+// of Item structs.
+//
+// If IsValidated==false and the AuthorSigned signature is valid, it means:
+// The signer whose Blessings have hash BlessingsHash asserts Data.
+//
+// If IsValidated==true and both AuthorSigned and ValidatorSigned signatures are is valid,
+// it means both:
+// 1) The signer whose Blessings b have hash BlessingsHash asserts Data.
+// 2) If vd is the ValidatorData with hash ValidatorDataHash, the owner of
+// vd.PublicKey asserts that it checked that at least the names vd.Names[] were
+// valid in b.
+//
+// The sender obtains:
+// - BlessingsHash (and the wire form of the blessings) with ValidationCache.AddBlessings().
+// - ValidatorDataHash (and the wire form of the ValidataData) with ValidationCache.AddValidatorData().
+//
+// The receiver looks up:
+// - BlessingsHash with ValidationCache.LookupBlessingsData()
+// - ValidatorDataHash with ValidationCache.LookupValidatorData()
+//
+// If not yet there, the receiver inserts the valus into its ValidationCache with:
+// - ValidationCache.AddWireBlessings()
+// - ValidationCache.AddValidatorData()
+type DataWithSignature struct {
+ Data []Item
+ // BlessingsHash is a key for the validation cache; the corresponding
+ // cached value is a security.Blessings.
+ BlessingsHash []byte
+ // AuthorSigned is the signature of Data and BlessingsHash using the
+ // private key associated with the blessings hashed in BlessingsHash.
+ AuthorSigned security.Signature
+ IsValidated bool // Whether fields below are meaningful.
+ // ValidatorDataHash is a key for the validation cache returned by
+ // ValidatorData.Hash(); the corresponding cached value is the
+ // ValidatorData.
+ ValidatorDataHash []byte
+ ValidatorSigned security.Signature
+}
+
+func (DataWithSignature) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/signing.DataWithSignature"`
+}) {
+}
+
+type (
+ // Item represents any single field of the Item union type.
+ //
+ // An Item represents either a marshalled data item or its SHA-256 hash.
+ // The Data field is a []byte, rather than an "any" to make signatures
+ // determistic. VOM encoding is not deterministic for two reasons:
+ // - map elements may be marshalled in any order
+ // - different versions of VOM may marshal in different ways.
+ // Thus, the initial producer of a data item marshals the data once, and it is
+ // this marshalled form that is transmitted from device to device. If the
+ // data were unmarshalled and then remarsahalled, the signatures might not
+ // match. The Hash field is used instead of the Data field when the recipient
+ // of the DataWithSignature is not permitted to see certain Items' Data
+ // fields.
+ Item interface {
+ // Index returns the field index.
+ Index() int
+ // Interface returns the field value as an interface.
+ Interface() interface{}
+ // Name returns the field name.
+ Name() string
+ // __VDLReflect describes the Item union type.
+ __VDLReflect(__ItemReflect)
+ }
+ // ItemData represents field Data of the Item union type.
+ ItemData struct{ Value []byte } // Marshalled form of data.
+ // ItemHash represents field Hash of the Item union type.
+ ItemHash struct{ Value []byte } // Hash of what would have been in Data, as returned by SumByteVectorWithLength(Data).
+ // __ItemReflect describes the Item union type.
+ __ItemReflect struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/signing.Item"`
+ Type Item
+ Union struct {
+ Data ItemData
+ Hash ItemHash
+ }
+ }
+)
+
+func (x ItemData) Index() int { return 0 }
+func (x ItemData) Interface() interface{} { return x.Value }
+func (x ItemData) Name() string { return "Data" }
+func (x ItemData) __VDLReflect(__ItemReflect) {}
+
+func (x ItemHash) Index() int { return 1 }
+func (x ItemHash) Interface() interface{} { return x.Value }
+func (x ItemHash) Name() string { return "Hash" }
+func (x ItemHash) __VDLReflect(__ItemReflect) {}
+
+// WireValidatorData is the wire form of ValidatorData.
+// It excludes the unmarshalled form of the public key.
+type WireValidatorData struct {
+ Names []string // Names of valid signing blessings in the Blessings referred to by BlessingsHash.
+ MarshalledPublicKey []byte // PublicKey, marshalled with MarshalBinary().
+}
+
+func (WireValidatorData) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/signing.WireValidatorData"`
+}) {
+}
+
+func init() {
+ vdl.Register((*DataWithSignature)(nil))
+ vdl.Register((*Item)(nil))
+ vdl.Register((*WireValidatorData)(nil))
+}
diff --git a/services/syncbase/signing/signing.go b/services/syncbase/signing/signing.go
new file mode 100644
index 0000000..0115f37
--- /dev/null
+++ b/services/syncbase/signing/signing.go
@@ -0,0 +1,358 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package signing signs syncbase updates using public key signatures, and
+// allows these signatures to be checked on other nodes.
+//
+// The functionality is geared specifically towards syncbase synchronization
+// because it is designed to allow a signature to remain valid during its
+// propagation across the syncgroup once it has been accepted by at least one
+// member of a syncgroup, even if the original key or its blessings are
+// invalidated in the meantime.
+//
+// There are three types of participant:
+// - an "author", which creates an update, and signs it with Sign().
+// - one or more "validators", each of which receives a change directly from
+// the author, and applies Check() to validate it.
+// - zero or more "checkers', each of whom receives a change from a validator
+// or another checker, and applied Check() to check it.
+//
+// A validator checks the signature and blessings provided by the author, and
+// then appends its own signature, vouching for the fact that the author's
+// signature was good at the time the validator saw it.
+//
+// A checker checks the signatures of both the author and validator but uses
+// weaker checks for signature validity than a validator. In particular, it
+// uses a significant grace period for key expiry so that a change admitted to
+// the syncgroup by a validator has an opportunity to propagate to all the
+// nodes in the syncgroup if the keys or blessings are revoked after the change
+// is admitted, but before it is fully propagated. The intent is that the
+// grace period be chosen to be greater than the diameter of the syncgroup
+// (measured in time). One way to ensure that is to insist that members sync
+// with a central server at least every T time units, and make the grace period
+// be 2T. The central server may sign the data anew to allow new members to pick
+// it up.
+//
+// The model is further complicated by performance concerns. An update written
+// to syncbase might be quite small (perhaps tens of bytes) but:
+// a) a public key signature or verification can take on the order of a
+// millisecond. (Currently, ECDSA signing might a little under 1ms and
+// verification just over 2ms on a workstation. A checker performs two such
+// verifications.)
+// b) unmarshalling even a simple Blessings object can take milliseconds. (!)
+// c) marshalling a public key can take 10us.
+// d) a Blessings object is of the order of a kilobyte of more, which may
+// represent substantial space overhead if duplicated.
+//
+// Because of (a), we wish to batch syncbase updates, so that a single
+// signature check applies to several updates. Thus the Data in a
+// DataWithSignature is a vector of Item, rather than a single Item.
+//
+// However, we will not always wish to put all updates in the same batch. For
+// example, an author and a validator might share two different syncgroups with
+// different memberships. In such a case, the author might keep the batches
+// for one syncgoup separate from batches for the other syncgroup, even though
+// the author blessings and validator identities are the same for all the
+// batches. Thus, because of (b,c,d), it's worth decoupling the author's
+// Blessings data and the validator's key data separately from the signed
+// batches itself, so that the blessings and validator data can be processed
+// once, even though several batches of updates are being sent. A
+// ValidationCache is used to hold this data separately, and allow it to be
+// sent just once, rather than once per signature.
+//
+// Lastly, imagine that the author sends a batch of 10 updates to a validator,
+// and the validator then syncs with a checker that is permitted to see only
+// half of the updates; perhaps ACLs prevent if from seeing the others. This
+// requires that the signature on the batch remain valid even if some of the
+// updates in the batch are removed. This is accomplished via the Item type,
+// which is a VDL union type that contains either the bytes of the marshalled
+// form of the update, or (if the update must not be sent) the SHA-256 hash of
+// the data (which can be computed with SumByteVectorWithLength()).
+package signing
+
+import "bytes"
+import "crypto/sha256"
+import "encoding/binary"
+import "hash"
+import "time"
+
+import "v.io/syncbase/x/ref/services/syncbase/signing/krl"
+import "v.io/v23/context"
+import "v.io/v23/security"
+import "v.io/v23/verror"
+
+const pkgPath = "v.io/syncbase/x/ref/services/syncbase/signing"
+
+// These are among the errors may be returned by Check(), and indicate that the
+// operation should be retried when new data has been added to the
+// ValidationCache. The errors are public to make it easier for the client to
+// test for them.
+var (
+ ErrNeedAuthorBlessingsAndValidatorDataForHash = verror.Register(
+ pkgPath+".ErrNeedAuthorBlessingsAndValidatorDataForHash",
+ verror.RetryRefetch,
+ "{1:}{2:} The ValidationCache contains neither the author blessings nor the validator data{:_}")
+ ErrNeedAuthorBlessingsForHash = verror.Register(
+ pkgPath+".ErrNeedAuthorBlessingsForHash",
+ verror.RetryRefetch,
+ "{1:}{2:} The ValidationCache does not contain the author blessings{:_}")
+ ErrNeedValidatorDataForHash = verror.Register(
+ pkgPath+".ErrNeedValidatorDataForHash",
+ verror.RetryRefetch,
+ "{1:}{2:} The ValidationCache does not contain the validator data{:_}")
+)
+
+// These errors are less likely to be tested for, and so are not exported.
+var (
+ errAuthorKeyIsRevoked = verror.Register(
+ pkgPath+".errAuthorKeyIsRevoked",
+ verror.NoRetry,
+ "{1:}{2:} The author key has been revoked{:_}")
+ errBadAuthorSignature = verror.Register(
+ pkgPath+".errBadAuthorSignature",
+ verror.NoRetry,
+ "{1:}{2:} Author signature verification failed{:_}")
+ errBadValidatorSignature = verror.Register(
+ pkgPath+".errBadValidatorSignature",
+ verror.NoRetry,
+ "{1:}{2:} Validator signature verification failed{:_}")
+ errAuthorBlessingsHaveNoValidNames = verror.Register(
+ pkgPath+".errAuthorBlessingsHaveNoValidNames",
+ verror.NoRetry,
+ "{1:}{2:} Author Blessings have no valid names{:_}")
+ errMayNotValidateOwnSignature = verror.Register(
+ pkgPath+".errMayNotValidateOwnSignature",
+ verror.NoRetry,
+ "{1:}{2:} Author may not validate its own signature{:_}")
+ errSenderIsNotAuthor = verror.Register(
+ pkgPath+".errSenderIsNotAuthor",
+ verror.NoRetry,
+ "{1:}{2:} Author is not sender of RPC; will not validate{:_}")
+ errValidatesWrongNames = verror.Register(
+ pkgPath+".errValidatesWrongNames",
+ verror.NoRetry,
+ "{1:}{2:} The validated names are not a subset of the names sent by the checker{:_}")
+ errValidatorIsSigner = verror.Register(
+ pkgPath+".errValidatorIsSigner",
+ verror.NoRetry,
+ "{1:}{2:} The signature was validated by its author; treating as invalid{:_}")
+ errValidatorKeyIsRevoked = verror.Register(
+ pkgPath+".errValidatorKeyIsRevoked",
+ verror.NoRetry,
+ "{1:}{2:} The validator key is revoked{:_}")
+)
+
+// --------------------------------------------
+
+// SignData() uses authorPrincipal to sign data using blessings (which must be
+// associated with the authorPrincipal). A pointer to a newly constructed
+// DataWithSignature with IsValidated==false is returned. Ensures that the
+// blessings are stored in *cache. Typically, "authorPrincipal" is obtained from
+// v23.GetPrincipal(ctx).
+//
+// If a recipient of the result *d complains that it does not understand the
+// hash d.BlessingHash, the signer should present it with
+// blessingsData.MarshalledBlessings, which will allow the recipient to
+// construct the Blessings. The Blessings are transmitted out of line because
+// they are large, and may be reused for multiple signatures.
+func SignData(ctx *context.T, cache *ValidationCache, authorPrincipal security.Principal,
+ blessings security.Blessings, data []Item) (d *DataWithSignature, blessingsData *BlessingsData, err error) {
+
+ d = new(DataWithSignature)
+ d.Data = data
+ d.BlessingsHash, blessingsData, err = cache.AddBlessings(ctx, blessings)
+ if err == nil {
+ d.AuthorSigned, err = authorPrincipal.Sign(d.authorSignatureHash())
+ }
+ return d, blessingsData, err
+}
+
+// hashByteVectorWithLength() calls hasher.Write() on a representation of
+// len(b), followed by the contents of b.
+func hashByteVectorWithLength(hasher hash.Hash, b []byte) {
+ var length [8]byte
+ binary.LittleEndian.PutUint64(length[:], uint64(len(b)))
+ hasher.Write(length[:])
+ hasher.Write(b)
+}
+
+// SumByteVectorWithLength() returns a SHA-256 hash of
+// len(b), followed by the contents of b.
+func SumByteVectorWithLength(b []byte) []byte {
+ hasher := sha256.New()
+ var length [8]byte
+ binary.LittleEndian.PutUint64(length[:], uint64(len(b)))
+ hasher.Write(length[:])
+ hasher.Write(b)
+ return hasher.Sum(nil)[:]
+}
+
+// authorSignatureHash() returns the hash that the author should sign.
+func (d *DataWithSignature) authorSignatureHash() []byte {
+ hasher := sha256.New()
+ var length [8]byte
+ binary.LittleEndian.PutUint64(length[:], uint64(len(d.Data)))
+ hasher.Write(length[:])
+ for i := range d.Data {
+ if data, gotData := d.Data[i].(ItemData); gotData {
+ hasher.Write(SumByteVectorWithLength(data.Value))
+ } else if hash, gotHash := d.Data[i].(ItemHash); gotHash {
+ hasher.Write(hash.Value)
+ } else {
+ // d.Data is neither a Data nor a Hash. This shouldn't
+ // happen unless the mashalled data is somehow
+ // corrupted. The signature will not match unless the
+ // original author of the data was seeing the same.
+ hasher.Write([]byte("no data"))
+ }
+ }
+ hashByteVectorWithLength(hasher, d.BlessingsHash)
+ return hasher.Sum(nil)[:]
+}
+
+// validatorSignatureHash() returns the hash that the validator should sign,
+// given the hash that the author signed.
+func (d *DataWithSignature) validatorSignatureHash(authorSignatureHash []byte) []byte {
+ var buffer [32]byte
+ var buf []byte = buffer[:]
+ if len(d.AuthorSigned.Hash) > len(buf) {
+ buf = make([]byte, len(d.AuthorSigned.Hash))
+ }
+ hasher := sha256.New()
+ hashByteVectorWithLength(hasher, authorSignatureHash)
+ hashByteVectorWithLength(hasher, d.AuthorSigned.Purpose)
+ hashByteVectorWithLength(hasher, buf[:copy(buf, d.AuthorSigned.Hash)])
+ hashByteVectorWithLength(hasher, d.AuthorSigned.R)
+ hashByteVectorWithLength(hasher, d.AuthorSigned.S)
+ hashByteVectorWithLength(hasher, d.ValidatorDataHash)
+ return hasher.Sum(nil)[:]
+}
+
+// Check() verifies the signature(s) on *d:
+//
+// If d.IsValidated==false, checks that:
+// 1. the author's signature is available in *cache.
+// 2. the author's signature over its blessings and the data is
+// cyprotgraphically valid.
+// 3. security.SigningBlessingNames() yields a non-empty list of names when
+// applied to the author's blessings.
+// 4. the author's public key is not known to be revoked.
+// 5. the local's public key (call.LocalPrincipal().PublicKey()) is not known
+// to be revoked.
+// 6. the author's public key is the public key of the RPC caller.
+// 7. the author's public key and the local public key differ.
+// If checks pass and there are no other errors:
+// - records the list of names found in check (3) in the ValidationData
+// - adds a validation signature using the local public key (which is now the
+// validator)
+// - sets d.IsValidated
+// - returns the list of names found in check (3), and a nil error.
+// Otherwise returns a nil list of names and a non-nil error.
+//
+// If d.Validated==true, checks that:
+// 1. the author's signature and the validator data are available in *cache.
+// 2. the author's signature over its blessings and the data is
+// cyprotgraphically valid.
+// 8. the list of names stored in the ValidatorData by the validator is
+// non-empty.
+// 9. the author's public key and the validator's public key differ.
+// 10. the list of names stored in the ValidatorData by the validator is a
+// subset of the list of names that the author's blessings could have
+// represented.
+// 11. the author's public key is not known to be revoked more than
+// gracePeriod ago.
+// 12. the validator's public key is not known to be revoked more than
+// gracePeriod ago.
+// 13. the validator's signature is cryptographically valid.
+// If checks pass and there are no other errors:
+// - returns the list of names in the validator's data, and a nil error.
+// Otherwise returns a nil list of names and a non-nil error.
+func (d *DataWithSignature) Check(ctx *context.T, cache *ValidationCache, call security.Call,
+ krl *krl.KRL, gracePeriod time.Duration) (names []string, err error) {
+
+ // Verify that we have the Blessings and ValidatorData.
+ var authorBlessingsData *BlessingsData = cache.LookupBlessingsData(ctx, d.BlessingsHash)
+ var validatorData *ValidatorData
+ if d.IsValidated {
+ validatorData = cache.LookupValidatorData(ctx, d.ValidatorDataHash)
+ }
+ if authorBlessingsData == nil || (validatorData == nil && d.IsValidated) { // Check (1).
+ if authorBlessingsData == nil && (validatorData == nil && d.IsValidated) {
+ err = verror.New(ErrNeedAuthorBlessingsAndValidatorDataForHash, ctx)
+ } else if authorBlessingsData == nil {
+ err = verror.New(ErrNeedAuthorBlessingsForHash, ctx)
+ } else {
+ err = verror.New(ErrNeedValidatorDataForHash, ctx)
+ }
+ }
+
+ // Check the author signature.
+ var authorSignatureHash []byte
+ if err == nil {
+ authorSignatureHash = d.authorSignatureHash()
+ if !d.AuthorSigned.Verify(authorBlessingsData.UnmarshalledBlessings.PublicKey(), authorSignatureHash) { // Check (2).
+ err = verror.New(errBadAuthorSignature, ctx)
+ }
+ }
+
+ // Check or create the validator signature.
+ now := time.Now()
+ if err != nil {
+ // err already set
+ } else if !d.IsValidated {
+ // Not yet validated, so this run will attempt to validate.
+ var validatedNames []string
+ var localKeyMarshalled []byte
+ var senderKeyMarshalled []byte
+ validatedNames, _ = security.SigningBlessingNames(ctx, call.LocalPrincipal(),
+ authorBlessingsData.UnmarshalledBlessings)
+ if len(validatedNames) == 0 { // Check (3).
+ err = verror.New(errAuthorBlessingsHaveNoValidNames, ctx)
+ } else if localKeyMarshalled, err = call.LocalPrincipal().PublicKey().MarshalBinary(); err != nil {
+ // err already set
+ } else if krl.RevocationTime(authorBlessingsData.MarshalledPublicKey).Before(now) { // Check (4).
+ err = verror.New(errAuthorKeyIsRevoked, ctx)
+ } else if krl.RevocationTime(localKeyMarshalled).Before(now) { // Check (5).
+ err = verror.New(errValidatorKeyIsRevoked, ctx)
+ } else if senderKeyMarshalled, err = call.RemoteBlessings().PublicKey().MarshalBinary(); err != nil {
+ // err already set
+ } else if !bytes.Equal(senderKeyMarshalled, authorBlessingsData.MarshalledPublicKey) { // Check (6).
+ err = verror.New(errSenderIsNotAuthor, ctx)
+ } else if bytes.Equal(localKeyMarshalled, authorBlessingsData.MarshalledPublicKey) { // Check (7).
+ err = verror.New(errMayNotValidateOwnSignature, ctx)
+ } else {
+ // Local principal is different from author, so can validate.
+ validatorData = &ValidatorData{
+ Names: validatedNames,
+ PublicKey: call.LocalPrincipal().PublicKey(),
+ MarshalledPublicKey: localKeyMarshalled,
+ }
+ d.ValidatorDataHash = cache.AddValidatorData(ctx, validatorData)
+ d.ValidatorSigned, err = call.LocalPrincipal().Sign(d.validatorSignatureHash(authorSignatureHash))
+ d.IsValidated = (err == nil)
+ }
+ } else { // Data already validated; check the validator siganture.
+ if len(validatorData.Names) == 0 { // Check (8).
+ err = verror.New(errAuthorBlessingsHaveNoValidNames, ctx)
+ } else if bytes.Equal(validatorData.MarshalledPublicKey, authorBlessingsData.MarshalledPublicKey) { // Check (9).
+ err = verror.New(errValidatorIsSigner, ctx)
+ } else if !authorBlessingsData.UnmarshalledBlessings.CouldHaveNames(validatorData.Names) { // Check (10).
+ err = verror.New(errValidatesWrongNames, ctx)
+ } else if krl.RevocationTime(authorBlessingsData.MarshalledPublicKey).Before(now.Add(-gracePeriod)) { // Check (11).
+ err = verror.New(errAuthorKeyIsRevoked, ctx)
+ } else if krl.RevocationTime(validatorData.MarshalledPublicKey).Before(now.Add(-gracePeriod)) { // Check (12).
+ err = verror.New(errValidatorKeyIsRevoked, ctx)
+ } else if !d.ValidatorSigned.Verify(validatorData.PublicKey, d.validatorSignatureHash(authorSignatureHash)) { // Check (13).
+ err = verror.New(errBadValidatorSignature, ctx)
+ } // else success.
+ }
+
+ // If there were no errors, return the list of names from the validator.
+ if err == nil {
+ names = make([]string, len(validatorData.Names))
+ copy(names, validatorData.Names)
+ }
+
+ return names, err
+}
diff --git a/services/syncbase/signing/signing_test.go b/services/syncbase/signing/signing_test.go
new file mode 100644
index 0000000..96881f3
--- /dev/null
+++ b/services/syncbase/signing/signing_test.go
@@ -0,0 +1,421 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package signing_test implements a test for the package
+// v.io/syncbase/x/ref/services/syncbase/signing
+package signing_test
+
+import "crypto/sha256"
+import "testing"
+import "time"
+
+import "v.io/syncbase/x/ref/services/syncbase/signing"
+import "v.io/syncbase/x/ref/services/syncbase/signing/krl"
+import "v.io/v23/naming"
+import "v.io/v23/security"
+import "v.io/v23/vdl"
+import "v.io/v23/vom"
+import "v.io/v23/verror"
+import "v.io/x/ref/test"
+import lib_security "v.io/x/ref/lib/security"
+
+import _ "v.io/x/ref/runtime/factories/generic"
+
+// --------------------------------------
+// The following implements a fake security.Call.
+type fakeCall struct {
+ localPrincipal security.Principal
+ localBlessings security.Blessings
+ remoteBlessings security.Blessings
+}
+
+func (fc *fakeCall) Timestamp() time.Time { return time.Now() }
+func (fc *fakeCall) Method() string { return "the_method_name" }
+func (fc *fakeCall) MethodTags() []*vdl.Value { return nil }
+func (fc *fakeCall) Suffix() string { return "the_suffix" }
+func (fc *fakeCall) LocalDischarges() map[string]security.Discharge { return nil }
+func (fc *fakeCall) RemoteDischarges() map[string]security.Discharge { return nil }
+func (fc *fakeCall) LocalPrincipal() security.Principal { return fc.localPrincipal }
+func (fc *fakeCall) LocalBlessings() security.Blessings { return fc.localBlessings }
+func (fc *fakeCall) RemoteBlessings() security.Blessings { return fc.remoteBlessings }
+func (fc *fakeCall) LocalEndpoint() naming.Endpoint { return nil }
+func (fc *fakeCall) RemoteEndpoint() naming.Endpoint { return nil }
+
+// --------------------------------------
+
+// A principalDesc holds the local state of a single principal in the tests below.
+type principalDesc struct {
+ name string
+ principal security.Principal
+ blessings security.Blessings
+ krl *krl.KRL
+ authorBlessingsData *signing.BlessingsData
+ names []string
+ marshalledBlessings []byte
+ blessingsHash []byte
+ validatorData *signing.ValidatorData
+ validatorHash []byte
+ cache *signing.ValidationCache
+ data *signing.DataWithSignature
+}
+
+// makePrincipal() returns a pointer to a newly-initialized principalDesc,
+// with a unique key, and a single blessing named with its own name.
+func makePrincipal(t testing.TB, name string) (desc *principalDesc) {
+ var err error
+ desc = new(principalDesc)
+ desc.name = name
+ desc.principal, err = lib_security.NewPrincipal()
+ if err != nil {
+ t.Fatalf("security.CreatePrincipal %q failed: %v", desc.name, err)
+ }
+ desc.blessings, err = desc.principal.BlessSelf(desc.name)
+ if err != nil {
+ t.Fatalf("principal.BlessSelf %q failed: %v", desc.name, err)
+ }
+ desc.krl = krl.New()
+ desc.cache = signing.NewValidationCache(5 * time.Second)
+ return desc
+}
+
+// makePrincipals() creares one principal per name, and adds
+// the blessings of each to the roots of all.
+func makePrincipals(t testing.TB, names ...string) (principals []*principalDesc) {
+ for i := range names {
+ principals = append(principals, makePrincipal(t, names[i]))
+ }
+ for i := range principals {
+ for j := range principals {
+ principals[j].principal.AddToRoots(principals[i].blessings)
+ }
+ }
+ return principals
+}
+
+// BenchmarkHashData() measures the time taken to do a cryptogrqaphic hash of
+// 1kBytes.
+func BenchmarkHashData(b *testing.B) {
+ var block [1024]byte
+ hasher := sha256.New()
+ b.ResetTimer()
+
+ for i := 0; i < b.N; i++ {
+ hasher.Write(block[:])
+ }
+}
+
+// BenchmarkSignData() measures the time taken to sign something with
+// signing.SignData().
+func BenchmarkSignData(b *testing.B) {
+ ctx, shutdown := test.V23Init()
+ defer shutdown()
+ var err error
+ author := makePrincipal(b, "author")
+ dataToSign := []signing.Item{signing.ItemData{Value: []byte("hello")}}
+ b.ResetTimer()
+
+ for i := 0; i < b.N; i++ {
+ author.data, author.authorBlessingsData, err =
+ signing.SignData(ctx, author.cache, author.principal, author.blessings, dataToSign)
+ }
+ if err != nil {
+ panic(err)
+ }
+}
+
+// BenchmarkSign1000Data() measures the time taken to sign 1000 small data
+// items with signing.SignData().
+func BenchmarkSign1000Data(b *testing.B) {
+ ctx, shutdown := test.V23Init()
+ defer shutdown()
+ var err error
+ author := makePrincipal(b, "author")
+ var dataToSign []signing.Item
+ for i := 0; i != 1000; i++ {
+ dataToSign = append(dataToSign, signing.ItemData{Value: []byte("hello")})
+ }
+ b.ResetTimer()
+
+ for i := 0; i < b.N; i++ {
+ author.data, author.authorBlessingsData, err =
+ signing.SignData(ctx, author.cache, author.principal, author.blessings, dataToSign)
+ }
+ if err != nil {
+ panic(err)
+ }
+}
+
+// BenchmarkSignData() measures the time taken to check a validated signature
+// with DataWithSignature.Check().
+func BenchmarkCheckData(b *testing.B) {
+ ctx, shutdown := test.V23Init()
+ defer shutdown()
+ var err error
+
+ principals := makePrincipals(b, "author", "validator", "checker")
+ author := principals[0]
+ validator := principals[1]
+ checker := principals[2]
+
+ dataToSign := []signing.Item{signing.ItemData{Value: []byte("hello")}}
+ author.data, author.authorBlessingsData, err =
+ signing.SignData(ctx, author.cache, author.principal, author.blessings, dataToSign)
+ if err != nil {
+ panic(err)
+ }
+ callToValidator := fakeCall{
+ localPrincipal: validator.principal,
+ localBlessings: validator.blessings,
+ remoteBlessings: author.blessings,
+ }
+ validator.names, err = author.data.Check(ctx, author.cache, &callToValidator, validator.krl, 24*30*time.Hour)
+ if err != nil {
+ panic(err)
+ }
+ callToChecker := fakeCall{
+ localPrincipal: checker.principal,
+ localBlessings: checker.blessings,
+ remoteBlessings: validator.blessings,
+ }
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ checker.names, err = author.data.Check(ctx, author.cache, &callToChecker, checker.krl, 24*30*time.Hour)
+ }
+}
+
+// BenchmarkSign1000Data() measures the time taken to check a validated
+// signature over 1000 small data items with DataWithSignature.Check().
+func BenchmarkCheck1000Data(b *testing.B) {
+ ctx, shutdown := test.V23Init()
+ defer shutdown()
+ var err error
+
+ principals := makePrincipals(b, "author", "validator", "checker")
+ author := principals[0]
+ validator := principals[1]
+ checker := principals[2]
+
+ var dataToSign []signing.Item
+ for i := 0; i != 1000; i++ {
+ dataToSign = append(dataToSign, signing.ItemData{Value: []byte("hello")})
+ }
+ author.data, author.authorBlessingsData, err =
+ signing.SignData(ctx, author.cache, author.principal, author.blessings, dataToSign)
+ if err != nil {
+ panic(err)
+ }
+ callToValidator := fakeCall{
+ localPrincipal: validator.principal,
+ localBlessings: validator.blessings,
+ remoteBlessings: author.blessings,
+ }
+ validator.names, err = author.data.Check(ctx, author.cache, &callToValidator, validator.krl, 24*30*time.Hour)
+ if err != nil {
+ panic(err)
+ }
+ callToChecker := fakeCall{
+ localPrincipal: checker.principal,
+ localBlessings: checker.blessings,
+ remoteBlessings: validator.blessings,
+ }
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ checker.names, err = author.data.Check(ctx, author.cache, &callToChecker, checker.krl, 24*30*time.Hour)
+ }
+}
+
+// BenchmarkMarshallBlessings() measures the time taken to marshal a Blessings.
+func BenchmarkMarshallBlessings(b *testing.B) {
+ var err error
+ author := makePrincipal(b, "author")
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ author.marshalledBlessings, err = vom.Encode(author.blessings)
+ }
+ if err != nil {
+ b.Fatalf("vom.Encode failed: %v", err)
+ }
+}
+
+// BenchmarkUnmarshallBlessings() measures the time taken to unmashal a Blessings.
+func BenchmarkUnmarshallBlessings(b *testing.B) {
+ var err error
+ author := makePrincipal(b, "author")
+ author.marshalledBlessings, err = vom.Encode(author.blessings)
+ if err != nil {
+ b.Fatalf("vom.Encode failed: %v", err)
+ }
+ var blessings security.Blessings
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ err = vom.Decode(author.marshalledBlessings, &blessings)
+ }
+ if err != nil {
+ b.Fatalf("vom.Encode failed: %v", err)
+ }
+}
+
+// BenchmarkMarshallPublicKey() measures the time taken to marshal a PublicKey.
+func BenchmarkMarshallPublicKey(b *testing.B) {
+ var err error
+ author := makePrincipal(b, "author")
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ _, err = author.principal.PublicKey().MarshalBinary()
+ }
+ if err != nil {
+ b.Fatalf("MarshalBinary() failed: %v", err)
+ }
+}
+
+// BenchmarkUnmarshallPublicKey() measures the time taken to unmarshal a PublicKey.
+func BenchmarkUnmarshallPublicKey(b *testing.B) {
+ var err error
+ author := makePrincipal(b, "author")
+ var marshalledKey []byte
+ marshalledKey, err = author.principal.PublicKey().MarshalBinary()
+ if err != nil {
+ b.Fatalf("MarshalBinary() failed: %v", err)
+ }
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ _, err = security.UnmarshalPublicKey(marshalledKey)
+ }
+ if err != nil {
+ b.Fatalf("MarshalBinary() failed: %v", err)
+ }
+}
+
+// TestSignData() tests that a complete flow of signing, validating, and
+// checking works on a DataWithSignature.
+func TestSignData(t *testing.T) {
+ ctx, shutdown := test.V23Init()
+ defer shutdown()
+
+ var err error
+
+ principals := makePrincipals(t, "author", "validator", "checker")
+ author := principals[0]
+ validator := principals[1]
+ checker := principals[2]
+
+ // Add each princpipal's blessings to each principal's roots.
+ pdList := []*principalDesc{author, validator, checker}
+ for i := 0; i != len(pdList); i++ {
+ for j := 0; j != len(pdList); j++ {
+ pdList[j].principal.AddToRoots(pdList[i].blessings)
+ }
+ }
+
+ // --------------------------------------
+ // Author
+ // Sign some data.
+ dataToSign := []signing.Item{
+ signing.ItemData{Value: []byte("hello")},
+ signing.ItemData{Value: []byte("world")},
+ signing.ItemData{Value: []byte("!")},
+ }
+ author.data, author.authorBlessingsData, err =
+ signing.SignData(ctx, author.cache, author.principal, author.blessings, dataToSign)
+ if err != nil {
+ t.Fatalf("signing.SignData failed: %v", err)
+ }
+ if author.data.IsValidated {
+ t.Fatalf("signing.SignData generated data with IsValidated set")
+ }
+
+ // --------------------------------------
+ // Validator
+ callToValidator := fakeCall{
+ localPrincipal: validator.principal,
+ localBlessings: validator.blessings,
+ remoteBlessings: author.blessings,
+ }
+ // The validator receives author.data from the author.
+ validator.data = new(signing.DataWithSignature)
+ *validator.data = *author.data
+ // Initially the validator doesn't have the author BlessingsData.
+ validator.authorBlessingsData = validator.cache.LookupBlessingsData(ctx, validator.data.BlessingsHash)
+ if validator.authorBlessingsData != nil {
+ t.Errorf("found non-nil BlessingsData for validator.data.BlessingsHash in validator's ValidationCache")
+ }
+ validator.names, err = validator.data.Check(ctx, validator.cache, &callToValidator, validator.krl, 24*30*time.Hour)
+ if verror.ErrorID(err) != signing.ErrNeedAuthorBlessingsForHash.ID {
+ t.Fatalf("validator.data.Check got err %v, want %s", err, signing.ErrNeedAuthorBlessingsForHash.ID)
+ }
+
+ // The validator receives the author's marshalled blessings from the author.
+ validator.marshalledBlessings = author.authorBlessingsData.MarshalledBlessings
+ validator.blessingsHash, validator.authorBlessingsData, err = validator.cache.AddWireBlessings(ctx, validator.marshalledBlessings)
+ if err != nil {
+ t.Fatalf("validator can't add author's marshalled belssings to its ValidationCache: %v", err)
+ }
+
+ validator.names, err = validator.data.Check(ctx, validator.cache, &callToValidator, validator.krl, 24*30*time.Hour)
+ if err != nil {
+ t.Fatalf("validator error calling Check() on data: %v", err)
+ }
+ if !validator.data.IsValidated {
+ t.Fatalf("signing.Check didn't set IsValidated")
+ }
+ // Validator's cache should now have the author's BlessingData, and the validator's ValidatorData.
+ validator.authorBlessingsData = validator.cache.LookupBlessingsData(ctx, validator.data.BlessingsHash)
+ if validator.authorBlessingsData == nil {
+ t.Errorf("didn't finf BlessingsData for validator.data.BlessingsHash in validator's ValidationCache")
+ }
+ validator.validatorData = validator.cache.LookupValidatorData(ctx, validator.data.ValidatorDataHash)
+
+ // --------------------------------------
+ // Checker
+ callToChecker := fakeCall{
+ localPrincipal: checker.principal,
+ localBlessings: checker.blessings,
+ remoteBlessings: validator.blessings,
+ }
+ // The checker recieves validator.data from the validator, except that
+ // data item 1 is replaced by its hash, because (for example) the
+ // check is not allowed to see it.
+ checker.data = new(signing.DataWithSignature)
+ *checker.data = *validator.data
+ checker.data.Data[1] = signing.ItemHash{Value: signing.SumByteVectorWithLength(checker.data.Data[1].(signing.ItemData).Value)}
+
+ // Initially the checker doesn't have the author BlessingsData, or the validator ValidatorData.
+ checker.authorBlessingsData = checker.cache.LookupBlessingsData(ctx, checker.data.BlessingsHash)
+ if checker.authorBlessingsData != nil {
+ t.Errorf("found non-nil blessings data for checker.data.BlessingsHash hash in checker's ValidationCache")
+ }
+ checker.names, err = checker.data.Check(ctx, checker.cache, &callToChecker, checker.krl, 24*30*time.Hour)
+ if verror.ErrorID(err) != signing.ErrNeedAuthorBlessingsAndValidatorDataForHash.ID {
+ t.Fatalf("checker.data.Check got err %v, want %s", err, signing.ErrNeedAuthorBlessingsAndValidatorDataForHash.ID)
+ }
+
+ // The checker receives the author's marshalled blessings from the validator.
+ checker.marshalledBlessings = validator.marshalledBlessings
+ checker.blessingsHash, checker.authorBlessingsData, err = checker.cache.AddWireBlessings(ctx, checker.marshalledBlessings)
+ if err != nil {
+ t.Fatalf("checker can't add author's marshalled belssings to its ValidationCache: %v", err)
+ }
+ checker.names, err = checker.data.Check(ctx, checker.cache, &callToChecker, checker.krl, 24*30*time.Hour)
+ if verror.ErrorID(err) != signing.ErrNeedValidatorDataForHash.ID {
+ t.Fatalf("checker.data.Check got err %v, want %s", err, signing.ErrNeedValidatorDataForHash.ID)
+ }
+
+ // The checker receives the validator's data from the validator, passing through the wire format.
+ wvd := signing.ToWireValidatorData(validator.validatorData)
+ var vd signing.ValidatorData
+ vd, err = signing.FromWireValidatorData(&wvd)
+ if err != nil {
+ t.Fatalf("signing.FromWireValidatorData got error: %v", err)
+ }
+ checker.validatorData = &vd
+
+ // The checker adds the ValidatorData to its cache.
+ checker.validatorHash = checker.cache.AddValidatorData(ctx, checker.validatorData)
+
+ // And now the Check() operation should work.
+ checker.names, err = checker.data.Check(ctx, checker.cache, &callToChecker, checker.krl, 24*30*time.Hour)
+ if err != nil {
+ t.Fatalf("checker.data.Check got unexpected err %v", err)
+ }
+}
diff --git a/services/syncbase/signing/validationcache.go b/services/syncbase/signing/validationcache.go
new file mode 100644
index 0000000..1c95bd0
--- /dev/null
+++ b/services/syncbase/signing/validationcache.go
@@ -0,0 +1,190 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This module implements a cache of data associated with the
+// signatures, keyed by hash values of the data. The intent is that
+// communicating devices will refer to the data using hashes, and transmit the
+// data itself only if the device on the other side does not have the data in
+// its cache.
+
+package signing
+
+import "crypto/sha256"
+import "encoding/binary"
+import "time"
+
+import "v.io/syncbase/x/ref/services/syncbase/signing/hashcache"
+import "v.io/v23/context"
+import "v.io/v23/security"
+import "v.io/v23/vom"
+
+// --------------------------------------------
+
+// A BlessingsData contains information about a security.Blessings object. The
+// object itself is referred to by UnmarshalledBlessings. The implementation
+// constructs all instances; the client should not modify fields.
+type BlessingsData struct {
+ UnmarshalledBlessings security.Blessings // The Blessings.
+ MarshalledBlessings []byte // VOM encoded Blessings.
+ MarshalledPublicKey []byte // Value from blessings.PublicKey().MarshalBinary().
+}
+
+// A ValidatorData is the extra data that a validator signs when validating and
+// signing a DataWithSignature. Clients may construct instances to pass to
+// AddValidatorData(), but should not modify the fields of a constructed
+// ValidatorData.
+type ValidatorData struct {
+ Names []string // Names of valid signing blessings in the Blessings referred to by BlessingsHash.
+ PublicKey security.PublicKey // The key used to create ValidatorSigned.
+ MarshalledPublicKey []byte // PublicKey, marshalled with MarshalBinary().
+}
+
+// hash() returns the hash of *vd. This hash should be used in the
+// ValidatorDataHash field of DataWithSignature, and as the cache key of *vd
+// in a ValidationCache.
+func (vd *ValidatorData) hash() []byte {
+ hasher := sha256.New()
+ var buffer [256]byte
+ var buf []byte = buffer[:]
+ binary.LittleEndian.PutUint64(buf[:], uint64(len(vd.Names)))
+ hasher.Write(buf[:8])
+ for i := range vd.Names {
+ if len(vd.Names[i]) > len(buf) {
+ buf = make([]byte, len(vd.Names[i])+256)
+ }
+ hashByteVectorWithLength(hasher, []byte(vd.Names[i]))
+ }
+ hashByteVectorWithLength(hasher, vd.MarshalledPublicKey)
+ return hasher.Sum(nil)[:]
+}
+
+// A ValidationCache records recently-seen instances of BlessingsData and
+// ValidatorData values, keys by hashes of the blessings and validator keys
+// respectively. Values may expire from the cache if unused for a duration
+// specified with NewValidationCache().
+type ValidationCache struct {
+ blessingsCache *hashcache.Cache
+ validatorCache *hashcache.Cache
+}
+
+// NewValidationCache() returns a pointer to a new, empty ValidationCache with
+// the specified expiry duration..
+func NewValidationCache(expiry time.Duration) *ValidationCache {
+ return &ValidationCache{
+ blessingsCache: hashcache.New(expiry),
+ validatorCache: hashcache.New(expiry)}
+}
+
+// LookupBlessingsData() returns a pointer to the BlessingsData associated with
+// blessingsHash in *vc. blessingsHash should have been returned by a previous
+// call to AddBlessings() or AddWireBlessings() (possibly on another machine).
+// nil is returned if the data is not present. The client should not modify
+// *result, since it is shared with *vc.
+func (vc *ValidationCache) LookupBlessingsData(ctx *context.T, blessingsHash []byte) (result *BlessingsData) {
+ value, found := vc.blessingsCache.Lookup(blessingsHash)
+ if found {
+ result = value.(*BlessingsData)
+ }
+ return result
+}
+
+// addBlessings() adds a BlessingsData for blessings to *vc, and returns a hash
+// value, which if passed to LookupBlessingsData() will yield a pointer to the
+// BlessingsData, or a non-nil error. The fields of BlessingsData other than
+// MarshalledBlessings and UnmarshalledBlessings are constructed by this
+// routine. Requires that blessings and marshalledBlessings represent the same
+// data, or that marshalledBlessings be nil.
+func (vc *ValidationCache) addBlessings(ctx *context.T, blessings security.Blessings,
+ marshalledBlessings []byte) (blessingsHash []byte, data *BlessingsData, err error) {
+
+ blessingsHash = blessings.UniqueID()
+ if value, found := vc.blessingsCache.Lookup(blessingsHash); found {
+ data = value.(*BlessingsData)
+ } else { // not found
+ var marshalledKey []byte
+ if marshalledBlessings == nil {
+ marshalledBlessings, err = vom.Encode(blessings)
+ }
+ if err == nil {
+ marshalledKey, err = blessings.PublicKey().MarshalBinary()
+ }
+ if err == nil {
+ data = &BlessingsData{
+ UnmarshalledBlessings: blessings,
+ MarshalledBlessings: marshalledBlessings,
+ MarshalledPublicKey: marshalledKey}
+ vc.blessingsCache.Add(blessingsHash, data)
+ }
+ }
+ return blessingsHash, data, err
+}
+
+// AddBlessings() adds a BlessingsData for blessings to *cv, and
+// returns a hash value, which if passed to LookupBlessingsData() will yield a
+// pointer to the BlessingsData, or a non-nil error. The fields of
+// BlessingsData other than UnmarshalledBlessings are constructed by this
+// routine.
+func (vc *ValidationCache) AddBlessings(ctx *context.T, blessings security.Blessings) (blessingsHash []byte, data *BlessingsData, err error) {
+ return vc.addBlessings(ctx, blessings, nil)
+}
+
+// AddWireBlessings() adds a BlessingsData for blessings to *cv, and
+// returns a hash value, which if passed to LookupBlessingsData() will yield a
+// pointer to the BlessingsData, or a non-nil error. The fields of
+// BlessingsData other than MarshalledBlessings are constructed by this
+// routine.
+func (vc *ValidationCache) AddWireBlessings(ctx *context.T,
+ marshalledBlessings []byte) (blessingsHash []byte, data *BlessingsData, err error) {
+
+ var blessings security.Blessings
+ err = vom.Decode(marshalledBlessings, &blessings)
+ if err == nil {
+ blessingsHash, data, err = vc.addBlessings(ctx, blessings, marshalledBlessings)
+ }
+ return blessingsHash, data, err
+}
+
+// LookupValidatorData() returns a pointer to the ValidatorData associated with
+// hash validatorHash in *vc validatorHash should have been returned by a
+// previous call to AddValidatorData() (possibly on another machine). nil is
+// returned if the data is not present. The client should not modifiy *result,
+// since it it shared with *vc.
+func (vc *ValidationCache) LookupValidatorData(ctx *context.T, validatorHash []byte) (result *ValidatorData) {
+ value, found := vc.validatorCache.Lookup(validatorHash)
+ if found {
+ result = value.(*ValidatorData)
+ }
+ return result
+}
+
+// AddValidatorData() adds a ValidatorData *vd to cache *vc, and returns a hash
+// value, which if passed to LookupValidatorData() will yield a pointer to the
+// ValidatorData. The client should not modify *vd after the call, since it is
+// shared with *vc.
+func (vc *ValidationCache) AddValidatorData(ctx *context.T, vd *ValidatorData) (validatorDataHash []byte) {
+ validatorDataHash = vd.hash()
+ vc.validatorCache.Add(validatorDataHash, vd)
+ return validatorDataHash
+}
+
+// ToWireValidatorData() puts the wire form of ValidatorData *vd in *wvd.
+func ToWireValidatorData(vd *ValidatorData) (wvd WireValidatorData) {
+ wvd.Names = make([]string, len(vd.Names))
+ copy(wvd.Names, vd.Names)
+ wvd.MarshalledPublicKey = make([]byte, len(vd.MarshalledPublicKey))
+ copy(wvd.MarshalledPublicKey, vd.MarshalledPublicKey)
+ return wvd
+}
+
+// FromWireValidatorData() puts the in-memory form of WireValidatorData *wvd in *vd.
+func FromWireValidatorData(wvd *WireValidatorData) (vd ValidatorData, err error) {
+ vd.PublicKey, err = security.UnmarshalPublicKey(wvd.MarshalledPublicKey)
+ if err == nil {
+ vd.Names = make([]string, len(wvd.Names))
+ copy(vd.Names, wvd.Names)
+ vd.MarshalledPublicKey = make([]byte, len(wvd.MarshalledPublicKey))
+ copy(vd.MarshalledPublicKey, wvd.MarshalledPublicKey)
+ }
+ return vd, err
+}
diff --git a/services/syncbase/signing/validationcache_test.go b/services/syncbase/signing/validationcache_test.go
new file mode 100644
index 0000000..a45835b
--- /dev/null
+++ b/services/syncbase/signing/validationcache_test.go
@@ -0,0 +1,189 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file tests the validationcache.go module.
+
+package signing_test
+
+import "bytes"
+import "testing"
+import "time"
+
+import "v.io/syncbase/x/ref/services/syncbase/signing"
+import "v.io/v23/security"
+import "v.io/x/ref/test"
+import _ "v.io/x/ref/runtime/factories/generic"
+import lib_security "v.io/x/ref/lib/security"
+
+// A principalVDesc holds the local state of a single principal in the tests below.
+type principalVDesc struct {
+ name string
+ principal security.Principal
+ blessings security.Blessings
+ blessingsHash []byte
+ blessingsData *signing.BlessingsData
+ validatorHash []byte
+ validatorData *signing.ValidatorData
+ cache *signing.ValidationCache
+}
+
+// makePrincipalVDesc() returns a pointer to a newly-initialized principalVDesc,
+// with a unique key, and a single blessing named with its own name.
+func makePrincipalVDesc(t *testing.T, name string) (desc *principalVDesc) {
+ var err error
+ desc = new(principalVDesc)
+ desc.name = name
+ desc.principal, err = lib_security.NewPrincipal()
+ if err != nil {
+ t.Fatalf("lib_security.NewPrincipal %q failed: %v", desc.name, err)
+ }
+ desc.blessings, err = desc.principal.BlessSelf(desc.name)
+ if err != nil {
+ t.Fatalf("principal.BlessSelf %q failed: %v", desc.name, err)
+ }
+ desc.cache = signing.NewValidationCache(5 * time.Second)
+ return desc
+}
+
+func TestValidationCache(t *testing.T) {
+ ctx, shutdown := test.V23Init()
+ defer shutdown()
+
+ var err error
+
+ // Make a principalVDesc for each of the author, validator, and checker.
+ // (The author creates a signed change; the validator is a device
+ // author syncs with; the checker is a device a validator syncs with.)
+ author := makePrincipalVDesc(t, "author")
+ validator := makePrincipalVDesc(t, "validator")
+ checker := makePrincipalVDesc(t, "checker")
+
+ // Add each princpipal's blessings to each principal's roots.
+ pdList := []*principalVDesc{author, validator, checker}
+ for i := 0; i != len(pdList); i++ {
+ for j := 0; j != len(pdList); j++ {
+ pdList[j].principal.AddToRoots(pdList[i].blessings)
+ }
+ }
+
+ // --------------------------------------
+ // Author
+ arbitraryBlessingsData := author.cache.LookupBlessingsData(ctx, []byte{0x00})
+ if arbitraryBlessingsData != nil {
+ t.Errorf("found non-nil blessings data for nonsense hash in author's ValidationCache")
+ }
+ author.blessingsHash, author.blessingsData, err = author.cache.AddBlessings(ctx, author.blessings)
+ if err != nil {
+ t.Fatalf("error from author.cache.AddBlessings(): %v", err)
+ }
+ // Check that the author's data is as we expect.
+ if author.cache.LookupBlessingsData(ctx, author.blessingsHash) != author.blessingsData {
+ t.Fatalf("found wrong blessings data for hash in author's ValidationCache: %v vs %v",
+ author.cache.LookupBlessingsData(ctx, author.blessingsHash), author.blessingsData)
+ }
+
+ // --------------------------------------
+ // Validator
+ // The validator receives author.blessingsHash from the author.
+ // Initially the validator doesn't have the author BlessingsData.
+ authorBlessingsData := validator.cache.LookupBlessingsData(ctx, author.blessingsHash)
+ if authorBlessingsData != nil {
+ t.Errorf("found non-nil blessings data for author.blessingsHash hash in validator's ValidationCache")
+ }
+ // The validator receives the author's marshalled blessings from the author.
+ validator.blessingsHash, validator.blessingsData, err =
+ validator.cache.AddWireBlessings(ctx, author.blessingsData.MarshalledBlessings)
+ if err != nil {
+ t.Fatalf("validator can't add author's marshalled blessings to its ValidationCache: %v", err)
+ }
+ if !bytes.Equal(author.blessingsHash, validator.blessingsHash) {
+ t.Errorf("validator's copy of the blessingsHash different from author's")
+ }
+ // Check that we could have got the blessingsData with a lookup if this were the second time.
+ if validator.cache.LookupBlessingsData(ctx, validator.blessingsHash) != validator.blessingsData {
+ t.Fatalf("found wrong blessings data for hash in validator's ValidationCache")
+ }
+ var marshalledPublicKey []byte
+ marshalledPublicKey, err = validator.principal.PublicKey().MarshalBinary()
+ if err != nil {
+ t.Fatalf("validator.principal.PublicKey().MarshalBinary() got error: %v", err)
+ }
+
+ var validatedNames []string
+ validatedNames, _ = security.SigningBlessingNames(ctx, validator.principal,
+ validator.blessingsData.UnmarshalledBlessings)
+ validator.validatorData = &signing.ValidatorData{
+ Names: validatedNames,
+ PublicKey: validator.principal.PublicKey(),
+ MarshalledPublicKey: marshalledPublicKey}
+ validator.validatorHash = validator.cache.AddValidatorData(ctx, validator.validatorData)
+ if validator.cache.LookupValidatorData(ctx, validator.validatorHash) != validator.validatorData {
+ t.Fatalf("LookupValidatorData returned wrong ValidatorData pointer in validator")
+ }
+
+ // --------------------------------------
+ // Checker
+ // The checker receives validator.blessingsHash from the validator.
+ // Initially the checker doesn't have the author BlessingsData.
+ authorBlessingsData = checker.cache.LookupBlessingsData(ctx, validator.blessingsHash)
+ if authorBlessingsData != nil {
+ t.Errorf("found non-nil blessings data for author.blessingsHash hash in checker's ValidationCache")
+ }
+ // The checker receives the author's marshalled blessings from the validator.
+ checker.blessingsHash, checker.blessingsData, err =
+ checker.cache.AddWireBlessings(ctx, validator.blessingsData.MarshalledBlessings)
+ if err != nil {
+ t.Fatalf("checker can't add author's marshalled blessings (from validator) to ValidationCache: %v", err)
+ }
+ if !bytes.Equal(author.blessingsHash, checker.blessingsHash) {
+ t.Errorf("checker's copy of the blessingsHash different from author's")
+ }
+ // Check that we could have got the blessingsData with a lookup if this where the second time.
+ if checker.cache.LookupBlessingsData(ctx, checker.blessingsHash) != checker.blessingsData {
+ t.Fatalf("found wrong blessings data for hash in checker's ValidationCache")
+ }
+ // The checker recieves validator.validatorHash from the validator.
+ // Initially the checker doesn't have the ValidatorData.
+ validatorData := checker.cache.LookupValidatorData(ctx, validator.validatorHash)
+ if validatorData != nil {
+ t.Errorf("found non-nil validator data for validator.validatorHash hash in checker's ValidationCache")
+ }
+ // The checker receives the validator's data from the validator (or another checker).
+ checker.validatorHash = checker.cache.AddValidatorData(ctx, validator.validatorData)
+ if !bytes.Equal(validator.validatorHash, checker.validatorHash) {
+ t.Fatalf("checker's copy of the validatorHash different from validator's")
+ }
+ // Get the validatorData
+ checker.validatorData = checker.cache.LookupValidatorData(ctx, checker.validatorHash)
+ if checker.validatorData == nil {
+ t.Fatalf("found nil valdidatorData for checker.validatorHash hash in checker's ValidationCache")
+ }
+}
+
+func TestWireValidatorData(t *testing.T) {
+ var err error
+
+ pDesc := makePrincipalVDesc(t, "some_principal")
+
+ var vd signing.ValidatorData
+ vd.Names = []string{"wombat", "foo"}
+ vd.PublicKey = pDesc.principal.PublicKey()
+ vd.MarshalledPublicKey, err = vd.PublicKey.MarshalBinary()
+ if err != nil {
+ t.Fatalf("failed to marshel public key: %v\n", err)
+ }
+
+ var wvd signing.WireValidatorData
+ var vd2 signing.ValidatorData
+
+ wvd = signing.ToWireValidatorData(&vd)
+ vd2, err = signing.FromWireValidatorData(&wvd)
+ if err != nil {
+ t.Fatalf("FromWireValidatorData failed: %v\n", err)
+ }
+ if len(vd.Names) != len(vd2.Names) {
+ t.Fatalf("ToWireValidatorData/FromWireValidatorData failed to transfer Names list correctly:\nold\n%v\n\nnew\n%v\n\nwire\n%v\n",
+ vd, vd2, wvd)
+ }
+}
diff --git a/services/syncbase/store/benchmark/benchmark.go b/services/syncbase/store/benchmark/benchmark.go
new file mode 100644
index 0000000..c794fa9
--- /dev/null
+++ b/services/syncbase/store/benchmark/benchmark.go
@@ -0,0 +1,144 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package benchmark
+
+import (
+ "fmt"
+ "math/rand"
+ "testing"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+)
+
+// RandomGenerator is a helper for generating random data.
+type RandomGenerator struct {
+ rand.Rand
+ data []byte
+ pos int
+}
+
+// NewRandomGenerator returns a new generator of pseudo-random byte sequences
+// seeded with the given value. Every N bytes produced by this generator can be
+// compressed to (compressionRatio * N) bytes.
+func NewRandomGenerator(seed int64, compressionRatio float64) *RandomGenerator {
+ gen := &RandomGenerator{
+ *rand.New(rand.NewSource(seed)),
+ []byte{},
+ 0,
+ }
+ for len(gen.data) < 1000*1000 {
+ // We generate compressible byte sequences to test Snappy compression
+ // engine used by LevelDB.
+ gen.data = append(gen.data, gen.compressibleBytes(100, compressionRatio)...)
+ }
+ return gen
+}
+
+// randomBytes generates n pseudo-random bytes from range [' '..'~'].
+func (r *RandomGenerator) randomBytes(n int) (bytes []byte) {
+ for i := 0; i < n; i++ {
+ bytes = append(bytes, byte(' '+r.Intn(95))) // ' ' .. '~'
+ }
+ return
+}
+
+// compressibleBytes generates a sequence of n pseudo-random bytes that can
+// be compressed to ~(compressionRatio * n) bytes.
+func (r *RandomGenerator) compressibleBytes(n int, compressionRatio float64) (bytes []byte) {
+ raw := int(float64(n) * compressionRatio)
+ if raw < 1 {
+ raw = 1
+ }
+ rawData := r.randomBytes(raw)
+ // Duplicate the random data until we have filled n bytes.
+ for len(bytes) < n {
+ bytes = append(bytes, rawData...)
+ }
+ return bytes[0:n]
+}
+
+// generate returns a sequence of n pseudo-random bytes.
+func (r *RandomGenerator) generate(n int) []byte {
+ if r.pos+n > len(r.data) {
+ r.pos = 0
+ if n >= len(r.data) {
+ panic(fmt.Sprintf("length(%d) is too big", n))
+ }
+ }
+ r.pos += n
+ return r.data[r.pos-n : r.pos]
+}
+
+// Config is a set of settings required to run a benchmark.
+type Config struct {
+ Rand *RandomGenerator
+ // St is the database to use. Initially it should be empty.
+ St store.Store
+ KeyLen int // size of each key
+ ValueLen int // size of each value
+}
+
+// WriteSequential writes b.N values in sequential key order.
+func WriteSequential(b *testing.B, config *Config) {
+ doWrite(b, config, true)
+}
+
+// WriteRandom writes b.N values in random key order.
+func WriteRandom(b *testing.B, config *Config) {
+ doWrite(b, config, false)
+}
+
+func doWrite(b *testing.B, config *Config, seq bool) {
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ var k int
+ if seq {
+ k = i
+ } else {
+ k = config.Rand.Intn(b.N)
+ }
+ key := []byte(fmt.Sprintf("%0[2]*[1]d", k, config.KeyLen))
+ if err := config.St.Put(key, config.Rand.generate(config.ValueLen)); err != nil {
+ b.Fatalf("put error: %v", err)
+ }
+ }
+}
+
+// ReadSequential reads b.N values in sequential key order.
+func ReadSequential(b *testing.B, config *Config) {
+ WriteSequential(b, config)
+ b.ResetTimer()
+ s := config.St.Scan([]byte("0"), []byte("z"))
+ var key, value []byte
+ for i := 0; i < b.N; i++ {
+ if !s.Advance() {
+ b.Fatalf("can't read next value: %v", s.Err())
+ }
+ key = s.Key(key)
+ value = s.Value(value)
+ }
+ s.Cancel()
+}
+
+// ReadRandom reads b.N values in random key order.
+func ReadRandom(b *testing.B, config *Config) {
+ WriteSequential(b, config)
+ b.ResetTimer()
+ var value []byte
+ var err error
+ for i := 0; i < b.N; i++ {
+ key := []byte(fmt.Sprintf("%0[2]*[1]d", config.Rand.Intn(b.N), config.KeyLen))
+ if value, err = config.St.Get(key, value); err != nil {
+ b.Fatalf("can't read value for key %s: %v", key, err)
+ }
+ }
+}
+
+// Overwrite overwrites b.N values in random key order.
+func Overwrite(b *testing.B, config *Config) {
+ WriteSequential(b, config)
+ b.ResetTimer()
+ WriteRandom(b, config)
+}
diff --git a/services/syncbase/store/constants.go b/services/syncbase/store/constants.go
new file mode 100644
index 0000000..26551fa
--- /dev/null
+++ b/services/syncbase/store/constants.go
@@ -0,0 +1,15 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package store
+
+// TODO(sadovsky): Maybe define verrors for these.
+const (
+ ErrMsgClosedStore = "closed store"
+ ErrMsgAbortedSnapshot = "aborted snapshot"
+ ErrMsgCanceledStream = "canceled stream"
+ ErrMsgCommittedTxn = "already called commit"
+ ErrMsgAbortedTxn = "already called abort"
+ ErrMsgExpiredTxn = "expired transaction"
+)
diff --git a/services/syncbase/store/invalid_types.go b/services/syncbase/store/invalid_types.go
new file mode 100644
index 0000000..a230684
--- /dev/null
+++ b/services/syncbase/store/invalid_types.go
@@ -0,0 +1,121 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package store
+
+import (
+ "v.io/v23/verror"
+)
+
+// InvalidSnapshot is a Snapshot for which all methods return errors.
+type InvalidSnapshot struct {
+ SnapshotSpecImpl
+ Error error // returned by all methods
+}
+
+// InvalidStream is a Stream for which all methods return errors.
+type InvalidStream struct {
+ Error error // returned by all methods
+}
+
+// InvalidTransaction is a Transaction for which all methods return errors.
+type InvalidTransaction struct {
+ Error error // returned by all methods
+}
+
+var (
+ _ Snapshot = (*InvalidSnapshot)(nil)
+ _ Stream = (*InvalidStream)(nil)
+ _ Transaction = (*InvalidTransaction)(nil)
+)
+
+////////////////////////////////////////////////////////////
+// InvalidSnapshot
+
+// Abort implements the store.Snapshot interface.
+func (s *InvalidSnapshot) Abort() error {
+ return convertError(s.Error)
+}
+
+// Get implements the store.StoreReader interface.
+func (s *InvalidSnapshot) Get(key, valbuf []byte) ([]byte, error) {
+ return valbuf, convertError(s.Error)
+}
+
+// Scan implements the store.StoreReader interface.
+func (s *InvalidSnapshot) Scan(start, limit []byte) Stream {
+ return &InvalidStream{s.Error}
+}
+
+////////////////////////////////////////////////////////////
+// InvalidStream
+
+// Advance implements the store.Stream interface.
+func (s *InvalidStream) Advance() bool {
+ return false
+}
+
+// Key implements the store.Stream interface.
+func (s *InvalidStream) Key(keybuf []byte) []byte {
+ panic(s.Error)
+}
+
+// Value implements the store.Stream interface.
+func (s *InvalidStream) Value(valbuf []byte) []byte {
+ panic(s.Error)
+}
+
+// Err implements the store.Stream interface.
+func (s *InvalidStream) Err() error {
+ return convertError(s.Error)
+}
+
+// Cancel implements the store.Stream interface.
+func (s *InvalidStream) Cancel() {
+}
+
+////////////////////////////////////////////////////////////
+// InvalidTransaction
+
+// ResetForRetry implements the store.Transaction interface.
+func (tx *InvalidTransaction) ResetForRetry() {
+ panic(tx.Error)
+}
+
+// Get implements the store.StoreReader interface.
+func (tx *InvalidTransaction) Get(key, valbuf []byte) ([]byte, error) {
+ return valbuf, convertError(tx.Error)
+}
+
+// Scan implements the store.StoreReader interface.
+func (tx *InvalidTransaction) Scan(start, limit []byte) Stream {
+ return &InvalidStream{tx.Error}
+}
+
+// Put implements the store.StoreWriter interface.
+func (tx *InvalidTransaction) Put(key, value []byte) error {
+ return convertError(tx.Error)
+}
+
+// Delete implements the store.StoreWriter interface.
+func (tx *InvalidTransaction) Delete(key []byte) error {
+ return convertError(tx.Error)
+}
+
+// Commit implements the store.Transaction interface.
+func (tx *InvalidTransaction) Commit() error {
+ return convertError(tx.Error)
+}
+
+// Abort implements the store.Transaction interface.
+func (tx *InvalidTransaction) Abort() error {
+ return convertError(tx.Error)
+}
+
+////////////////////////////////////////////////////////////
+// Internal helpers
+
+func convertError(err error) error {
+ return verror.Convert(verror.IDAction{}, nil, err)
+}
diff --git a/services/syncbase/store/leveldb/benchmark_test.go b/services/syncbase/store/leveldb/benchmark_test.go
new file mode 100644
index 0000000..7de0062
--- /dev/null
+++ b/services/syncbase/store/leveldb/benchmark_test.go
@@ -0,0 +1,52 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package leveldb
+
+import (
+ "testing"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/syncbase/x/ref/services/syncbase/store/benchmark"
+)
+
+func testConfig(db store.Store) *benchmark.Config {
+ return &benchmark.Config{
+ Rand: benchmark.NewRandomGenerator(23917, 0.5),
+ St: db,
+ KeyLen: 20,
+ ValueLen: 100,
+ }
+}
+
+func runBenchmark(b *testing.B, f func(*testing.B, *benchmark.Config)) {
+ db, dbPath := newDB()
+ defer destroyDB(db, dbPath)
+ f(b, testConfig(db))
+}
+
+// BenchmarkWriteSequential writes b.N values in sequential key order.
+func BenchmarkWriteSequential(b *testing.B) {
+ runBenchmark(b, benchmark.WriteSequential)
+}
+
+// BenchmarkWriteRandom writes b.N values in random key order.
+func BenchmarkWriteRandom(b *testing.B) {
+ runBenchmark(b, benchmark.WriteRandom)
+}
+
+// BenchmarkOverwrite overwrites b.N values in random key order.
+func BenchmarkOverwrite(b *testing.B) {
+ runBenchmark(b, benchmark.Overwrite)
+}
+
+// BenchmarkReadSequential reads b.N values in sequential key order.
+func BenchmarkReadSequential(b *testing.B) {
+ runBenchmark(b, benchmark.ReadSequential)
+}
+
+// BenchmarkReadRandom reads b.N values in random key order.
+func BenchmarkReadRandom(b *testing.B) {
+ runBenchmark(b, benchmark.ReadRandom)
+}
diff --git a/services/syncbase/store/leveldb/db.go b/services/syncbase/store/leveldb/db.go
new file mode 100644
index 0000000..0430561
--- /dev/null
+++ b/services/syncbase/store/leveldb/db.go
@@ -0,0 +1,176 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package leveldb provides a LevelDB-based implementation of store.Store.
+package leveldb
+
+// #cgo LDFLAGS: -lleveldb -lsnappy
+// #include <stdlib.h>
+// #include "leveldb/c.h"
+// #include "syncbase_leveldb.h"
+import "C"
+import (
+ "fmt"
+ "sync"
+ "unsafe"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/syncbase/x/ref/services/syncbase/store/transactions"
+ "v.io/v23/verror"
+)
+
+// db is a wrapper around LevelDB that implements the transactions.BatchStore
+// interface.
+type db struct {
+ // mu protects the state of the db.
+ mu sync.RWMutex
+ node *store.ResourceNode
+ cDb *C.leveldb_t
+ // Default read/write options.
+ readOptions *C.leveldb_readoptions_t
+ writeOptions *C.leveldb_writeoptions_t
+ err error
+}
+
+type OpenOptions struct {
+ CreateIfMissing bool
+ ErrorIfExists bool
+}
+
+// Open opens the database located at the given path.
+func Open(path string, opts OpenOptions) (store.Store, error) {
+ var cError *C.char
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+
+ var cOptsCreateIfMissing, cOptsErrorIfExists C.uchar
+ if opts.CreateIfMissing {
+ cOptsCreateIfMissing = 1
+ }
+ if opts.ErrorIfExists {
+ cOptsErrorIfExists = 1
+ }
+
+ cOpts := C.leveldb_options_create()
+ C.leveldb_options_set_create_if_missing(cOpts, cOptsCreateIfMissing)
+ C.leveldb_options_set_error_if_exists(cOpts, cOptsErrorIfExists)
+ C.leveldb_options_set_paranoid_checks(cOpts, 1)
+ defer C.leveldb_options_destroy(cOpts)
+
+ cDb := C.leveldb_open(cOpts, cPath, &cError)
+ if err := goError(cError); err != nil {
+ return nil, err
+ }
+ readOptions := C.leveldb_readoptions_create()
+ C.leveldb_readoptions_set_verify_checksums(readOptions, 1)
+ return transactions.Wrap(&db{
+ node: store.NewResourceNode(),
+ cDb: cDb,
+ readOptions: readOptions,
+ writeOptions: C.leveldb_writeoptions_create(),
+ }), nil
+}
+
+// Close implements the store.Store interface.
+func (d *db) Close() error {
+ d.mu.Lock()
+ defer d.mu.Unlock()
+ if d.err != nil {
+ return store.ConvertError(d.err)
+ }
+ d.node.Close()
+ C.leveldb_close(d.cDb)
+ d.cDb = nil
+ C.leveldb_readoptions_destroy(d.readOptions)
+ d.readOptions = nil
+ C.leveldb_writeoptions_destroy(d.writeOptions)
+ d.writeOptions = nil
+ d.err = verror.New(verror.ErrCanceled, nil, store.ErrMsgClosedStore)
+ return nil
+}
+
+// Destroy removes all physical data of the database located at the given path.
+func Destroy(path string) error {
+ var cError *C.char
+ cPath := C.CString(path)
+ defer C.free(unsafe.Pointer(cPath))
+ cOpts := C.leveldb_options_create()
+ defer C.leveldb_options_destroy(cOpts)
+ C.leveldb_destroy_db(cOpts, cPath, &cError)
+ return goError(cError)
+}
+
+// Get implements the store.StoreReader interface.
+func (d *db) Get(key, valbuf []byte) ([]byte, error) {
+ return d.getWithOpts(key, valbuf, d.readOptions)
+}
+
+// Scan implements the store.StoreReader interface.
+func (d *db) Scan(start, limit []byte) store.Stream {
+ d.mu.RLock()
+ defer d.mu.RUnlock()
+ if d.err != nil {
+ return &store.InvalidStream{Error: d.err}
+ }
+ return newStream(d, d.node, start, limit, d.readOptions)
+}
+
+// NewSnapshot implements the store.Store interface.
+func (d *db) NewSnapshot() store.Snapshot {
+ d.mu.RLock()
+ defer d.mu.RUnlock()
+ if d.err != nil {
+ return &store.InvalidSnapshot{Error: d.err}
+ }
+ return newSnapshot(d, d.node)
+}
+
+// WriteBatch implements the transactions.BatchStore interface.
+func (d *db) WriteBatch(batch ...transactions.WriteOp) error {
+ d.mu.Lock()
+ defer d.mu.Unlock()
+ if d.err != nil {
+ return d.err
+ }
+ cBatch := C.leveldb_writebatch_create()
+ defer C.leveldb_writebatch_destroy(cBatch)
+ for _, write := range batch {
+ switch write.T {
+ case transactions.PutOp:
+ cKey, cKeyLen := cSlice(write.Key)
+ cVal, cValLen := cSlice(write.Value)
+ C.leveldb_writebatch_put(cBatch, cKey, cKeyLen, cVal, cValLen)
+ case transactions.DeleteOp:
+ cKey, cKeyLen := cSlice(write.Key)
+ C.leveldb_writebatch_delete(cBatch, cKey, cKeyLen)
+ default:
+ panic(fmt.Sprintf("unknown write operation type: %v", write.T))
+ }
+ }
+ var cError *C.char
+ C.leveldb_write(d.cDb, d.writeOptions, cBatch, &cError)
+ return goError(cError)
+}
+
+// getWithOpts returns the value for the given key.
+// cOpts may contain a pointer to a snapshot.
+func (d *db) getWithOpts(key, valbuf []byte, cOpts *C.leveldb_readoptions_t) ([]byte, error) {
+ d.mu.RLock()
+ defer d.mu.RUnlock()
+ if d.err != nil {
+ return valbuf, store.ConvertError(d.err)
+ }
+ var cError *C.char
+ var valLen C.size_t
+ cStr, cLen := cSlice(key)
+ val := C.leveldb_get(d.cDb, cOpts, cStr, cLen, &valLen, &cError)
+ if err := goError(cError); err != nil {
+ return valbuf, err
+ }
+ if val == nil {
+ return valbuf, verror.New(store.ErrUnknownKey, nil, string(key))
+ }
+ defer C.leveldb_free(unsafe.Pointer(val))
+ return store.CopyBytes(valbuf, goBytes(val, valLen)), nil
+}
diff --git a/services/syncbase/store/leveldb/db_test.go b/services/syncbase/store/leveldb/db_test.go
new file mode 100644
index 0000000..88cddc2
--- /dev/null
+++ b/services/syncbase/store/leveldb/db_test.go
@@ -0,0 +1,123 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package leveldb
+
+import (
+ "fmt"
+ "io/ioutil"
+ "runtime"
+ "testing"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/syncbase/x/ref/services/syncbase/store/test"
+)
+
+func init() {
+ runtime.GOMAXPROCS(10)
+}
+
+func TestStream(t *testing.T) {
+ runTest(t, test.RunStreamTest)
+}
+
+func TestSnapshot(t *testing.T) {
+ runTest(t, test.RunSnapshotTest)
+}
+
+func TestStoreState(t *testing.T) {
+ runTest(t, test.RunStoreStateTest)
+}
+
+func TestClose(t *testing.T) {
+ runTest(t, test.RunCloseTest)
+}
+
+func TestReadWriteBasic(t *testing.T) {
+ runTest(t, test.RunReadWriteBasicTest)
+}
+
+func TestReadWriteRandom(t *testing.T) {
+ runTest(t, test.RunReadWriteRandomTest)
+}
+
+func TestConcurrentTransactions(t *testing.T) {
+ runTest(t, test.RunConcurrentTransactionsTest)
+}
+
+func TestTransactionState(t *testing.T) {
+ runTest(t, test.RunTransactionStateTest)
+}
+
+func TestTransactionsWithGet(t *testing.T) {
+ runTest(t, test.RunTransactionsWithGetTest)
+}
+
+func TestOpenOptions(t *testing.T) {
+ path, err := ioutil.TempDir("", "syncbase_leveldb")
+ if err != nil {
+ t.Fatalf("can't create temp dir: %v", err)
+ }
+ // DB is missing => call should fail.
+ st, err := Open(path, OpenOptions{CreateIfMissing: false, ErrorIfExists: false})
+ if err == nil {
+ t.Fatalf("open should've failed")
+ }
+ // DB is missing => call should succeed.
+ st, err = Open(path, OpenOptions{CreateIfMissing: true, ErrorIfExists: false})
+ if err != nil {
+ t.Fatalf("open failed: %v", err)
+ }
+ st.Close()
+ // DB exists => call should succeed.
+ st, err = Open(path, OpenOptions{CreateIfMissing: false, ErrorIfExists: false})
+ if err != nil {
+ t.Fatalf("open failed: %v", err)
+ }
+ st.Close()
+ // DB exists => call should fail.
+ st, err = Open(path, OpenOptions{CreateIfMissing: false, ErrorIfExists: true})
+ if err == nil {
+ t.Fatalf("open should've failed")
+ }
+ // DB exists => call should fail.
+ st, err = Open(path, OpenOptions{CreateIfMissing: true, ErrorIfExists: true})
+ if err == nil {
+ t.Fatalf("open should've failed")
+ }
+ // DB exists => call should succeed.
+ st, err = Open(path, OpenOptions{CreateIfMissing: true, ErrorIfExists: false})
+ if err != nil {
+ t.Fatalf("open failed: %v", err)
+ }
+ st.Close()
+ if err := Destroy(path); err != nil {
+ t.Fatalf("destroy failed: %v", err)
+ }
+}
+
+func runTest(t *testing.T, f func(t *testing.T, st store.Store)) {
+ st, dbPath := newDB()
+ defer destroyDB(st, dbPath)
+ f(t, st)
+}
+
+func newDB() (store.Store, string) {
+ path, err := ioutil.TempDir("", "syncbase_leveldb")
+ if err != nil {
+ panic(fmt.Sprintf("can't create temp dir: %v", err))
+ }
+ st, err := Open(path, OpenOptions{CreateIfMissing: true, ErrorIfExists: true})
+ if err != nil {
+ panic(fmt.Sprintf("can't open db at %v: %v", path, err))
+ }
+ return st, path
+}
+
+func destroyDB(st store.Store, path string) {
+ st.Close()
+ if err := Destroy(path); err != nil {
+ panic(fmt.Sprintf("can't destroy db at %v: %v", path, err))
+ }
+}
diff --git a/services/syncbase/store/leveldb/snapshot.go b/services/syncbase/store/leveldb/snapshot.go
new file mode 100644
index 0000000..a403127
--- /dev/null
+++ b/services/syncbase/store/leveldb/snapshot.go
@@ -0,0 +1,82 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package leveldb
+
+// #include "leveldb/c.h"
+import "C"
+import (
+ "sync"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/verror"
+)
+
+// snapshot is a wrapper around LevelDB snapshot that implements
+// the store.Snapshot interface.
+type snapshot struct {
+ store.SnapshotSpecImpl
+ // mu protects the state of the snapshot.
+ mu sync.RWMutex
+ node *store.ResourceNode
+ d *db
+ cSnapshot *C.leveldb_snapshot_t
+ cOpts *C.leveldb_readoptions_t
+ err error
+}
+
+var _ store.Snapshot = (*snapshot)(nil)
+
+func newSnapshot(d *db, parent *store.ResourceNode) *snapshot {
+ cSnapshot := C.leveldb_create_snapshot(d.cDb)
+ cOpts := C.leveldb_readoptions_create()
+ C.leveldb_readoptions_set_verify_checksums(cOpts, 1)
+ C.leveldb_readoptions_set_snapshot(cOpts, cSnapshot)
+ s := &snapshot{
+ node: store.NewResourceNode(),
+ d: d,
+ cSnapshot: cSnapshot,
+ cOpts: cOpts,
+ }
+ parent.AddChild(s.node, func() {
+ s.Abort()
+ })
+ return s
+}
+
+// Abort implements the store.Snapshot interface.
+func (s *snapshot) Abort() error {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if s.err != nil {
+ return store.ConvertError(s.err)
+ }
+ s.node.Close()
+ C.leveldb_readoptions_destroy(s.cOpts)
+ s.cOpts = nil
+ C.leveldb_release_snapshot(s.d.cDb, s.cSnapshot)
+ s.cSnapshot = nil
+ s.err = verror.New(verror.ErrCanceled, nil, store.ErrMsgAbortedSnapshot)
+ return nil
+}
+
+// Get implements the store.StoreReader interface.
+func (s *snapshot) Get(key, valbuf []byte) ([]byte, error) {
+ s.mu.RLock()
+ defer s.mu.RUnlock()
+ if s.err != nil {
+ return valbuf, store.ConvertError(s.err)
+ }
+ return s.d.getWithOpts(key, valbuf, s.cOpts)
+}
+
+// Scan implements the store.StoreReader interface.
+func (s *snapshot) Scan(start, limit []byte) store.Stream {
+ s.mu.RLock()
+ defer s.mu.RUnlock()
+ if s.err != nil {
+ return &store.InvalidStream{Error: s.err}
+ }
+ return newStream(s.d, s.node, start, limit, s.cOpts)
+}
diff --git a/services/syncbase/store/leveldb/stream.go b/services/syncbase/store/leveldb/stream.go
new file mode 100644
index 0000000..2d592b4
--- /dev/null
+++ b/services/syncbase/store/leveldb/stream.go
@@ -0,0 +1,150 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package leveldb
+
+// #include "leveldb/c.h"
+// #include "syncbase_leveldb.h"
+import "C"
+import (
+ "bytes"
+ "sync"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/verror"
+)
+
+// stream is a wrapper around LevelDB iterator that implements
+// the store.Stream interface.
+type stream struct {
+ // mu protects the state of the stream.
+ mu sync.Mutex
+ node *store.ResourceNode
+ cIter *C.syncbase_leveldb_iterator_t
+ limit []byte
+
+ hasAdvanced bool
+ err error
+
+ // hasValue is true iff a value has been staged. If hasValue is true,
+ // key and value point to the staged key/value pair. The underlying buffers
+ // of key and value are allocated on the C heap until Cancel is called,
+ // at which point they are copied to the Go heap.
+ hasValue bool
+ key []byte
+ value []byte
+}
+
+var _ store.Stream = (*stream)(nil)
+
+func newStream(d *db, parent *store.ResourceNode, start, limit []byte, cOpts *C.leveldb_readoptions_t) *stream {
+ cStr, size := cSlice(start)
+ cIter := C.syncbase_leveldb_create_iterator(d.cDb, cOpts, cStr, size)
+ s := &stream{
+ node: store.NewResourceNode(),
+ cIter: cIter,
+ limit: limit,
+ }
+ parent.AddChild(s.node, func() {
+ s.Cancel()
+ })
+ return s
+}
+
+// destroyLeveldbIter destroys the underlying C iterator.
+// Assumes mu is held.
+func (s *stream) destroyLeveldbIter() {
+ s.node.Close()
+ C.syncbase_leveldb_iter_destroy(s.cIter)
+ s.cIter = nil
+}
+
+// Advance implements the store.Stream interface.
+func (s *stream) Advance() bool {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ s.hasValue = false
+ if s.cIter == nil {
+ return false
+ }
+ // The C iterator starts out initialized, pointing at the first value; we
+ // shouldn't move it during the first Advance() call.
+ if !s.hasAdvanced {
+ s.hasAdvanced = true
+ } else {
+ C.syncbase_leveldb_iter_next(s.cIter)
+ }
+ if s.cIter.is_valid != 0 && (len(s.limit) == 0 || bytes.Compare(s.cKey(), s.limit) < 0) {
+ s.hasValue = true
+ s.key = s.cKey()
+ s.value = s.cVal()
+ return true
+ }
+
+ var cError *C.char
+ C.syncbase_leveldb_iter_get_error(s.cIter, &cError)
+ s.err = goError(cError)
+ s.destroyLeveldbIter()
+ return false
+}
+
+// Key implements the store.Stream interface.
+func (s *stream) Key(keybuf []byte) []byte {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if !s.hasValue {
+ panic("nothing staged")
+ }
+ return store.CopyBytes(keybuf, s.key)
+}
+
+// Value implements the store.Stream interface.
+func (s *stream) Value(valbuf []byte) []byte {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if !s.hasValue {
+ panic("nothing staged")
+ }
+ return store.CopyBytes(valbuf, s.value)
+}
+
+// Err implements the store.Stream interface.
+func (s *stream) Err() error {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ return store.ConvertError(s.err)
+}
+
+// Cancel implements the store.Stream interface.
+// TODO(rogulenko): make Cancel non-blocking.
+func (s *stream) Cancel() {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if s.cIter == nil {
+ return
+ }
+ // s.hasValue will be false if Advance has never been called.
+ if s.hasValue {
+ // We copy the key and the value from the C heap to the Go heap before
+ // deallocating the C iterator.
+ s.key = store.CopyBytes(nil, s.cKey())
+ s.value = store.CopyBytes(nil, s.cVal())
+ }
+ s.err = verror.New(verror.ErrCanceled, nil, store.ErrMsgCanceledStream)
+ s.destroyLeveldbIter()
+}
+
+// cKey returns the current key.
+// The returned []byte points to a buffer allocated on the C heap. This buffer
+// is valid until the next call to Advance or Cancel.
+func (it *stream) cKey() []byte {
+ return goBytes(it.cIter.key, it.cIter.key_len)
+}
+
+// cVal returns the current value.
+// The returned []byte points to a buffer allocated on the C heap. This buffer
+// is valid until the next call to Advance or Cancel.
+func (it *stream) cVal() []byte {
+ return goBytes(it.cIter.val, it.cIter.val_len)
+}
diff --git a/services/syncbase/store/leveldb/syncbase_leveldb.cc b/services/syncbase/store/leveldb/syncbase_leveldb.cc
new file mode 100644
index 0000000..8c6f7e6
--- /dev/null
+++ b/services/syncbase/store/leveldb/syncbase_leveldb.cc
@@ -0,0 +1,47 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file is intended to be C++ so that we can access C++ LevelDB interface
+// directly if necessary.
+
+#include "syncbase_leveldb.h"
+
+extern "C" {
+
+static void PopulateIteratorFields(syncbase_leveldb_iterator_t* iter) {
+ iter->is_valid = leveldb_iter_valid(iter->rep);
+ if (!iter->is_valid) {
+ return;
+ }
+ iter->key = leveldb_iter_key(iter->rep, &iter->key_len);
+ iter->val = leveldb_iter_value(iter->rep, &iter->val_len);
+}
+
+syncbase_leveldb_iterator_t* syncbase_leveldb_create_iterator(
+ leveldb_t* db,
+ const leveldb_readoptions_t* options,
+ const char* start, size_t start_len) {
+ syncbase_leveldb_iterator_t* result = new syncbase_leveldb_iterator_t;
+ result->rep = leveldb_create_iterator(db, options);
+ leveldb_iter_seek(result->rep, start, start_len);
+ PopulateIteratorFields(result);
+ return result;
+}
+
+void syncbase_leveldb_iter_destroy(syncbase_leveldb_iterator_t* iter) {
+ leveldb_iter_destroy(iter->rep);
+ delete iter;
+}
+
+void syncbase_leveldb_iter_next(syncbase_leveldb_iterator_t* iter) {
+ leveldb_iter_next(iter->rep);
+ PopulateIteratorFields(iter);
+}
+
+void syncbase_leveldb_iter_get_error(
+ const syncbase_leveldb_iterator_t* iter, char** errptr) {
+ leveldb_iter_get_error(iter->rep, errptr);
+}
+
+} // end extern "C"
diff --git a/services/syncbase/store/leveldb/syncbase_leveldb.h b/services/syncbase/store/leveldb/syncbase_leveldb.h
new file mode 100644
index 0000000..d2faa82
--- /dev/null
+++ b/services/syncbase/store/leveldb/syncbase_leveldb.h
@@ -0,0 +1,61 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// This file contains helpers to minimize the number of cgo calls, which have
+// some overhead.
+// Some conventions:
+//
+// Errors are represented by a null-terminated C string. NULL means no error.
+// All operations that can raise an error are passed a "char** errptr" as the
+// last argument. *errptr should be NULL.
+// On failure, leveldb sets *errptr to a malloc()ed error message.
+//
+// All of the pointer arguments must be non-NULL.
+
+#ifndef V_IO_SYNCBASE_X_REF_SERVICES_SYNCBASE_STORE_LEVELDB_SYNCBASE_LEVELDB_H_
+#define V_IO_SYNCBASE_X_REF_SERVICES_SYNCBASE_STORE_LEVELDB_SYNCBASE_LEVELDB_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "leveldb/c.h"
+
+// Fields of this struct are accessed from go directly without cgo calls.
+struct syncbase_leveldb_iterator_t {
+ leveldb_iterator_t* rep;
+ unsigned char is_valid;
+ char const* key;
+ size_t key_len;
+ char const* val;
+ size_t val_len;
+};
+
+typedef struct syncbase_leveldb_iterator_t syncbase_leveldb_iterator_t;
+
+// Returns iterator that points to first key that is not less than |start|.
+// The returned iterator must be passed to |syncbase_leveldb_iter_destroy|
+// when finished.
+syncbase_leveldb_iterator_t* syncbase_leveldb_create_iterator(
+ leveldb_t* db,
+ const leveldb_readoptions_t* options,
+ const char* start, size_t start_len);
+
+// Deallocates iterator returned by |syncbase_leveldb_create_iterator|.
+void syncbase_leveldb_iter_destroy(syncbase_leveldb_iterator_t*);
+
+// Moves to the next entry in the source. After this call, |is_valid| is
+// true iff the iterator was not positioned at the last entry in the source.
+// REQUIRES: |is_valid| is true.
+void syncbase_leveldb_iter_next(syncbase_leveldb_iterator_t* iter);
+
+// Returns a non-nil error iff the iterator encountered any errors.
+void syncbase_leveldb_iter_get_error(
+ const syncbase_leveldb_iterator_t* iter, char** errptr);
+
+#ifdef __cplusplus
+} // end extern "C"
+#endif
+
+#endif // V_IO_SYNCBASE_X_REF_SERVICES_SYNCBASE_STORE_LEVELDB_SYNCBASE_LEVELDB_H_
diff --git a/services/syncbase/store/leveldb/util.go b/services/syncbase/store/leveldb/util.go
new file mode 100644
index 0000000..dce69bb
--- /dev/null
+++ b/services/syncbase/store/leveldb/util.go
@@ -0,0 +1,46 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package leveldb
+
+// #include "leveldb/c.h"
+import "C"
+import (
+ "reflect"
+ "unsafe"
+
+ "v.io/v23/verror"
+)
+
+// goError copies C error into Go heap and frees C buffer.
+func goError(cError *C.char) error {
+ if cError == nil {
+ return nil
+ }
+ err := verror.New(verror.ErrInternal, nil, C.GoString(cError))
+ C.leveldb_free(unsafe.Pointer(cError))
+ return err
+}
+
+// cSlice converts Go []byte to C string without copying the data.
+// This function behaves similarly to standard Go slice copying or sub-slicing,
+// in that the caller need not worry about ownership or garbage collection.
+func cSlice(str []byte) (*C.char, C.size_t) {
+ if len(str) == 0 {
+ return nil, 0
+ }
+ data := unsafe.Pointer((*reflect.StringHeader)(unsafe.Pointer(&str)).Data)
+ return (*C.char)(data), C.size_t(len(str))
+}
+
+// goBytes converts C string to Go []byte without copying the data.
+// This function behaves similarly to cSlice.
+func goBytes(str *C.char, size C.size_t) []byte {
+ ptr := unsafe.Pointer(&reflect.SliceHeader{
+ Data: uintptr(unsafe.Pointer(str)),
+ Len: int(size),
+ Cap: int(size),
+ })
+ return *(*[]byte)(ptr)
+}
diff --git a/services/syncbase/store/memstore/snapshot.go b/services/syncbase/store/memstore/snapshot.go
new file mode 100644
index 0000000..310f6e2
--- /dev/null
+++ b/services/syncbase/store/memstore/snapshot.go
@@ -0,0 +1,74 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package memstore
+
+import (
+ "sync"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/verror"
+)
+
+type snapshot struct {
+ store.SnapshotSpecImpl
+ mu sync.Mutex
+ node *store.ResourceNode
+ data map[string][]byte
+ err error
+}
+
+var _ store.Snapshot = (*snapshot)(nil)
+
+// Assumes st lock is held.
+func newSnapshot(st *memstore, parent *store.ResourceNode) *snapshot {
+ dataCopy := make(map[string][]byte, len(st.data))
+ for k, v := range st.data {
+ dataCopy[k] = v
+ }
+ s := &snapshot{
+ node: store.NewResourceNode(),
+ data: dataCopy,
+ }
+ parent.AddChild(s.node, func() {
+ s.Abort()
+ })
+ return s
+}
+
+// Abort implements the store.Snapshot interface.
+func (s *snapshot) Abort() error {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if s.err != nil {
+ return store.ConvertError(s.err)
+ }
+ s.node.Close()
+ s.err = verror.New(verror.ErrCanceled, nil, store.ErrMsgAbortedSnapshot)
+ return nil
+}
+
+// Get implements the store.StoreReader interface.
+func (s *snapshot) Get(key, valbuf []byte) ([]byte, error) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if s.err != nil {
+ return valbuf, store.ConvertError(s.err)
+ }
+ value, ok := s.data[string(key)]
+ if !ok {
+ return valbuf, verror.New(store.ErrUnknownKey, nil, string(key))
+ }
+ return store.CopyBytes(valbuf, value), nil
+}
+
+// Scan implements the store.StoreReader interface.
+func (s *snapshot) Scan(start, limit []byte) store.Stream {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if s.err != nil {
+ return &store.InvalidStream{Error: s.err}
+ }
+ return newStream(s, s.node, start, limit)
+}
diff --git a/services/syncbase/store/memstore/store.go b/services/syncbase/store/memstore/store.go
new file mode 100644
index 0000000..15a2988
--- /dev/null
+++ b/services/syncbase/store/memstore/store.go
@@ -0,0 +1,98 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package memstore provides a simple, in-memory implementation of store.Store.
+// Since it's a prototype implementation, it makes no attempt to be performant.
+package memstore
+
+import (
+ "fmt"
+ "sync"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/syncbase/x/ref/services/syncbase/store/transactions"
+ "v.io/v23/verror"
+)
+
+type memstore struct {
+ mu sync.Mutex
+ node *store.ResourceNode
+ data map[string][]byte
+ err error
+}
+
+// New creates a new memstore.
+func New() store.Store {
+ return transactions.Wrap(&memstore{
+ data: map[string][]byte{},
+ node: store.NewResourceNode(),
+ })
+}
+
+// Close implements the store.Store interface.
+func (st *memstore) Close() error {
+ st.mu.Lock()
+ defer st.mu.Unlock()
+ if st.err != nil {
+ return store.ConvertError(st.err)
+ }
+ st.node.Close()
+ st.err = verror.New(verror.ErrCanceled, nil, store.ErrMsgClosedStore)
+ return nil
+}
+
+// Get implements the store.StoreReader interface.
+func (st *memstore) Get(key, valbuf []byte) ([]byte, error) {
+ st.mu.Lock()
+ defer st.mu.Unlock()
+ if st.err != nil {
+ return valbuf, store.ConvertError(st.err)
+ }
+ value, ok := st.data[string(key)]
+ if !ok {
+ return valbuf, verror.New(store.ErrUnknownKey, nil, string(key))
+ }
+ return store.CopyBytes(valbuf, value), nil
+}
+
+// Scan implements the store.StoreReader interface.
+func (st *memstore) Scan(start, limit []byte) store.Stream {
+ st.mu.Lock()
+ defer st.mu.Unlock()
+ if st.err != nil {
+ return &store.InvalidStream{Error: st.err}
+ }
+ // TODO(sadovsky): Close snapshot once stream is closed or canceled.
+ return newSnapshot(st, st.node).Scan(start, limit)
+}
+
+// NewSnapshot implements the store.Store interface.
+func (st *memstore) NewSnapshot() store.Snapshot {
+ st.mu.Lock()
+ defer st.mu.Unlock()
+ if st.err != nil {
+ return &store.InvalidSnapshot{Error: st.err}
+ }
+ return newSnapshot(st, st.node)
+}
+
+// WriteBatch implements the transactions.BatchStore interface.
+func (st *memstore) WriteBatch(batch ...transactions.WriteOp) error {
+ st.mu.Lock()
+ defer st.mu.Unlock()
+ if st.err != nil {
+ return store.ConvertError(st.err)
+ }
+ for _, write := range batch {
+ switch write.T {
+ case transactions.PutOp:
+ st.data[string(write.Key)] = write.Value
+ case transactions.DeleteOp:
+ delete(st.data, string(write.Key))
+ default:
+ panic(fmt.Sprintf("unknown write operation type: %v", write.T))
+ }
+ }
+ return nil
+}
diff --git a/services/syncbase/store/memstore/store_test.go b/services/syncbase/store/memstore/store_test.go
new file mode 100644
index 0000000..0b04032
--- /dev/null
+++ b/services/syncbase/store/memstore/store_test.go
@@ -0,0 +1,59 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package memstore
+
+import (
+ "runtime"
+ "testing"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/syncbase/x/ref/services/syncbase/store/test"
+)
+
+func init() {
+ runtime.GOMAXPROCS(10)
+}
+
+func TestStream(t *testing.T) {
+ runTest(t, test.RunStreamTest)
+}
+
+func TestSnapshot(t *testing.T) {
+ runTest(t, test.RunSnapshotTest)
+}
+
+func TestStoreState(t *testing.T) {
+ runTest(t, test.RunStoreStateTest)
+}
+
+func TestClose(t *testing.T) {
+ runTest(t, test.RunCloseTest)
+}
+
+func TestReadWriteBasic(t *testing.T) {
+ runTest(t, test.RunReadWriteBasicTest)
+}
+
+func TestReadWriteRandom(t *testing.T) {
+ runTest(t, test.RunReadWriteRandomTest)
+}
+
+func TestConcurrentTransactions(t *testing.T) {
+ runTest(t, test.RunConcurrentTransactionsTest)
+}
+
+func TestTransactionState(t *testing.T) {
+ runTest(t, test.RunTransactionStateTest)
+}
+
+func TestTransactionsWithGet(t *testing.T) {
+ runTest(t, test.RunTransactionsWithGetTest)
+}
+
+func runTest(t *testing.T, f func(t *testing.T, st store.Store)) {
+ st := New()
+ defer st.Close()
+ f(t, st)
+}
diff --git a/services/syncbase/store/memstore/stream.go b/services/syncbase/store/memstore/stream.go
new file mode 100644
index 0000000..345ea93
--- /dev/null
+++ b/services/syncbase/store/memstore/stream.go
@@ -0,0 +1,103 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package memstore
+
+import (
+ "sort"
+ "sync"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/verror"
+)
+
+type stream struct {
+ mu sync.Mutex
+ node *store.ResourceNode
+ sn *snapshot
+ keys []string
+ currIndex int
+ currKey *string
+ err error
+ done bool
+}
+
+var _ store.Stream = (*stream)(nil)
+
+func newStream(sn *snapshot, parent *store.ResourceNode, start, limit []byte) *stream {
+ keys := []string{}
+ for k := range sn.data {
+ if k >= string(start) && (len(limit) == 0 || k < string(limit)) {
+ keys = append(keys, k)
+ }
+ }
+ sort.Strings(keys)
+ s := &stream{
+ node: store.NewResourceNode(),
+ sn: sn,
+ keys: keys,
+ currIndex: -1,
+ }
+ parent.AddChild(s.node, func() {
+ s.Cancel()
+ })
+ return s
+}
+
+// Advance implements the store.Stream interface.
+func (s *stream) Advance() bool {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ s.currKey = nil
+ if s.done {
+ return false
+ }
+ s.currIndex++
+ if s.currIndex < len(s.keys) {
+ s.currKey = &s.keys[s.currIndex]
+ } else {
+ s.done = true
+ s.currKey = nil
+ }
+ return !s.done
+}
+
+// Key implements the store.Stream interface.
+func (s *stream) Key(keybuf []byte) []byte {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if s.currKey == nil {
+ panic("nothing staged")
+ }
+ return store.CopyBytes(keybuf, []byte(*s.currKey))
+}
+
+// Value implements the store.Stream interface.
+func (s *stream) Value(valbuf []byte) []byte {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if s.currKey == nil {
+ panic("nothing staged")
+ }
+ return store.CopyBytes(valbuf, s.sn.data[*s.currKey])
+}
+
+// Err implements the store.Stream interface.
+func (s *stream) Err() error {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ return store.ConvertError(s.err)
+}
+
+// Cancel implements the store.Stream interface.
+func (s *stream) Cancel() {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if s.done {
+ return
+ }
+ s.done = true
+ s.node.Close()
+ s.err = verror.New(verror.ErrCanceled, nil, store.ErrMsgCanceledStream)
+}
diff --git a/services/syncbase/store/model.go b/services/syncbase/store/model.go
new file mode 100644
index 0000000..be7265d
--- /dev/null
+++ b/services/syncbase/store/model.go
@@ -0,0 +1,147 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package store defines the API for the syncbase storage engine.
+// Currently, this API and its implementations are meant to be internal.
+package store
+
+// TODO(sadovsky): Decide whether to defensively copy passed-in []byte's vs.
+// requiring clients not to modify passed-in []byte's.
+
+// StoreReader reads data from a CRUD-capable storage engine.
+type StoreReader interface {
+ // Get returns the value for the given key. The returned slice may be a
+ // sub-slice of valbuf if valbuf was large enough to hold the entire value.
+ // Otherwise, a newly allocated slice will be returned. It is valid to pass a
+ // nil valbuf.
+ // If the given key is unknown, valbuf is returned unchanged and the function
+ // fails with ErrUnknownKey.
+ Get(key, valbuf []byte) ([]byte, error)
+
+ // Scan returns all rows with keys in range [start, limit). If limit is "",
+ // all rows with keys >= start are included.
+ // Concurrency semantics: It is legal to perform writes concurrently with
+ // Scan. The returned stream may or may not reflect subsequent writes to keys
+ // not yet reached by the stream.
+ Scan(start, limit []byte) Stream
+}
+
+// StoreWriter writes data to a CRUD-capable storage engine.
+type StoreWriter interface {
+ // Put writes the given value for the given key.
+ Put(key, value []byte) error
+
+ // Delete deletes the entry for the given key.
+ // Succeeds (no-op) if the given key is unknown.
+ Delete(key []byte) error
+}
+
+// storeReadWriter combines StoreReader and StoreWriter.
+type storeReadWriter interface {
+ StoreReader
+ StoreWriter
+}
+
+// Store is a CRUD-capable storage engine that supports transactions.
+type Store interface {
+ storeReadWriter
+
+ // Close closes the store.
+ Close() error
+
+ // NewTransaction creates a transaction.
+ // TODO(rogulenko): add transaction options.
+ NewTransaction() Transaction
+
+ // NewSnapshot creates a snapshot.
+ // TODO(rogulenko): add snapshot options.
+ NewSnapshot() Snapshot
+}
+
+// SnapshotOrTransaction represents a Snapshot or a Transaction.
+type SnapshotOrTransaction interface {
+ StoreReader
+
+ // Abort closes the snapshot or transaction.
+ // Any subsequent method calls will fail.
+ // NOTE: this method is also used to distinguish between StoreReader and
+ // SnapshotOrTransaction.
+ Abort() error
+}
+
+// Snapshot is a handle to particular state in time of a Store.
+//
+// All read operations are executed against a consistent view of Store commit
+// history. Snapshots don't acquire locks and thus don't block transactions.
+type Snapshot interface {
+ SnapshotOrTransaction
+
+ // __snapshotSpec is a utility method to distinguish between Snapshot and
+ // SnapshotOrTransaction. This is a no-op.
+ __snapshotSpec()
+}
+
+// Transaction provides a mechanism for atomic reads and writes. Instead of
+// calling this function directly, clients are encouraged to use the
+// RunInTransaction() helper function, which detects "concurrent transaction"
+// errors and handles retries internally.
+//
+// Default concurrency semantics:
+// - Reads (e.g. gets, scans) inside a transaction operate over a consistent
+// snapshot taken during NewTransaction(), and will see the effects of prior
+// writes performed inside the transaction.
+// - Commit() may fail with ErrConcurrentTransaction, indicating that after
+// NewTransaction() but before Commit(), some concurrent routine wrote to a
+// key that matches a key or row-range read inside this transaction.
+// - Other methods will never fail with error ErrConcurrentTransaction, even if
+// it is known that Commit() will fail with this error.
+//
+// Once a transaction has been committed or aborted, subsequent method calls
+// will fail with no effect.
+type Transaction interface {
+ SnapshotOrTransaction
+ StoreWriter
+
+ // Commit commits the transaction.
+ // Fails if writes from outside this transaction conflict with reads from
+ // within this transaction.
+ Commit() error
+}
+
+// Stream is an interface for iterating through a collection of key-value pairs.
+type Stream interface {
+ // Advance stages an element so the client can retrieve it with Key or Value.
+ // Advance returns true iff there is an element to retrieve. The client must
+ // call Advance before calling Key or Value. The client must call Cancel if it
+ // does not iterate through all elements (i.e. until Advance returns false).
+ // Advance may block if an element is not immediately available.
+ Advance() bool
+
+ // Key returns the key of the element that was staged by Advance. The returned
+ // slice may be a sub-slice of keybuf if keybuf was large enough to hold the
+ // entire key. Otherwise, a newly allocated slice will be returned. It is
+ // valid to pass a nil keybuf.
+ // Key may panic if Advance returned false or was not called at all.
+ // Key does not block.
+ Key(keybuf []byte) []byte
+
+ // Value returns the value of the element that was staged by Advance. The
+ // returned slice may be a sub-slice of valbuf if valbuf was large enough to
+ // hold the entire value. Otherwise, a newly allocated slice will be returned.
+ // It is valid to pass a nil valbuf.
+ // Value may panic if Advance returned false or was not called at all.
+ // Value does not block.
+ Value(valbuf []byte) []byte
+
+ // Err returns a non-nil error iff the stream encountered any errors. Err does
+ // not block.
+ Err() error
+
+ // Cancel notifies the stream provider that it can stop producing elements.
+ // The client must call Cancel if it does not iterate through all elements
+ // (i.e. until Advance returns false). Cancel is idempotent and can be called
+ // concurrently with a goroutine that is iterating via Advance/Key/Value.
+ // Cancel causes Advance to subsequently return false. Cancel does not block.
+ Cancel()
+}
diff --git a/services/syncbase/store/model.vdl b/services/syncbase/store/model.vdl
new file mode 100644
index 0000000..6a56e66
--- /dev/null
+++ b/services/syncbase/store/model.vdl
@@ -0,0 +1,14 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package store
+
+error (
+ // ConcurrentTransaction means that the current transaction failed to commit
+ // because its read set was invalidated by some other transaction.
+ ConcurrentTransaction() {"en":"Concurrent transaction{:_}"}
+
+ // UnknownKey means the given key does not exist in the store.
+ UnknownKey() {"en":"Unknown key{:_}"}
+)
diff --git a/services/syncbase/store/model.vdl.go b/services/syncbase/store/model.vdl.go
new file mode 100644
index 0000000..eec8747
--- /dev/null
+++ b/services/syncbase/store/model.vdl.go
@@ -0,0 +1,38 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: model.vdl
+
+package store
+
+import (
+ // VDL system imports
+ "v.io/v23/context"
+ "v.io/v23/i18n"
+ "v.io/v23/verror"
+)
+
+var (
+ // ConcurrentTransaction means that the current transaction failed to commit
+ // because its read set was invalidated by some other transaction.
+ ErrConcurrentTransaction = verror.Register("v.io/syncbase/x/ref/services/syncbase/store.ConcurrentTransaction", verror.NoRetry, "{1:}{2:} Concurrent transaction{:_}")
+ // UnknownKey means the given key does not exist in the store.
+ ErrUnknownKey = verror.Register("v.io/syncbase/x/ref/services/syncbase/store.UnknownKey", verror.NoRetry, "{1:}{2:} Unknown key{:_}")
+)
+
+func init() {
+ i18n.Cat().SetWithBase(i18n.LangID("en"), i18n.MsgID(ErrConcurrentTransaction.ID), "{1:}{2:} Concurrent transaction{:_}")
+ i18n.Cat().SetWithBase(i18n.LangID("en"), i18n.MsgID(ErrUnknownKey.ID), "{1:}{2:} Unknown key{:_}")
+}
+
+// NewErrConcurrentTransaction returns an error with the ErrConcurrentTransaction ID.
+func NewErrConcurrentTransaction(ctx *context.T) error {
+ return verror.New(ErrConcurrentTransaction, ctx)
+}
+
+// NewErrUnknownKey returns an error with the ErrUnknownKey ID.
+func NewErrUnknownKey(ctx *context.T) error {
+ return verror.New(ErrUnknownKey, ctx)
+}
diff --git a/services/syncbase/store/resource_node.go b/services/syncbase/store/resource_node.go
new file mode 100644
index 0000000..c7228b2
--- /dev/null
+++ b/services/syncbase/store/resource_node.go
@@ -0,0 +1,73 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package store
+
+import (
+ "sync"
+)
+
+// ResourceNode is a node in a dependency graph. This graph is used to ensure
+// that when a resource is freed, downstream resources are also freed. For
+// example, closing a store closes all downstream transactions, snapshots and
+// streams.
+type ResourceNode struct {
+ mu sync.Mutex
+ parent *ResourceNode
+ children map[*ResourceNode]func()
+}
+
+// NewResourceNode creates a new isolated node in the dependency graph.
+func NewResourceNode() *ResourceNode {
+ return &ResourceNode{
+ children: make(map[*ResourceNode]func()),
+ }
+}
+
+// AddChild adds a parent-child relation between this node and the provided
+// node. The provided function is called to close the child when this node is
+// closed.
+func (r *ResourceNode) AddChild(node *ResourceNode, closefn func()) {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ if r.children == nil {
+ panic("already closed")
+ }
+ node.parent = r
+ r.children[node] = closefn
+}
+
+// removeChild removes the parent-child relation between this node and the
+// provided node, enabling Go's garbage collector to free the resources
+// associated with the child node if there are no more references to it.
+func (r *ResourceNode) removeChild(node *ResourceNode) {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ if r.children == nil {
+ // Already closed.
+ return
+ }
+ delete(r.children, node)
+}
+
+// Close closes this node and detaches it from its parent. All of this node's
+// children are closed using close functions provided to AddChild.
+func (r *ResourceNode) Close() {
+ r.mu.Lock()
+ if r.parent != nil {
+ // If there is a node V with parent P and we decide to explicitly close V,
+ // then we need to remove V from P's children list so that we don't close
+ // V again when P is closed.
+ r.parent.removeChild(r)
+ r.parent = nil
+ }
+ // Copy the children map to a local variable so that the removeChild step
+ // executed from children won't affect the map while we iterate through it.
+ children := r.children
+ r.children = nil
+ r.mu.Unlock()
+ for _, closefn := range children {
+ closefn()
+ }
+}
diff --git a/services/syncbase/store/test/snapshot.go b/services/syncbase/store/test/snapshot.go
new file mode 100644
index 0000000..04dee18
--- /dev/null
+++ b/services/syncbase/store/test/snapshot.go
@@ -0,0 +1,42 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+ "testing"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/verror"
+)
+
+// RunSnapshotTest verifies store.Snapshot operations.
+func RunSnapshotTest(t *testing.T, st store.Store) {
+ key1, value1 := []byte("key1"), []byte("value1")
+ st.Put(key1, value1)
+ snapshot := st.NewSnapshot()
+ key2, value2 := []byte("key2"), []byte("value2")
+ st.Put(key2, value2)
+
+ // Test Get and Scan.
+ verifyGet(t, snapshot, key1, value1)
+ verifyGet(t, snapshot, key2, nil)
+ s := snapshot.Scan([]byte("a"), []byte("z"))
+ verifyAdvance(t, s, key1, value1)
+ verifyAdvance(t, s, nil, nil)
+
+ // Test functions after Abort.
+ if err := snapshot.Abort(); err != nil {
+ t.Fatalf("can't abort the snapshot: %v", err)
+ }
+ expectedErrMsg := store.ErrMsgAbortedSnapshot
+ verifyError(t, snapshot.Abort(), verror.ErrCanceled.ID, expectedErrMsg)
+
+ _, err := snapshot.Get(key1, nil)
+ verifyError(t, err, verror.ErrCanceled.ID, expectedErrMsg)
+
+ s = snapshot.Scan([]byte("a"), []byte("z"))
+ verifyAdvance(t, s, nil, nil)
+ verifyError(t, s.Err(), verror.ErrCanceled.ID, expectedErrMsg)
+}
diff --git a/services/syncbase/store/test/store.go b/services/syncbase/store/test/store.go
new file mode 100644
index 0000000..48022f9
--- /dev/null
+++ b/services/syncbase/store/test/store.go
@@ -0,0 +1,239 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+ "fmt"
+ "math/rand"
+ "testing"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/verror"
+)
+
+type operation int
+
+const (
+ Put operation = 0
+ Delete operation = 1
+)
+
+type testStep struct {
+ op operation
+ key int
+}
+
+func randomBytes(rnd *rand.Rand, length int) []byte {
+ var res []byte
+ for i := 0; i < length; i++ {
+ res = append(res, '0'+byte(rnd.Intn(10)))
+ }
+ return res
+}
+
+// storeState is the in-memory representation of the store state.
+type storeState struct {
+ // We assume that the database has keys [0..size).
+ size int
+ rnd *rand.Rand
+ memtable map[string][]byte
+}
+
+func newStoreState(size int) *storeState {
+ return &storeState{
+ size,
+ rand.New(rand.NewSource(239017)),
+ make(map[string][]byte),
+ }
+}
+
+func (s *storeState) clone() *storeState {
+ other := &storeState{
+ s.size,
+ s.rnd,
+ make(map[string][]byte),
+ }
+ for k, v := range s.memtable {
+ other.memtable[k] = v
+ }
+ return other
+}
+
+// nextKey returns the smallest key in the store that is not less than the
+// provided key. If there is no such key, returns size.
+func (s *storeState) lowerBound(key int) int {
+ for key < s.size {
+ if _, ok := s.memtable[fmt.Sprintf("%05d", key)]; ok {
+ return key
+ }
+ key++
+ }
+ return key
+}
+
+// verify checks that various read operations on store.Store and memtable return
+// the same results.
+func (s *storeState) verify(t *testing.T, st store.StoreReader) {
+ // Verify Get().
+ for i := 0; i < s.size; i++ {
+ keystr := fmt.Sprintf("%05d", i)
+ answer, ok := s.memtable[keystr]
+ if ok {
+ verifyGet(t, st, []byte(keystr), answer)
+ } else {
+ verifyGet(t, st, []byte(keystr), nil)
+ }
+ }
+ // Verify 10 random Scan() calls.
+ for i := 0; i < 10; i++ {
+ start, limit := s.rnd.Intn(s.size), s.rnd.Intn(s.size)
+ if start > limit {
+ start, limit = limit, start
+ }
+ limit++
+ stream := st.Scan([]byte(fmt.Sprintf("%05d", start)), []byte(fmt.Sprintf("%05d", limit)))
+ for start = s.lowerBound(start); start < limit; start = s.lowerBound(start + 1) {
+ keystr := fmt.Sprintf("%05d", start)
+ verifyAdvance(t, stream, []byte(keystr), s.memtable[keystr])
+ }
+ verifyAdvance(t, stream, nil, nil)
+ }
+}
+
+// runReadWriteTest verifies read/write/snapshot operations.
+func runReadWriteTest(t *testing.T, st store.Store, size int, steps []testStep) {
+ s := newStoreState(size)
+ // We verify database state no more than ~100 times to prevent the test from
+ // being slow.
+ frequency := (len(steps) + 99) / 100
+ var states []*storeState
+ var snapshots []store.Snapshot
+ for i, step := range steps {
+ if step.key < 0 || step.key >= s.size {
+ t.Fatalf("invalid test step %v", step)
+ }
+ key := fmt.Sprintf("%05d", step.key)
+ switch step.op {
+ case Put:
+ value := randomBytes(s.rnd, 100)
+ s.memtable[key] = value
+ st.Put([]byte(key), value)
+ case Delete:
+ if _, ok := s.memtable[key]; ok {
+ delete(s.memtable, key)
+ st.Delete([]byte(key))
+ }
+ default:
+ t.Fatalf("invalid test step %v", step)
+ }
+ if i%frequency == 0 {
+ s.verify(t, st)
+ states = append(states, s.clone())
+ snapshots = append(snapshots, st.NewSnapshot())
+ }
+ }
+ s.verify(t, st)
+ for i := 0; i < len(states); i++ {
+ states[i].verify(t, snapshots[i])
+ snapshots[i].Abort()
+ }
+}
+
+// RunReadWriteBasicTest runs a basic test that verifies reads, writes and
+// snapshots.
+func RunReadWriteBasicTest(t *testing.T, st store.Store) {
+ runReadWriteTest(t, st, 3, []testStep{
+ testStep{Put, 1},
+ testStep{Put, 2},
+ testStep{Delete, 1},
+ testStep{Put, 1},
+ testStep{Put, 2},
+ })
+}
+
+// RunReadWriteRandomTest runs a random-generated test that verifies reads,
+// writes and snapshots.
+func RunReadWriteRandomTest(t *testing.T, st store.Store) {
+ rnd := rand.New(rand.NewSource(239017))
+ var steps []testStep
+ size := 50
+ for i := 0; i < 10000; i++ {
+ steps = append(steps, testStep{operation(rnd.Intn(2)), rnd.Intn(size)})
+ }
+ runReadWriteTest(t, st, size, steps)
+}
+
+// RunStoreStateTest verifies operations that modify the state of a store.Store.
+func RunStoreStateTest(t *testing.T, st store.Store) {
+ key1, value1 := []byte("key1"), []byte("value1")
+ st.Put(key1, value1)
+ key2 := []byte("key2")
+
+ // Test Get and Scan.
+ verifyGet(t, st, key1, value1)
+ verifyGet(t, st, key2, nil)
+ s := st.Scan([]byte("a"), []byte("z"))
+ verifyAdvance(t, s, key1, value1)
+ verifyAdvance(t, s, nil, nil)
+
+ // Test functions after Close.
+ if err := st.Close(); err != nil {
+ t.Fatalf("can't close the store: %v", err)
+ }
+ expectedErrMsg := store.ErrMsgClosedStore
+ verifyError(t, st.Close(), verror.ErrCanceled.ID, expectedErrMsg)
+
+ s = st.Scan([]byte("a"), []byte("z"))
+ verifyAdvance(t, s, nil, nil)
+ verifyError(t, s.Err(), verror.ErrCanceled.ID, expectedErrMsg)
+
+ snapshot := st.NewSnapshot()
+ _, err := snapshot.Get(key1, nil)
+ verifyError(t, err, verror.ErrCanceled.ID, expectedErrMsg)
+
+ tx := st.NewTransaction()
+ _, err = tx.Get(key1, nil)
+ verifyError(t, err, verror.ErrCanceled.ID, expectedErrMsg)
+
+ _, err = st.Get(key1, nil)
+ verifyError(t, err, verror.ErrCanceled.ID, expectedErrMsg)
+ verifyError(t, st.Put(key1, value1), verror.ErrCanceled.ID, expectedErrMsg)
+ verifyError(t, st.Delete(key1), verror.ErrCanceled.ID, expectedErrMsg)
+}
+
+// RunCloseTest verifies that child objects are closed when the parent object is
+// closed.
+func RunCloseTest(t *testing.T, st store.Store) {
+ key1, value1 := []byte("key1"), []byte("value1")
+ st.Put(key1, value1)
+
+ var streams []store.Stream
+ var snapshots []store.Snapshot
+ var transactions []store.Transaction
+ for i := 0; i < 10; i++ {
+ streams = append(streams, st.Scan([]byte("a"), []byte("z")))
+ snapshot := st.NewSnapshot()
+ tx := st.NewTransaction()
+ for j := 0; j < 10; j++ {
+ streams = append(streams, snapshot.Scan([]byte("a"), []byte("z")))
+ streams = append(streams, tx.Scan([]byte("a"), []byte("z")))
+ }
+ snapshots = append(snapshots, snapshot)
+ transactions = append(transactions, tx)
+ }
+ st.Close()
+
+ for _, stream := range streams {
+ verifyError(t, stream.Err(), verror.ErrCanceled.ID, store.ErrMsgCanceledStream)
+ }
+ for _, snapshot := range snapshots {
+ _, err := snapshot.Get(key1, nil)
+ verifyError(t, err, verror.ErrCanceled.ID, store.ErrMsgAbortedSnapshot)
+ }
+ for _, tx := range transactions {
+ _, err := tx.Get(key1, nil)
+ verifyError(t, err, verror.ErrCanceled.ID, store.ErrMsgAbortedTxn)
+ }
+}
diff --git a/services/syncbase/store/test/stream.go b/services/syncbase/store/test/stream.go
new file mode 100644
index 0000000..e058fc0
--- /dev/null
+++ b/services/syncbase/store/test/stream.go
@@ -0,0 +1,53 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+ "bytes"
+ "testing"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/verror"
+)
+
+// RunStreamTest verifies store.Stream operations.
+func RunStreamTest(t *testing.T, st store.Store) {
+ // Test that advancing or canceling a stream that has reached its end
+ // doesn't cause a panic.
+ s := st.Scan([]byte("a"), []byte("z"))
+ verifyAdvance(t, s, nil, nil)
+ verifyAdvance(t, s, nil, nil)
+ if s.Err() != nil {
+ t.Fatalf("unexpected error: %v", s.Err())
+ }
+ s.Cancel()
+ if s.Err() != nil {
+ t.Fatalf("unexpected error: %v", s.Err())
+ }
+
+ key1, value1 := []byte("key1"), []byte("value1")
+ st.Put(key1, value1)
+ key2, value2 := []byte("key2"), []byte("value2")
+ st.Put(key2, value2)
+ key3, value3 := []byte("key3"), []byte("value3")
+ st.Put(key3, value3)
+ s = st.Scan([]byte("a"), []byte("z"))
+ verifyAdvance(t, s, key1, value1)
+ if !s.Advance() {
+ t.Fatalf("can't advance the stream")
+ }
+ s.Cancel()
+ for i := 0; i < 2; i++ {
+ var key, value []byte
+ if key = s.Key(key); !bytes.Equal(key, key2) {
+ t.Fatalf("unexpected key: got %q, want %q", key, key2)
+ }
+ if value = s.Value(value); !bytes.Equal(value, value2) {
+ t.Fatalf("unexpected value: got %q, want %q", value, value2)
+ }
+ }
+ verifyAdvance(t, s, nil, nil)
+ verifyError(t, s.Err(), verror.ErrCanceled.ID, store.ErrMsgCanceledStream)
+}
diff --git a/services/syncbase/store/test/transaction.go b/services/syncbase/store/test/transaction.go
new file mode 100644
index 0000000..6cf26e8
--- /dev/null
+++ b/services/syncbase/store/test/transaction.go
@@ -0,0 +1,216 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+ "bytes"
+ "fmt"
+ "math/rand"
+ "strconv"
+ "sync"
+ "testing"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/verror"
+)
+
+// RunTransactionStateTest verifies operations that modify the state of a
+// store.Transaction.
+func RunTransactionStateTest(t *testing.T, st store.Store) {
+ finalizeFns := []func(t *testing.T, tx store.Transaction) (verror.ID, string){
+ func(t *testing.T, tx store.Transaction) (verror.ID, string) {
+ if err := tx.Abort(); err != nil {
+ Fatalf(t, "can't abort the transaction: %v", err)
+ }
+ return verror.ErrCanceled.ID, store.ErrMsgAbortedTxn
+ },
+ func(t *testing.T, tx store.Transaction) (verror.ID, string) {
+ if err := tx.Commit(); err != nil {
+ Fatalf(t, "can't commit the transaction: %v", err)
+ }
+ return verror.ErrBadState.ID, store.ErrMsgCommittedTxn
+ },
+ }
+ for _, fn := range finalizeFns {
+ key1, value1 := []byte("key1"), []byte("value1")
+ st.Put(key1, value1)
+ key2 := []byte("key2")
+ tx := st.NewTransaction()
+
+ // Test Get and Scan.
+ verifyGet(t, tx, key1, value1)
+ verifyGet(t, tx, key2, nil)
+ s := tx.Scan([]byte("a"), []byte("z"))
+ verifyAdvance(t, s, key1, value1)
+ verifyAdvance(t, s, nil, nil)
+
+ // Test Put then Get & Scan inside the transaction.
+ key3, value3 := []byte("key3"), []byte("value3")
+ tx.Put(key3, value3)
+ verifyGet(t, tx, key3, value3)
+ s = tx.Scan([]byte("a"), []byte("z"))
+ verifyAdvance(t, s, key1, value1)
+ verifyAdvance(t, s, key3, value3)
+ verifyAdvance(t, s, nil, nil)
+
+ // Test Delete of old key then Get inside the transaction.
+ tx.Delete(key1)
+ verifyGet(t, tx, key1, nil)
+
+ // Test Delete of new key then Get inside the transaction.
+ tx.Delete(key3)
+ verifyGet(t, tx, key3, nil)
+
+ // Test functions after finalize.
+ expectedID, expectedErrMsg := fn(t, tx)
+ verifyError(t, tx.Abort(), expectedID, expectedErrMsg)
+ verifyError(t, tx.Commit(), expectedID, expectedErrMsg)
+
+ s = tx.Scan([]byte("a"), []byte("z"))
+ verifyAdvance(t, s, nil, nil)
+ verifyError(t, s.Err(), expectedID, expectedErrMsg)
+
+ _, err := tx.Get(key1, nil)
+ verifyError(t, err, expectedID, expectedErrMsg)
+ verifyError(t, tx.Put(key1, value1), expectedID, expectedErrMsg)
+ verifyError(t, tx.Delete(key1), expectedID, expectedErrMsg)
+ }
+}
+
+// RunConcurrentTransactionsTest verifies that concurrent transactions
+// invalidate each other as expected.
+func RunConcurrentTransactionsTest(t *testing.T, st store.Store) {
+ st.Put([]byte("a"), []byte("0"))
+ st.Put([]byte("b"), []byte("0"))
+ st.Put([]byte("c"), []byte("0"))
+ // Test Get fails.
+ txA := st.NewTransaction()
+ txB := st.NewTransaction()
+ txA.Get([]byte("a"), nil)
+ txB.Get([]byte("a"), nil)
+ txA.Put([]byte("a"), []byte("a"))
+ txB.Put([]byte("a"), []byte("b"))
+ if err := txA.Commit(); err != nil {
+ t.Fatalf("can't commit the transaction: %v", err)
+ }
+ if err := txB.Commit(); verror.ErrorID(err) != store.ErrConcurrentTransaction.ID {
+ t.Fatalf("unexpected commit error: %v", err)
+ }
+ if value, _ := st.Get([]byte("a"), nil); !bytes.Equal(value, []byte("a")) {
+ t.Fatalf("unexpected value: got %q, want %q", value, "a")
+ }
+ // Test Scan fails.
+ txA = st.NewTransaction()
+ txB = st.NewTransaction()
+ txA.Scan([]byte("a"), []byte("z"))
+ txB.Scan([]byte("a"), []byte("z"))
+ txA.Put([]byte("aa"), []byte("a"))
+ txB.Put([]byte("bb"), []byte("b"))
+ if err := txA.Commit(); err != nil {
+ t.Fatalf("can't commit the transaction: %v", err)
+ }
+ if err := txB.Commit(); verror.ErrorID(err) != store.ErrConcurrentTransaction.ID {
+ t.Fatalf("unexpected commit error: %v", err)
+ }
+ if value, _ := st.Get([]byte("aa"), nil); !bytes.Equal(value, []byte("a")) {
+ t.Fatalf("unexpected value: got %q, want %q", value, "a")
+ }
+ // Test Get and Scan OK.
+ txA = st.NewTransaction()
+ txB = st.NewTransaction()
+ txA.Scan([]byte("a"), []byte("b"))
+ txB.Scan([]byte("b"), []byte("c"))
+ txA.Get([]byte("c"), nil)
+ txB.Get([]byte("c"), nil)
+ txA.Put([]byte("a"), []byte("a"))
+ txB.Put([]byte("b"), []byte("b"))
+ if err := txA.Commit(); err != nil {
+ t.Fatalf("can't commit the transaction: %v", err)
+ }
+ if err := txB.Commit(); err != nil {
+ t.Fatalf("can't commit the transaction: %v", err)
+ }
+}
+
+// RunTransactionsWithGetTest tests transactions that use Put and Get
+// operations.
+// NOTE: consider setting GOMAXPROCS to something greater than 1.
+func RunTransactionsWithGetTest(t *testing.T, st store.Store) {
+ // Invariant: value mapped to n is sum of values of 0..n-1.
+ // Each of k transactions takes m distinct random values from 0..n-1, adds 1
+ // to each and m to value mapped to n.
+ // The correctness of sums is checked after all transactions have been
+ // committed.
+ n, m, k := 10, 3, 100
+ for i := 0; i <= n; i++ {
+ if err := st.Put([]byte(fmt.Sprintf("%05d", i)), []byte{'0'}); err != nil {
+ t.Fatalf("can't write to database")
+ }
+ }
+ var wg sync.WaitGroup
+ wg.Add(k)
+ for i := 0; i < k; i++ {
+ go func(idx int) {
+ rnd := rand.New(rand.NewSource(239017 * int64(idx)))
+ perm := rnd.Perm(n)
+ if err := store.RunInTransaction(st, func(tx store.Transaction) error {
+ for j := 0; j <= m; j++ {
+ var keystr string
+ if j < m {
+ keystr = fmt.Sprintf("%05d", perm[j])
+ } else {
+ keystr = fmt.Sprintf("%05d", n)
+ }
+ key := []byte(keystr)
+ val, err := tx.Get(key, nil)
+ if err != nil {
+ return fmt.Errorf("can't get key %q: %v", key, err)
+ }
+ intValue, err := strconv.ParseInt(string(val), 10, 64)
+ if err != nil {
+ return fmt.Errorf("can't parse int from %q: %v", val, err)
+ }
+ var newValue int64
+ if j < m {
+ newValue = intValue + 1
+ } else {
+ newValue = intValue + int64(m)
+ }
+ if err := tx.Put(key, []byte(fmt.Sprintf("%d", newValue))); err != nil {
+ return fmt.Errorf("can't put {%q: %v}: %v", key, newValue, err)
+ }
+ }
+ return nil
+ }); err != nil {
+ panic(fmt.Errorf("can't commit transaction: %v", err))
+ }
+ wg.Done()
+ }(i)
+ }
+ wg.Wait()
+ var sum int64
+ for j := 0; j <= n; j++ {
+ keystr := fmt.Sprintf("%05d", j)
+ key := []byte(keystr)
+ val, err := st.Get(key, nil)
+ if err != nil {
+ t.Fatalf("can't get key %q: %v", key, err)
+ }
+ intValue, err := strconv.ParseInt(string(val), 10, 64)
+ if err != nil {
+ t.Fatalf("can't parse int from %q: %v", val, err)
+ }
+ if j < n {
+ sum += intValue
+ } else {
+ if intValue != int64(m*k) {
+ t.Fatalf("invalid sum value in the database: got %d, want %d", intValue, m*k)
+ }
+ }
+ }
+ if sum != int64(m*k) {
+ t.Fatalf("invalid sum of values in the database: got %d, want %d", sum, m*k)
+ }
+}
diff --git a/services/syncbase/store/test/util.go b/services/syncbase/store/test/util.go
new file mode 100644
index 0000000..55b886a
--- /dev/null
+++ b/services/syncbase/store/test/util.go
@@ -0,0 +1,79 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+ "bytes"
+ "runtime/debug"
+ "strings"
+ "testing"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/verror"
+)
+
+// verifyGet verifies that st.Get(key) == value. If value is nil, verifies that
+// the key is not found.
+func verifyGet(t *testing.T, st store.StoreReader, key, value []byte) {
+ valbuf := []byte("tmp")
+ var err error
+ if value != nil {
+ if valbuf, err = st.Get(key, valbuf); err != nil {
+ Fatalf(t, "can't get value of %q: %v", key, err)
+ }
+ if !bytes.Equal(valbuf, value) {
+ Fatalf(t, "unexpected value: got %q, want %q", valbuf, value)
+ }
+ } else {
+ valbuf, err = st.Get(key, valbuf)
+ verifyError(t, err, store.ErrUnknownKey.ID, string(key))
+ valcopy := []byte("tmp")
+ // Verify that valbuf is not modified if the key is not found.
+ if !bytes.Equal(valbuf, valcopy) {
+ Fatalf(t, "unexpected value: got %q, want %q", valbuf, valcopy)
+ }
+ }
+}
+
+// verifyGet verifies the next key/value pair of the provided stream.
+// If key is nil, verifies that next Advance call on the stream returns false.
+func verifyAdvance(t *testing.T, s store.Stream, key, value []byte) {
+ ok := s.Advance()
+ if key == nil {
+ if ok {
+ Fatalf(t, "advance returned true unexpectedly")
+ }
+ return
+ }
+ if !ok {
+ Fatalf(t, "can't advance the stream")
+ }
+ var k, v []byte
+ for i := 0; i < 2; i++ {
+ if k = s.Key(k); !bytes.Equal(k, key) {
+ Fatalf(t, "unexpected key: got %q, want %q", k, key)
+ }
+ if v = s.Value(v); !bytes.Equal(v, value) {
+ Fatalf(t, "unexpected value: got %q, want %q", v, value)
+ }
+ }
+}
+
+// verifyError verifies that the given error has the given errorID and that the
+// error string contains the given substr. Pass an empty substr to skip the
+// substr check.
+func verifyError(t *testing.T, err error, errorID verror.ID, substr string) {
+ if got := verror.ErrorID(err); got != errorID {
+ Fatalf(t, "unexpected error ID: got %v, want %v", got, errorID)
+ }
+ if !strings.Contains(err.Error(), substr) {
+ Fatalf(t, "unexpected error: %q not found in %q", substr, err)
+ }
+}
+
+func Fatalf(t *testing.T, format string, args ...interface{}) {
+ debug.PrintStack()
+ t.Fatalf(format, args...)
+}
diff --git a/services/syncbase/store/transactions/manager.go b/services/syncbase/store/transactions/manager.go
new file mode 100644
index 0000000..254812f
--- /dev/null
+++ b/services/syncbase/store/transactions/manager.go
@@ -0,0 +1,194 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package transactions
+
+import (
+ "container/list"
+ "sync"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/verror"
+)
+
+// BatchStore is a CRUD-capable storage engine that supports atomic batch
+// writes. BatchStore doesn't support transactions.
+// This interface is a Go version of the C++ LevelDB interface. It serves as
+// an intermediate interface between store.Store and the LevelDB interface.
+type BatchStore interface {
+ store.StoreReader
+
+ // WriteBatch atomically writes a list of write operations to the database.
+ WriteBatch(batch ...WriteOp) error
+
+ // Close closes the store.
+ Close() error
+
+ // NewSnapshot creates a snapshot.
+ NewSnapshot() store.Snapshot
+}
+
+// manager handles transaction-related operations of the store.
+type manager struct {
+ BatchStore
+ // mu protects the variables below, and is also held during transaction
+ // commits. It must always be acquired before the store-level lock.
+ mu sync.Mutex
+ // events is a queue of create/commit transaction events.
+ events *list.List
+ seq uint64
+ // txTable is a set of keys written by recent transactions. This set
+ // includes all write sets of transactions committed after the oldest living
+ // (in-flight) transaction.
+ txTable *trie
+}
+
+// commitedTransaction is only used as an element of manager.events.
+type commitedTransaction struct {
+ seq uint64
+ batch [][]byte
+}
+
+// Wrap wraps the BatchStore with transaction functionality.
+func Wrap(bs BatchStore) store.Store {
+ return &manager{
+ BatchStore: bs,
+ events: list.New(),
+ txTable: newTrie(),
+ }
+}
+
+// Close implements the store.Store interface.
+func (mg *manager) Close() error {
+ mg.mu.Lock()
+ defer mg.mu.Unlock()
+ if mg.txTable == nil {
+ return verror.New(verror.ErrCanceled, nil, store.ErrMsgClosedStore)
+ }
+ mg.BatchStore.Close()
+ for event := mg.events.Front(); event != nil; event = event.Next() {
+ if tx, ok := event.Value.(*transaction); ok {
+ // tx.Abort() internally removes tx from the mg.events list under
+ // the mg.mu lock. To brake the cyclic dependency, we set tx.event
+ // to nil.
+ tx.mu.Lock()
+ tx.event = nil
+ tx.mu.Unlock()
+ tx.Abort()
+ }
+ }
+ mg.events = nil
+ mg.txTable = nil
+ return nil
+}
+
+// NewTransaction implements the store.Store interface.
+func (mg *manager) NewTransaction() store.Transaction {
+ mg.mu.Lock()
+ defer mg.mu.Unlock()
+ if mg.txTable == nil {
+ return &store.InvalidTransaction{
+ Error: verror.New(verror.ErrCanceled, nil, store.ErrMsgClosedStore),
+ }
+ }
+ return newTransaction(mg)
+}
+
+// Put implements the store.StoreWriter interface.
+func (mg *manager) Put(key, value []byte) error {
+ mg.mu.Lock()
+ defer mg.mu.Unlock()
+ if mg.txTable == nil {
+ return verror.New(verror.ErrCanceled, nil, store.ErrMsgClosedStore)
+ }
+ write := WriteOp{
+ T: PutOp,
+ Key: key,
+ Value: value,
+ }
+ if err := mg.BatchStore.WriteBatch(write); err != nil {
+ return err
+ }
+ mg.trackBatch(write)
+ return nil
+}
+
+// Delete implements the store.StoreWriter interface.
+func (mg *manager) Delete(key []byte) error {
+ mg.mu.Lock()
+ defer mg.mu.Unlock()
+ if mg.txTable == nil {
+ return verror.New(verror.ErrCanceled, nil, store.ErrMsgClosedStore)
+ }
+ write := WriteOp{
+ T: DeleteOp,
+ Key: key,
+ }
+ if err := mg.BatchStore.WriteBatch(write); err != nil {
+ return err
+ }
+ mg.trackBatch(write)
+ return nil
+}
+
+// trackBatch writes the batch to txTable and adds a commit event to
+// the events queue.
+// Assumes mu is held.
+func (mg *manager) trackBatch(batch ...WriteOp) {
+ if mg.events.Len() == 0 {
+ return
+ }
+ // TODO(rogulenko): do GC.
+ mg.seq++
+ var keys [][]byte
+ for _, write := range batch {
+ mg.txTable.add(write.Key, mg.seq)
+ keys = append(keys, write.Key)
+ }
+ tx := &commitedTransaction{
+ seq: mg.seq,
+ batch: keys,
+ }
+ mg.events.PushBack(tx)
+}
+
+//////////////////////////////////////////////////////////////
+// Read and Write types used for storing transcation reads
+// and uncommitted writes.
+
+type WriteType int
+
+const (
+ PutOp WriteType = iota
+ DeleteOp
+)
+
+type WriteOp struct {
+ T WriteType
+ Key []byte
+ Value []byte
+}
+
+type scanRange struct {
+ Start, Limit []byte
+}
+
+type readSet struct {
+ Keys [][]byte
+ Ranges []scanRange
+}
+
+type writeOpArray []WriteOp
+
+func (a writeOpArray) Len() int {
+ return len(a)
+}
+
+func (a writeOpArray) Less(i, j int) bool {
+ return string(a[i].Key) < string(a[j].Key)
+}
+
+func (a writeOpArray) Swap(i, j int) {
+ a[i], a[j] = a[j], a[i]
+}
diff --git a/services/syncbase/store/transactions/merged_stream.go b/services/syncbase/store/transactions/merged_stream.go
new file mode 100644
index 0000000..4ab10be
--- /dev/null
+++ b/services/syncbase/store/transactions/merged_stream.go
@@ -0,0 +1,149 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package transactions
+
+import (
+ "sort"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+)
+
+//////////////////////////////////////////////////////////////
+// mergedStream implementation of Stream
+//
+// This implementation of Stream must take into account writes
+// which have occurred since the snapshot was taken on the
+// transaction.
+//
+// The MergeWritesWithStream() function requires uncommitted
+// changes to be passed in as an array of WriteOp.
+
+// Create a new stream which merges a snapshot stream with an array of write operations.
+func mergeWritesWithStream(sn store.Snapshot, w []WriteOp, start, limit []byte) store.Stream {
+ // Collect writes with the range specified, then sort them.
+ // Note: Writes could contain more than one write for a given key.
+ // The last write is the current state.
+ writesMap := map[string]WriteOp{}
+ for _, write := range w {
+ if string(write.Key) >= string(start) && (string(limit) == "" || string(write.Key) < string(limit)) {
+ writesMap[string(write.Key)] = write
+ }
+ }
+ var writesArray writeOpArray
+ for _, writeOp := range writesMap {
+ writesArray = append(writesArray, writeOp)
+ }
+ sort.Sort(writesArray)
+ return &mergedStream{
+ snapshotStream: sn.Scan(start, limit),
+ writesArray: writesArray,
+ writesCursor: 0,
+ unusedSnapshotValue: false,
+ snapshotStreamEOF: false,
+ hasValue: false,
+ }
+}
+
+type mergedStream struct {
+ snapshotStream store.Stream
+ writesArray []WriteOp
+ writesCursor int
+ unusedSnapshotValue bool
+ snapshotStreamEOF bool
+ hasValue bool // if true, Key() and Value() can be called
+ key []byte
+ value []byte
+}
+
+// Convenience function to check EOF on writesArray
+func (s *mergedStream) writesArrayEOF() bool {
+ return s.writesCursor >= len(s.writesArray)
+}
+
+// If a kv from the snapshot isn't on deck, call
+// Advance on the snapshot and set unusedSnapshotValue.
+// If EOF encountered, set snapshotStreamEOF.
+// If error encountered, return it.
+func (s *mergedStream) stageSnapshotKeyValue() error {
+ if !s.snapshotStreamEOF && !s.unusedSnapshotValue {
+ if !s.snapshotStream.Advance() {
+ s.snapshotStreamEOF = true
+ if err := s.snapshotStream.Err(); err != nil {
+ return err
+ }
+ }
+ s.unusedSnapshotValue = true
+ }
+ return nil
+}
+
+// Pick a kv from either the snapshot or the uncommited writes array.
+// If an uncommited write is picked advance past it and return false (also, advance the snapshot
+// stream if its current key is equal to the ucommitted delete).
+func (s *mergedStream) pickKeyValue() bool {
+ if !s.snapshotStreamEOF && (s.writesArrayEOF() || string(s.writesArray[s.writesCursor].Key) > string(s.snapshotStream.Key(nil))) {
+ s.key = s.snapshotStream.Key(s.key)
+ s.value = s.snapshotStream.Value(s.value)
+ s.unusedSnapshotValue = false
+ return true
+ }
+ if !s.snapshotStreamEOF && string(s.writesArray[s.writesCursor].Key) == string(s.snapshotStream.Key(nil)) {
+ s.unusedSnapshotValue = false
+ }
+ if s.writesArrayEOF() || s.writesArray[s.writesCursor].T == DeleteOp {
+ s.writesCursor++
+ return false
+ }
+ s.key = store.CopyBytes(s.key, s.writesArray[s.writesCursor].Key)
+ s.value = store.CopyBytes(s.value, s.writesArray[s.writesCursor].Value)
+ s.writesCursor++
+ return true
+}
+
+func (s *mergedStream) Advance() bool {
+ s.hasValue = false
+ for true {
+ if err := s.stageSnapshotKeyValue(); err != nil {
+ return false
+ }
+ if s.snapshotStreamEOF && s.writesArrayEOF() {
+ return false
+ }
+ if s.pickKeyValue() {
+ s.hasValue = true
+ return true
+ }
+ }
+ return false // compiler insists on this line
+}
+
+// Key implements the Stream interface.
+func (s *mergedStream) Key(keybuf []byte) []byte {
+ if !s.hasValue {
+ panic("nothing staged")
+ }
+ return store.CopyBytes(keybuf, s.key)
+}
+
+// Value implements the Stream interface.
+func (s *mergedStream) Value(valbuf []byte) []byte {
+ if !s.hasValue {
+ panic("nothing staged")
+ }
+ return store.CopyBytes(valbuf, s.value)
+}
+
+// Err implements the Stream interface.
+func (s *mergedStream) Err() error {
+ return s.snapshotStream.Err()
+}
+
+// Cancel implements the Stream interface.
+func (s *mergedStream) Cancel() {
+ s.snapshotStream.Cancel()
+ s.hasValue = false
+ s.snapshotStreamEOF = true
+ s.writesCursor = len(s.writesArray)
+}
diff --git a/services/syncbase/store/transactions/transaction.go b/services/syncbase/store/transactions/transaction.go
new file mode 100644
index 0000000..dcf7569
--- /dev/null
+++ b/services/syncbase/store/transactions/transaction.go
@@ -0,0 +1,193 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package transactions
+
+import (
+ "bytes"
+ "container/list"
+ "sync"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/verror"
+ "v.io/x/lib/vlog"
+)
+
+// transaction is a wrapper on top of a BatchWriter and a store.Snapshot that
+// implements the store.Transaction interface.
+type transaction struct {
+ // mu protects the state of the transaction.
+ mu sync.Mutex
+ mg *manager
+ seq uint64
+ event *list.Element // pointer to element of db.txEvents
+ snapshot store.Snapshot
+ reads readSet
+ writes []WriteOp
+ err error
+}
+
+var _ store.Transaction = (*transaction)(nil)
+
+func newTransaction(mg *manager) *transaction {
+ tx := &transaction{
+ mg: mg,
+ snapshot: mg.BatchStore.NewSnapshot(),
+ seq: mg.seq,
+ }
+ tx.event = mg.events.PushFront(tx)
+ return tx
+}
+
+// close removes this transaction from the mg.events queue and aborts
+// the underlying snapshot.
+// Assumes mu is held.
+func (tx *transaction) close() {
+ tx.removeEvent()
+ tx.snapshot.Abort()
+}
+
+// removeEvent removes this transaction from the mg.events queue.
+// Assumes mu is held.
+func (tx *transaction) removeEvent() {
+ // This can happen if the transaction was committed, since Commit()
+ // explicitly calls removeEvent().
+ if tx.event == nil {
+ return
+ }
+ tx.mg.mu.Lock()
+ tx.mg.events.Remove(tx.event)
+ tx.mg.mu.Unlock()
+ tx.event = nil
+}
+
+// Get implements the store.StoreReader interface.
+func (tx *transaction) Get(key, valbuf []byte) ([]byte, error) {
+ tx.mu.Lock()
+ defer tx.mu.Unlock()
+ if tx.err != nil {
+ return valbuf, store.ConvertError(tx.err)
+ }
+ tx.reads.Keys = append(tx.reads.Keys, key)
+
+ // Reflect the state of the transaction: the "writes" (puts and
+ // deletes) override the values in the transaction snapshot.
+ // Find the last "writes" entry for this key, if one exists.
+ // Note: this step could be optimized by using maps (puts and
+ // deletes) instead of an array.
+ for i := len(tx.writes) - 1; i >= 0; i-- {
+ op := &tx.writes[i]
+ if bytes.Equal(op.Key, key) {
+ if op.T == PutOp {
+ return op.Value, nil
+ }
+ return valbuf, verror.New(store.ErrUnknownKey, nil, string(key))
+ }
+ }
+
+ return tx.snapshot.Get(key, valbuf)
+}
+
+// Scan implements the store.StoreReader interface.
+func (tx *transaction) Scan(start, limit []byte) store.Stream {
+ tx.mu.Lock()
+ defer tx.mu.Unlock()
+ if tx.err != nil {
+ return &store.InvalidStream{Error: tx.err}
+ }
+
+ tx.reads.Ranges = append(tx.reads.Ranges, scanRange{
+ Start: start,
+ Limit: limit,
+ })
+
+ // Return a stream which merges the snaphot stream with the uncommitted changes.
+ return mergeWritesWithStream(tx.snapshot, tx.writes, start, limit)
+}
+
+// Put implements the store.StoreWriter interface.
+func (tx *transaction) Put(key, value []byte) error {
+ tx.mu.Lock()
+ defer tx.mu.Unlock()
+ if tx.err != nil {
+ return store.ConvertError(tx.err)
+ }
+ tx.writes = append(tx.writes, WriteOp{
+ T: PutOp,
+ Key: key,
+ Value: value,
+ })
+ return nil
+}
+
+// Delete implements the store.StoreWriter interface.
+func (tx *transaction) Delete(key []byte) error {
+ tx.mu.Lock()
+ defer tx.mu.Unlock()
+ if tx.err != nil {
+ return store.ConvertError(tx.err)
+ }
+ tx.writes = append(tx.writes, WriteOp{
+ T: DeleteOp,
+ Key: key,
+ })
+ return nil
+}
+
+// validateReadSet returns true iff the read set of this transaction has not
+// been invalidated by other transactions.
+// Assumes tx.mg.mu is held.
+func (tx *transaction) validateReadSet() bool {
+ for _, key := range tx.reads.Keys {
+ if tx.mg.txTable.get(key) > tx.seq {
+ vlog.VI(3).Infof("key conflict: %q", key)
+ return false
+ }
+ }
+ for _, r := range tx.reads.Ranges {
+ if tx.mg.txTable.rangeMax(r.Start, r.Limit) > tx.seq {
+ vlog.VI(3).Infof("range conflict: {%q, %q}", r.Start, r.Limit)
+ return false
+ }
+
+ }
+ return true
+}
+
+// Commit implements the store.Transaction interface.
+func (tx *transaction) Commit() error {
+ tx.mu.Lock()
+ defer tx.mu.Unlock()
+ if tx.err != nil {
+ return store.ConvertError(tx.err)
+ }
+ tx.err = verror.New(verror.ErrBadState, nil, store.ErrMsgCommittedTxn)
+ // Explicitly remove this transaction from the event queue. If this was the
+ // only active transaction, the event queue becomes empty and writeLocked will
+ // not add this transaction's write set to txTable.
+ tx.removeEvent()
+ defer tx.close()
+ tx.mg.mu.Lock()
+ defer tx.mg.mu.Unlock()
+ if !tx.validateReadSet() {
+ return store.NewErrConcurrentTransaction(nil)
+ }
+ if err := tx.mg.BatchStore.WriteBatch(tx.writes...); err != nil {
+ return err
+ }
+ tx.mg.trackBatch(tx.writes...)
+ return nil
+}
+
+// Abort implements the store.Transaction interface.
+func (tx *transaction) Abort() error {
+ tx.mu.Lock()
+ defer tx.mu.Unlock()
+ if tx.err != nil {
+ return store.ConvertError(tx.err)
+ }
+ tx.err = verror.New(verror.ErrCanceled, nil, store.ErrMsgAbortedTxn)
+ tx.close()
+ return nil
+}
diff --git a/services/syncbase/store/transactions/trie.go b/services/syncbase/store/transactions/trie.go
new file mode 100644
index 0000000..51a99a3
--- /dev/null
+++ b/services/syncbase/store/transactions/trie.go
@@ -0,0 +1,75 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package transactions
+
+import (
+ "fmt"
+)
+
+// trie is an in-memory data structure that keeps track of recently-written
+// keys, and exposes an interface for asking when a key or key range was most
+// recently written to. It is used to check whether the read set of a
+// transaction pending commit is still valid. The transaction can be committed
+// iff its read set is valid.
+// TODO(rogulenko): replace this dummy implementation with an actual trie.
+type trie struct {
+ seqs map[string]uint64
+}
+
+func newTrie() *trie {
+ return &trie{
+ seqs: make(map[string]uint64),
+ }
+}
+
+// add updates the given key to the given seq, which must be greater than the
+// current seq (if one exists). Seqs of subsequent calls must be in
+// ascending order.
+func (t *trie) add(key []byte, seq uint64) {
+ keystr := string(key)
+ if oldSeq, ok := t.seqs[keystr]; ok && seq < oldSeq {
+ panic(fmt.Sprintf("seq for key %q should be at least %d, but got %d", key, oldSeq, seq))
+ }
+ t.seqs[keystr] = seq
+}
+
+// remove reverts effect of add(key, seq).
+// Seqs of subsequent calls must be in ascending order.
+func (t *trie) remove(key []byte, seq uint64) {
+ keystr := string(key)
+ oldSeq, ok := t.seqs[keystr]
+ if !ok {
+ panic(fmt.Sprintf("key %q was not found", key))
+ }
+ if oldSeq > seq {
+ return
+ } else if oldSeq == seq {
+ delete(t.seqs, keystr)
+ } else {
+ panic(fmt.Sprintf("seq for key %q is too big: got %v, want %v", keystr, seq, oldSeq))
+ }
+}
+
+// get returns the seq associated with the given key.
+func (t *trie) get(key []byte) uint64 {
+ keystr := string(key)
+ if seq, ok := t.seqs[keystr]; ok {
+ return seq
+ }
+ return 0
+}
+
+// rangeMax returns the max seq associated with keys in range
+// [start, limit). Empty limit means no limit.
+func (t *trie) rangeMax(start, limit []byte) uint64 {
+ var result uint64 = 0
+ s, e := string(start), string(limit)
+ for key, seq := range t.seqs {
+ if key >= s && (e == "" || key < e) && seq > result {
+ result = seq
+ }
+ }
+ return result
+}
diff --git a/services/syncbase/store/util.go b/services/syncbase/store/util.go
new file mode 100644
index 0000000..19695ed
--- /dev/null
+++ b/services/syncbase/store/util.go
@@ -0,0 +1,61 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package store
+
+import (
+ "v.io/v23/verror"
+)
+
+type SnapshotSpecImpl struct{}
+
+func (s *SnapshotSpecImpl) __snapshotSpec() {}
+
+// RunInTransaction runs the given fn in a transaction, managing retries and
+// commit/abort.
+func RunInTransaction(st Store, fn func(tx Transaction) error) error {
+ // TODO(rogulenko): Make the number of attempts configurable.
+ // TODO(rogulenko): Change the default number of attempts to 3. Currently,
+ // some storage engine tests fail when the number of attempts is that low.
+ var err error
+ for i := 0; i < 100; i++ {
+ // TODO(sadovsky): Should NewTransaction return an error? If not, how will
+ // we deal with RPC errors when talking to remote storage engines? (Note,
+ // client-side BeginBatch returns an error.)
+ tx := st.NewTransaction()
+ if err = fn(tx); err != nil {
+ tx.Abort()
+ return err
+ }
+ // TODO(sadovsky): Commit() can fail for a number of reasons, e.g. RPC
+ // failure or ErrConcurrentTransaction. Depending on the cause of failure,
+ // it may be desirable to retry the Commit() and/or to call Abort().
+ if err = tx.Commit(); verror.ErrorID(err) != ErrConcurrentTransaction.ID {
+ return err
+ }
+ }
+ return err
+}
+
+// CopyBytes copies elements from a source slice into a destination slice.
+// The returned slice may be a sub-slice of dst if dst was large enough to hold
+// src. Otherwise, a newly allocated slice will be returned.
+// TODO(rogulenko): add some tests.
+func CopyBytes(dst, src []byte) []byte {
+ if cap(dst) < len(src) {
+ newlen := cap(dst)*2 + 2
+ if newlen < len(src) {
+ newlen = len(src)
+ }
+ dst = make([]byte, newlen)
+ }
+ dst = dst[:len(src)]
+ copy(dst, src)
+ return dst
+}
+
+// ConvertError returns a copy of the verror, appending the current stack to it.
+func ConvertError(err error) error {
+ return verror.Convert(verror.IDAction{}, nil, err)
+}
diff --git a/services/syncbase/syncbased/main.go b/services/syncbase/syncbased/main.go
new file mode 100644
index 0000000..8def540
--- /dev/null
+++ b/services/syncbase/syncbased/main.go
@@ -0,0 +1,81 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+ "flag"
+
+ "v.io/syncbase/x/ref/services/syncbase/server"
+ "v.io/v23"
+ "v.io/v23/context"
+ "v.io/v23/rpc"
+ "v.io/v23/security"
+ "v.io/v23/security/access"
+ "v.io/x/lib/vlog"
+ "v.io/x/ref/lib/security/securityflag"
+ _ "v.io/x/ref/runtime/factories/roaming"
+)
+
+var (
+ name = flag.String("name", "", "Name to mount at.")
+ rootDir = flag.String("root-dir", "/var/lib/syncbase", "Root dir for storage engines and other data")
+ engine = flag.String("engine", "leveldb", "Storage engine to use. Currently supported: memstore and leveldb.")
+)
+
+// defaultPerms returns a permissions object that grants all permissions to the
+// provided blessing patterns.
+func defaultPerms(blessingPatterns []security.BlessingPattern) access.Permissions {
+ perms := access.Permissions{}
+ for _, tag := range access.AllTypicalTags() {
+ for _, bp := range blessingPatterns {
+ perms.Add(bp, string(tag))
+ }
+ }
+ return perms
+}
+
+// TODO(sadovsky): We return rpc.Server and rpc.Dispatcher as a quick hack to
+// support Mojo.
+func Serve(ctx *context.T) (rpc.Server, rpc.Dispatcher) {
+ s, err := v23.NewServer(ctx)
+ if err != nil {
+ vlog.Fatal("v23.NewServer() failed: ", err)
+ }
+ if _, err := s.Listen(v23.GetListenSpec(ctx)); err != nil {
+ vlog.Fatal("s.Listen() failed: ", err)
+ }
+
+ perms, err := securityflag.PermissionsFromFlag()
+ if err != nil {
+ vlog.Fatal("securityflag.PermissionsFromFlag() failed: ", err)
+ }
+ if perms != nil {
+ vlog.Info("Using perms from command line flag.")
+ } else {
+ vlog.Info("Perms flag not set. Giving local principal all perms.")
+ perms = defaultPerms(security.DefaultBlessingPatterns(v23.GetPrincipal(ctx)))
+ }
+ vlog.Infof("Perms: %v", perms)
+ service, err := server.NewService(ctx, nil, server.ServiceOptions{
+ Perms: perms,
+ RootDir: *rootDir,
+ Engine: *engine,
+ Server: s,
+ })
+ if err != nil {
+ vlog.Fatal("server.NewService() failed: ", err)
+ }
+ d := server.NewDispatcher(service)
+
+ // Publish the service in the mount table.
+ if err := s.ServeDispatcher(*name, d); err != nil {
+ vlog.Fatal("s.ServeDispatcher() failed: ", err)
+ }
+ if *name != "" {
+ vlog.Info("Mounted at: ", *name)
+ }
+
+ return s, d
+}
diff --git a/services/syncbase/syncbased/mojo_main.go b/services/syncbase/syncbased/mojo_main.go
new file mode 100644
index 0000000..74a247f
--- /dev/null
+++ b/services/syncbase/syncbased/mojo_main.go
@@ -0,0 +1,79 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build mojo
+
+package main
+
+// To build:
+// cd $V23_ROOT/experimental/projects/ether
+// make build
+
+import (
+ "log"
+
+ "mojo/public/go/application"
+ "mojo/public/go/bindings"
+ "mojo/public/go/system"
+
+ "mojom/syncbase"
+
+ "v.io/syncbase/x/ref/services/syncbase/server"
+ "v.io/v23"
+ "v.io/v23/context"
+ "v.io/v23/rpc"
+)
+
+//#include "mojo/public/c/system/types.h"
+import "C"
+
+type delegate struct {
+ ctx *context.T
+ srv rpc.Server
+ disp rpc.Dispatcher
+ stubs []*bindings.Stub
+}
+
+func (d *delegate) Initialize(ctx application.Context) {
+ d.srv, d.disp = Serve(d.ctx)
+}
+
+func (d *delegate) Create(req syncbase.Syncbase_Request) {
+ impl := server.NewMojoImpl(d.ctx, d.srv, d.disp)
+ stub := syncbase.NewSyncbaseStub(req, impl, bindings.GetAsyncWaiter())
+ d.stubs = append(d.stubs, stub)
+ go func() {
+ for {
+ if err := stub.ServeRequest(); err != nil {
+ connErr, ok := err.(*bindings.ConnectionError)
+ if !ok || !connErr.Closed() {
+ log.Println(err)
+ }
+ break
+ }
+ }
+ }()
+}
+
+func (d *delegate) AcceptConnection(conn *application.Connection) {
+ conn.ProvideServices(&syncbase.Syncbase_ServiceFactory{d})
+}
+
+func (d *delegate) Quit() {
+ for _, stub := range d.stubs {
+ stub.Close()
+ }
+}
+
+//export MojoMain
+func MojoMain(handle C.MojoHandle) C.MojoResult {
+ ctx, shutdown := v23.Init()
+ defer shutdown()
+ application.Run(&delegate{ctx: ctx}, system.MojoHandle(handle))
+ return C.MOJO_RESULT_OK
+}
+
+// NOTE(nlacasse): Mojo runs Go code by calling MojoMain(). The main() method
+// below is still needed because the Go tool won't build without it.
+func main() {}
diff --git a/services/syncbase/syncbased/v23_main.go b/services/syncbase/syncbased/v23_main.go
new file mode 100644
index 0000000..1651dc1
--- /dev/null
+++ b/services/syncbase/syncbased/v23_main.go
@@ -0,0 +1,23 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !mojo
+
+// syncbased is a syncbase daemon.
+package main
+
+// Example invocation:
+// syncbased --veyron.tcp.address="127.0.0.1:0" --name=syncbased
+
+import (
+ "v.io/v23"
+ "v.io/x/ref/lib/signals"
+)
+
+func main() {
+ ctx, shutdown := v23.Init()
+ defer shutdown()
+ Serve(ctx)
+ <-signals.ShutdownOnSignals(ctx)
+}
diff --git a/services/syncbase/vsync/blob.go b/services/syncbase/vsync/blob.go
new file mode 100644
index 0000000..ff04066
--- /dev/null
+++ b/services/syncbase/vsync/blob.go
@@ -0,0 +1,429 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+import (
+ "io"
+
+ wire "v.io/syncbase/v23/services/syncbase/nosql"
+ blob "v.io/syncbase/x/ref/services/syncbase/localblobstore"
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/v23/context"
+ "v.io/v23/naming"
+ "v.io/v23/rpc"
+ "v.io/v23/verror"
+ "v.io/x/lib/vlog"
+)
+
+const (
+ chunkSize = 8 * 1024
+)
+
+// blobLocInfo contains the location information about a BlobRef. This location
+// information is merely a hint used to search for the blob.
+type blobLocInfo struct {
+ peer string // Syncbase from which the presence of this BlobRef was first learned.
+ source string // Syncbase that originated this blob.
+ sgIds map[interfaces.GroupId]struct{} // SyncGroups through which the BlobRef was learned.
+}
+
+////////////////////////////////////////////////////////////
+// RPCs for managing blobs between Syncbase and its clients.
+
+func (sd *syncDatabase) CreateBlob(ctx *context.T, call rpc.ServerCall) (wire.BlobRef, error) {
+ vlog.VI(2).Infof("sync: CreateBlob: begin")
+ defer vlog.VI(2).Infof("sync: CreateBlob: end")
+
+ // Get this Syncbase's blob store handle.
+ ss := sd.sync.(*syncService)
+ bst := ss.bst
+
+ writer, err := bst.NewBlobWriter(ctx, "")
+ if err != nil {
+ return wire.NullBlobRef, err
+ }
+ defer writer.CloseWithoutFinalize()
+
+ name := writer.Name()
+ vlog.VI(4).Infof("sync: CreateBlob: blob ref %s", name)
+ return wire.BlobRef(name), nil
+}
+
+func (sd *syncDatabase) PutBlob(ctx *context.T, call wire.BlobManagerPutBlobServerCall, br wire.BlobRef) error {
+ vlog.VI(2).Infof("sync: PutBlob: begin br %v", br)
+ defer vlog.VI(2).Infof("sync: PutBlob: end br %v", br)
+
+ // Get this Syncbase's blob store handle.
+ ss := sd.sync.(*syncService)
+ bst := ss.bst
+
+ writer, err := bst.ResumeBlobWriter(ctx, string(br))
+ if err != nil {
+ return err
+ }
+ defer writer.CloseWithoutFinalize()
+
+ stream := call.RecvStream()
+ for stream.Advance() {
+ item := blob.BlockOrFile{Block: stream.Value()}
+ if err = writer.AppendFragment(item); err != nil {
+ return err
+ }
+ }
+ return stream.Err()
+}
+
+func (sd *syncDatabase) CommitBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) error {
+ vlog.VI(2).Infof("sync: CommitBlob: begin br %v", br)
+ defer vlog.VI(2).Infof("sync: CommitBlob: end br %v", br)
+
+ // Get this Syncbase's blob store handle.
+ ss := sd.sync.(*syncService)
+ bst := ss.bst
+
+ writer, err := bst.ResumeBlobWriter(ctx, string(br))
+ if err != nil {
+ return err
+ }
+ return writer.Close()
+}
+
+func (sd *syncDatabase) GetBlobSize(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) (int64, error) {
+ vlog.VI(2).Infof("sync: GetBlobSize: begin br %v", br)
+ defer vlog.VI(2).Infof("sync: GetBlobSize: end br %v", br)
+
+ // Get this Syncbase's blob store handle.
+ ss := sd.sync.(*syncService)
+ bst := ss.bst
+
+ reader, err := bst.NewBlobReader(ctx, string(br))
+ if err != nil {
+ return 0, err
+ }
+ defer reader.Close()
+
+ return reader.Size(), nil
+}
+
+func (sd *syncDatabase) DeleteBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) error {
+ return verror.NewErrNotImplemented(ctx)
+}
+
+func (sd *syncDatabase) GetBlob(ctx *context.T, call wire.BlobManagerGetBlobServerCall, br wire.BlobRef, offset int64) error {
+ vlog.VI(2).Infof("sync: GetBlob: begin br %v", br)
+ defer vlog.VI(2).Infof("sync: GetBlob: end br %v", br)
+
+ // First get the blob locally if available.
+ ss := sd.sync.(*syncService)
+ err := getLocalBlob(ctx, call.SendStream(), ss.bst, br, offset)
+ if err == nil || verror.ErrorID(err) == wire.ErrBlobNotCommitted.ID {
+ return err
+ }
+
+ return sd.fetchBlobRemote(ctx, br, nil, call, offset)
+}
+
+func (sd *syncDatabase) FetchBlob(ctx *context.T, call wire.BlobManagerFetchBlobServerCall, br wire.BlobRef, priority uint64) error {
+ vlog.VI(2).Infof("sync: FetchBlob: begin br %v", br)
+ defer vlog.VI(2).Infof("sync: FetchBlob: end br %v", br)
+
+ clientStream := call.SendStream()
+
+ // Check if BlobRef already exists locally.
+ ss := sd.sync.(*syncService)
+ bst := ss.bst
+
+ bReader, err := bst.NewBlobReader(ctx, string(br))
+ if err == nil {
+ finalized := bReader.IsFinalized()
+ bReader.Close()
+
+ if !finalized {
+ return wire.NewErrBlobNotCommitted(ctx)
+ }
+ clientStream.Send(wire.BlobFetchStatus{State: wire.BlobFetchStateDone})
+ return nil
+ }
+
+ // Wait for this blob's turn.
+ // TODO(hpucha): Implement a blob queue.
+ clientStream.Send(wire.BlobFetchStatus{State: wire.BlobFetchStatePending})
+
+ return sd.fetchBlobRemote(ctx, br, call, nil, 0)
+}
+
+func (sd *syncDatabase) PinBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) error {
+ return verror.NewErrNotImplemented(ctx)
+}
+
+func (sd *syncDatabase) UnpinBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) error {
+ return verror.NewErrNotImplemented(ctx)
+}
+
+func (sd *syncDatabase) KeepBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef, rank uint64) error {
+ return verror.NewErrNotImplemented(ctx)
+}
+
+////////////////////////////////////////////////////////////
+// RPC for blob fetch between Syncbases.
+
+func (s *syncService) FetchBlob(ctx *context.T, call interfaces.SyncFetchBlobServerCall, br wire.BlobRef) error {
+ vlog.VI(2).Infof("sync: FetchBlob: sb-sb begin br %v", br)
+ defer vlog.VI(2).Infof("sync: FetchBlob: sb-sb end br %v", br)
+ return getLocalBlob(ctx, call.SendStream(), s.bst, br, 0)
+}
+
+func (s *syncService) HaveBlob(ctx *context.T, call rpc.ServerCall, br wire.BlobRef) (int64, error) {
+ vlog.VI(2).Infof("sync: HaveBlob: begin br %v", br)
+ defer vlog.VI(2).Infof("sync: HaveBlob: end br %v", br)
+
+ bReader, err := s.bst.NewBlobReader(ctx, string(br))
+ if err != nil {
+ return 0, err
+ }
+ defer bReader.Close()
+ if !bReader.IsFinalized() {
+ return 0, wire.NewErrBlobNotCommitted(ctx)
+ }
+ return bReader.Size(), nil
+}
+
+func (s *syncService) FetchBlobRecipe(ctx *context.T, call interfaces.SyncFetchBlobRecipeServerCall, br wire.BlobRef) error {
+ return verror.NewErrNotImplemented(ctx)
+}
+
+func (s *syncService) FetchChunks(ctx *context.T, call interfaces.SyncFetchChunksServerCall) error {
+ return verror.NewErrNotImplemented(ctx)
+}
+
+////////////////////////////////////////////////////////////
+// Helpers.
+
+type byteStream interface {
+ Send(item []byte) error
+}
+
+// getLocalBlob looks for a blob in the local store and, if found, reads the
+// blob and sends it to the client. If the blob is found, it starts reading it
+// from the given offset and sends its bytes into the client stream.
+func getLocalBlob(ctx *context.T, stream byteStream, bst blob.BlobStore, br wire.BlobRef, offset int64) error {
+ vlog.VI(4).Infof("sync: getLocalBlob: begin br %v, offset %v", br, offset)
+ defer vlog.VI(4).Infof("sync: getLocalBlob: end br %v, offset %v", br, offset)
+
+ reader, err := bst.NewBlobReader(ctx, string(br))
+ if err != nil {
+ return err
+ }
+ defer reader.Close()
+
+ if !reader.IsFinalized() {
+ return wire.NewErrBlobNotCommitted(ctx)
+ }
+
+ buf := make([]byte, chunkSize)
+ for {
+ nbytes, err := reader.ReadAt(buf, offset)
+ if err != nil && err != io.EOF {
+ return err
+ }
+ if nbytes <= 0 {
+ break
+ }
+ offset += int64(nbytes)
+ stream.Send(buf[:nbytes])
+ if err == io.EOF {
+ break
+ }
+ }
+
+ return nil
+}
+
+func (sd *syncDatabase) fetchBlobRemote(ctx *context.T, br wire.BlobRef, statusCall wire.BlobManagerFetchBlobServerCall, dataCall wire.BlobManagerGetBlobServerCall, offset int64) error {
+ vlog.VI(4).Infof("sync: fetchBlobRemote: begin br %v, offset %v", br, offset)
+ defer vlog.VI(4).Infof("sync: fetchBlobRemote: end br %v, offset %v", br, offset)
+
+ var sendStatus, sendData bool
+ var statusStream interface {
+ Send(item wire.BlobFetchStatus) error
+ }
+ var dataStream interface {
+ Send(item []byte) error
+ }
+
+ if statusCall != nil {
+ sendStatus = true
+ statusStream = statusCall.SendStream()
+ }
+ if dataCall != nil {
+ sendData = true
+ dataStream = dataCall.SendStream()
+ }
+
+ if sendStatus {
+ // Start blob source discovery.
+ statusStream.Send(wire.BlobFetchStatus{State: wire.BlobFetchStateLocating})
+ }
+
+ // Locate blob.
+ peer, size, err := sd.locateBlob(ctx, br)
+ if err != nil {
+ return err
+ }
+
+ // Start blob fetching.
+ status := wire.BlobFetchStatus{State: wire.BlobFetchStateFetching, Total: size}
+ if sendStatus {
+ statusStream.Send(status)
+ }
+
+ ss := sd.sync.(*syncService)
+ bst := ss.bst
+
+ bWriter, err := bst.NewBlobWriter(ctx, string(br))
+ if err != nil {
+ return err
+ }
+
+ c := interfaces.SyncClient(peer)
+ ctxPeer, cancel := context.WithRootCancel(ctx)
+ stream, err := c.FetchBlob(ctxPeer, br)
+ if err == nil {
+ peerStream := stream.RecvStream()
+ for peerStream.Advance() {
+ item := blob.BlockOrFile{Block: peerStream.Value()}
+ if err = bWriter.AppendFragment(item); err != nil {
+ break
+ }
+ curSize := int64(len(item.Block))
+ status.Received += curSize
+ if sendStatus {
+ statusStream.Send(status)
+ }
+ if sendData {
+ if curSize <= offset {
+ offset -= curSize
+ } else if offset != 0 {
+ dataStream.Send(item.Block[offset:])
+ offset = 0
+ } else {
+ dataStream.Send(item.Block)
+ }
+ }
+ }
+
+ if err != nil {
+ cancel()
+ stream.Finish()
+ } else {
+ err = peerStream.Err()
+ if terr := stream.Finish(); err == nil {
+ err = terr
+ }
+ cancel()
+ }
+ }
+
+ bWriter.Close()
+ if err != nil {
+ // Clean up the blob with failed download, so that it can be
+ // downloaded again. Ignore any error from deletion.
+ bst.DeleteBlob(ctx, string(br))
+ } else {
+ status := wire.BlobFetchStatus{State: wire.BlobFetchStateDone}
+ if sendStatus {
+ statusStream.Send(status)
+ }
+ }
+ return err
+}
+
+// TODO(hpucha): Add syncgroup driven blob discovery.
+func (sd *syncDatabase) locateBlob(ctx *context.T, br wire.BlobRef) (string, int64, error) {
+ vlog.VI(4).Infof("sync: locateBlob: begin br %v", br)
+ defer vlog.VI(4).Infof("sync: locateBlob: end br %v", br)
+
+ ss := sd.sync.(*syncService)
+ loc, err := ss.getBlobLocInfo(ctx, br)
+ if err != nil {
+ return "", 0, err
+ }
+
+ // Search for blob amongst the source peer and peer learned from.
+ var peers = []string{loc.source, loc.peer}
+ for _, p := range peers {
+ vlog.VI(4).Infof("sync: locateBlob: attempting %s", p)
+ // Get the mounttables for this peer.
+ mtTables, err := sd.getMountTables(ctx, p)
+ if err != nil {
+ continue
+ }
+
+ for mt := range mtTables {
+ absName := naming.Join(mt, p, util.SyncbaseSuffix)
+ c := interfaces.SyncClient(absName)
+ size, err := c.HaveBlob(ctx, br)
+ if err == nil {
+ vlog.VI(4).Infof("sync: locateBlob: found blob on %s", absName)
+ return absName, size, nil
+ }
+ }
+ }
+
+ return "", 0, verror.New(verror.ErrInternal, ctx, "blob not found")
+
+}
+
+func (sd *syncDatabase) getMountTables(ctx *context.T, peer string) (map[string]struct{}, error) {
+ ss := sd.sync.(*syncService)
+ mInfo := ss.copyMemberInfo(ctx, peer)
+
+ mtTables := make(map[string]struct{})
+ for gdbName, sgInfo := range mInfo.db2sg {
+ appName, dbName, err := splitAppDbName(ctx, gdbName)
+ if err != nil {
+ return nil, err
+ }
+ st, err := ss.getDbStore(ctx, nil, appName, dbName)
+ if err != nil {
+ return nil, err
+ }
+
+ for id := range sgInfo {
+ sg, err := getSyncGroupById(ctx, st, id)
+ if err != nil {
+ continue
+ }
+ if _, ok := sg.Joiners[peer]; !ok {
+ // Peer is no longer part of the SyncGroup.
+ continue
+ }
+ for _, mt := range sg.Spec.MountTables {
+ mtTables[mt] = struct{}{}
+ }
+ }
+ }
+ return mtTables, nil
+}
+
+// TODO(hpucha): Persist the blob directory periodically.
+func (s *syncService) addBlobLocInfo(ctx *context.T, br wire.BlobRef, info *blobLocInfo) error {
+ s.blobDirLock.Lock()
+ defer s.blobDirLock.Unlock()
+
+ s.blobDirectory[br] = info
+ return nil
+}
+
+func (s *syncService) getBlobLocInfo(ctx *context.T, br wire.BlobRef) (*blobLocInfo, error) {
+ s.blobDirLock.Lock()
+ defer s.blobDirLock.Unlock()
+
+ if info, ok := s.blobDirectory[br]; ok {
+ return info, nil
+ }
+ return nil, verror.New(verror.ErrInternal, ctx, "blob state not found", br)
+}
diff --git a/services/syncbase/vsync/conflict_resolution.go b/services/syncbase/vsync/conflict_resolution.go
new file mode 100644
index 0000000..a8e41f6
--- /dev/null
+++ b/services/syncbase/vsync/conflict_resolution.go
@@ -0,0 +1,144 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+import (
+ "v.io/v23/context"
+ "v.io/v23/verror"
+ "v.io/x/lib/vlog"
+)
+
+// Policies for conflict resolution.
+// TODO(hpucha): Move relevant parts to client-facing vdl.
+const (
+ // Resolves conflicts by picking the update with the most recent timestamp.
+ useTime = iota
+
+ // TODO(hpucha): implement other policies.
+ // Resolves conflicts by using the app conflict resolver callbacks via store.
+ useCallback
+)
+
+var (
+ // conflictResolutionPolicy is the policy used to resolve conflicts.
+ conflictResolutionPolicy = useTime
+)
+
+// resolutionType represents how a conflict is resolved.
+type resolutionType byte
+
+const (
+ pickLocal resolutionType = iota // local update was chosen as the resolution.
+ pickRemote // remote update was chosen as the resolution.
+ createNew // new update was created as the resolution.
+)
+
+// conflictResolution represents the state of a conflict resolution.
+type conflictResolution struct {
+ ty resolutionType
+ rec *localLogRec // Valid only if ty == createNew.
+ val []byte // Valid only if ty == createNew.
+}
+
+// resolveConflicts resolves conflicts for updated objects. Conflicts may be
+// resolved by adding new versions or picking either the local or the remote
+// version.
+func (iSt *initiationState) resolveConflicts(ctx *context.T) error {
+ for obj, st := range iSt.updObjects {
+ if !st.isConflict {
+ continue
+ }
+
+ // TODO(hpucha): Look up policy from the schema. Currently,
+ // hardcoded to time.
+ var err error
+ st.res, err = iSt.resolveObjConflict(ctx, obj, st.oldHead, st.newHead, st.ancestor)
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// resolveObjConflict resolves a conflict for an object given its ID and the 3
+// versions that express the conflict: the object's local version, its remote
+// version (from the device contacted), and the closest common ancestor (see
+// dag.go on how the ancestor is chosen). The function returns the new object
+// value according to the conflict resolution policy.
+func (iSt *initiationState) resolveObjConflict(ctx *context.T, oid, local, remote, ancestor string) (*conflictResolution, error) {
+ // Fetch the log records of the 3 object versions.
+ versions := []string{local, remote, ancestor}
+ lrecs, err := iSt.getLogRecsBatch(ctx, oid, versions)
+ if err != nil {
+ return nil, err
+ }
+
+ // The local and remote records must exist, however it is valid for the
+ // common ancestor to not exist. This happens when two Syncbases create
+ // separately their first versions for the same object (key).
+ locRec, remRec, ancRec := lrecs[0], lrecs[1], lrecs[2]
+ if locRec == nil || remRec == nil {
+ vlog.Fatalf("sync: resolveObjConflict: oid %s: invalid local (%s: %v) or remote recs (%s: %v)",
+ oid, local, locRec, remote, remRec)
+ }
+
+ // Resolve the conflict according to the resolution policy.
+ switch conflictResolutionPolicy {
+ case useTime:
+ return iSt.resolveObjConflictByTime(ctx, oid, locRec, remRec, ancRec)
+ default:
+ return nil, verror.New(verror.ErrInternal, ctx, "unknown conflict resolution policy", conflictResolutionPolicy)
+ }
+}
+
+// resolveObjConflictByTime resolves conflicts using the timestamps of the
+// conflicting mutations. It picks a mutation with the larger timestamp,
+// i.e. the most recent update. If the timestamps are equal, it uses the
+// mutation version numbers as a tie-breaker, picking the mutation with the
+// larger version. Instead of creating a new version that resolves the
+// conflict, we pick an existing version as the conflict resolution.
+func (iSt *initiationState) resolveObjConflictByTime(ctx *context.T, oid string, local, remote, ancestor *localLogRec) (*conflictResolution, error) {
+ var res conflictResolution
+ switch {
+ case local.Metadata.UpdTime.After(remote.Metadata.UpdTime):
+ res.ty = pickLocal
+ case local.Metadata.UpdTime.Before(remote.Metadata.UpdTime):
+ res.ty = pickRemote
+ case local.Metadata.CurVers > remote.Metadata.CurVers:
+ res.ty = pickLocal
+ case local.Metadata.CurVers < remote.Metadata.CurVers:
+ res.ty = pickRemote
+ default:
+ vlog.Fatalf("sync: resolveObjConflictByTime: local and remote update times and versions are the same, local %v remote %v", local, remote)
+ }
+
+ return &res, nil
+}
+
+// getLogRecsBatch gets the log records for an array of versions for a given object.
+func (iSt *initiationState) getLogRecsBatch(ctx *context.T, obj string, versions []string) ([]*localLogRec, error) {
+ lrecs := make([]*localLogRec, len(versions))
+ for p, v := range versions {
+ if v == NoVersion {
+ lrecs[p] = nil
+ continue
+ }
+
+ logKey, err := getLogRecKey(ctx, iSt.tx, obj, v)
+ if err != nil {
+ return nil, err
+ }
+ dev, gen, err := splitLogRecKey(ctx, logKey)
+ if err != nil {
+ return nil, err
+ }
+ lrecs[p], err = getLogRec(ctx, iSt.tx, dev, gen)
+ if err != nil {
+ return nil, err
+ }
+ }
+ return lrecs, nil
+}
diff --git a/services/syncbase/vsync/dag.go b/services/syncbase/vsync/dag.go
new file mode 100644
index 0000000..121f594
--- /dev/null
+++ b/services/syncbase/vsync/dag.go
@@ -0,0 +1,855 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Syncbase DAG (directed acyclic graph) utility functions.
+//
+// The DAG is used to track the version history of synced objects in order
+// to detect and resolve conflicts (concurrent changes on different devices).
+//
+// Note: the sync code uses the words "object" and "object ID" (oid) as a
+// generic way to refer to syncable entities, whether they are actual user data
+// (table row and its row key), prefix-ACLs (permission entry and its prefix),
+// or other metadata such as SyncGroups (SyncGroup value and its internal key
+// based on the SyncGroup ID).
+//
+// * Object IDs are globally unique across all devices.
+// * Syncable objects have version numbers associated with their mutations.
+// * For a given object ID, the version number is globally unique across all
+// devices, i.e. the (oid, version) tuple is globally unique.
+// * Each (oid, version) tuple is represented by a node in the DAG.
+// * The previous version of an object is its parent in the DAG, i.e. the
+// new version is derived from that parent.
+// * DAG nodes have child-to-parent pointers.
+// * When there are no conflicts, the parent node has a single child node
+// that points to it.
+// * When a parent node has more than one child, this indicates concurrent
+// mutations which are treated as a conflict to be resolved.
+// * When a conflict is resolved, the new version has pointers back to each of
+// the two parents to indicate that it is derived from both nodes.
+// * During a sync operation from a source device to a target device, the
+// target receives a DAG fragment from the source. That fragment has to
+// be incorporated (grafted) into the target device's DAG. It may be a
+// continuation of the DAG of an object, with the attachment (graft) point
+// being the current head of DAG, in which case there are no conflicts.
+// Or the graft point(s) may be older nodes, which means the new fragment
+// is a divergence in the graph causing a conflict that must be resolved
+// in order to re-converge the two DAG fragments.
+//
+// In the diagrams below:
+// (h) represents the head node in the local device.
+// (nh) represents the new head node received from the remote device.
+// (g) represents a graft node, where new nodes attach to the existing DAG.
+// <- represents a derived-from mutation, i.e. a child-to-parent pointer
+//
+// a- No-conflict example: the new nodes (v4, v5) attach to the head node (v3).
+// In this case the new head becomes the head node, the new DAG fragment
+// being a continuation of the existing DAG.
+//
+// Before:
+// v1 <- v2 <- v3(h)
+//
+// Sync updates applied, no conflict detected:
+// v1 <- v2 <- v3(h,g) <- v4 <- v5 (nh)
+//
+// After:
+// v1 <- v2 <- v3 <- v4 <- v5 (h)
+//
+// b- Conflict example: the new nodes (v4, v5) attach to an old node (v2).
+// The current head node (v3) and the new head node (v5) are divergent
+// (concurrent) mutations that need to be resolved. The conflict
+// resolution function is passed the old head (v3), new head (v5), and
+// the common ancestor (v2). It resolves the conflict with (v6) which
+// is represented in the DAG as derived from both v3 and v5 (2 parents).
+//
+// Before:
+// v1 <- v2 <- v3(h)
+//
+// Sync updates applied, conflict detected (v3 not a graft node):
+// v1 <- v2(g) <- v3(h)
+// <- v4 <- v5 (nh)
+//
+// After: conflict resolver creates v6 having 2 parents (v3, v5):
+// v1 <- v2(g) <- v3 <------- v6(h)
+// <- v4 <- v5 <-
+//
+// The DAG does not grow indefinitely. During a sync operation each device
+// learns what the other device already knows -- where it's at in the version
+// history for the objects. When a device determines that all devices that
+// sync an object (members of matching SyncGroups) have moved past some version
+// for that object, the DAG for that object can be pruned up to that common
+// version, deleting all prior (ancestor) nodes.
+//
+// The DAG contains three tables persisted to disk (nodes, heads, batches):
+//
+// * nodes: one entry per (oid, version) with references to parent node(s)
+// it is derived from, a reference to the log record identifying
+// that mutation, a reference to its write batch (if any), and a
+// boolean to indicate whether this was an object deletion.
+//
+// * heads: one entry per object pointing to its most recent version.
+//
+// * batches: one entry per batch ID containing the set of objects in the
+// write batch and their versions.
+
+import (
+ "container/list"
+ "fmt"
+
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ "v.io/v23/verror"
+ "v.io/x/lib/vlog"
+)
+
+const (
+ NoVersion = ""
+ NoBatchId = uint64(0)
+)
+
+// dagNode holds the information on a object mutation in the DAG.
+// Note: the batch ID and deleted flag are copies of information in the log
+// record. They are also stored in the DAG node to improve DAG traversal for
+// conflict resolution and pruning without having to fetch the full log record
+// every time.
+type dagNode struct {
+ Level uint64 // node distance from root
+ Parents []string // references to parent versions
+ Logrec string // reference to log record
+ BatchId uint64 // ID of a write batch
+ Deleted bool // true if the change was a delete
+}
+
+// batchSet holds information on a set of write batches.
+type batchSet map[uint64]*batchInfo
+
+// batchInfo holds the information on a write batch:
+// - The map of syncable (versioned) objects: {oid: version}
+// - The total count of batch objects, including non-syncable ones.
+// TODO(rdaoud): add support to track the read and scan sets.
+type batchInfo struct {
+ Objects map[string]string
+ Count uint64
+}
+
+// graftMap holds the state of DAG node grafting (attaching) per object.
+type graftMap map[string]*graftInfo
+
+// graftInfo holds the state of an object's node grafting in the DAG.
+// It is ephemeral (in-memory), used during a single sync operation to track
+// where the new DAG fragments are attached to the existing DAG for the object:
+// - newNodes: the set of newly added nodes; used to detect the type of edges
+// between nodes (new-node to old-node or vice versa).
+// - newHeads: the set of new candidate head nodes; used to detect conflicts.
+// - graftNodes: the set of old nodes on which new nodes were added, and their
+// level in the DAG; used to find common ancestors for conflicts.
+// - oldHeadSnap: snapshot of the current local head known by sync, used in
+// conflict detection, particularly when conflict detection needs
+// to be retried due to sync dag state being stale compared
+// to local store.
+//
+// After the received mutations are applied, if there are two heads in the
+// newHeads set, there is a conflict to be resolved for the object. Otherwise,
+// if there is one head, no conflict was triggered and the new head becomes the
+// current object version. In case of conflict, the graftNodes set is used to
+// select a common ancestor.
+// TODO(rdaoud): support open DAGs to handle delayed conflict resolution by
+// tracking multiple dangling remote heads in addition to the local head node.
+type graftInfo struct {
+ newNodes map[string]bool
+ newHeads map[string]bool
+ graftNodes map[string]uint64
+ oldHeadSnap string
+}
+
+// newBatchInfo allocates and initializes a batch info entry.
+func newBatchInfo() *batchInfo {
+ return &batchInfo{Objects: make(map[string]string), Count: 0}
+}
+
+// startBatch marks the start of a batch. It generates a batch ID and returns
+// it to the caller, if an ID is not given. The batch ID is used to track DAG
+// nodes that are part of the same batch and it is stored in the log records.
+// If a batch ID is given by the caller, its information is accessed.
+func (s *syncService) startBatch(ctx *context.T, st store.StoreReader, btid uint64) uint64 {
+ s.batchesLock.Lock()
+ defer s.batchesLock.Unlock()
+
+ // If no batch ID is given, generate a new unused one.
+ if btid == NoBatchId {
+ for (btid == NoBatchId) || (s.batches[btid] != nil) {
+ btid = rand64()
+ }
+
+ s.batches[btid] = newBatchInfo()
+ return btid
+ }
+
+ // Use the given batch ID and, if needed, refetch its in-memory entry
+ // from the store. It is OK not to find it in the store; it means sync
+ // is learning about this batch ID the first time from another sync.
+ if s.batches[btid] == nil {
+ info, err := getBatch(ctx, st, btid)
+ if err != nil {
+ info = newBatchInfo()
+ }
+ s.batches[btid] = info
+ }
+
+ return btid
+}
+
+// addNodeToBatch adds a node (oid, version) to a batch under construction.
+func (s *syncService) addNodeToBatch(ctx *context.T, btid uint64, oid, version string) error {
+ s.batchesLock.Lock()
+ defer s.batchesLock.Unlock()
+
+ if btid == NoBatchId {
+ return verror.New(verror.ErrInternal, ctx, "invalid batch id", btid)
+ }
+
+ info := s.batches[btid]
+ if info == nil {
+ return verror.New(verror.ErrInternal, ctx, "unknown batch id", btid)
+ }
+
+ info.Objects[oid] = version
+ return nil
+}
+
+// endBatch marks the end of a given batch. The batch information is persisted
+// to the store and removed from the temporary in-memory entry.
+func (s *syncService) endBatch(ctx *context.T, tx store.Transaction, btid, count uint64) error {
+ s.batchesLock.Lock()
+ defer s.batchesLock.Unlock()
+
+ if btid == NoBatchId || count == 0 {
+ return verror.New(verror.ErrInternal, ctx, "invalid batch info", btid, count)
+ }
+
+ info := s.batches[btid]
+ if info == nil {
+ return verror.New(verror.ErrInternal, ctx, "unknown batch id", btid)
+ }
+
+ // The first time a batch is ended, info.Count is zero. Subsequently,
+ // if this batch ID is started and ended again, info.Count should be
+ // the same as the "count" value given.
+ if info.Count != 0 && info.Count != count {
+ return verror.New(verror.ErrInternal, ctx, "wrong counts for batch", btid, info.Count, count)
+ }
+
+ // Only save non-empty batches.
+ if len(info.Objects) > 0 {
+ info.Count = count
+ if err := setBatch(ctx, tx, btid, info); err != nil {
+ return err
+ }
+ }
+
+ delete(s.batches, btid)
+ return nil
+}
+
+// addNode adds a new node for a DAG object, linking it to its parent nodes.
+// It verifies that the node does not exist and its parent nodes are valid.
+// If a batch ID is given, track the node membership in the batch.
+//
+// Note: an in-memory grafting structure is passed to track DAG attachments
+// during a sync operation. This is needed when nodes are being added due to
+// remote changes fetched by the sync protocol. The Initiator allocates a
+// grafting structure at the start of a sync operation and passes it across
+// calls to addNode() to update the DAG grafting state:
+// - If a parent node is not new, mark it as a DAG graft point.
+// - Mark this version as a new node.
+// - Update the new head node pointer of the grafted DAG.
+//
+// The grafting structure is not needed when nodes are being added locally by
+// the Watcher, passing a nil grafting structure.
+func (s *syncService) addNode(ctx *context.T, tx store.Transaction, oid, version, logrec string, deleted bool, parents []string, btid uint64, graft graftMap) error {
+ if parents != nil {
+ if len(parents) > 2 {
+ return verror.New(verror.ErrInternal, ctx, "cannot have more than 2 parents")
+ }
+ if len(parents) == 0 {
+ parents = nil // replace an empty array with a nil
+ }
+ }
+
+ // The new node must not exist.
+ if ok, err := hasNode(ctx, tx, oid, version); err != nil {
+ return err
+ } else if ok {
+ return verror.New(verror.ErrInternal, ctx, "DAG node already exists", oid, version)
+ }
+
+ // Verify the parents, determine the node level. Also save the levels
+ // of the parent nodes for later in this function in graft updates.
+ parentLevels := make(map[string]uint64)
+ var level uint64
+ for _, parent := range parents {
+ pnode, err := getNode(ctx, tx, oid, parent)
+ if err != nil {
+ return err
+ }
+ parentLevels[parent] = pnode.Level
+ if level <= pnode.Level {
+ level = pnode.Level + 1
+ }
+ }
+
+ // If a batch ID is given, add the node to that batch.
+ if btid != NoBatchId {
+ if err := s.addNodeToBatch(ctx, btid, oid, version); err != nil {
+ return err
+ }
+ }
+
+ // Add the node entry to the DAG.
+ node := &dagNode{
+ Level: level,
+ Parents: parents,
+ Logrec: logrec,
+ BatchId: btid,
+ Deleted: deleted,
+ }
+ if err := setNode(ctx, tx, oid, version, node); err != nil {
+ return err
+ }
+
+ // We are done if grafting is not being tracked (a local node add).
+ if graft == nil {
+ return nil
+ }
+
+ // Get the object's graft info entry in order to update it. It happens
+ // when addNode() is called by the sync Initiator and the DAG is updated
+ // with new nodes fetched from other devices.
+ //
+ // During a sync operation, each mutated object gets new nodes added in
+ // its DAG. These new nodes are either derived from nodes that were
+ // previously known on this device (i.e. their parent nodes are pre-
+ // existing, or they have no parents (new root nodes)), or they are
+ // derived from other new DAG nodes being discovered during this sync
+ // (i.e. their parent nodes were also just added to the DAG).
+ //
+ // To detect a conflict and find the most recent common ancestor to
+ // pass to the conflict resolver, the DAG graft info keeps track of the
+ // new nodes that have old parent nodes. These old-to-new edges are
+ // points where new DAG fragments are attached (grafted) onto the
+ // existing DAG. The old nodes are the graft nodes forming the set of
+ // common ancestors to use in conflict resolution:
+ //
+ // 1- A conflict happens when the current "head node" for an object is
+ // not in the set of graft nodes. It means the object mutations
+ // were not derived from what the device knows, but are divergent
+ // changes at a prior point.
+ //
+ // 2- The most recent common ancestor to use in resolving the conflict
+ // is the object graft node with the deepest level (furthest from
+ // the root node), representing the most up-to-date common knowledge
+ // between the devices.
+ info := getObjectGraftInfo(ctx, tx, graft, oid)
+
+ for _, parent := range parents {
+ // If this parent is an old node, it's a graft point.
+ if !info.newNodes[parent] {
+ info.graftNodes[parent] = parentLevels[parent]
+ }
+
+ // A parent cannot be a candidate for a new head.
+ delete(info.newHeads, parent)
+ }
+
+ // This new node is a candidate for new head version.
+ info.newNodes[version] = true
+ info.newHeads[version] = true
+ return nil
+}
+
+// addParent adds to the DAG node (oid, version) linkage to this parent node.
+//
+// Note: as with the addNode() call, an in-memory grafting structure is passed
+// to track DAG attachements during a sync operation. It is not needed if the
+// parent linkage is due to a local change (from conflict resolution selecting
+// an existing version).
+func (s *syncService) addParent(ctx *context.T, tx store.Transaction, oid, version, parent string, graft graftMap) error {
+ if version == parent {
+ return verror.New(verror.ErrInternal, ctx, "object", oid, version, "cannot be its own parent")
+ }
+
+ node, err := getNode(ctx, tx, oid, version)
+ if err != nil {
+ return err
+ }
+ pnode, err := getNode(ctx, tx, oid, parent)
+ if err != nil {
+ return err
+ }
+
+ // Check if the parent is already linked to this node.
+ found := false
+ for _, p := range node.Parents {
+ if p == parent {
+ found = true
+ break
+ }
+ }
+
+ // Add the parent if it is not yet linked.
+ if !found {
+ // Make sure that adding the link does not create a DAG cycle.
+ // Verify that the node is not an ancestor of the parent that
+ // it is being linked to.
+ err = forEachAncestor(ctx, tx, oid, pnode.Parents, func(v string, nd *dagNode) error {
+ if v == version {
+ return verror.New(verror.ErrInternal, ctx, "cycle on object",
+ oid, ": node", version, "is ancestor of parent", parent)
+ }
+ return nil
+ })
+ if err != nil {
+ return err
+ }
+ node.Parents = append(node.Parents, parent)
+ if err = setNode(ctx, tx, oid, version, node); err != nil {
+ return err
+ }
+ }
+
+ // If no grafting structure is given (i.e. local changes), we are done.
+ if graft == nil {
+ return nil
+ }
+
+ // Update graft: if the node and its parent are new/old or old/new then
+ // add the parent as a graft point (a potential common ancestor).
+ info := getObjectGraftInfo(ctx, tx, graft, oid)
+
+ _, nodeNew := info.newNodes[version]
+ _, parentNew := info.newNodes[parent]
+ if (nodeNew && !parentNew) || (!nodeNew && parentNew) {
+ info.graftNodes[parent] = pnode.Level
+ }
+
+ // The parent node can no longer be a candidate for a new head version.
+ delete(info.newHeads, parent)
+ return nil
+}
+
+// moveHead moves the object head node in the DAG.
+func moveHead(ctx *context.T, tx store.Transaction, oid, head string) error {
+ // Verify that the node exists.
+ if ok, err := hasNode(ctx, tx, oid, head); err != nil {
+ return err
+ } else if !ok {
+ return verror.New(verror.ErrInternal, ctx, "node", oid, head, "does not exist")
+ }
+
+ return setHead(ctx, tx, oid, head)
+}
+
+// hasConflict determines if an object has a conflict between its new and old
+// head nodes.
+// - Yes: return (true, newHead, oldHead, ancestor) -- from a common past
+// - Yes: return (true, newHead, oldHead, NoVersion) -- from disjoint pasts
+// - No: return (false, newHead, oldHead, NoVersion) -- no conflict
+//
+// A conflict exists when there are two new-head nodes in the graft structure.
+// It means the newly added object versions are not derived in part from this
+// device's current knowledge. A conflict also exists if the snapshotted local
+// head is different from the current local head. If there is a single new-head
+// and the snapshot head is the same as the current local head, the object
+// changes were applied without triggering a conflict.
+func hasConflict(ctx *context.T, st store.StoreReader, oid string, graft graftMap) (isConflict bool, newHead, oldHead, ancestor string, err error) {
+ isConflict = false
+ oldHead = NoVersion
+ newHead = NoVersion
+ ancestor = NoVersion
+ err = nil
+
+ if graft == nil {
+ err = verror.New(verror.ErrInternal, ctx, "no DAG graft map given")
+ return
+ }
+
+ info := graft[oid]
+ if info == nil {
+ err = verror.New(verror.ErrInternal, ctx, "node", oid, "has no DAG graft info")
+ return
+ }
+
+ numHeads := len(info.newHeads)
+ if numHeads < 1 || numHeads > 2 {
+ err = verror.New(verror.ErrInternal, ctx, "node", oid, "invalid count of new heads", numHeads)
+ return
+ }
+
+ // Fetch the current head for this object if it exists. The error from
+ // getHead() is ignored because a newly received object is not yet known
+ // on this device and will not trigger a conflict.
+ oldHead, _ = getHead(ctx, st, oid)
+
+ // If there is only one new head node and the snapshotted old head is
+ // still unchanged, there is no conflict. The new head is that single
+ // one, even if it might also be the same old node.
+ if numHeads == 1 {
+ for head := range info.newHeads {
+ newHead = head
+ }
+ if newHead == info.oldHeadSnap {
+ // Only link log records could've been received.
+ newHead = oldHead
+ return
+ } else if oldHead == info.oldHeadSnap {
+ return
+ }
+ }
+
+ // The new head is the non-old one.
+ for head := range info.newHeads {
+ if head != info.oldHeadSnap {
+ newHead = head
+ break
+ }
+ }
+
+ // There wasn't a conflict at the old snapshot, but now there is. The
+ // snapshotted head is the common ancestor.
+ isConflict = true
+ if numHeads == 1 {
+ vlog.VI(4).Infof("sync: hasConflict: old graft snapshot %v, head %s", graft, oldHead)
+ ancestor = info.oldHeadSnap
+ return
+ }
+
+ // There is a conflict: the best choice ancestor is the graft node with
+ // the largest level (farthest from the root). It is possible in some
+ // corner cases to have multiple graft nodes at the same level. This
+ // would still be a single conflict, but the multiple same-level graft
+ // nodes representing equivalent conflict resolutions on different
+ // devices that are now merging their resolutions. In such a case it
+ // does not matter which node is chosen as the ancestor because the
+ // conflict resolver function is assumed to be convergent. However it
+ // is nicer to make that selection deterministic so all devices see the
+ // same choice: the version number is used as a tie-breaker.
+ // Note: for the case of a conflict from disjoint pasts, there are no
+ // graft nodes (empty set) and thus no common ancestor because the two
+ // DAG fragments were created from distinct root nodes. The "NoVersion"
+ // value is returned as the ancestor.
+ var maxLevel uint64
+ for node, level := range info.graftNodes {
+ if maxLevel < level || (maxLevel == level && ancestor < node) {
+ maxLevel = level
+ ancestor = node
+ }
+ }
+ return
+}
+
+// newGraft allocates a graftMap to track DAG node grafting during sync.
+func newGraft() graftMap {
+ return make(graftMap)
+}
+
+// getObjectGraft returns the graftInfo for an object ID. If the graftMap is
+// nil, a nil graftInfo is returned because grafting is not being tracked.
+func getObjectGraftInfo(ctx *context.T, sntx store.SnapshotOrTransaction, graft graftMap, oid string) *graftInfo {
+ if graft == nil {
+ return nil
+ }
+ if info := graft[oid]; info != nil {
+ return info
+ }
+
+ info := &graftInfo{
+ newNodes: make(map[string]bool),
+ newHeads: make(map[string]bool),
+ graftNodes: make(map[string]uint64),
+ }
+
+ // If the object has a head node, include it in the set of new heads.
+ if head, err := getHead(ctx, sntx, oid); err == nil {
+ info.newHeads[head] = true
+ info.oldHeadSnap = head
+ }
+
+ graft[oid] = info
+ return info
+}
+
+// forEachAncestor loops over the DAG ancestor nodes of an object in a breadth-
+// first traversal starting from given version nodes. It calls the given
+// callback function once for each ancestor node.
+func forEachAncestor(ctx *context.T, st store.StoreReader, oid string, startVersions []string, callback func(version string, node *dagNode) error) error {
+ visited := make(map[string]bool)
+ queue := list.New()
+ for _, version := range startVersions {
+ queue.PushBack(version)
+ visited[version] = true
+ }
+
+ for queue.Len() > 0 {
+ version := queue.Remove(queue.Front()).(string)
+ node, err := getNode(ctx, st, oid, version)
+ if err != nil {
+ // Ignore it, the parent was previously pruned.
+ continue
+ }
+ for _, parent := range node.Parents {
+ if !visited[parent] {
+ queue.PushBack(parent)
+ visited[parent] = true
+ }
+ }
+ if err = callback(version, node); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// newBatchPruning allocates an in-memory structure to track batches affected
+// by a DAG pruning operation across objects.
+func newBatchPruning() batchSet {
+ return make(batchSet)
+}
+
+// prune trims the DAG of an object at a given version (node) by deleting all
+// its ancestor nodes, making it the new root node. For each deleted node it
+// calls the given callback function to delete its log record.
+//
+// Note: this function should only be used when sync determines that all devices
+// that know about this object have gotten past this version.
+//
+// The batch set passed is used to track batches affected by the deletion of DAG
+// objects across multiple calls to prune(). It is later given to pruneDone()
+// to do GC on these batches.
+func prune(ctx *context.T, tx store.Transaction, oid, version string, batches batchSet, delLogRec func(ctx *context.T, tx store.Transaction, logrec string) error) error {
+ if batches == nil {
+ return verror.New(verror.ErrInternal, ctx, "missing batch set")
+ }
+
+ // Get the node at the pruning point and set its parents to nil.
+ // It will become the oldest DAG node (root) for the object.
+ node, err := getNode(ctx, tx, oid, version)
+ if err != nil {
+ return err
+ }
+ if node.Parents == nil {
+ // Nothing to do, this node is already the root.
+ return nil
+ }
+
+ parents := node.Parents
+ node.Parents = nil
+ if err = setNode(ctx, tx, oid, version, node); err != nil {
+ return err
+ }
+
+ // Delete all ancestor nodes and their log records. Delete as many as
+ // possible and track the error counts. Update the batch set to track
+ // their pruning.
+ nodeErrs, logErrs := 0, 0
+ forEachAncestor(ctx, tx, oid, parents, func(v string, nd *dagNode) error {
+ if btid := nd.BatchId; btid != NoBatchId {
+ if batches[btid] == nil {
+ batches[btid] = newBatchInfo()
+ }
+ batches[btid].Objects[oid] = v
+ }
+
+ if err := delLogRec(ctx, tx, nd.Logrec); err != nil {
+ logErrs++
+ }
+ if err := delNode(ctx, tx, oid, v); err != nil {
+ nodeErrs++
+ }
+ return nil
+ })
+ if nodeErrs != 0 || logErrs != 0 {
+ return verror.New(verror.ErrInternal, ctx,
+ "prune failed to delete nodes and logs:", nodeErrs, logErrs)
+ }
+ return nil
+}
+
+// pruneDone is called when object pruning is finished within a single pass of
+// the sync garbage collector. It updates the batch sets affected by objects
+// deleted by prune().
+func pruneDone(ctx *context.T, tx store.Transaction, batches batchSet) error {
+ // Update batch sets by removing the pruned objects from them.
+ for btid, pruneInfo := range batches {
+ info, err := getBatch(ctx, tx, btid)
+ if err != nil {
+ return err
+ }
+
+ for oid := range pruneInfo.Objects {
+ delete(info.Objects, oid)
+ }
+
+ if len(info.Objects) > 0 {
+ err = setBatch(ctx, tx, btid, info)
+ } else {
+ err = delBatch(ctx, tx, btid)
+ }
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// getLogRecKey returns the key of the log record for a given object version.
+func getLogRecKey(ctx *context.T, st store.StoreReader, oid, version string) (string, error) {
+ node, err := getNode(ctx, st, oid, version)
+ if err != nil {
+ return "", err
+ }
+ return node.Logrec, nil
+}
+
+// Low-level utility functions to access DB entries without tracking their
+// relationships. Use the functions above to manipulate the DAG.
+
+// nodeKey returns the key used to access a DAG node (oid, version).
+func nodeKey(oid, version string) string {
+ return util.JoinKeyParts(util.SyncPrefix, dagPrefix, "n", oid, version)
+}
+
+// setNode stores the DAG node entry.
+func setNode(ctx *context.T, tx store.Transaction, oid, version string, node *dagNode) error {
+ if version == NoVersion {
+ return verror.New(verror.ErrInternal, ctx, "invalid version", version)
+ }
+
+ return util.Put(ctx, tx, nodeKey(oid, version), node)
+}
+
+// getNode retrieves the DAG node entry for the given (oid, version).
+func getNode(ctx *context.T, st store.StoreReader, oid, version string) (*dagNode, error) {
+ if version == NoVersion {
+ return nil, verror.New(verror.ErrInternal, ctx, "invalid version", version)
+ }
+
+ var node dagNode
+ key := nodeKey(oid, version)
+ if err := util.Get(ctx, st, key, &node); err != nil {
+ return nil, err
+ }
+ return &node, nil
+}
+
+// delNode deletes the DAG node entry.
+func delNode(ctx *context.T, tx store.Transaction, oid, version string) error {
+ if version == NoVersion {
+ return verror.New(verror.ErrInternal, ctx, "invalid version", version)
+ }
+
+ return util.Delete(ctx, tx, nodeKey(oid, version))
+}
+
+// hasNode returns true if the node (oid, version) exists in the DAG.
+func hasNode(ctx *context.T, st store.StoreReader, oid, version string) (bool, error) {
+ // TODO(rdaoud): optimize to avoid the unneeded fetch/decode of the data.
+ if _, err := getNode(ctx, st, oid, version); err != nil {
+ if verror.ErrorID(err) == verror.ErrNoExist.ID {
+ err = nil
+ }
+ return false, err
+ }
+ return true, nil
+}
+
+// headKey returns the key used to access the DAG object head.
+func headKey(oid string) string {
+ return util.JoinKeyParts(util.SyncPrefix, dagPrefix, "h", oid)
+}
+
+// setHead stores version as the DAG object head.
+func setHead(ctx *context.T, tx store.Transaction, oid, version string) error {
+ if version == NoVersion {
+ return verror.New(verror.ErrInternal, ctx, fmt.Errorf("invalid version: %s", version))
+ }
+
+ return util.Put(ctx, tx, headKey(oid), version)
+}
+
+// getHead retrieves the DAG object head.
+func getHead(ctx *context.T, st store.StoreReader, oid string) (string, error) {
+ var version string
+ key := headKey(oid)
+ if err := util.Get(ctx, st, key, &version); err != nil {
+ return NoVersion, err
+ }
+ return version, nil
+}
+
+// delHead deletes the DAG object head.
+func delHead(ctx *context.T, tx store.Transaction, oid string) error {
+ return util.Delete(ctx, tx, headKey(oid))
+}
+
+// batchKey returns the key used to access the DAG batch info.
+func batchKey(btid uint64) string {
+ return util.JoinKeyParts(util.SyncPrefix, dagPrefix, "b", fmt.Sprintf("%d", btid))
+}
+
+// setBatch stores the DAG batch entry.
+func setBatch(ctx *context.T, tx store.Transaction, btid uint64, info *batchInfo) error {
+ if btid == NoBatchId {
+ return verror.New(verror.ErrInternal, ctx, "invalid batch id", btid)
+ }
+
+ return util.Put(ctx, tx, batchKey(btid), info)
+}
+
+// getBatch retrieves the DAG batch entry.
+func getBatch(ctx *context.T, st store.StoreReader, btid uint64) (*batchInfo, error) {
+ if btid == NoBatchId {
+ return nil, verror.New(verror.ErrInternal, ctx, "invalid batch id", btid)
+ }
+
+ var info batchInfo
+ key := batchKey(btid)
+ if err := util.Get(ctx, st, key, &info); err != nil {
+ return nil, err
+ }
+ return &info, nil
+}
+
+// delBatch deletes the DAG batch entry.
+func delBatch(ctx *context.T, tx store.Transaction, btid uint64) error {
+ if btid == NoBatchId {
+ return verror.New(verror.ErrInternal, ctx, "invalid batch id", btid)
+ }
+
+ return util.Delete(ctx, tx, batchKey(btid))
+}
+
+// getParentMap is a testing and debug helper function that returns for an
+// object a map of its DAG (node-to-parents relations). If a graft structure
+// is given, include its fragments in the map.
+func getParentMap(ctx *context.T, st store.StoreReader, oid string, graft graftMap) map[string][]string {
+ parentMap := make(map[string][]string)
+ var start []string
+
+ if head, err := getHead(ctx, st, oid); err == nil {
+ start = append(start, head)
+ }
+ if graft != nil && graft[oid] != nil {
+ for v := range graft[oid].newHeads {
+ start = append(start, v)
+ }
+ }
+
+ forEachAncestor(ctx, st, oid, start, func(v string, nd *dagNode) error {
+ parentMap[v] = nd.Parents
+ return nil
+ })
+ return parentMap
+}
diff --git a/services/syncbase/vsync/dag_test.go b/services/syncbase/vsync/dag_test.go
new file mode 100644
index 0000000..9ef43f7
--- /dev/null
+++ b/services/syncbase/vsync/dag_test.go
@@ -0,0 +1,1632 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Tests for the Syncbase DAG.
+
+import (
+ "errors"
+ "fmt"
+ "reflect"
+ "strconv"
+ "testing"
+
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ _ "v.io/x/ref/runtime/factories/generic"
+)
+
+// TestSetNode tests setting and getting a DAG node.
+func TestSetNode(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+
+ oid, version := "1111", "1"
+
+ node, err := getNode(nil, st, oid, version)
+ if err == nil || node != nil {
+ t.Errorf("found non-existent object %s:%s: %v", oid, version, node)
+ }
+
+ if ok, err := hasNode(nil, st, oid, version); err != nil || ok {
+ t.Errorf("hasNode() found non-existent object %s:%s", oid, version)
+ }
+
+ if logrec, err := getLogRecKey(nil, st, oid, version); err == nil || logrec != "" {
+ t.Errorf("non-existent object %s:%s has a logrec: %v", oid, version, logrec)
+ }
+
+ node = &dagNode{Level: 15, Parents: []string{"444", "555"}, Logrec: "logrec-23"}
+
+ tx := st.NewTransaction()
+ if err = setNode(nil, tx, oid, version, node); err != nil {
+ t.Fatalf("cannot set object %s:%s (%v): %v", oid, version, node, err)
+ }
+ tx.Commit()
+
+ node2, err := getNode(nil, st, oid, version)
+ if err != nil || node2 == nil {
+ t.Errorf("cannot find stored object %s:%s: %v", oid, version, err)
+ }
+
+ if ok, err := hasNode(nil, st, oid, version); err != nil || !ok {
+ t.Errorf("hasNode() did not find object %s:%s", oid, version)
+ }
+
+ if !reflect.DeepEqual(node, node2) {
+ t.Errorf("object %s:%s has wrong data: %v instead of %v", oid, version, node2, node)
+ }
+
+ if logrec, err := getLogRecKey(nil, st, oid, version); err != nil || logrec != "logrec-23" {
+ t.Errorf("object %s:%s has wrong logrec: %s", oid, version, logrec)
+ }
+}
+
+// TestDelNode tests deleting a DAG node.
+func TestDelNode(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+
+ oid, version := "2222", "2"
+
+ node := &dagNode{Level: 123, Parents: []string{"333"}, Logrec: "logrec-789"}
+
+ tx := st.NewTransaction()
+ if err := setNode(nil, tx, oid, version, node); err != nil {
+ t.Fatalf("cannot set object %s:%s (%v): %v", oid, version, node, err)
+ }
+ tx.Commit()
+
+ tx = st.NewTransaction()
+ if err := delNode(nil, tx, oid, version); err != nil {
+ t.Fatalf("cannot delete object %s:%s: %v", oid, version, err)
+ }
+ tx.Commit()
+
+ node2, err := getNode(nil, st, oid, version)
+ if err == nil || node2 != nil {
+ t.Errorf("found deleted object %s:%s (%v)", oid, version, node2)
+ }
+
+ if ok, err := hasNode(nil, st, oid, version); err != nil || ok {
+ t.Errorf("hasNode() found deleted object %s:%s", oid, version)
+ }
+
+ if logrec, err := getLogRecKey(nil, st, oid, version); err == nil || logrec != "" {
+ t.Errorf("deleted object %s:%s has logrec: %s", oid, version, logrec)
+ }
+}
+
+// TestAddParent tests adding parents to a DAG node.
+func TestAddParent(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+ s := svc.sync
+
+ oid, version := "foo1", "7"
+
+ tx := st.NewTransaction()
+ if err := s.addParent(nil, tx, oid, version, "haha", nil); err == nil {
+ t.Errorf("addParent() did not fail for an unknown object %s:%s", oid, version)
+ }
+ tx.Abort()
+
+ if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+ t.Fatal(err)
+ }
+
+ node := &dagNode{Level: 15, Logrec: "logrec-22"}
+
+ tx = st.NewTransaction()
+ if err := setNode(nil, tx, oid, version, node); err != nil {
+ t.Fatalf("cannot set object %s:%s (%v): %v", oid, version, node, err)
+ }
+ tx.Commit()
+
+ graft := newGraft()
+ tx = st.NewTransaction()
+ if err := s.addParent(nil, tx, oid, version, version, graft); err == nil {
+ t.Errorf("addParent() did not fail on a self-parent for object %s:%s", oid, version)
+ }
+ tx.Abort()
+
+ remote := true
+ expParents := []string{"4", "5", "6"}
+
+ for _, parent := range expParents {
+ tx = st.NewTransaction()
+ if err := s.addParent(nil, tx, oid, version, parent, graft); err == nil {
+ t.Errorf("addParent() did not reject invalid parent %s for object %s:%s",
+ parent, oid, version)
+ }
+ tx.Abort()
+
+ pnode := &dagNode{Level: 11, Logrec: fmt.Sprintf("logrec-%s", parent), Parents: []string{"3"}}
+
+ tx = st.NewTransaction()
+ if err := setNode(nil, tx, oid, parent, pnode); err != nil {
+ t.Fatalf("cannot set parent object %s:%s (%v): %v", oid, parent, pnode, err)
+ }
+ tx.Commit()
+
+ var g graftMap
+ if remote {
+ g = graft
+ }
+
+ // addParent() twice to verify it is idempotent.
+ for i := 0; i < 2; i++ {
+ tx = st.NewTransaction()
+ if err := s.addParent(nil, tx, oid, version, parent, g); err != nil {
+ t.Errorf("addParent() failed on parent %s, remote %t (i=%d) for %s:%s: %v",
+ parent, remote, i, oid, version, err)
+ }
+ tx.Commit()
+ }
+
+ remote = !remote
+ }
+
+ node2, err := getNode(nil, st, oid, version)
+ if err != nil || node2 == nil {
+ t.Errorf("cannot find object %s:%s: %v", oid, version, err)
+ }
+
+ if !reflect.DeepEqual(node2.Parents, expParents) {
+ t.Errorf("invalid parents for object %s:%s: %v instead of %v",
+ oid, version, node2.Parents, expParents)
+ }
+
+ // Creating cycles should fail.
+ for v := 1; v < 7; v++ {
+ ver := fmt.Sprintf("%d", v)
+ tx = st.NewTransaction()
+ if err = s.addParent(nil, tx, oid, ver, version, nil); err == nil {
+ t.Errorf("addParent() failed to reject a cycle for %s: from ancestor %s to node %s",
+ oid, ver, version)
+ }
+ tx.Abort()
+ }
+}
+
+// TestSetHead tests setting and getting a DAG head node.
+func TestSetHead(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+
+ oid := "3333"
+
+ version, err := getHead(nil, st, oid)
+ if err == nil {
+ t.Errorf("found non-existent object head %s:%s", oid, version)
+ }
+
+ for i := 0; i < 2; i++ {
+ version = fmt.Sprintf("v%d", 555+i)
+
+ tx := st.NewTransaction()
+ if err = setHead(nil, tx, oid, version); err != nil {
+ t.Fatalf("cannot set object head %s:%s (i=%d)", oid, version, i)
+ }
+ tx.Commit()
+
+ version2, err := getHead(nil, st, oid)
+ if err != nil {
+ t.Errorf("cannot find stored object head %s (i=%d)", oid, i)
+ }
+ if version != version2 {
+ t.Errorf("object %s has wrong head data (i=%d): %s instead of %s",
+ oid, i, version2, version)
+ }
+ }
+}
+
+// TestLocalUpdates tests the sync handling of initial local updates: an object
+// is created (v1) and updated twice (v2, v3) on this device. The DAG should
+// show: v1 -> v2 -> v3 and the head should point to v3.
+func TestLocalUpdates(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+ s := svc.sync
+
+ oid := "foo1"
+
+ if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+ t.Fatal(err)
+ }
+
+ // The head must have moved to v3 and the parent map shows the updated DAG.
+ if head, err := getHead(nil, st, oid); err != nil || head != "3" {
+ t.Errorf("invalid object %s head: %s", oid, head)
+ }
+
+ pmap := getParentMap(nil, st, oid, nil)
+
+ exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2"}}
+
+ if !reflect.DeepEqual(pmap, exp) {
+ t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+ }
+
+ // Make sure an existing node cannot be added again.
+ tx := st.NewTransaction()
+ if err := s.addNode(nil, tx, oid, "2", "foo", false, []string{"1", "3"}, NoBatchId, nil); err == nil {
+ t.Errorf("addNode() did not fail when given an existing node")
+ }
+
+ // Make sure a new node cannot have more than 2 parents.
+ if err := s.addNode(nil, tx, oid, "4", "foo", false, []string{"1", "2", "3"}, NoBatchId, nil); err == nil {
+ t.Errorf("addNode() did not fail when given 3 parents")
+ }
+
+ // Make sure a new node cannot have an invalid parent.
+ if err := s.addNode(nil, tx, oid, "4", "foo", false, []string{"1", "555"}, NoBatchId, nil); err == nil {
+ t.Errorf("addNode() did not fail when using an invalid parent")
+ }
+
+ // Make sure a new root node (no parents) can be added once a root exists.
+ // For the parents array, check both the "nil" and the empty array as input.
+ if err := s.addNode(nil, tx, oid, "6789", "foo", false, nil, NoBatchId, nil); err != nil {
+ t.Errorf("cannot add another root node (nil parents) for object %s: %v", oid, err)
+ }
+ if err := s.addNode(nil, tx, oid, "9999", "foo", false, []string{}, NoBatchId, nil); err != nil {
+ t.Errorf("cannot add another root node (empty parents) for object %s: %v", oid, err)
+ }
+
+ tx.Abort()
+}
+
+// TestRemoteUpdates tests the sync handling of initial remote updates:
+// an object is created (v1) and updated twice (v2, v3) on another device and
+// we learn about it during sync. The updated DAG should show: v1 -> v2 -> v3
+// and report no conflicts with the new head pointing at v3.
+func TestRemoteUpdates(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+ s := svc.sync
+
+ oid := "foo1"
+
+ graft, err := s.dagReplayCommands(nil, "remote-init-00.log.sync")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // The head must not have moved (i.e. still undefined) and the parent
+ // map shows the newly grafted DAG fragment.
+ if head, err := getHead(nil, st, oid); err == nil {
+ t.Errorf("object %s head found: %s", oid, head)
+ }
+
+ pmap := getParentMap(nil, st, oid, graft)
+
+ exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2"}}
+
+ if !reflect.DeepEqual(pmap, exp) {
+ t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+ }
+
+ // Verify the grafting of remote nodes.
+ g := graft[oid]
+
+ expNewHeads := map[string]bool{"3": true}
+
+ if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+ t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+ }
+
+ expGrafts := map[string]uint64{}
+ if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+ t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+ }
+
+ // There should be no conflict.
+ isConflict, newHead, oldHead, ancestor, errConflict := hasConflict(nil, st, oid, graft)
+ if !(!isConflict && newHead == "3" && oldHead == NoVersion && ancestor == NoVersion && errConflict == nil) {
+ t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+ oid, isConflict, newHead, oldHead, ancestor, errConflict)
+ }
+
+ if logrec, err := getLogRecKey(nil, st, oid, newHead); err != nil || logrec != "$sync:log:11:3" {
+ t.Errorf("invalid logrec for newhead object %s:%s: %v", oid, newHead, logrec)
+ }
+
+ // Make sure an unknown node cannot become the new head.
+ tx := st.NewTransaction()
+ if err := moveHead(nil, tx, oid, "55"); err == nil {
+ t.Errorf("moveHead() did not fail on an invalid node")
+ }
+ tx.Abort()
+
+ // Then move the head.
+ tx = st.NewTransaction()
+ if err := moveHead(nil, tx, oid, newHead); err != nil {
+ t.Errorf("object %s cannot move head to %s: %v", oid, newHead, err)
+ }
+ tx.Commit()
+}
+
+// TestRemoteNoConflict tests sync of remote updates on top of a local initial
+// state without conflict. An object is created locally and updated twice
+// (v1 -> v2 -> v3). Another device, having gotten this info, makes 3 updates
+// on top of that (v3 -> v4 -> v5 -> v6) and sends this info in a later sync.
+// The updated DAG should show (v1 -> v2 -> v3 -> v4 -> v5 -> v6) and report
+// no conflicts with the new head pointing at v6. It should also report v3 as
+// the graft point on which the new fragment (v4 -> v5 -> v6) gets attached.
+func TestRemoteNoConflict(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+ s := svc.sync
+
+ oid := "foo1"
+
+ if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+ t.Fatal(err)
+ }
+ graft, err := s.dagReplayCommands(nil, "remote-noconf-00.log.sync")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // The head must not have moved (i.e. still at v3) and the parent map
+ // shows the newly grafted DAG fragment on top of the prior DAG.
+ if head, err := getHead(nil, st, oid); err != nil || head != "3" {
+ t.Errorf("object %s has wrong head: %s", oid, head)
+ }
+
+ pmap := getParentMap(nil, st, oid, graft)
+
+ exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2"}, "4": {"3"}, "5": {"4"}, "6": {"5"}}
+
+ if !reflect.DeepEqual(pmap, exp) {
+ t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+ }
+
+ // Verify the grafting of remote nodes.
+ g := graft[oid]
+
+ expNewHeads := map[string]bool{"6": true}
+ if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+ t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+ }
+
+ expGrafts := map[string]uint64{"3": 2}
+ if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+ t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+ }
+
+ // There should be no conflict.
+ isConflict, newHead, oldHead, ancestor, errConflict := hasConflict(nil, st, oid, graft)
+ if !(!isConflict && newHead == "6" && oldHead == "3" && ancestor == NoVersion && errConflict == nil) {
+ t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+ oid, isConflict, newHead, oldHead, ancestor, errConflict)
+ }
+
+ if logrec, err := getLogRecKey(nil, st, oid, oldHead); err != nil || logrec != "$sync:log:10:3" {
+ t.Errorf("invalid logrec for oldhead object %s:%s: %v", oid, oldHead, logrec)
+ }
+ if logrec, err := getLogRecKey(nil, st, oid, newHead); err != nil || logrec != "$sync:log:11:3" {
+ t.Errorf("invalid logrec for newhead object %s:%s: %v", oid, newHead, logrec)
+ }
+
+ // Then move the head.
+ tx := st.NewTransaction()
+ if err := moveHead(nil, tx, oid, newHead); err != nil {
+ t.Errorf("object %s cannot move head to %s: %v", oid, newHead, err)
+ }
+ tx.Commit()
+
+ // Verify that hasConflict() fails without graft data.
+ isConflict, newHead, oldHead, ancestor, errConflict = hasConflict(nil, st, oid, nil)
+ if errConflict == nil {
+ t.Errorf("hasConflict() on %s did not fail w/o graft data: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+ oid, isConflict, newHead, oldHead, ancestor, errConflict)
+ }
+}
+
+// TestRemoteConflict tests sync handling of remote updates that build on the
+// local initial state and trigger a conflict. An object is created locally
+// and updated twice (v1 -> v2 -> v3). Another device, having only gotten
+// the v1 -> v2 history, makes 3 updates on top of v2 (v2 -> v4 -> v5 -> v6)
+// and sends this info during a later sync. Separately, the local device
+// makes a conflicting (concurrent) update v2 -> v3. The updated DAG should
+// show the branches: (v1 -> v2 -> v3) and (v1 -> v2 -> v4 -> v5 -> v6) and
+// report the conflict between v3 and v6 (current and new heads). It should
+// also report v2 as the graft point and the common ancestor in the conflict.
+// The conflict is resolved locally by creating v7 that is derived from both
+// v3 and v6 and it becomes the new head.
+func TestRemoteConflict(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+ s := svc.sync
+
+ oid := "foo1"
+
+ if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+ t.Fatal(err)
+ }
+ graft, err := s.dagReplayCommands(nil, "remote-conf-00.log.sync")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // The head must not have moved (i.e. still at v3) and the parent map
+ // shows the newly grafted DAG fragment on top of the prior DAG.
+ if head, err := getHead(nil, st, oid); err != nil || head != "3" {
+ t.Errorf("object %s has wrong head: %s", oid, head)
+ }
+
+ pmap := getParentMap(nil, st, oid, graft)
+
+ exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2"}, "4": {"2"}, "5": {"4"}, "6": {"5"}}
+
+ if !reflect.DeepEqual(pmap, exp) {
+ t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+ }
+
+ // Verify the grafting of remote nodes.
+ g := graft[oid]
+
+ expNewHeads := map[string]bool{"3": true, "6": true}
+ if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+ t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+ }
+
+ expGrafts := map[string]uint64{"2": 1}
+ if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+ t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+ }
+
+ // There should be a conflict between v3 and v6 with v2 as ancestor.
+ isConflict, newHead, oldHead, ancestor, errConflict := hasConflict(nil, st, oid, graft)
+ if !(isConflict && newHead == "6" && oldHead == "3" && ancestor == "2" && errConflict == nil) {
+ t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+ oid, isConflict, newHead, oldHead, ancestor, errConflict)
+ }
+
+ if logrec, err := getLogRecKey(nil, st, oid, oldHead); err != nil || logrec != "$sync:log:10:3" {
+ t.Errorf("invalid logrec for oldhead object %s:%s: %s", oid, oldHead, logrec)
+ }
+ if logrec, err := getLogRecKey(nil, st, oid, newHead); err != nil || logrec != "$sync:log:11:3" {
+ t.Errorf("invalid logrec for newhead object %s:%s: %s", oid, newHead, logrec)
+ }
+ if logrec, err := getLogRecKey(nil, st, oid, ancestor); err != nil || logrec != "$sync:log:10:2" {
+ t.Errorf("invalid logrec for ancestor object %s:%s: %s", oid, ancestor, logrec)
+ }
+
+ // Resolve the conflict by adding a new local v7 derived from v3 and v6 (this replay moves the head).
+ if _, err := s.dagReplayCommands(nil, "local-resolve-00.sync"); err != nil {
+ t.Fatal(err)
+ }
+
+ // Verify that the head moved to v7 and the parent map shows the resolution.
+ if head, err := getHead(nil, st, oid); err != nil || head != "7" {
+ t.Errorf("object %s has wrong head after conflict resolution: %s", oid, head)
+ }
+
+ exp["7"] = []string{"3", "6"}
+ pmap = getParentMap(nil, st, oid, nil)
+ if !reflect.DeepEqual(pmap, exp) {
+ t.Errorf("invalid object %s parent map after conflict resolution: (%v) instead of (%v)",
+ oid, pmap, exp)
+ }
+}
+
+// TestRemoteConflictTwoGrafts tests sync handling of remote updates that build
+// on the local initial state and trigger a conflict with 2 graft points.
+// An object is created locally and updated twice (v1 -> v2 -> v3). Another
+// device, first learns about v1 and makes it own conflicting update v1 -> v4.
+// That remote device later learns about v2 and resolves the v2/v4 confict by
+// creating v5. Then it makes a last v5 -> v6 update -- which will conflict
+// with v3 but it doesn't know that.
+// Now the sync order is reversed and the local device learns all of what
+// happened on the remote device. The local DAG should get be augmented by
+// a subtree with 2 graft points: v1 and v2. It receives this new branch:
+// v1 -> v4 -> v5 -> v6. Note that v5 is also derived from v2 as a remote
+// conflict resolution. This should report a conflict between v3 and v6
+// (current and new heads), with v1 and v2 as graft points, and v2 as the
+// most-recent common ancestor for that conflict. The conflict is resolved
+// locally by creating v7, derived from both v3 and v6, becoming the new head.
+func TestRemoteConflictTwoGrafts(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+ s := svc.sync
+
+ oid := "foo1"
+
+ if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+ t.Fatal(err)
+ }
+ graft, err := s.dagReplayCommands(nil, "remote-conf-01.log.sync")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // The head must not have moved (i.e. still at v3) and the parent map
+ // shows the newly grafted DAG fragment on top of the prior DAG.
+ if head, err := getHead(nil, st, oid); err != nil || head != "3" {
+ t.Errorf("object %s has wrong head: %s", oid, head)
+ }
+
+ pmap := getParentMap(nil, st, oid, graft)
+
+ exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2"}, "4": {"1"}, "5": {"2", "4"}, "6": {"5"}}
+
+ if !reflect.DeepEqual(pmap, exp) {
+ t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+ }
+
+ // Verify the grafting of remote nodes.
+ g := graft[oid]
+
+ expNewHeads := map[string]bool{"3": true, "6": true}
+ if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+ t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+ }
+
+ expGrafts := map[string]uint64{"1": 0, "2": 1}
+ if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+ t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+ }
+
+ // There should be a conflict between v3 and v6 with v2 as ancestor.
+ isConflict, newHead, oldHead, ancestor, errConflict := hasConflict(nil, st, oid, graft)
+ if !(isConflict && newHead == "6" && oldHead == "3" && ancestor == "2" && errConflict == nil) {
+ t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+ oid, isConflict, newHead, oldHead, ancestor, errConflict)
+ }
+
+ if logrec, err := getLogRecKey(nil, st, oid, oldHead); err != nil || logrec != "$sync:log:10:3" {
+ t.Errorf("invalid logrec for oldhead object %s:%s: %s", oid, oldHead, logrec)
+ }
+ if logrec, err := getLogRecKey(nil, st, oid, newHead); err != nil || logrec != "$sync:log:11:2" {
+ t.Errorf("invalid logrec for newhead object %s:%s: %s", oid, newHead, logrec)
+ }
+ if logrec, err := getLogRecKey(nil, st, oid, ancestor); err != nil || logrec != "$sync:log:10:2" {
+ t.Errorf("invalid logrec for ancestor object %s:%s: %s", oid, ancestor, logrec)
+ }
+
+ // Resolve the conflict by adding a new local v7 derived from v3 and v6 (this replay moves the head).
+ if _, err := s.dagReplayCommands(nil, "local-resolve-00.sync"); err != nil {
+ t.Fatal(err)
+ }
+
+ // Verify that the head moved to v7 and the parent map shows the resolution.
+ if head, err := getHead(nil, st, oid); err != nil || head != "7" {
+ t.Errorf("object %s has wrong head after conflict resolution: %s", oid, head)
+ }
+
+ exp["7"] = []string{"3", "6"}
+ pmap = getParentMap(nil, st, oid, nil)
+ if !reflect.DeepEqual(pmap, exp) {
+ t.Errorf("invalid object %s parent map after conflict resolution: (%v) instead of (%v)",
+ oid, pmap, exp)
+ }
+}
+
+// TestRemoteConflictNoAncestor tests sync handling of remote updates that create
+// the same object independently from local initial state (no common past) and
+// trigger a conflict with no common ancestors (no graft points). An object is
+// created locally and updated twice (v1 -> v2 -> v3). Another device creates
+// the same object from scratch and updates it twice (v4 -> v5 -> v6). When
+// the local device learns of what happened on the remote device, it should
+// detect a conflict between v3 and v6 with no common ancestor. The conflict
+// is resolved locally by creating v7, derived from both v3 and v6, becoming
+// the new head.
+func TestRemoteConflictNoAncestor(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+ s := svc.sync
+
+ oid := "foo1"
+
+ if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+ t.Fatal(err)
+ }
+ graft, err := s.dagReplayCommands(nil, "remote-conf-03.log.sync")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // The head must not have moved (i.e. still at v3) and the parent map
+ // shows the newly grafted DAG fragment on top of the prior DAG.
+ if head, err := getHead(nil, st, oid); err != nil || head != "3" {
+ t.Errorf("object %s has wrong head: %s", oid, head)
+ }
+
+ pmap := getParentMap(nil, st, oid, graft)
+
+ exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2"}, "4": nil, "5": {"4"}, "6": {"5"}}
+
+ if !reflect.DeepEqual(pmap, exp) {
+ t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+ }
+
+ // Verify the grafting of remote nodes.
+ g := graft[oid]
+
+ expNewHeads := map[string]bool{"3": true, "6": true}
+ if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+ t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+ }
+
+ expGrafts := map[string]uint64{}
+ if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+ t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+ }
+
+ // There should be a conflict between v3 and v6 with no ancestor.
+ isConflict, newHead, oldHead, ancestor, errConflict := hasConflict(nil, st, oid, graft)
+ if !(isConflict && newHead == "6" && oldHead == "3" && ancestor == NoVersion && errConflict == nil) {
+ t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+ oid, isConflict, newHead, oldHead, ancestor, errConflict)
+ }
+
+ if logrec, err := getLogRecKey(nil, st, oid, oldHead); err != nil || logrec != "$sync:log:10:3" {
+ t.Errorf("invalid logrec for oldhead object %s:%s: %s", oid, oldHead, logrec)
+ }
+ if logrec, err := getLogRecKey(nil, st, oid, newHead); err != nil || logrec != "$sync:log:11:3" {
+ t.Errorf("invalid logrec for newhead object %s:%s: %s", oid, newHead, logrec)
+ }
+
+ // Resolve the conflict by adding a new local v7 derived from v3 and v6 (this replay moves the head).
+ if _, err := s.dagReplayCommands(nil, "local-resolve-00.sync"); err != nil {
+ t.Fatal(err)
+ }
+
+ // Verify that the head moved to v7 and the parent map shows the resolution.
+ if head, err := getHead(nil, st, oid); err != nil || head != "7" {
+ t.Errorf("object %s has wrong head after conflict resolution: %s", oid, head)
+ }
+
+ exp["7"] = []string{"3", "6"}
+ pmap = getParentMap(nil, st, oid, nil)
+ if !reflect.DeepEqual(pmap, exp) {
+ t.Errorf("invalid object %s parent map after conflict resolution: (%v) instead of (%v)",
+ oid, pmap, exp)
+ }
+}
+
+// TestAncestorIterator checks that the iterator goes over the correct set
+// of ancestor nodes for an object given a starting node. It should traverse
+// reconvergent DAG branches only visiting each ancestor once:
+// v1 -> v2 -> v3 -> v5 -> v6 -> v8 -> v9
+// |--> v4 ---| |
+// +--> v7 ---------------+
+// - Starting at v1 it should only cover v1.
+// - Starting at v3 it should only cover v1-v3.
+// - Starting at v6 it should only cover v1-v6.
+// - Starting at v9 it should cover all nodes (v1-v9).
+func TestAncestorIterator(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+ s := svc.sync
+
+ oid := "1234"
+
+ if _, err := s.dagReplayCommands(nil, "local-init-01.sync"); err != nil {
+ t.Fatal(err)
+ }
+
+ // Loop checking the iteration behavior for different starting nodes.
+ for _, start := range []int{1, 3, 6, 9} {
+ visitCount := make(map[string]int)
+ vstart := fmt.Sprintf("%d", start)
+ forEachAncestor(nil, st, oid, []string{vstart}, func(v string, nd *dagNode) error {
+ visitCount[v]++
+ return nil
+ })
+
+ // Check that all prior nodes are visited only once.
+ for i := 1; i < (start + 1); i++ {
+ vv := fmt.Sprintf("%d", i)
+ if visitCount[vv] != 1 {
+ t.Errorf("wrong visit count on object %s:%s starting from %s: %d instead of 1",
+ oid, vv, vstart, visitCount[vv])
+ }
+ }
+ }
+
+ // Make sure an error in the callback is returned.
+ cbErr := errors.New("callback error")
+ err := forEachAncestor(nil, st, oid, []string{"9"}, func(v string, nd *dagNode) error {
+ if v == "1" {
+ return cbErr
+ }
+ return nil
+ })
+ if err != cbErr {
+ t.Errorf("wrong error returned from callback: %v instead of %v", err, cbErr)
+ }
+}
+
+// TestPruning tests sync pruning of the DAG for an object with 3 concurrent
+// updates (i.e. 2 conflict resolution convergent points). The pruning must
+// get rid of the DAG branches across the reconvergence points:
+// v1 -> v2 -> v3 -> v5 -> v6 -> v8 -> v9
+// |--> v4 ---| |
+// +--> v7 ---------------+
+// By pruning at v1, nothing is deleted.
+// Then by pruning at v2, only v1 is deleted.
+// Then by pruning at v6, v2-v5 are deleted leaving v6 and "v7 -> v8 -> v9".
+// Then by pruning at v8, v6-v7 are deleted leaving "v8 -> v9".
+// Then by pruning at v9, v8 is deleted leaving v9 as the head.
+// Then by pruning again at v9 nothing changes.
+func TestPruning(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+ s := svc.sync
+
+ oid := "1234"
+
+ if _, err := s.dagReplayCommands(nil, "local-init-01.sync"); err != nil {
+ t.Fatal(err)
+ }
+
+ exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2"}, "4": {"2"}, "5": {"3", "4"}, "6": {"5"}, "7": {"2"}, "8": {"6", "7"}, "9": {"8"}}
+
+ // Loop pruning at an invalid version (333) then at different valid versions.
+ testVersions := []string{"333", "1", "2", "6", "8", "9", "9"}
+ delCounts := []int{0, 0, 1, 4, 2, 1, 0}
+ which := "prune-snip-"
+ remain := 9
+
+ for i, version := range testVersions {
+ batches := newBatchPruning()
+ tx := st.NewTransaction()
+ del := 0
+ err := prune(nil, tx, oid, version, batches,
+ func(ctx *context.T, tx store.Transaction, lr string) error {
+ del++
+ return nil
+ })
+ tx.Commit()
+
+ if i == 0 && err == nil {
+ t.Errorf("pruning non-existent object %s:%s did not fail", oid, version)
+ } else if i > 0 && err != nil {
+ t.Errorf("pruning object %s:%s failed: %v", oid, version, err)
+ }
+
+ if del != delCounts[i] {
+ t.Errorf("pruning object %s:%s deleted %d log records instead of %d",
+ oid, version, del, delCounts[i])
+ }
+
+ which += "*"
+ remain -= del
+
+ if head, err := getHead(nil, st, oid); err != nil || head != "9" {
+ t.Errorf("object %s has wrong head: %s", oid, head)
+ }
+
+ tx = st.NewTransaction()
+ err = pruneDone(nil, tx, batches)
+ if err != nil {
+ t.Errorf("pruneDone() failed: %v", err)
+ }
+ tx.Commit()
+
+ // Remove pruned nodes from the expected parent map used to validate
+ // and set the parents of the pruned node to nil.
+ intVersion, err := strconv.ParseInt(version, 10, 32)
+ if err != nil {
+ t.Errorf("invalid version: %s", version)
+ }
+
+ if intVersion < 10 {
+ for j := int64(0); j < intVersion; j++ {
+ delete(exp, fmt.Sprintf("%d", j))
+ }
+ exp[version] = nil
+ }
+
+ pmap := getParentMap(nil, st, oid, nil)
+ if !reflect.DeepEqual(pmap, exp) {
+ t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+ }
+ }
+}
+
+// TestPruningCallbackError tests sync pruning of the DAG when the callback
+// function returns an error. The pruning must try to delete as many nodes
+// and log records as possible and properly adjust the parent pointers of
+// the pruning node. The object DAG is:
+// v1 -> v2 -> v3 -> v5 -> v6 -> v8 -> v9
+// |--> v4 ---| |
+// +--> v7 ---------------+
+// By pruning at v9 and having the callback function fail for v4, all other
+// nodes must be deleted and only v9 remains as the head.
+func TestPruningCallbackError(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+ s := svc.sync
+
+ oid := "1234"
+
+ if _, err := s.dagReplayCommands(nil, "local-init-01.sync"); err != nil {
+ t.Fatal(err)
+ }
+
+ exp := map[string][]string{"9": nil}
+
+ // Prune at v9 with a callback function that fails for v4.
+ del, expDel := 0, 8
+ version := "9"
+
+ batches := newBatchPruning()
+ tx := st.NewTransaction()
+ err := prune(nil, tx, oid, version, batches,
+ func(ctx *context.T, tx store.Transaction, lr string) error {
+ del++
+ if lr == "logrec-03" {
+ return fmt.Errorf("refuse to delete %s", lr)
+ }
+ return nil
+ })
+ tx.Commit()
+
+ if err == nil {
+ t.Errorf("pruning object %s:%s did not fail", oid, version)
+ }
+ if del != expDel {
+ t.Errorf("pruning object %s:%s deleted %d log records instead of %d", oid, version, del, expDel)
+ }
+
+ tx = st.NewTransaction()
+ err = pruneDone(nil, tx, batches)
+ if err != nil {
+ t.Errorf("pruneDone() failed: %v", err)
+ }
+ tx.Commit()
+
+ if head, err := getHead(nil, st, oid); err != nil || head != version {
+ t.Errorf("object %s has wrong head: %s", oid, head)
+ }
+
+ pmap := getParentMap(nil, st, oid, nil)
+ if !reflect.DeepEqual(pmap, exp) {
+ t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+ }
+}
+
+// TestRemoteLinkedNoConflictSameHead tests sync of remote updates that contain
+// linked nodes (conflict resolution by selecting an existing version) on top of
+// a local initial state without conflict. An object is created locally and
+// updated twice (v1 -> v2 -> v3). Another device learns about v1, then creates
+// (v1 -> v4), then learns about (v1 -> v2) and resolves the (v2/v4) conflict by
+// selecting v2 over v4. It sends that new info (v4 and the v2/v4 link) back to
+// the original (local) device. Instead of a v3/v4 conflict, the device sees
+// that v2 was chosen over v4 and resolves it as a no-conflict case.
+func TestRemoteLinkedNoConflictSameHead(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+ s := svc.sync
+
+ oid := "foo1"
+
+ if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+ t.Fatal(err)
+ }
+ graft, err := s.dagReplayCommands(nil, "remote-noconf-link-00.log.sync")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // The head must not have moved (i.e. still at v3) and the parent map
+ // shows the newly grafted DAG fragment on top of the prior DAG.
+ if head, err := getHead(nil, st, oid); err != nil || head != "3" {
+ t.Errorf("object %s has wrong head: %s", oid, head)
+ }
+
+ pmap := getParentMap(nil, st, oid, graft)
+
+ exp := map[string][]string{"1": nil, "2": {"1", "4"}, "3": {"2"}, "4": {"1"}}
+
+ if !reflect.DeepEqual(pmap, exp) {
+ t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+ }
+
+ // Verify the grafting of remote nodes.
+ g := graft[oid]
+
+ expNewHeads := map[string]bool{"3": true}
+ if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+ t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+ }
+
+ expGrafts := map[string]uint64{"1": 0, "4": 1}
+ if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+ t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+ }
+
+ // There should be no conflict.
+ isConflict, newHead, oldHead, ancestor, errConflict := hasConflict(nil, st, oid, graft)
+ if !(!isConflict && newHead == "3" && oldHead == "3" && ancestor == NoVersion && errConflict == nil) {
+ t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+ oid, isConflict, newHead, oldHead, ancestor, errConflict)
+ }
+
+ // Verify that hasConflict() fails with a nil or empty graft map.
+ isConflict, newHead, oldHead, ancestor, errConflict = hasConflict(nil, st, oid, nil)
+ if errConflict == nil {
+ t.Errorf("hasConflict() on %v did not fail with a nil graft map: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+ oid, isConflict, newHead, oldHead, ancestor, errConflict)
+ }
+ isConflict, newHead, oldHead, ancestor, errConflict = hasConflict(nil, st, oid, newGraft())
+ if errConflict == nil {
+ t.Errorf("hasConflict() on %v did not fail with an empty graft map: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+ oid, isConflict, newHead, oldHead, ancestor, errConflict)
+ }
+}
+
+// TestRemoteLinkedConflict tests sync of remote updates that contain linked
+// nodes (conflict resolution by selecting an existing version) on top of a local
+// initial state triggering a local conflict. An object is created locally and
+// updated twice (v1 -> v2 -> v3). Another device has along the way learned
+// about v1, created (v1 -> v4), then learned about (v1 -> v2) and resolved that
+// conflict by selecting v4 over v2. Now it sends that new info (v4 and the
+// v4/v2 link) back to the original (local) device which sees a v3/v4 conflict.
+func TestRemoteLinkedConflict(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+ s := svc.sync
+
+ oid := "foo1"
+
+ if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+ t.Fatal(err)
+ }
+ graft, err := s.dagReplayCommands(nil, "remote-conf-link.log.sync")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // The head must not have moved (i.e. still at v2) and the parent map
+ // shows the newly grafted DAG fragment on top of the prior DAG.
+ if head, err := getHead(nil, st, oid); err != nil || head != "3" {
+ t.Errorf("object %s has wrong head: %s", oid, head)
+ }
+
+ pmap := getParentMap(nil, st, oid, graft)
+
+ exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2"}, "4": {"1", "2"}}
+
+ if !reflect.DeepEqual(pmap, exp) {
+ t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+ }
+
+ // Verify the grafting of remote nodes.
+ g := graft[oid]
+
+ expNewHeads := map[string]bool{"3": true, "4": true}
+ if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+ t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+ }
+
+ expGrafts := map[string]uint64{"1": 0, "2": 1}
+ if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+ t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+ }
+
+ // There should be a conflict.
+ isConflict, newHead, oldHead, ancestor, errConflict := hasConflict(nil, st, oid, graft)
+ if !(isConflict && newHead == "4" && oldHead == "3" && ancestor == "2" && errConflict == nil) {
+ t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+ oid, isConflict, newHead, oldHead, ancestor, errConflict)
+ }
+}
+
+// TestRemoteLinkedNoConflictNewHead tests sync of remote updates that contain
+// linked nodes (conflict resolution by selecting an existing version) on top of
+// a local initial state without conflict, but moves the head node to a new one.
+// An object is created locally and updated twice (v1 -> v2 -> v3). Another
+// device has along the way learned about v1, created (v1 -> v4), then learned
+// about (v1 -> v2 -> v3) and resolved that conflict by selecting v4 over v3.
+// Now it sends that new info (v4 and the v4/v3 link) back to the original
+// (local) device. The device sees that the new head v4 is "derived" from v3
+// thus no conflict.
+func TestRemoteLinkedConflictNewHead(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+ s := svc.sync
+
+ oid := "foo1"
+
+ if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+ t.Fatal(err)
+ }
+ graft, err := s.dagReplayCommands(nil, "remote-noconf-link-01.log.sync")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // The head must not have moved (i.e. still at v2) and the parent map
+ // shows the newly grafted DAG fragment on top of the prior DAG.
+ if head, err := getHead(nil, st, oid); err != nil || head != "3" {
+ t.Errorf("object %s has wrong head: %s", oid, head)
+ }
+
+ pmap := getParentMap(nil, st, oid, graft)
+
+ exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2"}, "4": {"1", "3"}}
+
+ if !reflect.DeepEqual(pmap, exp) {
+ t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+ }
+
+ // Verify the grafting of remote nodes.
+ g := graft[oid]
+
+ expNewHeads := map[string]bool{"4": true}
+ if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+ t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+ }
+
+ expGrafts := map[string]uint64{"1": 0, "3": 2}
+ if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+ t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+ }
+
+ // There should be no conflict.
+ isConflict, newHead, oldHead, ancestor, errConflict := hasConflict(nil, st, oid, graft)
+ if !(!isConflict && newHead == "4" && oldHead == "3" && ancestor == NoVersion && errConflict == nil) {
+ t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+ oid, isConflict, newHead, oldHead, ancestor, errConflict)
+ }
+}
+
+// TestRemoteLinkedNoConflictNewHeadOvertake tests sync of remote updates that
+// contain linked nodes (conflict resolution by selecting an existing version)
+// on top of a local initial state without conflict, but moves the head node
+// to a new one that overtook the linked node.
+//
+// An object is created locally and updated twice (v1 -> v2 -> v3). Another
+// device has along the way learned about v1, created (v1 -> v4), then learned
+// about (v1 -> v2 -> v3) and resolved that conflict by selecting v3 over v4.
+// Then it creates a new update v5 from v3 (v3 -> v5). Now it sends that new
+// info (v4, the v3/v4 link, and v5) back to the original (local) device.
+// The device sees that the new head v5 is "derived" from v3 thus no conflict.
+func TestRemoteLinkedConflictNewHeadOvertake(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+ s := svc.sync
+
+ oid := "foo1"
+
+ if _, err := s.dagReplayCommands(nil, "local-init-00.log.sync"); err != nil {
+ t.Fatal(err)
+ }
+ graft, err := s.dagReplayCommands(nil, "remote-noconf-link-02.log.sync")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // The head must not have moved (i.e. still at v2) and the parent map
+ // shows the newly grafted DAG fragment on top of the prior DAG.
+ if head, err := getHead(nil, st, oid); err != nil || head != "3" {
+ t.Errorf("object %s has wrong head: %s", oid, head)
+ }
+
+ pmap := getParentMap(nil, st, oid, graft)
+
+ exp := map[string][]string{"1": nil, "2": {"1"}, "3": {"2", "4"}, "4": {"1"}, "5": {"3"}}
+
+ if !reflect.DeepEqual(pmap, exp) {
+ t.Errorf("invalid object %s parent map: (%v) instead of (%v)", oid, pmap, exp)
+ }
+
+ // Verify the grafting of remote nodes.
+ g := graft[oid]
+
+ expNewHeads := map[string]bool{"5": true}
+ if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+ t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+ }
+
+ expGrafts := map[string]uint64{"1": 0, "3": 2, "4": 1}
+ if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+ t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+ }
+
+ // There should be no conflict.
+ isConflict, newHead, oldHead, ancestor, errConflict := hasConflict(nil, st, oid, graft)
+ if !(!isConflict && newHead == "5" && oldHead == "3" && ancestor == NoVersion && errConflict == nil) {
+ t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+ oid, isConflict, newHead, oldHead, ancestor, errConflict)
+ }
+
+ // Move the head.
+ tx := st.NewTransaction()
+ if err = moveHead(nil, tx, oid, newHead); err != nil {
+ t.Errorf("object %s cannot move head to %s: %v", oid, newHead, err)
+ }
+ tx.Commit()
+
+ // Now new info comes from another device repeating the v2/v3 link.
+ // Verify that it is a NOP (no changes).
+ graft, err = s.dagReplayCommands(nil, "remote-noconf-link-repeat.log.sync")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if head, err := getHead(nil, st, oid); err != nil || head != "5" {
+ t.Errorf("object %s has wrong head: %s", oid, head)
+ }
+
+ g = graft[oid]
+
+ if !reflect.DeepEqual(g.newHeads, expNewHeads) {
+ t.Errorf("object %s has invalid newHeads: (%v) instead of (%v)", oid, g.newHeads, expNewHeads)
+ }
+
+ expGrafts = map[string]uint64{}
+ if !reflect.DeepEqual(g.graftNodes, expGrafts) {
+ t.Errorf("invalid object %s graft: (%v) instead of (%v)", oid, g.graftNodes, expGrafts)
+ }
+
+ isConflict, newHead, oldHead, ancestor, errConflict = hasConflict(nil, st, oid, graft)
+ if !(!isConflict && newHead == "5" && oldHead == "5" && ancestor == NoVersion && errConflict == nil) {
+ t.Errorf("object %s: wrong conflict info: flag %t, newHead %s, oldHead %s, ancestor %s, err %v",
+ oid, isConflict, newHead, oldHead, ancestor, errConflict)
+ }
+}
+
+// TestAddNodeBatch tests adding multiple DAG nodes grouped within a batch.
+func TestAddNodeBatch(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+ s := svc.sync
+
+ if _, err := s.dagReplayCommands(nil, "local-init-02.sync"); err != nil {
+ t.Fatal(err)
+ }
+
+ oid_a, oid_b, oid_c := "1234", "6789", "2222"
+
+ tx := st.NewTransaction()
+
+ // Verify NoBatchId is reported as an error.
+ if err := s.endBatch(nil, tx, NoBatchId, 0); err == nil {
+ t.Errorf("endBatch() did not fail for invalid 'NoBatchId' value")
+ }
+ if _, err := getBatch(nil, st, NoBatchId); err == nil {
+ t.Errorf("getBatch() did not fail for invalid 'NoBatchId' value")
+ }
+ if err := setBatch(nil, tx, NoBatchId, nil); err == nil {
+ t.Errorf("setBatch() did not fail for invalid 'NoBatchId' value")
+ }
+ if err := delBatch(nil, tx, NoBatchId); err == nil {
+ t.Errorf("delBatch() did not fail for invalid 'NoBatchId' value")
+ }
+
+ // Mutate 2 objects within a batch.
+ btid_1 := s.startBatch(nil, st, NoBatchId)
+ if btid_1 == NoBatchId {
+ t.Fatal("cannot start 1st DAG batch")
+ }
+ if err := s.endBatch(nil, tx, btid_1, 0); err == nil {
+ t.Errorf("endBatch() did not fail for a zero-count batch")
+ }
+
+ info := s.batches[btid_1]
+ if info == nil {
+ t.Errorf("batches state for ID %v not found", btid_1)
+ }
+ if n := len(info.Objects); n != 0 {
+ t.Errorf("batch info map for ID %v has length %d instead of 0", btid_1, n)
+ }
+
+ if err := s.addNode(nil, tx, oid_a, "3", "logrec-a-03", false, []string{"2"}, btid_1, nil); err != nil {
+ t.Errorf("cannot addNode() on object %s and batch ID %v: %v", oid_a, btid_1, err)
+ }
+
+ if id := s.startBatch(nil, st, btid_1); id != btid_1 {
+ t.Fatalf("restarting batch failed: got %v instead of %v", id, btid_1)
+ }
+
+ if err := s.addNode(nil, tx, oid_b, "3", "logrec-b-03", false, []string{"2"}, btid_1, nil); err != nil {
+ t.Errorf("cannot addNode() on object %s and batch ID %v: %v", oid_b, btid_1, err)
+ }
+
+ // At the same time mutate the 3rd object in another batch.
+ btid_2 := s.startBatch(nil, st, NoBatchId)
+ if btid_2 == NoBatchId {
+ t.Fatal("cannot start 2nd DAG batch")
+ }
+
+ info = s.batches[btid_2]
+ if info == nil {
+ t.Errorf("batches state for ID %v not found", btid_2)
+ }
+ if n := len(info.Objects); n != 0 {
+ t.Errorf("batch info map for ID %v has length %d instead of 0", btid_2, n)
+ }
+
+ if err := s.addNode(nil, tx, oid_c, "2", "logrec-c-02", false, []string{"1"}, btid_2, nil); err != nil {
+ t.Errorf("cannot addNode() on object %s and batch ID %v: %v", oid_c, btid_2, err)
+ }
+
+ // Verify the in-memory batch sets constructed.
+ info = s.batches[btid_1]
+ if info == nil {
+ t.Errorf("batches state for ID %v not found", btid_1)
+ }
+
+ expInfo := &batchInfo{map[string]string{oid_a: "3", oid_b: "3"}, 0}
+ if !reflect.DeepEqual(info, expInfo) {
+ t.Errorf("invalid batch info for ID %v: %v instead of %v", btid_1, info, expInfo)
+ }
+
+ info = s.batches[btid_2]
+ if info == nil {
+ t.Errorf("batches state for ID %v not found", btid_2)
+ }
+
+ expInfo = &batchInfo{map[string]string{oid_c: "2"}, 0}
+ if !reflect.DeepEqual(info, expInfo) {
+ t.Errorf("invalid batch info for ID %v: %v instead of %v", btid_2, info, expInfo)
+ }
+
+ // Verify failing to use a batch ID not returned by startBatch().
+ bad_btid := btid_1 + 1
+ for bad_btid == NoBatchId || bad_btid == btid_2 {
+ bad_btid++
+ }
+
+ if err := s.addNode(nil, tx, oid_c, "3", "logrec-c-03", false, []string{"2"}, bad_btid, nil); err == nil {
+ t.Errorf("addNode() did not fail on object %s for a bad batch ID %v", oid_c, bad_btid)
+ }
+ if err := s.endBatch(nil, tx, bad_btid, 1); err == nil {
+ t.Errorf("endBatch() did not fail for a bad batch ID %v", bad_btid)
+ }
+
+ // End the 1st batch and verify the in-memory and in-store data.
+ if err := s.endBatch(nil, tx, btid_1, 2); err != nil {
+ t.Errorf("cannot endBatch() for ID %v: %v", btid_1, err)
+ }
+ tx.Commit()
+
+ if info = s.batches[btid_1]; info != nil {
+ t.Errorf("batch info for ID %v still exists", btid_1)
+ }
+
+ info, err := getBatch(nil, st, btid_1)
+ if err != nil {
+ t.Errorf("cannot getBatch() for ID %v: %v", btid_1, err)
+ }
+
+ expInfo = &batchInfo{map[string]string{oid_a: "3", oid_b: "3"}, 2}
+ if !reflect.DeepEqual(info, expInfo) {
+ t.Errorf("invalid batch info from DAG storage for ID %v: %v instead of %v",
+ btid_1, info, expInfo)
+ }
+
+ info = s.batches[btid_2]
+ if info == nil {
+ t.Errorf("batches state for ID %v not found", btid_2)
+ }
+
+ expInfo = &batchInfo{map[string]string{oid_c: "2"}, 0}
+ if !reflect.DeepEqual(info, expInfo) {
+ t.Errorf("invalid batch info for ID %v: %v instead of %v", btid_2, info, expInfo)
+ }
+
+ // End the 2nd batch and re-verify the in-memory and in-store data.
+ tx = st.NewTransaction()
+ if err := s.endBatch(nil, tx, btid_2, 1); err != nil {
+ t.Errorf("cannot endBatch() for ID %v: %v", btid_2, err)
+ }
+ tx.Commit()
+
+ if info = s.batches[btid_2]; info != nil {
+ t.Errorf("batch info for ID %v still exists", btid_2)
+ }
+
+ info, err = getBatch(nil, st, btid_2)
+ if err != nil {
+ t.Errorf("cannot getBatch() for ID %v: %v", btid_2, err)
+ }
+
+ expInfo = &batchInfo{map[string]string{oid_c: "2"}, 1}
+ if !reflect.DeepEqual(info, expInfo) {
+ t.Errorf("invalid batch info from DAG storage for ID %v: %v instead of %v", btid_2, info, expInfo)
+ }
+
+ if n := len(s.batches); n != 0 {
+ t.Errorf("batches set in-memory: %d entries found, should be empty", n)
+ }
+
+ // Test incrementally filling up a batch.
+ btid_3 := uint64(100)
+ if s.batches[btid_3] != nil {
+ t.Errorf("batch info for ID %v found", btid_3)
+ }
+
+ if id := s.startBatch(nil, st, btid_3); id != btid_3 {
+ t.Fatalf("cannot start batch %v", btid_3)
+ }
+
+ info = s.batches[btid_3]
+ if info == nil {
+ t.Errorf("batches state for ID %v not found", btid_3)
+ }
+ if n := len(info.Objects); n != 0 {
+ t.Errorf("batch info map for ID %v has length %d instead of 0", btid_3, n)
+ }
+
+ tx = st.NewTransaction()
+ if err := s.addNode(nil, tx, oid_a, "4", "logrec-a-04", false, []string{"3"}, btid_3, nil); err != nil {
+ t.Errorf("cannot addNode() on object %s and batch ID %v: %v", oid_a, btid_3, err)
+ }
+
+ if err := s.endBatch(nil, tx, btid_3, 2); err != nil {
+ t.Errorf("cannot endBatch() for ID %v: %v", btid_3, err)
+ }
+ tx.Commit()
+
+ if s.batches[btid_3] != nil {
+ t.Errorf("batch info for ID %v still exists", btid_3)
+ }
+
+ info, err = getBatch(nil, st, btid_3)
+ if err != nil {
+ t.Errorf("cannot getBatch() for ID %v: %v", btid_3, err)
+ }
+
+ expInfo = &batchInfo{map[string]string{oid_a: "4"}, 2}
+ if !reflect.DeepEqual(info, expInfo) {
+ t.Errorf("invalid batch info from DAG storage for ID %v: %v instead of %v",
+ btid_3, info, expInfo)
+ }
+
+ if id := s.startBatch(nil, st, btid_3); id != btid_3 {
+ t.Fatalf("cannot start batch %v", btid_3)
+ }
+
+ info = s.batches[btid_3]
+ if info == nil {
+ t.Errorf("batch state for ID %v not found", btid_3)
+ }
+
+ if !reflect.DeepEqual(info, expInfo) {
+ t.Errorf("invalid batch info from DAG storage for ID %v: %v instead of %v",
+ btid_3, info, expInfo)
+ }
+
+ tx = st.NewTransaction()
+ if err := s.addNode(nil, tx, oid_b, "4", "logrec-b-04", false, []string{"3"}, btid_3, nil); err != nil {
+ t.Errorf("cannot addNode() on object %s and batch ID %v: %v", oid_b, btid_3, err)
+ }
+
+ if err := s.endBatch(nil, tx, btid_3, 3); err == nil {
+ t.Errorf("endBatch() didn't fail for ID %v: %v", btid_3, err)
+ }
+
+ if err := s.endBatch(nil, tx, btid_3, 2); err != nil {
+ t.Errorf("cannot endBatch() for ID %v: %v", btid_3, err)
+ }
+ tx.Commit()
+
+ info, err = getBatch(nil, st, btid_3)
+ if err != nil {
+ t.Errorf("cannot getBatch() for ID %v: %v", btid_3, err)
+ }
+
+ expInfo = &batchInfo{map[string]string{oid_a: "4", oid_b: "4"}, 2}
+ if !reflect.DeepEqual(info, expInfo) {
+ t.Errorf("invalid batch state from DAG storage for ID %v: %v instead of %v",
+ btid_3, info, expInfo)
+ }
+
+ // Get the 3 new nodes from the DAG and verify their batch IDs.
+ type nodeTest struct {
+ oid string
+ version string
+ btid uint64
+ }
+ tests := []nodeTest{
+ {oid_a, "3", btid_1},
+ {oid_a, "4", btid_3},
+ {oid_b, "3", btid_1},
+ {oid_b, "4", btid_3},
+ {oid_c, "2", btid_2},
+ }
+
+ for _, test := range tests {
+ node, err := getNode(nil, st, test.oid, test.version)
+ if err != nil {
+ t.Errorf("cannot find object %s:%s: %v", test.oid, test.version, err)
+ }
+ if node.BatchId != test.btid {
+ t.Errorf("invalid batch ID for object %s:%s: %v instead of %v",
+ test.oid, test.version, node.BatchId, test.btid)
+ }
+ }
+}
+
+// TestPruningBatches tests pruning DAG nodes grouped within batches.
+func TestPruningBatches(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+ s := svc.sync
+
+ if _, err := s.dagReplayCommands(nil, "local-init-02.sync"); err != nil {
+ t.Fatal(err)
+ }
+
+ oid_a, oid_b, oid_c := "1234", "6789", "2222"
+
+ // Mutate objects in 2 batches then add non-batch mutations to act as
+ // the pruning points. Before pruning the DAG is:
+ // a1 -- a2 -- (a3) --- a4
+ // b1 -- b2 -- (b3) -- (b4) -- b5
+ // c1 ---------------- (c2)
+ // Now by pruning at (a4, b5, c2), the new DAG should be:
+ // a4
+ // b5
+ // (c2)
+ // Batch 1 (a3, b3) gets deleted, but batch 2 (b4, c2) still has (c2)
+ // dangling waiting for a future pruning.
+ btid_1 := s.startBatch(nil, st, NoBatchId)
+ if btid_1 == NoBatchId {
+ t.Fatal("cannot start 1st DAG addNode() batch")
+ }
+
+ tx := st.NewTransaction()
+ if err := s.addNode(nil, tx, oid_a, "3", "logrec-a-03", false, []string{"2"}, btid_1, nil); err != nil {
+ t.Errorf("cannot addNode() on object %s and batch ID %v: %v", oid_a, btid_1, err)
+ }
+ if err := s.addNode(nil, tx, oid_b, "3", "logrec-b-03", false, []string{"2"}, btid_1, nil); err != nil {
+ t.Errorf("cannot addNode() on object %s and batch ID %v: %v", oid_b, btid_1, err)
+ }
+ if err := s.endBatch(nil, tx, btid_1, 2); err != nil {
+ t.Errorf("cannot endBatch() for ID %v: %v", btid_1, err)
+ }
+ tx.Commit()
+
+ btid_2 := s.startBatch(nil, st, NoBatchId)
+ if btid_2 == NoBatchId {
+ t.Fatal("cannot start 2nd DAG addNode() batch")
+ }
+
+ tx = st.NewTransaction()
+ if err := s.addNode(nil, tx, oid_b, "4", "logrec-b-04", false, []string{"3"}, btid_2, nil); err != nil {
+ t.Errorf("cannot addNode() on object %s and batch ID %v: %v", oid_b, btid_2, err)
+ }
+ if err := s.addNode(nil, tx, oid_c, "2", "logrec-c-02", false, []string{"1"}, btid_2, nil); err != nil {
+ t.Errorf("cannot addNode() on object %s and batch ID %v: %v", oid_c, btid_2, err)
+ }
+ if err := s.endBatch(nil, tx, btid_2, 2); err != nil {
+ t.Errorf("cannot endBatch() for ID %v: %v", btid_2, err)
+ }
+ tx.Commit()
+
+ tx = st.NewTransaction()
+ if err := s.addNode(nil, tx, oid_a, "4", "logrec-a-04", false, []string{"3"}, NoBatchId, nil); err != nil {
+ t.Errorf("cannot addNode() on object %s: %v", oid_a, err)
+ }
+ if err := s.addNode(nil, tx, oid_b, "5", "logrec-b-05", false, []string{"4"}, NoBatchId, nil); err != nil {
+ t.Errorf("cannot addNode() on object %s: %v", oid_b, err)
+ }
+
+ if err := moveHead(nil, tx, oid_a, "4"); err != nil {
+ t.Errorf("object %s cannot move head: %v", oid_a, err)
+ }
+ if err := moveHead(nil, tx, oid_b, "5"); err != nil {
+ t.Errorf("object %s cannot move head: %v", oid_b, err)
+ }
+ if err := moveHead(nil, tx, oid_c, "2"); err != nil {
+ t.Errorf("object %s cannot move head: %v", oid_c, err)
+ }
+ tx.Commit()
+
+ // Verify the batch sets.
+ info, err := getBatch(nil, st, btid_1)
+ if err != nil {
+ t.Errorf("cannot getBatch() for ID %v: %v", btid_1, err)
+ }
+
+ expInfo := &batchInfo{map[string]string{oid_a: "3", oid_b: "3"}, 2}
+ if !reflect.DeepEqual(info, expInfo) {
+ t.Errorf("invalid batch info from DAG storage for ID %v: %v instead of %v",
+ btid_1, info, expInfo)
+ }
+
+ info, err = getBatch(nil, st, btid_2)
+ if err != nil {
+ t.Errorf("cannot getBatch() for ID %v: %v", btid_2, err)
+ }
+
+ expInfo = &batchInfo{map[string]string{oid_b: "4", oid_c: "2"}, 2}
+ if !reflect.DeepEqual(info, expInfo) {
+ t.Errorf("invalid batch info from DAG storage for ID %v: %v instead of %v",
+ btid_2, info, expInfo)
+ }
+
+ // Prune the 3 objects at their head nodes.
+ batches := newBatchPruning()
+ tx = st.NewTransaction()
+ for _, oid := range []string{oid_a, oid_b, oid_c} {
+ head, err := getHead(nil, st, oid)
+ if err != nil {
+ t.Errorf("cannot getHead() on object %s: %v", oid, err)
+ }
+ err = prune(nil, tx, oid, head, batches,
+ func(ctx *context.T, itx store.Transaction, lr string) error {
+ return nil
+ })
+ if err != nil {
+ t.Errorf("cannot prune() on object %s: %v", oid, err)
+ }
+ }
+
+ if err = pruneDone(nil, tx, batches); err != nil {
+ t.Errorf("pruneDone() failed: %v", err)
+ }
+ tx.Commit()
+
+ // Verify that batch-1 was deleted and batch-2 still has c2 in it.
+ info, err = getBatch(nil, st, btid_1)
+ if err == nil {
+ t.Errorf("getBatch() did not fail for ID %v: %v", btid_1, info)
+ }
+
+ info, err = getBatch(nil, st, btid_2)
+ if err != nil {
+ t.Errorf("cannot getBatch() for ID %v: %v", btid_2, err)
+ }
+
+ expInfo = &batchInfo{map[string]string{oid_c: "2"}, 2}
+ if !reflect.DeepEqual(info, expInfo) {
+ t.Errorf("invalid batch info for ID %v: %v instead of %v", btid_2, info, expInfo)
+ }
+
+ // Add c3 as a new head and prune at that point. This should GC batch-2.
+ tx = st.NewTransaction()
+ if err := s.addNode(nil, tx, oid_c, "3", "logrec-c-03", false, []string{"2"}, NoBatchId, nil); err != nil {
+ t.Errorf("cannot addNode() on object %s: %v", oid_c, err)
+ }
+ if err = moveHead(nil, tx, oid_c, "3"); err != nil {
+ t.Errorf("object %s cannot move head: %v", oid_c, err)
+ }
+
+ batches = newBatchPruning()
+ err = prune(nil, tx, oid_c, "3", batches,
+ func(ctx *context.T, itx store.Transaction, lr string) error {
+ return nil
+ })
+ if err != nil {
+ t.Errorf("cannot prune() on object %s: %v", oid_c, err)
+ }
+ if err = pruneDone(nil, tx, batches); err != nil {
+ t.Errorf("pruneDone() #2 failed: %v", err)
+ }
+ tx.Commit()
+
+ info, err = getBatch(nil, st, btid_2)
+ if err == nil {
+ t.Errorf("getBatch() did not fail for ID %v: %v", btid_2, info)
+ }
+}
diff --git a/services/syncbase/vsync/initiator.go b/services/syncbase/vsync/initiator.go
new file mode 100644
index 0000000..cccf217
--- /dev/null
+++ b/services/syncbase/vsync/initiator.go
@@ -0,0 +1,968 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Initiator is a goroutine that periodically picks a peer from all the known
+// remote peers, and requests deltas from that peer for all the SyncGroups in
+// common across all apps/databases. It then modifies the sync metadata (DAG and
+// local log records) based on the deltas, detects and resolves conflicts if
+// any, and suitably updates the local Databases.
+
+import (
+ "sort"
+ "strings"
+ "time"
+
+ "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/server/watchable"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ "v.io/v23/naming"
+ "v.io/v23/vdl"
+ "v.io/v23/verror"
+ "v.io/v23/vom"
+ "v.io/x/lib/vlog"
+)
+
+// Policies to pick a peer to sync with.
+const (
+ // Picks a peer at random from the available set.
+ selectRandom = iota
+
+ // TODO(hpucha): implement other policies.
+ // Picks a peer with most differing generations.
+ selectMostDiff
+
+ // Picks a peer that was synced with the furthest in the past.
+ selectOldest
+)
+
+var (
+ // peerSyncInterval is the duration between two consecutive peer
+ // contacts. During every peer contact, the initiator obtains any
+ // pending updates from that peer.
+ peerSyncInterval = 50 * time.Millisecond
+
+ // peerSelectionPolicy is the policy used to select a peer when
+ // the initiator gets a chance to sync.
+ peerSelectionPolicy = selectRandom
+)
+
+// syncer wakes up every peerSyncInterval to do work: (1) Act as an initiator
+// for SyncGroup metadata by selecting a SyncGroup Admin, and syncing Syncgroup
+// metadata with it (getting updates from the remote peer, detecting and
+// resolving conflicts); (2) Refresh memberView if needed and act as an
+// initiator for data by selecting a peer, and syncing data corresponding to all
+// common SyncGroups across all Databases; (3) Act as a SyncGroup publisher to
+// publish pending SyncGroups; (4) Garbage collect older generations.
+//
+// TODO(hpucha): Currently only does initiation. Add rest.
+func (s *syncService) syncer(ctx *context.T) {
+ defer s.pending.Done()
+
+ ticker := time.NewTicker(peerSyncInterval)
+ defer ticker.Stop()
+
+ for {
+ select {
+ case <-s.closed:
+ vlog.VI(1).Info("sync: syncer: channel closed, stop work and exit")
+ return
+
+ case <-ticker.C:
+ }
+
+ // TODO(hpucha): Cut a gen for the responder even if there is no
+ // one to initiate to?
+
+ // Do work.
+ peer, err := s.pickPeer(ctx)
+ if err != nil {
+ continue
+ }
+ s.getDeltasFromPeer(ctx, peer)
+ }
+}
+
+// getDeltasFromPeer performs an initiation round to the specified
+// peer. An initiation round consists of:
+// * Contacting the peer to receive all the deltas based on the local genvector.
+// * Processing those deltas to discover objects which have been updated.
+// * Processing updated objects to detect and resolve any conflicts if needed.
+// * Communicating relevant object updates to the Database, and updating local
+// genvector to catch up to the received remote genvector.
+//
+// The processing of the deltas is done one Database at a time. If a local error
+// is encountered during the processing of a Database, that Database is skipped
+// and the initiator continues on to the next one. If the connection to the peer
+// encounters an error, this initiation round is aborted. Note that until the
+// local genvector is updated based on the received deltas (the last step in an
+// initiation round), the work done by the initiator is idempotent.
+//
+// TODO(hpucha): Check the idempotence, esp in addNode in DAG.
+func (s *syncService) getDeltasFromPeer(ctxIn *context.T, peer string) {
+ vlog.VI(2).Infof("sync: getDeltasFromPeer: begin: contacting peer %s", peer)
+ defer vlog.VI(2).Infof("sync: getDeltasFromPeer: end: contacting peer %s", peer)
+
+ ctx, cancel := context.WithRootCancel(ctxIn)
+
+ info := s.copyMemberInfo(ctx, peer)
+ if info == nil {
+ vlog.Fatalf("sync: getDeltasFromPeer: missing information in member view for %q", peer)
+ }
+ connected := false
+ var stream interfaces.SyncGetDeltasClientCall
+
+ // Sync each Database that may have SyncGroups common with this peer,
+ // one at a time.
+ for gdbName, sgInfo := range info.db2sg {
+
+ // Initialize initiation state for syncing this Database.
+ iSt, err := newInitiationState(ctx, s, peer, gdbName, sgInfo)
+ if err != nil {
+ vlog.Errorf("sync: getDeltasFromPeer: couldn't initialize initiator state for peer %s, gdb %s, err %v", peer, gdbName, err)
+ continue
+ }
+
+ if len(iSt.sgIds) == 0 || len(iSt.sgPfxs) == 0 {
+ vlog.Errorf("sync: getDeltasFromPeer: didn't find any SyncGroups for peer %s, gdb %s, err %v", peer, gdbName, err)
+ continue
+ }
+
+ // Make contact with the peer once.
+ if !connected {
+ stream, connected = iSt.connectToPeer(ctx)
+ if !connected {
+ // Try a different Database. Perhaps there are
+ // different mount tables.
+ continue
+ }
+ }
+
+ // Create local genvec so that it contains knowledge only about common prefixes.
+ if err := iSt.createLocalGenVec(ctx); err != nil {
+ vlog.Errorf("sync: getDeltasFromPeer: error creating local genvec for gdb %s, err %v", gdbName, err)
+ continue
+ }
+
+ iSt.stream = stream
+ req := interfaces.DeltaReq{
+ AppName: iSt.appName,
+ DbName: iSt.dbName,
+ SgIds: iSt.sgIds,
+ InitVec: iSt.local,
+ }
+
+ vlog.VI(3).Infof("sync: getDeltasFromPeer: send request: %v", req)
+ sender := iSt.stream.SendStream()
+ sender.Send(req)
+
+ // Obtain deltas from the peer over the network.
+ if err := iSt.recvAndProcessDeltas(ctx); err != nil {
+ vlog.Errorf("sync: getDeltasFromPeer: error receiving deltas for gdb %s, err %v", gdbName, err)
+ // Returning here since something could be wrong with
+ // the connection, and no point in attempting the next
+ // Database.
+ cancel()
+ stream.Finish()
+ return
+ }
+ vlog.VI(3).Infof("sync: getDeltasFromPeer: got reply: %v", iSt.remote)
+
+ if err := iSt.processUpdatedObjects(ctx); err != nil {
+ vlog.Errorf("sync: getDeltasFromPeer: error processing objects for gdb %s, err %v", gdbName, err)
+ // Move to the next Database even if processing updates
+ // failed.
+ continue
+ }
+ }
+
+ if connected {
+ stream.Finish()
+ }
+ cancel()
+}
+
+type sgSet map[interfaces.GroupId]struct{}
+
+// initiationState is accumulated for each Database during an initiation round.
+type initiationState struct {
+ // Relative name of the peer to sync with.
+ peer string
+
+ // Collection of mount tables that this peer may have registered with.
+ mtTables map[string]struct{}
+
+ // SyncGroups being requested in the initiation round.
+ sgIds sgSet
+
+ // SyncGroup prefixes being requested in the initiation round, and their
+ // corresponding SyncGroup ids.
+ sgPfxs map[string]sgSet
+
+ // Local generation vector.
+ local interfaces.GenVector
+
+ // Generation vector from the remote peer.
+ remote interfaces.GenVector
+
+ // Updated local generation vector at the end of the initiation round.
+ updLocal interfaces.GenVector
+
+ // State to track updated objects during a log replay.
+ updObjects map[string]*objConflictState
+
+ // DAG state that tracks conflicts and common ancestors.
+ dagGraft graftMap
+
+ sync *syncService
+ appName string
+ dbName string
+ st store.Store // Store handle to the Database.
+ stream interfaces.SyncGetDeltasClientCall // Stream handle for the GetDeltas RPC.
+
+ // Transaction handle for the initiation round. Used during the update
+ // of objects in the Database.
+ tx store.Transaction
+}
+
+// objConflictState contains the conflict state for an object that is updated
+// during an initiator round.
+type objConflictState struct {
+ isConflict bool
+ newHead string
+ oldHead string
+ ancestor string
+ res *conflictResolution
+}
+
+// newInitiationState creates new initiation state.
+func newInitiationState(ctx *context.T, s *syncService, peer string, name string, sgInfo sgMemberInfo) (*initiationState, error) {
+ iSt := &initiationState{}
+ iSt.peer = peer
+ iSt.updObjects = make(map[string]*objConflictState)
+ iSt.dagGraft = newGraft()
+ iSt.sync = s
+
+ // TODO(hpucha): Would be nice to standardize on the combined "app:db"
+ // name across sync (not syncbase) so we only join split/join them at
+ // the boundary with the store part.
+ var err error
+ iSt.appName, iSt.dbName, err = splitAppDbName(ctx, name)
+ if err != nil {
+ return nil, err
+ }
+
+ // TODO(hpucha): nil rpc.ServerCall ok?
+ iSt.st, err = s.getDbStore(ctx, nil, iSt.appName, iSt.dbName)
+ if err != nil {
+ return nil, err
+ }
+
+ iSt.peerMtTblsAndSgInfo(ctx, peer, sgInfo)
+
+ return iSt, nil
+}
+
+// peerMtTblsAndSgInfo computes the possible mount tables, the SyncGroup Ids and
+// prefixes common with a remote peer in a particular Database by consulting the
+// SyncGroups in the specified Database.
+func (iSt *initiationState) peerMtTblsAndSgInfo(ctx *context.T, peer string, info sgMemberInfo) {
+ iSt.mtTables = make(map[string]struct{})
+ iSt.sgIds = make(sgSet)
+ iSt.sgPfxs = make(map[string]sgSet)
+
+ for id := range info {
+ sg, err := getSyncGroupById(ctx, iSt.st, id)
+ if err != nil {
+ continue
+ }
+ if _, ok := sg.Joiners[peer]; !ok {
+ // Peer is no longer part of the SyncGroup.
+ continue
+ }
+ for _, mt := range sg.Spec.MountTables {
+ iSt.mtTables[mt] = struct{}{}
+ }
+ iSt.sgIds[id] = struct{}{}
+
+ for _, p := range sg.Spec.Prefixes {
+ sgs, ok := iSt.sgPfxs[p]
+ if !ok {
+ sgs = make(sgSet)
+ iSt.sgPfxs[p] = sgs
+ }
+ sgs[id] = struct{}{}
+ }
+ }
+}
+
+// connectToPeer attempts to connect to the remote peer using the mount tables
+// obtained from the SyncGroups being synced in the current Database.
+func (iSt *initiationState) connectToPeer(ctx *context.T) (interfaces.SyncGetDeltasClientCall, bool) {
+ if len(iSt.mtTables) < 1 {
+ vlog.Errorf("sync: connectToPeer: no mount tables found to connect to peer %s, app %s db %s", iSt.peer, iSt.appName, iSt.dbName)
+ return nil, false
+ }
+ for mt := range iSt.mtTables {
+ absName := naming.Join(mt, iSt.peer, util.SyncbaseSuffix)
+ c := interfaces.SyncClient(absName)
+ stream, err := c.GetDeltas(ctx, iSt.sync.name)
+ if err == nil {
+ vlog.VI(3).Infof("sync: connectToPeer: established on %s", absName)
+ return stream, true
+ }
+ }
+ return nil, false
+}
+
+// createLocalGenVec creates the generation vector with local knowledge for the
+// initiator to send to the responder.
+//
+// TODO(hpucha): Refactor this code with computeDelta code in sync_state.go.
+func (iSt *initiationState) createLocalGenVec(ctx *context.T) error {
+ iSt.sync.thLock.Lock()
+ defer iSt.sync.thLock.Unlock()
+
+ // Freeze the most recent batch of local changes before fetching
+ // remote changes from a peer. This frozen state is used by the
+ // responder when responding to GetDeltas RPC.
+ //
+ // We only allow an initiator to freeze local generations (not
+ // responders/watcher) in order to maintain a static baseline
+ // for the duration of a sync. This addresses the following race
+ // condition: If we allow responders to use newer local
+ // generations while the initiator is in progress, they may beat
+ // the initiator and send these new generations to remote
+ // devices. These remote devices in turn can send these
+ // generations back to the initiator in progress which was
+ // started with older generation information.
+ if err := iSt.sync.checkptLocalGen(ctx, iSt.appName, iSt.dbName); err != nil {
+ return err
+ }
+
+ local, lgen, err := iSt.sync.copyDbGenInfo(ctx, iSt.appName, iSt.dbName)
+ if err != nil {
+ return err
+ }
+ localPfxs := extractAndSortPrefixes(local)
+
+ sgPfxs := make([]string, len(iSt.sgPfxs))
+ i := 0
+ for p := range iSt.sgPfxs {
+ sgPfxs[i] = p
+ i++
+ }
+ sort.Strings(sgPfxs)
+
+ iSt.local = make(interfaces.GenVector)
+
+ if len(sgPfxs) == 0 {
+ return verror.New(verror.ErrInternal, ctx, "no syncgroups for syncing")
+ }
+
+ pfx := sgPfxs[0]
+ for _, p := range sgPfxs {
+ if strings.HasPrefix(p, pfx) && p != pfx {
+ continue
+ }
+
+ // Process this prefix as this is the start of a new set of
+ // nested prefixes.
+ pfx = p
+ var lpStart string
+ for _, lp := range localPfxs {
+ if !strings.HasPrefix(lp, pfx) && !strings.HasPrefix(pfx, lp) {
+ // No relationship with pfx.
+ continue
+ }
+ if strings.HasPrefix(pfx, lp) {
+ lpStart = lp
+ } else {
+ iSt.local[lp] = local[lp]
+ }
+ }
+ // Deal with the starting point.
+ if lpStart == "" {
+ // No matching prefixes for pfx were found.
+ iSt.local[pfx] = make(interfaces.PrefixGenVector)
+ iSt.local[pfx][iSt.sync.id] = lgen
+ } else {
+ iSt.local[pfx] = local[lpStart]
+ }
+ }
+ return nil
+}
+
+// recvAndProcessDeltas first receives the log records and generation vector
+// from the GetDeltas RPC and puts them in the Database. It also replays the
+// entire log stream as the log records arrive. These records span multiple
+// generations from different devices. It does not perform any conflict
+// resolution during replay. This avoids resolving conflicts that have already
+// been resolved by other devices.
+func (iSt *initiationState) recvAndProcessDeltas(ctx *context.T) error {
+ iSt.sync.thLock.Lock()
+ defer iSt.sync.thLock.Unlock()
+
+ // TODO(hpucha): This works for now, but figure out a long term solution
+ // as this may be implementation dependent. It currently works because
+ // the RecvStream call is stateless, and grabbing a handle to it
+ // repeatedly doesn't affect what data is seen next.
+ rcvr := iSt.stream.RecvStream()
+ start, finish := false, false
+
+ // TODO(hpucha): See if we can avoid committing the entire delta stream
+ // as one batch. Currently the dependency is between the log records and
+ // the batch info.
+ tx := iSt.st.NewTransaction()
+ committed := false
+
+ defer func() {
+ if !committed {
+ tx.Abort()
+ }
+ }()
+
+ // Track received batches (BatchId --> BatchCount mapping).
+ batchMap := make(map[uint64]uint64)
+
+ for rcvr.Advance() {
+ resp := rcvr.Value()
+ switch v := resp.(type) {
+ case interfaces.DeltaRespStart:
+ if start {
+ return verror.New(verror.ErrInternal, ctx, "received start followed by start in delta response stream")
+ }
+ start = true
+
+ case interfaces.DeltaRespFinish:
+ if finish {
+ return verror.New(verror.ErrInternal, ctx, "received finish followed by finish in delta response stream")
+ }
+ finish = true
+
+ case interfaces.DeltaRespRespVec:
+ iSt.remote = v.Value
+
+ case interfaces.DeltaRespRec:
+ // Insert log record in Database.
+ // TODO(hpucha): Should we reserve more positions in a batch?
+ // TODO(hpucha): Handle if SyncGroup is left/destroyed while sync is in progress.
+ pos := iSt.sync.reservePosInDbLog(ctx, iSt.appName, iSt.dbName, 1)
+ rec := &localLogRec{Metadata: v.Value.Metadata, Pos: pos}
+ batchId := rec.Metadata.BatchId
+ if batchId != NoBatchId {
+ if cnt, ok := batchMap[batchId]; !ok {
+ if iSt.sync.startBatch(ctx, tx, batchId) != batchId {
+ return verror.New(verror.ErrInternal, ctx, "failed to create batch info")
+ }
+ batchMap[batchId] = rec.Metadata.BatchCount
+ } else if cnt != rec.Metadata.BatchCount {
+ return verror.New(verror.ErrInternal, ctx, "inconsistent counts for tid", batchId, cnt, rec.Metadata.BatchCount)
+ }
+ }
+
+ vlog.VI(4).Infof("sync: recvAndProcessDeltas: processing rec %v", rec)
+ if err := iSt.insertRecInLogDagAndDb(ctx, rec, batchId, v.Value.Value, tx); err != nil {
+ return err
+ }
+
+ // Check for BlobRefs, and process them.
+ if err := iSt.processBlobRefs(ctx, &rec.Metadata, v.Value.Value); err != nil {
+ return err
+ }
+
+ // Mark object dirty.
+ iSt.updObjects[rec.Metadata.ObjId] = &objConflictState{}
+ }
+
+ // Break out of the stream.
+ if finish {
+ break
+ }
+ }
+
+ if !(start && finish) {
+ return verror.New(verror.ErrInternal, ctx, "didn't receive start/finish delimiters in delta response stream")
+ }
+
+ if err := rcvr.Err(); err != nil {
+ return err
+ }
+
+ // End the started batches if any.
+ for bid, cnt := range batchMap {
+ if err := iSt.sync.endBatch(ctx, tx, bid, cnt); err != nil {
+ return err
+ }
+ }
+
+ // Commit this transaction. We do not retry this transaction since it
+ // should not conflict with any other keys. So if it fails, it is a
+ // non-retriable error.
+ err := tx.Commit()
+ if verror.ErrorID(err) == store.ErrConcurrentTransaction.ID {
+ // Note: This might be triggered with memstore until it handles
+ // transactions in a more fine-grained fashion.
+ vlog.Fatalf("sync: recvAndProcessDeltas: encountered concurrent transaction")
+ }
+ if err == nil {
+ committed = true
+ }
+ return err
+}
+
+func (iSt *initiationState) processBlobRefs(ctx *context.T, m *interfaces.LogRecMetadata, valbuf []byte) error {
+ objid := m.ObjId
+ srcPeer := syncbaseIdToName(m.Id)
+
+ vlog.VI(4).Infof("sync: processBlobRefs: begin processing blob refs for objid %s", objid)
+ defer vlog.VI(4).Infof("sync: processBlobRefs: end processing blob refs for objid %s", objid)
+
+ if valbuf == nil {
+ return nil
+ }
+
+ var val *vdl.Value
+ if err := vom.Decode(valbuf, &val); err != nil {
+ return err
+ }
+
+ brs := make(map[nosql.BlobRef]struct{})
+ if err := extractBlobRefs(val, brs); err != nil {
+ return err
+ }
+ sgIds := make(sgSet)
+ for br := range brs {
+ for p, sgs := range iSt.sgPfxs {
+ if strings.HasPrefix(extractAppKey(objid), p) {
+ for sg := range sgs {
+ sgIds[sg] = struct{}{}
+ }
+ }
+ }
+ vlog.VI(4).Infof("sync: processBlobRefs: Found blobref %v peer %v, source %v, sgs %v", br, iSt.peer, srcPeer, sgIds)
+ info := &blobLocInfo{peer: iSt.peer, source: srcPeer, sgIds: sgIds}
+ if err := iSt.sync.addBlobLocInfo(ctx, br, info); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// TODO(hpucha): Handle blobrefs part of list, map, any.
+func extractBlobRefs(val *vdl.Value, brs map[nosql.BlobRef]struct{}) error {
+ if val == nil {
+ return nil
+ }
+ switch val.Kind() {
+ case vdl.String:
+ // Could be a BlobRef.
+ var br nosql.BlobRef
+ if val.Type() == vdl.TypeOf(br) {
+ brs[nosql.BlobRef(val.RawString())] = struct{}{}
+ }
+ case vdl.Struct:
+ for i := 0; i < val.Type().NumField(); i++ {
+ v := val.StructField(i)
+ if err := extractBlobRefs(v, brs); err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
+
+// insertRecInLogDagAndDb adds a new log record to log and dag data structures,
+// and inserts the versioned value in the Database.
+func (iSt *initiationState) insertRecInLogDagAndDb(ctx *context.T, rec *localLogRec, batchId uint64, valbuf []byte, tx store.Transaction) error {
+ if err := putLogRec(ctx, tx, rec); err != nil {
+ return err
+ }
+
+ m := rec.Metadata
+ logKey := logRecKey(m.Id, m.Gen)
+
+ var err error
+ switch m.RecType {
+ case interfaces.NodeRec:
+ err = iSt.sync.addNode(ctx, tx, m.ObjId, m.CurVers, logKey, m.Delete, m.Parents, m.BatchId, iSt.dagGraft)
+ case interfaces.LinkRec:
+ err = iSt.sync.addParent(ctx, tx, m.ObjId, m.CurVers, m.Parents[0], iSt.dagGraft)
+ default:
+ err = verror.New(verror.ErrInternal, ctx, "unknown log record type")
+ }
+
+ if err != nil {
+ return err
+ }
+ // TODO(hpucha): Hack right now. Need to change Database's handling of
+ // deleted objects. Currently, the initiator needs to treat deletions
+ // specially since deletions do not get a version number or a special
+ // value in the Database.
+ if !rec.Metadata.Delete && rec.Metadata.RecType == interfaces.NodeRec {
+ return watchable.PutAtVersion(ctx, tx, []byte(m.ObjId), valbuf, []byte(m.CurVers))
+ }
+ return nil
+}
+
+// processUpdatedObjects processes all the updates received by the initiator,
+// one object at a time. Conflict detection and resolution is carried out after
+// the entire delta of log records is replayed, instead of incrementally after
+// each record/batch is replayed, to avoid repeating conflict resolution already
+// performed by other peers.
+//
+// For each updated object, we first check if the object has any conflicts,
+// resulting in three possibilities:
+//
+// * There is no conflict, and no updates are needed to the Database
+// (isConflict=false, newHead == oldHead). All changes received convey
+// information that still keeps the local head as the most recent version. This
+// occurs when conflicts are resolved by picking the existing local version.
+//
+// * There is no conflict, but a remote version is discovered that builds on the
+// local head (isConflict=false, newHead != oldHead). In this case, we generate
+// a Database update to simply update the Database to the latest value.
+//
+// * There is a conflict and we call into the app or use a well-known policy to
+// resolve the conflict, resulting in three possibilties: (a) conflict was
+// resolved by picking the local version. In this case, Database need not be
+// updated, but a link is added to record the choice. (b) conflict was resolved
+// by picking the remote version. In this case, Database is updated with the
+// remote version and a link is added as well. (c) conflict was resolved by
+// generating a new Database update. In this case, Database is updated with the
+// new version.
+//
+// We collect all the updates to the Database in a transaction. In addition, as
+// part of the same transaction, we update the log and dag state suitably (move
+// the head ptr of the object in the dag to the latest version, and create a new
+// log record reflecting conflict resolution if any). Finally, we update the
+// sync state first on storage. This transaction's commit can fail since
+// preconditions on the objects may have been violated. In this case, we wait to
+// get the latest versions of objects from the Database, and recheck if the object
+// has any conflicts and repeat the above steps, until the transaction commits
+// successfully. Upon commit, we also update the in-memory sync state of the
+// Database.
+func (iSt *initiationState) processUpdatedObjects(ctx *context.T) error {
+ // Note that the tx handle in initiation state is cached for the scope of
+ // this function only as different stages in the pipeline add to the
+ // transaction.
+ committed := false
+ defer func() {
+ if !committed {
+ iSt.tx.Abort()
+ }
+ }()
+
+ for {
+ vlog.VI(3).Infof("sync: processUpdatedObjects: begin: %d objects updated", len(iSt.updObjects))
+
+ iSt.tx = iSt.st.NewTransaction()
+ watchable.SetTransactionFromSync(iSt.tx) // for echo-suppression
+
+ if count, err := iSt.detectConflicts(ctx); err != nil {
+ return err
+ } else {
+ vlog.VI(3).Infof("sync: processUpdatedObjects: %d conflicts detected", count)
+ }
+
+ if err := iSt.resolveConflicts(ctx); err != nil {
+ return err
+ }
+
+ err := iSt.updateDbAndSyncSt(ctx)
+ if err == nil {
+ err = iSt.tx.Commit()
+ }
+ if err == nil {
+ committed = true
+ // Update in-memory genvector since commit is successful.
+ if err := iSt.sync.putDbGenInfoRemote(ctx, iSt.appName, iSt.dbName, iSt.updLocal); err != nil {
+ vlog.Fatalf("sync: processUpdatedObjects: putting geninfo in memory failed for app %s db %s, err %v", iSt.appName, iSt.dbName, err)
+ }
+ vlog.VI(3).Info("sync: processUpdatedObjects: end: changes committed")
+ return nil
+ }
+ if verror.ErrorID(err) != store.ErrConcurrentTransaction.ID {
+ return err
+ }
+
+ // Either updateDbAndSyncSt() or tx.Commit() detected a
+ // concurrent transaction. Retry processing the remote updates.
+ //
+ // TODO(hpucha): Sleeping and retrying is a temporary
+ // solution. Next iteration will have coordination with watch
+ // thread to intelligently retry. Hence this value is not a
+ // config param.
+ vlog.VI(3).Info("sync: processUpdatedObjects: retry due to local mutations")
+ iSt.tx.Abort()
+ time.Sleep(1 * time.Second)
+ }
+}
+
+// detectConflicts iterates through all the updated objects to detect conflicts.
+func (iSt *initiationState) detectConflicts(ctx *context.T) (int, error) {
+ count := 0
+ for objid, confSt := range iSt.updObjects {
+ // Check if object has a conflict.
+ var err error
+ confSt.isConflict, confSt.newHead, confSt.oldHead, confSt.ancestor, err = hasConflict(ctx, iSt.tx, objid, iSt.dagGraft)
+ if err != nil {
+ return 0, err
+ }
+
+ if !confSt.isConflict {
+ if confSt.newHead == confSt.oldHead {
+ confSt.res = &conflictResolution{ty: pickLocal}
+ } else {
+ confSt.res = &conflictResolution{ty: pickRemote}
+ }
+ } else {
+ count++
+ }
+ }
+ return count, nil
+}
+
+// updateDbAndSync updates the Database, and if that is successful, updates log,
+// dag and genvector data structures as needed.
+func (iSt *initiationState) updateDbAndSyncSt(ctx *context.T) error {
+ for objid, confSt := range iSt.updObjects {
+ // If the local version is picked, no further updates to the
+ // Database are needed. If the remote version is picked or if a
+ // new version is created, we put it in the Database.
+ if confSt.res.ty != pickLocal {
+
+ // TODO(hpucha): Hack right now. Need to change Database's
+ // handling of deleted objects.
+ oldVersDeleted := true
+ if confSt.oldHead != NoVersion {
+ oldDagNode, err := getNode(ctx, iSt.tx, objid, confSt.oldHead)
+ if err != nil {
+ return err
+ }
+ oldVersDeleted = oldDagNode.Deleted
+ }
+
+ var newVersion string
+ var newVersDeleted bool
+ switch confSt.res.ty {
+ case pickRemote:
+ newVersion = confSt.newHead
+ newDagNode, err := getNode(ctx, iSt.tx, objid, newVersion)
+ if err != nil {
+ return err
+ }
+ newVersDeleted = newDagNode.Deleted
+ case createNew:
+ newVersion = confSt.res.rec.Metadata.CurVers
+ newVersDeleted = confSt.res.rec.Metadata.Delete
+ }
+
+ // Skip delete followed by a delete.
+ if oldVersDeleted && newVersDeleted {
+ continue
+ }
+
+ if !oldVersDeleted {
+ // Read current version to enter it in the readset of the transaction.
+ version, err := watchable.GetVersion(ctx, iSt.tx, []byte(objid))
+ if err != nil {
+ return err
+ }
+ if string(version) != confSt.oldHead {
+ vlog.VI(4).Infof("sync: updateDbAndSyncSt: concurrent updates %s %s", version, confSt.oldHead)
+ return store.NewErrConcurrentTransaction(ctx)
+ }
+ } else {
+ // Ensure key doesn't exist.
+ if _, err := watchable.GetVersion(ctx, iSt.tx, []byte(objid)); verror.ErrorID(err) != store.ErrUnknownKey.ID {
+ return store.NewErrConcurrentTransaction(ctx)
+ }
+ }
+
+ if !newVersDeleted {
+ if confSt.res.ty == createNew {
+ vlog.VI(4).Infof("sync: updateDbAndSyncSt: PutAtVersion %s %s", objid, newVersion)
+ if err := watchable.PutAtVersion(ctx, iSt.tx, []byte(objid), confSt.res.val, []byte(newVersion)); err != nil {
+ return err
+ }
+ }
+ vlog.VI(4).Infof("sync: updateDbAndSyncSt: PutVersion %s %s", objid, newVersion)
+ if err := watchable.PutVersion(ctx, iSt.tx, []byte(objid), []byte(newVersion)); err != nil {
+ return err
+ }
+ } else {
+ vlog.VI(4).Infof("sync: updateDbAndSyncSt: Deleting obj %s", objid)
+ if err := iSt.tx.Delete([]byte(objid)); err != nil {
+ return err
+ }
+ }
+ }
+ // Always update sync state irrespective of local/remote/new
+ // versions being picked.
+ if err := iSt.updateLogAndDag(ctx, objid); err != nil {
+ return err
+ }
+ }
+
+ return iSt.updateSyncSt(ctx)
+}
+
+// updateLogAndDag updates the log and dag data structures.
+func (iSt *initiationState) updateLogAndDag(ctx *context.T, obj string) error {
+ confSt, ok := iSt.updObjects[obj]
+ if !ok {
+ return verror.New(verror.ErrInternal, ctx, "object state not found", obj)
+ }
+ var newVersion string
+
+ if !confSt.isConflict {
+ newVersion = confSt.newHead
+ } else {
+ // Object had a conflict. Create a log record to reflect resolution.
+ var rec *localLogRec
+
+ switch {
+ case confSt.res.ty == pickLocal:
+ // Local version was picked as the conflict resolution.
+ rec = iSt.createLocalLinkLogRec(ctx, obj, confSt.oldHead, confSt.newHead)
+ newVersion = confSt.oldHead
+ case confSt.res.ty == pickRemote:
+ // Remote version was picked as the conflict resolution.
+ rec = iSt.createLocalLinkLogRec(ctx, obj, confSt.newHead, confSt.oldHead)
+ newVersion = confSt.newHead
+ default:
+ // New version was created to resolve the conflict.
+ rec = confSt.res.rec
+ newVersion = confSt.res.rec.Metadata.CurVers
+ }
+
+ if err := putLogRec(ctx, iSt.tx, rec); err != nil {
+ return err
+ }
+
+ // Add a new DAG node.
+ var err error
+ m := rec.Metadata
+ switch m.RecType {
+ case interfaces.NodeRec:
+ err = iSt.sync.addNode(ctx, iSt.tx, obj, m.CurVers, logRecKey(m.Id, m.Gen), m.Delete, m.Parents, NoBatchId, nil)
+ case interfaces.LinkRec:
+ err = iSt.sync.addParent(ctx, iSt.tx, obj, m.CurVers, m.Parents[0], nil)
+ default:
+ return verror.New(verror.ErrInternal, ctx, "unknown log record type")
+ }
+ if err != nil {
+ return err
+ }
+ }
+
+ // Move the head. This should be idempotent. We may move head to the
+ // local head in some cases.
+ return moveHead(ctx, iSt.tx, obj, newVersion)
+}
+
+func (iSt *initiationState) createLocalLinkLogRec(ctx *context.T, obj, vers, par string) *localLogRec {
+ gen, pos := iSt.sync.reserveGenAndPosInDbLog(ctx, iSt.appName, iSt.dbName, 1)
+
+ vlog.VI(4).Infof("sync: createLocalLinkLogRec: obj %s vers %s par %s", obj, vers, par)
+
+ rec := &localLogRec{
+ Metadata: interfaces.LogRecMetadata{
+ Id: iSt.sync.id,
+ Gen: gen,
+ RecType: interfaces.LinkRec,
+
+ ObjId: obj,
+ CurVers: vers,
+ Parents: []string{par},
+ UpdTime: time.Now().UTC(),
+ BatchId: NoBatchId,
+ BatchCount: 1,
+ // TODO(hpucha): What is its batchid and count?
+ },
+ Pos: pos,
+ }
+ return rec
+}
+
+// updateSyncSt updates local sync state at the end of an initiator cycle.
+func (iSt *initiationState) updateSyncSt(ctx *context.T) error {
+ // Get the current local sync state.
+ dsInMem, err := iSt.sync.copyDbSyncStateInMem(ctx, iSt.appName, iSt.dbName)
+ if err != nil {
+ return err
+ }
+ ds := &dbSyncState{
+ Gen: dsInMem.gen,
+ CheckptGen: dsInMem.checkptGen,
+ GenVec: dsInMem.genvec,
+ }
+
+ // remote can be a subset of local.
+ for rpfx, respgv := range iSt.remote {
+ for lpfx, lpgv := range ds.GenVec {
+ if strings.HasPrefix(lpfx, rpfx) {
+ mergePrefixGenVectors(lpgv, respgv)
+ }
+ }
+ if _, ok := ds.GenVec[rpfx]; !ok {
+ ds.GenVec[rpfx] = respgv
+ }
+ }
+
+ iSt.updLocal = ds.GenVec
+ // Clean the genvector of any local state. Note that local state is held
+ // in gen/ckPtGen in sync state struct.
+ for _, pgv := range iSt.updLocal {
+ delete(pgv, iSt.sync.id)
+ }
+
+ // TODO(hpucha): Add knowledge compaction.
+
+ return putDbSyncState(ctx, iSt.tx, ds)
+}
+
+// mergePrefixGenVectors merges responder prefix genvector into local genvector.
+func mergePrefixGenVectors(lpgv, respgv interfaces.PrefixGenVector) {
+ for devid, rgen := range respgv {
+ gen, ok := lpgv[devid]
+ if !ok || gen < rgen {
+ lpgv[devid] = rgen
+ }
+ }
+}
+
+////////////////////////////////////////
+// Peer selection policies.
+
+// pickPeer picks a Syncbase to sync with.
+func (s *syncService) pickPeer(ctx *context.T) (string, error) {
+ switch peerSelectionPolicy {
+ case selectRandom:
+ members := s.getMembers(ctx)
+ // Remove myself from the set.
+ delete(members, s.name)
+ if len(members) == 0 {
+ return "", verror.New(verror.ErrInternal, ctx, "no useful peer")
+ }
+
+ // Pick a peer at random.
+ ind := randIntn(len(members))
+ for m := range members {
+ if ind == 0 {
+ return m, nil
+ }
+ ind--
+ }
+ return "", verror.New(verror.ErrInternal, ctx, "random selection didn't succeed")
+ default:
+ return "", verror.New(verror.ErrInternal, ctx, "unknown peer selection policy")
+ }
+}
diff --git a/services/syncbase/vsync/initiator_test.go b/services/syncbase/vsync/initiator_test.go
new file mode 100644
index 0000000..af09ce1
--- /dev/null
+++ b/services/syncbase/vsync/initiator_test.go
@@ -0,0 +1,477 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The initiator tests below are driven by replaying the state from the log
+// files (in testdata directory). These log files may mimic watching the
+// Database locally (addl commands in the log file) or obtaining log records and
+// generation vector from a remote peer (addr, genvec commands). The log files
+// contain the metadata of log records. The log files are only used to set up
+// the state. The tests verify that given a particular local state and a stream
+// of remote deltas, the initiator behaves as expected.
+
+package vsync
+
+import (
+ "fmt"
+ "reflect"
+ "testing"
+ "time"
+
+ "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/server/watchable"
+ "v.io/v23/vdl"
+ "v.io/v23/vom"
+ _ "v.io/x/ref/runtime/factories/generic"
+)
+
+func TestExtractBlobRefs(t *testing.T) {
+ var tests [][]byte
+ br := nosql.BlobRef("123")
+
+ // BlobRef is the value.
+ buf0, err := vom.Encode(br)
+ if err != nil {
+ t.Fatalf("Encode(BlobRef) failed, err %v", err)
+ }
+ tests = append(tests, buf0)
+
+ // Struct contains BlobRef.
+ type test1Struct struct {
+ A int64
+ B string
+ C nosql.BlobRef
+ }
+ v1 := test1Struct{A: 10, B: "foo", C: br}
+ buf1, err := vom.Encode(v1)
+ if err != nil {
+ t.Fatalf("Encode(test1Struct) failed, err %v", err)
+ }
+ tests = append(tests, buf1)
+
+ // Nested struct contains BlobRef.
+ type test2Struct struct {
+ A int64
+ B string
+ C test1Struct
+ }
+ v2 := test2Struct{A: 10, B: "foo", C: v1}
+ buf2, err := vom.Encode(v2)
+ if err != nil {
+ t.Fatalf("Encode(test2Struct) failed, err %v", err)
+ }
+ tests = append(tests, buf2)
+
+ for i, buf := range tests {
+ var val *vdl.Value
+ if err := vom.Decode(buf, &val); err != nil {
+ t.Fatalf("Decode failed (test %d), err %v", i, err)
+ }
+
+ gotbrs := make(map[nosql.BlobRef]struct{})
+ if err := extractBlobRefs(val, gotbrs); err != nil {
+ t.Fatalf("extractBlobRefs failed (test %d), err %v", i, err)
+ }
+ wantbrs := map[nosql.BlobRef]struct{}{br: struct{}{}}
+ if !reflect.DeepEqual(gotbrs, wantbrs) {
+ t.Fatalf("Data mismatch in blobrefs (test %d), got %v, want %v", i, gotbrs, wantbrs)
+ }
+ }
+}
+
+// TestLogStreamRemoteOnly tests processing of a remote log stream. Commands are
+// in file testdata/remote-init-00.log.sync.
+func TestLogStreamRemoteOnly(t *testing.T) {
+ svc, iSt, cleanup := testInit(t, "", "remote-init-00.log.sync")
+ defer cleanup(t, svc)
+
+ // Check all log records.
+ objid := util.JoinKeyParts(util.RowPrefix, "foo1")
+ var gen uint64
+ var parents []string
+ for gen = 1; gen < 4; gen++ {
+ gotRec, err := getLogRec(nil, svc.St(), 11, gen)
+ if err != nil || gotRec == nil {
+ t.Fatalf("getLogRec can not find object 11 %d, err %v", gen, err)
+ }
+ vers := fmt.Sprintf("%d", gen)
+ wantRec := &localLogRec{
+ Metadata: interfaces.LogRecMetadata{
+ Id: 11,
+ Gen: gen,
+ RecType: interfaces.NodeRec,
+ ObjId: objid,
+ CurVers: vers,
+ Parents: parents,
+ UpdTime: constTime,
+ BatchCount: 1,
+ },
+ Pos: gen - 1,
+ }
+
+ if !reflect.DeepEqual(gotRec, wantRec) {
+ t.Fatalf("Data mismatch in log record got %v, want %v", gotRec, wantRec)
+ }
+ // Verify DAG state.
+ if _, err := getNode(nil, svc.St(), objid, vers); err != nil {
+ t.Fatalf("getNode can not find object %s vers %s in DAG, err %v", objid, vers, err)
+ }
+ // Verify Database state.
+ tx := svc.St().NewTransaction()
+ if _, err := watchable.GetAtVersion(nil, tx, []byte(objid), nil, []byte(vers)); err != nil {
+ t.Fatalf("GetAtVersion can not find object %s vers %s in Database, err %v", objid, vers, err)
+ }
+ tx.Abort()
+ parents = []string{vers}
+ }
+
+ // Verify conflict state.
+ if len(iSt.updObjects) != 1 {
+ t.Fatalf("Unexpected number of updated objects %d", len(iSt.updObjects))
+ }
+ st := iSt.updObjects[objid]
+ if st.isConflict {
+ t.Fatalf("Detected a conflict %v", st)
+ }
+ if st.newHead != "3" || st.oldHead != NoVersion {
+ t.Fatalf("Conflict detection didn't succeed %v", st)
+ }
+
+ // Verify genvec state.
+ wantVec := interfaces.GenVector{
+ "foo1": interfaces.PrefixGenVector{11: 3},
+ "bar": interfaces.PrefixGenVector{11: 0},
+ }
+ if !reflect.DeepEqual(iSt.updLocal, wantVec) {
+ t.Fatalf("Final local gen vec mismatch got %v, want %v", iSt.updLocal, wantVec)
+ }
+
+ // Verify DAG state.
+ if head, err := getHead(nil, svc.St(), objid); err != nil || head != "3" {
+ t.Fatalf("Invalid object %s head in DAG %v, err %v", objid, head, err)
+ }
+
+ // Verify Database state.
+ valbuf, err := svc.St().Get([]byte(objid), nil)
+ var val string
+ if err := vom.Decode(valbuf, &val); err != nil {
+ t.Fatalf("Value decode failed, err %v", err)
+ }
+ if err != nil || val != "abc" {
+ t.Fatalf("Invalid object %s in Database %v, err %v", objid, val, err)
+ }
+ tx := svc.St().NewTransaction()
+ version, err := watchable.GetVersion(nil, tx, []byte(objid))
+ if err != nil || string(version) != "3" {
+ t.Fatalf("Invalid object %s head in Database %v, err %v", objid, string(version), err)
+ }
+ tx.Abort()
+}
+
+// TestLogStreamNoConflict tests that a local and a remote log stream can be
+// correctly applied (when there are no conflicts). Commands are in files
+// testdata/<local-init-00.log.sync,remote-noconf-00.log.sync>.
+func TestLogStreamNoConflict(t *testing.T) {
+ svc, iSt, cleanup := testInit(t, "local-init-00.log.sync", "remote-noconf-00.log.sync")
+ defer cleanup(t, svc)
+
+ objid := util.JoinKeyParts(util.RowPrefix, "foo1")
+
+ // Check all log records.
+ var version uint64 = 1
+ var parents []string
+ for _, devid := range []uint64{10, 11} {
+ var gen uint64
+ for gen = 1; gen < 4; gen++ {
+ gotRec, err := getLogRec(nil, svc.St(), devid, gen)
+ if err != nil || gotRec == nil {
+ t.Fatalf("getLogRec can not find object %d:%d, err %v",
+ devid, gen, err)
+ }
+ vers := fmt.Sprintf("%d", version)
+ wantRec := &localLogRec{
+ Metadata: interfaces.LogRecMetadata{
+ Id: devid,
+ Gen: gen,
+ RecType: interfaces.NodeRec,
+ ObjId: objid,
+ CurVers: vers,
+ Parents: parents,
+ UpdTime: constTime,
+ BatchCount: 1,
+ },
+ Pos: gen - 1,
+ }
+
+ if !reflect.DeepEqual(gotRec, wantRec) {
+ t.Fatalf("Data mismatch in log record got %v, want %v", gotRec, wantRec)
+ }
+
+ // Verify DAG state.
+ if _, err := getNode(nil, svc.St(), objid, vers); err != nil {
+ t.Fatalf("getNode can not find object %s vers %s in DAG, err %v", objid, vers, err)
+ }
+ // Verify Database state.
+ tx := svc.St().NewTransaction()
+ if _, err := watchable.GetAtVersion(nil, tx, []byte(objid), nil, []byte(vers)); err != nil {
+ t.Fatalf("GetAtVersion can not find object %s vers %s in Database, err %v", objid, vers, err)
+ }
+ tx.Abort()
+ parents = []string{vers}
+ version++
+ }
+ }
+
+ // Verify conflict state.
+ if len(iSt.updObjects) != 1 {
+ t.Fatalf("Unexpected number of updated objects %d", len(iSt.updObjects))
+ }
+ st := iSt.updObjects[objid]
+ if st.isConflict {
+ t.Fatalf("Detected a conflict %v", st)
+ }
+ if st.newHead != "6" || st.oldHead != "3" {
+ t.Fatalf("Conflict detection didn't succeed %v", st)
+ }
+
+ // Verify genvec state.
+ wantVec := interfaces.GenVector{
+ "foo1": interfaces.PrefixGenVector{11: 3},
+ "bar": interfaces.PrefixGenVector{11: 0},
+ }
+ if !reflect.DeepEqual(iSt.updLocal, wantVec) {
+ t.Fatalf("Final local gen vec failed got %v, want %v", iSt.updLocal, wantVec)
+ }
+
+ // Verify DAG state.
+ if head, err := getHead(nil, svc.St(), objid); err != nil || head != "6" {
+ t.Fatalf("Invalid object %s head in DAG %v, err %v", objid, head, err)
+ }
+
+ // Verify Database state.
+ valbuf, err := svc.St().Get([]byte(objid), nil)
+ var val string
+ if err := vom.Decode(valbuf, &val); err != nil {
+ t.Fatalf("Value decode failed, err %v", err)
+ }
+ if err != nil || val != "abc" {
+ t.Fatalf("Invalid object %s in Database %v, err %v", objid, val, err)
+ }
+ tx := svc.St().NewTransaction()
+ versbuf, err := watchable.GetVersion(nil, tx, []byte(objid))
+ if err != nil || string(versbuf) != "6" {
+ t.Fatalf("Invalid object %s head in Database %v, err %v", objid, string(versbuf), err)
+ }
+ tx.Abort()
+}
+
+// TestLogStreamConflict tests that a local and a remote log stream can be
+// correctly applied when there are conflicts. Commands are in files
+// testdata/<local-init-00.log.sync,remote-conf-00.log.sync>.
+func TestLogStreamConflict(t *testing.T) {
+ svc, iSt, cleanup := testInit(t, "local-init-00.log.sync", "remote-conf-00.log.sync")
+ defer cleanup(t, svc)
+
+ objid := util.JoinKeyParts(util.RowPrefix, "foo1")
+
+ // Verify conflict state.
+ if len(iSt.updObjects) != 1 {
+ t.Fatalf("Unexpected number of updated objects %d", len(iSt.updObjects))
+ }
+ st := iSt.updObjects[objid]
+ if !st.isConflict {
+ t.Fatalf("Didn't detect a conflict %v", st)
+ }
+ if st.newHead != "6" || st.oldHead != "3" || st.ancestor != "2" {
+ t.Fatalf("Conflict detection didn't succeed %v", st)
+ }
+ if st.res.ty != pickRemote {
+ t.Fatalf("Conflict resolution did not pick remote: %v", st.res.ty)
+ }
+
+ // Verify DAG state.
+ if head, err := getHead(nil, svc.St(), objid); err != nil || head != "6" {
+ t.Fatalf("Invalid object %s head in DAG %v, err %v", objid, head, err)
+ }
+
+ // Verify Database state.
+ valbuf, err := svc.St().Get([]byte(objid), nil)
+ var val string
+ if err := vom.Decode(valbuf, &val); err != nil {
+ t.Fatalf("Value decode failed, err %v", err)
+ }
+ if err != nil || val != "abc" {
+ t.Fatalf("Invalid object %s in Database %v, err %v", objid, string(valbuf), err)
+ }
+ tx := svc.St().NewTransaction()
+ versbuf, err := watchable.GetVersion(nil, tx, []byte(objid))
+ if err != nil || string(versbuf) != "6" {
+ t.Fatalf("Invalid object %s head in Database %v, err %v", objid, string(versbuf), err)
+ }
+ tx.Abort()
+}
+
+// TestLogStreamConflictNoAncestor tests that a local and a remote log stream
+// can be correctly applied when there are conflicts from the start where the
+// two versions of an object have no common ancestor. Commands are in files
+// testdata/<local-init-00.log.sync,remote-conf-03.log.sync>.
+func TestLogStreamConflictNoAncestor(t *testing.T) {
+ svc, iSt, cleanup := testInit(t, "local-init-00.log.sync", "remote-conf-03.log.sync")
+ defer cleanup(t, svc)
+
+ objid := util.JoinKeyParts(util.RowPrefix, "foo1")
+
+ // Verify conflict state.
+ if len(iSt.updObjects) != 1 {
+ t.Fatalf("Unexpected number of updated objects %d", len(iSt.updObjects))
+ }
+ st := iSt.updObjects[objid]
+ if !st.isConflict {
+ t.Fatalf("Didn't detect a conflict %v", st)
+ }
+ if st.newHead != "6" || st.oldHead != "3" || st.ancestor != "" {
+ t.Fatalf("Conflict detection didn't succeed %v", st)
+ }
+ if st.res.ty != pickRemote {
+ t.Fatalf("Conflict resolution did not pick remote: %v", st.res.ty)
+ }
+
+ // Verify DAG state.
+ if head, err := getHead(nil, svc.St(), objid); err != nil || head != "6" {
+ t.Fatalf("Invalid object %s head in DAG %v, err %v", objid, head, err)
+ }
+
+ // Verify Database state.
+ valbuf, err := svc.St().Get([]byte(objid), nil)
+ var val string
+ if err := vom.Decode(valbuf, &val); err != nil {
+ t.Fatalf("Value decode failed, err %v", err)
+ }
+ if err != nil || val != "abc" {
+ t.Fatalf("Invalid object %s in Database %v, err %v", objid, string(valbuf), err)
+ }
+ tx := svc.St().NewTransaction()
+ versbuf, err := watchable.GetVersion(nil, tx, []byte(objid))
+ if err != nil || string(versbuf) != "6" {
+ t.Fatalf("Invalid object %s head in Database %v, err %v", objid, string(versbuf), err)
+ }
+ tx.Abort()
+}
+
+//////////////////////////////
+// Helpers.
+
+func testInit(t *testing.T, lfile, rfile string) (*mockService, *initiationState, func(*testing.T, *mockService)) {
+ // Set a large value to prevent the initiator from running.
+ peerSyncInterval = 1 * time.Hour
+ conflictResolutionPolicy = useTime
+ svc := createService(t)
+ cleanup := destroyService
+ s := svc.sync
+ s.id = 10 // initiator
+
+ sgId1 := interfaces.GroupId(1234)
+ nullInfo := nosql.SyncGroupMemberInfo{}
+ sgInfo := sgMemberInfo{
+ sgId1: nullInfo,
+ }
+
+ sg1 := &interfaces.SyncGroup{
+ Name: "sg1",
+ Id: sgId1,
+ AppName: "mockapp",
+ DbName: "mockdb",
+ Creator: "mockCreator",
+ SpecVersion: "etag-0",
+ Spec: nosql.SyncGroupSpec{
+ Prefixes: []string{"foo", "bar"},
+ MountTables: []string{"1/2/3/4", "5/6/7/8"},
+ },
+ Joiners: map[string]nosql.SyncGroupMemberInfo{
+ "a": nullInfo,
+ "b": nullInfo,
+ },
+ }
+
+ tx := svc.St().NewTransaction()
+ if err := addSyncGroup(nil, tx, sg1); err != nil {
+ t.Fatalf("cannot add SyncGroup ID %d, err %v", sg1.Id, err)
+ }
+ if err := tx.Commit(); err != nil {
+ t.Fatalf("cannot commit adding SyncGroup ID %d, err %v", sg1.Id, err)
+ }
+
+ if lfile != "" {
+ replayLocalCommands(t, svc, lfile)
+ }
+
+ if rfile == "" {
+ return svc, nil, cleanup
+ }
+
+ gdb := appDbName("mockapp", "mockdb")
+ iSt, err := newInitiationState(nil, s, "b", gdb, sgInfo)
+ if err != nil {
+ t.Fatalf("newInitiationState failed with err %v", err)
+ }
+
+ testIfSgPfxsEqual(t, iSt.sgPfxs, sg1.Spec.Prefixes)
+ testIfMapArrEqual(t, iSt.mtTables, sg1.Spec.MountTables)
+
+ s.initDbSyncStateInMem(nil, "mockapp", "mockdb")
+
+ // Create local genvec so that it contains knowledge only about common prefixes.
+ if err := iSt.createLocalGenVec(nil); err != nil {
+ t.Fatalf("createLocalGenVec failed with err %v", err)
+ }
+
+ wantVec := interfaces.GenVector{
+ "foo": interfaces.PrefixGenVector{10: 0},
+ "bar": interfaces.PrefixGenVector{10: 0},
+ }
+ if !reflect.DeepEqual(iSt.local, wantVec) {
+ t.Fatalf("createLocalGenVec failed got %v, want %v", iSt.local, wantVec)
+ }
+
+ iSt.stream = createReplayStream(t, rfile)
+
+ if err := iSt.recvAndProcessDeltas(nil); err != nil {
+ t.Fatalf("recvAndProcessDeltas failed with err %v", err)
+ }
+
+ if err := iSt.processUpdatedObjects(nil); err != nil {
+ t.Fatalf("processUpdatedObjects failed with err %v", err)
+ }
+ return svc, iSt, cleanup
+}
+
+func testIfSgPfxsEqual(t *testing.T, m map[string]sgSet, a []string) {
+ aMap := arrToMap(a)
+
+ if len(aMap) != len(m) {
+ t.Fatalf("testIfSgPfxsEqual diff lengths, got %v want %v", aMap, m)
+ }
+
+ for p := range aMap {
+ if _, ok := m[p]; !ok {
+ t.Fatalf("testIfSgPfxsEqual want %v", p)
+ }
+ }
+}
+
+func testIfMapArrEqual(t *testing.T, m map[string]struct{}, a []string) {
+ aMap := arrToMap(a)
+ if !reflect.DeepEqual(m, aMap) {
+ t.Fatalf("testIfMapArrEqual failed map %v, arr %v", m, aMap)
+ }
+}
+
+func arrToMap(a []string) map[string]struct{} {
+ m := make(map[string]struct{})
+ for _, s := range a {
+ m[s] = struct{}{}
+ }
+ return m
+}
diff --git a/services/syncbase/vsync/replay_test.go b/services/syncbase/vsync/replay_test.go
new file mode 100644
index 0000000..03d6dc6
--- /dev/null
+++ b/services/syncbase/vsync/replay_test.go
@@ -0,0 +1,399 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Used to ease the setup of sync test scenarios.
+// Parses a sync command file and returns a vector of commands to execute.
+// dagReplayCommands() executes the parsed commands at the DAG API level.
+
+import (
+ "bufio"
+ "container/list"
+ "fmt"
+ "os"
+ "strconv"
+ "strings"
+ "testing"
+ "time"
+
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/server/watchable"
+ "v.io/v23/context"
+ "v.io/v23/vom"
+)
+
+const (
+ addLocal = iota
+ addRemote
+ linkLocal
+ linkRemote
+ genvec
+)
+
+var (
+ constTime = time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC)
+)
+
+type syncCommand struct {
+ cmd int
+ oid string
+ version string
+ parents []string
+ logrec string
+ deleted bool
+ batchId uint64
+ batchCount uint64
+ genVec interfaces.GenVector
+}
+
+// parseSyncCommands parses a sync test file and returns its commands.
+func parseSyncCommands(file string) ([]syncCommand, error) {
+ cmds := []syncCommand{}
+
+ sf, err := os.Open("testdata/" + file)
+ if err != nil {
+ return nil, err
+ }
+ defer sf.Close()
+
+ scanner := bufio.NewScanner(sf)
+ lineno := 0
+ for scanner.Scan() {
+ lineno++
+ line := strings.TrimSpace(scanner.Text())
+ if line == "" || line[0] == '#' {
+ continue
+ }
+
+ args := strings.Split(line, "|")
+ nargs := len(args)
+
+ switch args[0] {
+ case "addl", "addr":
+ expNargs := 9
+ if nargs != expNargs {
+ return nil, fmt.Errorf("%s:%d: need %d args instead of %d",
+ file, lineno, expNargs, nargs)
+ }
+ var parents []string
+ for i := 3; i <= 4; i++ {
+ if args[i] != "" {
+ parents = append(parents, args[i])
+ }
+ }
+
+ batchId, err := strconv.ParseUint(args[6], 10, 64)
+ if err != nil {
+ return nil, fmt.Errorf("%s:%d: invalid batchId: %s", file, lineno, args[6])
+ }
+ batchCount, err := strconv.ParseUint(args[7], 10, 64)
+ if err != nil {
+ return nil, fmt.Errorf("%s:%d: invalid batch count: %s", file, lineno, args[7])
+ }
+ del, err := strconv.ParseBool(args[8])
+ if err != nil {
+ return nil, fmt.Errorf("%s:%d: invalid deleted bit: %s", file, lineno, args[8])
+ }
+ cmd := syncCommand{
+ oid: args[1],
+ version: args[2],
+ parents: parents,
+ logrec: args[5],
+ batchId: batchId,
+ batchCount: batchCount,
+ deleted: del,
+ }
+ if args[0] == "addl" {
+ cmd.cmd = addLocal
+ } else {
+ cmd.cmd = addRemote
+ }
+ cmds = append(cmds, cmd)
+
+ case "linkl", "linkr":
+ expNargs := 6
+ if nargs != expNargs {
+ return nil, fmt.Errorf("%s:%d: need %d args instead of %d",
+ file, lineno, expNargs, nargs)
+ }
+
+ if args[3] == "" {
+ return nil, fmt.Errorf("%s:%d: parent version not specified", file, lineno)
+ }
+ if args[4] != "" {
+ return nil, fmt.Errorf("%s:%d: cannot specify a 2nd parent: %s",
+ file, lineno, args[4])
+ }
+
+ cmd := syncCommand{
+ oid: args[1],
+ version: args[2],
+ parents: []string{args[3]},
+ logrec: args[5],
+ }
+ if args[0] == "linkl" {
+ cmd.cmd = linkLocal
+ } else {
+ cmd.cmd = linkRemote
+ }
+ cmds = append(cmds, cmd)
+
+ case "genvec":
+ cmd := syncCommand{
+ cmd: genvec,
+ genVec: make(interfaces.GenVector),
+ }
+ for i := 1; i < len(args); i = i + 2 {
+ pfx := args[i]
+ genVec := make(interfaces.PrefixGenVector)
+ for _, elem := range strings.Split(args[i+1], ",") {
+ kv := strings.Split(elem, ":")
+ if len(kv) != 2 {
+ return nil, fmt.Errorf("%s:%d: invalid gen vector key/val: %s", file, lineno, elem)
+ }
+ dev, err := strconv.ParseUint(kv[0], 10, 64)
+ if err != nil {
+ return nil, fmt.Errorf("%s:%d: invalid devid: %s", file, lineno, args[i+1])
+ }
+ gen, err := strconv.ParseUint(kv[1], 10, 64)
+ if err != nil {
+ return nil, fmt.Errorf("%s:%d: invalid gen: %s", file, lineno, args[i+1])
+ }
+ genVec[dev] = gen
+ }
+ cmd.genVec[pfx] = genVec
+ }
+ cmds = append(cmds, cmd)
+
+ default:
+ return nil, fmt.Errorf("%s:%d: invalid operation: %s", file, lineno, args[0])
+ }
+ }
+
+ err = scanner.Err()
+ return cmds, err
+}
+
+// dagReplayCommands parses a sync test file and replays its commands, updating
+// the DAG structures associated with the sync service.
+func (s *syncService) dagReplayCommands(ctx *context.T, syncfile string) (graftMap, error) {
+ cmds, err := parseSyncCommands(syncfile)
+ if err != nil {
+ return nil, err
+ }
+
+ st := s.sv.St()
+ graft := newGraft()
+
+ for _, cmd := range cmds {
+ tx := st.NewTransaction()
+
+ switch cmd.cmd {
+ case addLocal:
+ err = s.addNode(ctx, tx, cmd.oid, cmd.version, cmd.logrec,
+ cmd.deleted, cmd.parents, NoBatchId, nil)
+ if err != nil {
+ return nil, fmt.Errorf("cannot add local node %s:%s: %v",
+ cmd.oid, cmd.version, err)
+ }
+
+ if err = moveHead(ctx, tx, cmd.oid, cmd.version); err != nil {
+ return nil, fmt.Errorf("cannot move head to %s:%s: %v",
+ cmd.oid, cmd.version, err)
+ }
+
+ case addRemote:
+ err = s.addNode(ctx, tx, cmd.oid, cmd.version, cmd.logrec,
+ cmd.deleted, cmd.parents, NoBatchId, graft)
+ if err != nil {
+ return nil, fmt.Errorf("cannot add remote node %s:%s: %v",
+ cmd.oid, cmd.version, err)
+ }
+
+ case linkLocal:
+ if err = s.addParent(ctx, tx, cmd.oid, cmd.version, cmd.parents[0], nil); err != nil {
+ return nil, fmt.Errorf("cannot add local parent %s to node %s:%s: %v",
+ cmd.parents[0], cmd.oid, cmd.version, err)
+ }
+
+ case linkRemote:
+ if err = s.addParent(ctx, tx, cmd.oid, cmd.version, cmd.parents[0], graft); err != nil {
+ return nil, fmt.Errorf("cannot add remote parent %s to node %s:%s: %v",
+ cmd.parents[0], cmd.oid, cmd.version, err)
+ }
+ }
+
+ tx.Commit()
+ }
+
+ return graft, nil
+}
+
+// dummyStream emulates stream of log records received from RPC.
+type dummyStream struct {
+ l *list.List
+ entry interfaces.DeltaResp
+}
+
+func newStream() *dummyStream {
+ ds := &dummyStream{
+ l: list.New(),
+ }
+ return ds
+}
+
+func (ds *dummyStream) add(entry interfaces.DeltaResp) {
+ ds.l.PushBack(entry)
+}
+
+func (ds *dummyStream) Advance() bool {
+ if ds.l.Len() > 0 {
+ ds.entry = ds.l.Remove(ds.l.Front()).(interfaces.DeltaResp)
+ return true
+ }
+ return false
+}
+
+func (ds *dummyStream) Value() interfaces.DeltaResp {
+ return ds.entry
+}
+
+func (ds *dummyStream) RecvStream() interface {
+ Advance() bool
+ Value() interfaces.DeltaResp
+ Err() error
+} {
+ return ds
+}
+
+func (*dummyStream) Err() error { return nil }
+
+func (ds *dummyStream) Finish() error {
+ return nil
+}
+
+func (ds *dummyStream) Cancel() {
+}
+
+func (ds *dummyStream) SendStream() interface {
+ Send(item interfaces.DeltaReq) error
+ Close() error
+} {
+ return ds
+}
+
+func (ds *dummyStream) Send(item interfaces.DeltaReq) error {
+ return nil
+}
+
+func (ds *dummyStream) Close() error {
+ return nil
+}
+
+// replayLocalCommands replays local log records parsed from the input file.
+func replayLocalCommands(t *testing.T, s *mockService, syncfile string) {
+ cmds, err := parseSyncCommands(syncfile)
+ if err != nil {
+ t.Fatalf("parseSyncCommands failed with err %v", err)
+ }
+
+ tx := s.St().NewTransaction()
+ var pos uint64
+ for _, cmd := range cmds {
+ switch cmd.cmd {
+ case addLocal:
+ rec := &localLogRec{
+ Metadata: createMetadata(t, interfaces.NodeRec, cmd),
+ Pos: pos,
+ }
+ err = s.sync.processLocalLogRec(nil, tx, rec)
+ if err != nil {
+ t.Fatalf("processLocalLogRec failed with err %v", err)
+ }
+
+ // Add to Store.
+ err = watchable.PutVersion(nil, tx, []byte(rec.Metadata.ObjId), []byte(rec.Metadata.CurVers))
+ if err != nil {
+ t.Fatalf("PutVersion failed with err %v", err)
+ }
+ err = watchable.PutAtVersion(nil, tx, []byte(rec.Metadata.ObjId), []byte("abc"), []byte(rec.Metadata.CurVers))
+ if err != nil {
+ t.Fatalf("PutAtVersion failed with err %v", err)
+ }
+
+ default:
+ t.Fatalf("replayLocalCommands failed with unknown command %v", cmd)
+ }
+ pos++
+ }
+ if err := tx.Commit(); err != nil {
+ t.Fatalf("cannot commit local log records %s, err %v", syncfile, err)
+ }
+}
+
+// createReplayStream creates a dummy stream of log records parsed from the input file.
+func createReplayStream(t *testing.T, syncfile string) *dummyStream {
+ cmds, err := parseSyncCommands(syncfile)
+ if err != nil {
+ t.Fatalf("parseSyncCommands failed with err %v", err)
+ }
+
+ stream := newStream()
+ start := interfaces.DeltaRespStart{true}
+ stream.add(start)
+
+ for _, cmd := range cmds {
+ var ty byte
+ switch cmd.cmd {
+ case genvec:
+ gv := interfaces.DeltaRespRespVec{cmd.genVec}
+ stream.add(gv)
+ continue
+ case addRemote:
+ ty = interfaces.NodeRec
+ case linkRemote:
+ ty = interfaces.LinkRec
+ default:
+ t.Fatalf("createReplayStream unknown command %v", cmd)
+ }
+
+ var val string = "abc"
+ valbuf, err := vom.Encode(val)
+ if err != nil {
+ t.Fatalf("createReplayStream encode failed, err %v", err)
+ }
+
+ rec := interfaces.DeltaRespRec{interfaces.LogRec{
+ Metadata: createMetadata(t, ty, cmd),
+ Value: valbuf,
+ }}
+
+ stream.add(rec)
+ }
+ fin := interfaces.DeltaRespFinish{true}
+ stream.add(fin)
+ return stream
+}
+
+func createMetadata(t *testing.T, ty byte, cmd syncCommand) interfaces.LogRecMetadata {
+ id, gen, err := splitLogRecKey(nil, cmd.logrec)
+ if err != nil {
+ t.Fatalf("createReplayStream splitLogRecKey failed, key %s, err %v", cmd.logrec, gen)
+ }
+ m := interfaces.LogRecMetadata{
+ Id: id,
+ Gen: gen,
+ RecType: ty,
+ ObjId: util.JoinKeyParts(util.RowPrefix, cmd.oid),
+ CurVers: cmd.version,
+ Parents: cmd.parents,
+ UpdTime: constTime,
+ Delete: cmd.deleted,
+ BatchId: cmd.batchId,
+ BatchCount: cmd.batchCount,
+ }
+ return m
+}
diff --git a/services/syncbase/vsync/responder.go b/services/syncbase/vsync/responder.go
new file mode 100644
index 0000000..c417eca
--- /dev/null
+++ b/services/syncbase/vsync/responder.go
@@ -0,0 +1,516 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+import (
+ "container/heap"
+ "sort"
+ "strings"
+
+ wire "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+ "v.io/syncbase/x/ref/services/syncbase/server/watchable"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ "v.io/v23/verror"
+ "v.io/x/lib/vlog"
+)
+
+// GetDeltas implements the responder side of the GetDeltas RPC.
+func (s *syncService) GetDeltas(ctx *context.T, call interfaces.SyncGetDeltasServerCall, initiator string) error {
+ vlog.VI(2).Infof("sync: GetDeltas: begin: from initiator %s", initiator)
+ defer vlog.VI(2).Infof("sync: GetDeltas: end: from initiator %s", initiator)
+
+ recvr := call.RecvStream()
+ for recvr.Advance() {
+ req := recvr.Value()
+ // Ignoring errors since if one Database fails for any reason,
+ // it is fine to continue to the next one. In fact, sometimes
+ // the failure might be genuine. For example, the responder is
+ // no longer part of the requested SyncGroups, or the app/db is
+ // locally deleted, or a permission change has denied access.
+ rSt := newResponderState(ctx, call, s, req, initiator)
+ rSt.sendDeltasPerDatabase(ctx)
+ }
+
+ // TODO(hpucha): Is there a need to call finish or some such?
+ return recvr.Err()
+}
+
+// responderState is state accumulated per Database by the responder during an
+// initiation round.
+type responderState struct {
+ req interfaces.DeltaReq
+ call interfaces.SyncGetDeltasServerCall // Stream handle for the GetDeltas RPC.
+ initiator string
+ errState error // Captures the error from the first two phases of the responder.
+ sync *syncService
+ st store.Store // Store handle to the Database.
+ diff genRangeVector
+ outVec interfaces.GenVector
+}
+
+func newResponderState(ctx *context.T, call interfaces.SyncGetDeltasServerCall, sync *syncService, req interfaces.DeltaReq, initiator string) *responderState {
+ rSt := &responderState{call: call, sync: sync, req: req, initiator: initiator}
+ return rSt
+}
+
+// sendDeltasPerDatabase sends to an initiator all the missing generations
+// corresponding to the prefixes requested for this Database, and a genvector
+// summarizing the knowledge transferred from the responder to the
+// initiator. This happens in three phases:
+//
+// In the first phase, the initiator is checked against the SyncGroup ACLs of
+// all the SyncGroups it is requesting, and only those prefixes that belong to
+// allowed SyncGroups are carried forward.
+//
+// In the second phase, for a given set of nested prefixes from the initiator,
+// the shortest prefix in that set is extracted. The initiator's prefix
+// genvector for this shortest prefix represents the lower bound on its
+// knowledge for the entire set of nested prefixes. This prefix genvector
+// (representing the lower bound) is diffed with all the responder prefix
+// genvectors corresponding to same or deeper prefixes compared to the initiator
+// prefix. This diff produces a bound on the missing knowledge. For example, say
+// the initiator is interested in prefixes {foo, foobar}, where each prefix is
+// associated with a prefix genvector. Since the initiator strictly has as much
+// or more knowledge for prefix "foobar" as it has for prefix "foo", "foo"'s
+// prefix genvector is chosen as the lower bound for the initiator's
+// knowledge. Similarly, say the responder has knowledge on prefixes {f,
+// foobarX, foobarY, bar}. The responder diffs the prefix genvectors for
+// prefixes f, foobarX and foobarY with the initiator's prefix genvector to
+// compute a bound on missing generations (all responder's prefixes that match
+// "foo". Note that since the responder doesn't have a prefix genvector at
+// "foo", its knowledge at "f" is applicable to "foo").
+//
+// Since the second phase outputs an aggressive calculation of missing
+// generations containing more generation entries than strictly needed by the
+// initiator, in the third phase, each missing generation is sent to the
+// initiator only if the initiator is eligible for it and is not aware of
+// it. The generations are sent to the initiator in the same order as the
+// responder learned them so that the initiator can reconstruct the DAG for the
+// objects by learning older nodes first.
+func (rSt *responderState) sendDeltasPerDatabase(ctx *context.T) error {
+ // TODO(rdaoud): for such vlog.VI() calls where the function name is
+ // embedded, consider using a helper function to auto-fill it instead
+ // (see http://goo.gl/mEa4L0) but only incur that overhead when the
+ // logging level specified is enabled.
+ vlog.VI(3).Infof("sync: sendDeltasPerDatabase: %s, %s: sgids %v, genvec %v",
+ rSt.req.AppName, rSt.req.DbName, rSt.req.SgIds, rSt.req.InitVec)
+
+ // Phase 1 of sendDeltas: Authorize the initiator and respond to the
+ // caller only for the SyncGroups that allow access.
+ rSt.authorizeAndFilterSyncGroups(ctx)
+
+ // Phase 2 of sendDeltas: diff contains the bound on the
+ // generations missing from the initiator per device.
+ rSt.computeDeltaBound(ctx)
+
+ // Phase 3 of sendDeltas: Process the diff, filtering out records that
+ // are not needed, and send the remainder on the wire ordered.
+ return rSt.filterAndSendDeltas(ctx)
+}
+
+// authorizeAndFilterSyncGroups authorizes the initiator against the requested
+// SyncGroups and filters the initiator's prefixes to only include those from
+// allowed SyncGroups (phase 1 of sendDeltas).
+func (rSt *responderState) authorizeAndFilterSyncGroups(ctx *context.T) {
+ rSt.st, rSt.errState = rSt.sync.getDbStore(ctx, nil, rSt.req.AppName, rSt.req.DbName)
+ if rSt.errState != nil {
+ return
+ }
+
+ allowedPfxs := make(map[string]struct{})
+ for sgid := range rSt.req.SgIds {
+ // Check permissions for the SyncGroup.
+ var sg *interfaces.SyncGroup
+ sg, rSt.errState = getSyncGroupById(ctx, rSt.st, sgid)
+ if rSt.errState != nil {
+ return
+ }
+ rSt.errState = authorize(ctx, rSt.call.Security(), sg)
+ if verror.ErrorID(rSt.errState) == verror.ErrNoAccess.ID {
+ continue
+ } else if rSt.errState != nil {
+ return
+ }
+
+ for _, p := range sg.Spec.Prefixes {
+ allowedPfxs[p] = struct{}{}
+ }
+
+ // Add the initiator to the SyncGroup membership if not already
+ // in it. It is a temporary solution until SyncGroup metadata
+ // is synchronized peer to peer.
+ // TODO(rdaoud): remove this when SyncGroups are synced.
+ rSt.addInitiatorToSyncGroup(ctx, sgid)
+ }
+
+ // Filter the initiator's prefixes to what is allowed.
+ for pfx := range rSt.req.InitVec {
+ if _, ok := allowedPfxs[pfx]; ok {
+ continue
+ }
+ allowed := false
+ for p := range allowedPfxs {
+ if strings.HasPrefix(pfx, p) {
+ allowed = true
+ }
+ }
+
+ if !allowed {
+ delete(rSt.req.InitVec, pfx)
+ }
+ }
+ return
+}
+
+// addInitiatorToSyncGroup adds the request initiator to the membership of the
+// given SyncGroup if the initiator is not already a member. It is a temporary
+// solution until SyncGroup metadata starts being synchronized, at which time
+// peers will learn of new members through mutations of the SyncGroup metadata
+// by the SyncGroup administrators.
+// Note: the joiner metadata is fake because the responder does not have it.
+func (rSt *responderState) addInitiatorToSyncGroup(ctx *context.T, gid interfaces.GroupId) {
+ if rSt.initiator == "" {
+ return
+ }
+
+ err := store.RunInTransaction(rSt.st, func(tx store.Transaction) error {
+ sg, err := getSyncGroupById(ctx, tx, gid)
+ if err != nil {
+ return err
+ }
+
+ // If the initiator is already a member of the SyncGroup abort
+ // the transaction with a special error code.
+ if _, ok := sg.Joiners[rSt.initiator]; ok {
+ return verror.New(verror.ErrExist, ctx, "member already in SyncGroup")
+ }
+
+ vlog.VI(4).Infof("sync: addInitiatorToSyncGroup: add %s to sgid %d", rSt.initiator, gid)
+ sg.Joiners[rSt.initiator] = wire.SyncGroupMemberInfo{SyncPriority: 1}
+ return setSGDataEntry(ctx, tx, gid, sg)
+ })
+
+ if err != nil && verror.ErrorID(err) != verror.ErrExist.ID {
+ vlog.Errorf("sync: addInitiatorToSyncGroup: initiator %s, sgid %d: %v", rSt.initiator, gid, err)
+ }
+}
+
+// computeDeltaBound computes the bound on missing generations across all
+// requested prefixes (phase 2 of sendDeltas).
+func (rSt *responderState) computeDeltaBound(ctx *context.T) {
+ // Check error from phase 1.
+ if rSt.errState != nil {
+ return
+ }
+
+ if len(rSt.req.InitVec) == 0 {
+ rSt.errState = verror.New(verror.ErrInternal, ctx, "empty initiator generation vector")
+ return
+ }
+
+ var respVec interfaces.GenVector
+ var respGen uint64
+ respVec, respGen, rSt.errState = rSt.sync.copyDbGenInfo(ctx, rSt.req.AppName, rSt.req.DbName)
+ if rSt.errState != nil {
+ return
+ }
+ respPfxs := extractAndSortPrefixes(respVec)
+ initPfxs := extractAndSortPrefixes(rSt.req.InitVec)
+
+ rSt.outVec = make(interfaces.GenVector)
+ rSt.diff = make(genRangeVector)
+ pfx := initPfxs[0]
+
+ for _, p := range initPfxs {
+ if strings.HasPrefix(p, pfx) && p != pfx {
+ continue
+ }
+
+ // Process this prefix as this is the start of a new set of
+ // nested prefixes.
+ pfx = p
+
+ // Lower bound on initiator's knowledge for this prefix set.
+ initpgv := rSt.req.InitVec[pfx]
+
+ // Find the relevant responder prefixes and add the corresponding knowledge.
+ var respgv interfaces.PrefixGenVector
+ var rpStart string
+ for _, rp := range respPfxs {
+ if !strings.HasPrefix(rp, pfx) && !strings.HasPrefix(pfx, rp) {
+ // No relationship with pfx.
+ continue
+ }
+
+ if strings.HasPrefix(pfx, rp) {
+ // If rp is a prefix of pfx, remember it because
+ // it may be a potential starting point for the
+ // responder's knowledge. The actual starting
+ // point is the deepest prefix where rp is a
+ // prefix of pfx.
+ //
+ // Say the initiator is looking for "foo", and
+ // the responder has knowledge for "f" and "fo",
+ // the responder's starting point will be the
+ // prefix genvector for "fo". Similarly, if the
+ // responder has knowledge for "foo", the
+ // starting point will be the prefix genvector
+ // for "foo".
+ rpStart = rp
+ } else {
+ // If pfx is a prefix of rp, this knowledge must
+ // be definitely sent to the initiator. Diff the
+ // prefix genvectors to adjust the delta bound and
+ // include in outVec.
+ respgv = respVec[rp]
+ rSt.diffPrefixGenVectors(respgv, initpgv)
+ rSt.outVec[rp] = respgv
+ }
+ }
+
+ // Deal with the starting point.
+ if rpStart == "" {
+ // No matching prefixes for pfx were found.
+ respgv = make(interfaces.PrefixGenVector)
+ respgv[rSt.sync.id] = respGen
+ } else {
+ respgv = respVec[rpStart]
+ }
+ rSt.diffPrefixGenVectors(respgv, initpgv)
+ rSt.outVec[pfx] = respgv
+ }
+
+ vlog.VI(3).Infof("sync: computeDeltaBound: %s, %s: diff %v, outvec %v",
+ rSt.req.AppName, rSt.req.DbName, rSt.diff, rSt.outVec)
+ return
+}
+
+// filterAndSendDeltas filters the computed delta to remove records already
+// known by the initiator, and sends the resulting records to the initiator
+// (phase 3 of sendDeltas).
+func (rSt *responderState) filterAndSendDeltas(ctx *context.T) error {
+ // Always send a start and finish response so that the initiator can
+ // move on to the next Database.
+ //
+ // TODO(hpucha): Although ok for now to call SendStream once per
+ // Database, would like to make this implementation agnostic.
+ sender := rSt.call.SendStream()
+ sender.Send(interfaces.DeltaRespStart{true})
+ defer sender.Send(interfaces.DeltaRespFinish{true})
+
+ // Check error from phase 2.
+ if rSt.errState != nil {
+ return rSt.errState
+ }
+
+ // First two phases were successful. So now on to phase 3. We now visit
+ // every log record in the generation range as obtained from phase 1 in
+ // their log order. We use a heap to incrementally sort the log records
+ // as per their position in the log.
+ //
+ // Init the min heap, one entry per device in the diff.
+ mh := make(minHeap, 0, len(rSt.diff))
+ for dev, r := range rSt.diff {
+ r.cur = r.min
+ rec, err := getNextLogRec(ctx, rSt.st, dev, r)
+ if err != nil {
+ return err
+ }
+ if rec != nil {
+ mh = append(mh, rec)
+ } else {
+ delete(rSt.diff, dev)
+ }
+ }
+ heap.Init(&mh)
+
+ // Process the log records in order.
+ initPfxs := extractAndSortPrefixes(rSt.req.InitVec)
+ for mh.Len() > 0 {
+ rec := heap.Pop(&mh).(*localLogRec)
+
+ if !filterLogRec(rec, rSt.req.InitVec, initPfxs) {
+ // Send on the wire.
+ wireRec, err := makeWireLogRec(ctx, rSt.st, rec)
+ if err != nil {
+ return err
+ }
+ sender.Send(interfaces.DeltaRespRec{*wireRec})
+ }
+
+ // Add a new record from the same device if not done.
+ dev := rec.Metadata.Id
+ rec, err := getNextLogRec(ctx, rSt.st, dev, rSt.diff[dev])
+ if err != nil {
+ return err
+ }
+ if rec != nil {
+ heap.Push(&mh, rec)
+ } else {
+ delete(rSt.diff, dev)
+ }
+ }
+
+ sender.Send(interfaces.DeltaRespRespVec{rSt.outVec})
+ return nil
+}
+
+// genRange represents a range of generations (min and max inclusive).
+type genRange struct {
+ min uint64
+ max uint64
+ cur uint64
+}
+
+type genRangeVector map[uint64]*genRange
+
+// diffPrefixGenVectors diffs two generation vectors, belonging to the responder
+// and the initiator, and updates the range of generations per device known to
+// the responder but not known to the initiator. "gens" (generation range) is
+// passed in as an input argument so that it can be incrementally updated as the
+// range of missing generations grows when different responder prefix genvectors
+// are used to compute the diff.
+//
+// For example: Generation vector for responder is say RVec = {A:10, B:5, C:1},
+// Generation vector for initiator is say IVec = {A:5, B:10, D:2}. Diffing these
+// two vectors returns: {A:[6-10], C:[1-1]}.
+//
+// TODO(hpucha): Add reclaimVec for GCing.
+func (rSt *responderState) diffPrefixGenVectors(respPVec, initPVec interfaces.PrefixGenVector) {
+ // Compute missing generations for devices that are in both initiator's and responder's vectors.
+ for devid, gen := range initPVec {
+ rgen, ok := respPVec[devid]
+ if ok {
+ updateDevRange(devid, rgen, gen, rSt.diff)
+ }
+ }
+
+ // Compute missing generations for devices not in initiator's vector but in responder's vector.
+ for devid, rgen := range respPVec {
+ if _, ok := initPVec[devid]; !ok {
+ updateDevRange(devid, rgen, 0, rSt.diff)
+ }
+ }
+}
+
+func updateDevRange(devid, rgen, gen uint64, gens genRangeVector) {
+ if gen < rgen {
+ // Need to include all generations in the interval [gen+1,rgen], gen+1 and rgen inclusive.
+ if r, ok := gens[devid]; !ok {
+ gens[devid] = &genRange{min: gen + 1, max: rgen}
+ } else {
+ if gen+1 < r.min {
+ r.min = gen + 1
+ }
+ if rgen > r.max {
+ r.max = rgen
+ }
+ }
+ }
+}
+
+func extractAndSortPrefixes(vec interfaces.GenVector) []string {
+ pfxs := make([]string, len(vec))
+ i := 0
+ for p := range vec {
+ pfxs[i] = p
+ i++
+ }
+ sort.Strings(pfxs)
+ return pfxs
+}
+
+// TODO(hpucha): This can be optimized using a scan instead of "gets" in a for
+// loop.
+func getNextLogRec(ctx *context.T, st store.Store, dev uint64, r *genRange) (*localLogRec, error) {
+ for i := r.cur; i <= r.max; i++ {
+ rec, err := getLogRec(ctx, st, dev, i)
+ if err == nil {
+ r.cur = i + 1
+ return rec, nil
+ }
+ if verror.ErrorID(err) != verror.ErrNoExist.ID {
+ return nil, err
+ }
+ }
+ return nil, nil
+}
+
+// Note: initPfxs is sorted.
+func filterLogRec(rec *localLogRec, initVec interfaces.GenVector, initPfxs []string) bool {
+ // The key starts with one of the store's reserved prefixes for managed
+ // namespaces (e.g. $row, $perms). Remove that prefix before comparing
+ // it with the SyncGroup prefixes which are defined by the application.
+ key := extractAppKey(rec.Metadata.ObjId)
+
+ filter := true
+ var maxGen uint64
+ for _, p := range initPfxs {
+ if strings.HasPrefix(key, p) {
+ // Do not filter. Initiator is interested in this
+ // prefix.
+ filter = false
+
+ // Track if the initiator knows of this record.
+ gen := initVec[p][rec.Metadata.Id]
+ if maxGen < gen {
+ maxGen = gen
+ }
+ }
+ }
+
+ // Filter this record if the initiator already has it.
+ if maxGen >= rec.Metadata.Gen {
+ filter = true
+ }
+
+ return filter
+}
+
+// makeWireLogRec creates a sync log record to send on the wire from a given
+// local sync record.
+func makeWireLogRec(ctx *context.T, st store.Store, rec *localLogRec) (*interfaces.LogRec, error) {
+ // Get the object value at the required version.
+ key, version := rec.Metadata.ObjId, rec.Metadata.CurVers
+ var value []byte
+ if !rec.Metadata.Delete {
+ var err error
+ value, err = watchable.GetAtVersion(ctx, st, []byte(key), nil, []byte(version))
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ wireRec := &interfaces.LogRec{Metadata: rec.Metadata, Value: value}
+ return wireRec, nil
+}
+
+// A minHeap implements heap.Interface and holds local log records.
+type minHeap []*localLogRec
+
+func (mh minHeap) Len() int { return len(mh) }
+
+func (mh minHeap) Less(i, j int) bool {
+ return mh[i].Pos < mh[j].Pos
+}
+
+func (mh minHeap) Swap(i, j int) {
+ mh[i], mh[j] = mh[j], mh[i]
+}
+
+func (mh *minHeap) Push(x interface{}) {
+ item := x.(*localLogRec)
+ *mh = append(*mh, item)
+}
+
+func (mh *minHeap) Pop() interface{} {
+ old := *mh
+ n := len(old)
+ item := old[n-1]
+ *mh = old[0 : n-1]
+ return item
+}
diff --git a/services/syncbase/vsync/responder_test.go b/services/syncbase/vsync/responder_test.go
new file mode 100644
index 0000000..986f9b3
--- /dev/null
+++ b/services/syncbase/vsync/responder_test.go
@@ -0,0 +1,520 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+import (
+ "fmt"
+ "math/rand"
+ "reflect"
+ "testing"
+ "time"
+
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+ "v.io/syncbase/x/ref/services/syncbase/server/watchable"
+ "v.io/v23/naming"
+ "v.io/v23/rpc"
+ "v.io/v23/security"
+ _ "v.io/x/ref/runtime/factories/generic"
+)
+
+// TestDiffPrefixGenVectors tests diffing prefix gen vectors.
+func TestDiffPrefixGenVectors(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ s := svc.sync
+ s.id = 10 //responder. Initiator is id 11.
+
+ tests := []struct {
+ respPVec, initPVec interfaces.PrefixGenVector
+ genDiffIn genRangeVector
+ genDiffWant genRangeVector
+ }{
+ { // responder and initiator are at identical vectors.
+ respPVec: interfaces.PrefixGenVector{10: 1, 11: 10, 12: 20, 13: 2},
+ initPVec: interfaces.PrefixGenVector{10: 1, 11: 10, 12: 20, 13: 2},
+ genDiffIn: make(genRangeVector),
+ },
+ { // responder and initiator are at identical vectors.
+ respPVec: interfaces.PrefixGenVector{10: 0},
+ initPVec: interfaces.PrefixGenVector{10: 0},
+ genDiffIn: make(genRangeVector),
+ },
+ { // responder has no updates.
+ respPVec: interfaces.PrefixGenVector{10: 0},
+ initPVec: interfaces.PrefixGenVector{10: 5, 11: 10, 12: 20, 13: 8},
+ genDiffIn: make(genRangeVector),
+ },
+ { // responder and initiator have no updates.
+ respPVec: interfaces.PrefixGenVector{10: 0},
+ initPVec: interfaces.PrefixGenVector{11: 0},
+ genDiffIn: make(genRangeVector),
+ },
+ { // responder is staler than initiator.
+ respPVec: interfaces.PrefixGenVector{10: 1, 11: 10, 12: 20, 13: 2},
+ initPVec: interfaces.PrefixGenVector{10: 1, 11: 10, 12: 20, 13: 8, 14: 5},
+ genDiffIn: make(genRangeVector),
+ },
+ { // responder is more up-to-date than initiator for local updates.
+ respPVec: interfaces.PrefixGenVector{10: 5, 11: 10, 12: 20, 13: 2},
+ initPVec: interfaces.PrefixGenVector{10: 1, 11: 10, 12: 20, 13: 2},
+ genDiffIn: make(genRangeVector),
+ genDiffWant: genRangeVector{10: &genRange{min: 2, max: 5}},
+ },
+ { // responder is fresher than initiator for local updates and one device.
+ respPVec: interfaces.PrefixGenVector{10: 5, 11: 10, 12: 22, 13: 2},
+ initPVec: interfaces.PrefixGenVector{10: 1, 11: 10, 12: 20, 13: 2, 14: 40},
+ genDiffIn: make(genRangeVector),
+ genDiffWant: genRangeVector{
+ 10: &genRange{min: 2, max: 5},
+ 12: &genRange{min: 21, max: 22},
+ },
+ },
+ { // responder is fresher than initiator in all but one device.
+ respPVec: interfaces.PrefixGenVector{10: 1, 11: 2, 12: 3, 13: 4},
+ initPVec: interfaces.PrefixGenVector{10: 0, 11: 2, 12: 0},
+ genDiffIn: make(genRangeVector),
+ genDiffWant: genRangeVector{
+ 10: &genRange{min: 1, max: 1},
+ 12: &genRange{min: 1, max: 3},
+ 13: &genRange{min: 1, max: 4},
+ },
+ },
+ { // initiator has no updates.
+ respPVec: interfaces.PrefixGenVector{10: 1, 11: 2, 12: 3, 13: 4},
+ initPVec: interfaces.PrefixGenVector{},
+ genDiffIn: make(genRangeVector),
+ genDiffWant: genRangeVector{
+ 10: &genRange{min: 1, max: 1},
+ 11: &genRange{min: 1, max: 2},
+ 12: &genRange{min: 1, max: 3},
+ 13: &genRange{min: 1, max: 4},
+ },
+ },
+ { // initiator has no updates, pre-existing diff.
+ respPVec: interfaces.PrefixGenVector{10: 1, 11: 2, 12: 3, 13: 4},
+ initPVec: interfaces.PrefixGenVector{13: 1},
+ genDiffIn: genRangeVector{
+ 10: &genRange{min: 5, max: 20},
+ 13: &genRange{min: 1, max: 3},
+ },
+ genDiffWant: genRangeVector{
+ 10: &genRange{min: 1, max: 20},
+ 11: &genRange{min: 1, max: 2},
+ 12: &genRange{min: 1, max: 3},
+ 13: &genRange{min: 1, max: 4},
+ },
+ },
+ }
+
+ for _, test := range tests {
+ want := test.genDiffWant
+ got := test.genDiffIn
+ rSt := newResponderState(nil, nil, s, interfaces.DeltaReq{}, "fakeInitiator")
+ rSt.diff = got
+ rSt.diffPrefixGenVectors(test.respPVec, test.initPVec)
+ checkEqualDevRanges(t, got, want)
+ }
+}
+
+// TestSendDeltas tests the computation of the delta bound (computeDeltaBound)
+// and if the log records on the wire are correctly ordered (phases 2 and 3 of
+// SendDeltas).
+func TestSendDeltas(t *testing.T) {
+ appName := "mockapp"
+ dbName := "mockdb"
+
+ tests := []struct {
+ respVec, initVec, outVec interfaces.GenVector
+ respGen uint64
+ genDiff genRangeVector
+ keyPfxs []string
+ }{
+ { // Identical prefixes, local and remote updates.
+ respVec: interfaces.GenVector{
+ "foo": interfaces.PrefixGenVector{12: 8},
+ "foobar": interfaces.PrefixGenVector{12: 10},
+ },
+ initVec: interfaces.GenVector{
+ "foo": interfaces.PrefixGenVector{11: 5},
+ "foobar": interfaces.PrefixGenVector{11: 5},
+ },
+ respGen: 5,
+ outVec: interfaces.GenVector{
+ "foo": interfaces.PrefixGenVector{10: 5, 12: 8},
+ "foobar": interfaces.PrefixGenVector{10: 5, 12: 10},
+ },
+ genDiff: genRangeVector{
+ 10: &genRange{min: 1, max: 5},
+ 12: &genRange{min: 1, max: 10},
+ },
+ keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", ""},
+ },
+ { // Identical prefixes, local and remote updates.
+ respVec: interfaces.GenVector{
+ "bar": interfaces.PrefixGenVector{12: 20},
+ "foo": interfaces.PrefixGenVector{12: 8},
+ "foobar": interfaces.PrefixGenVector{12: 10},
+ },
+ initVec: interfaces.GenVector{
+ "foo": interfaces.PrefixGenVector{11: 5},
+ "foobar": interfaces.PrefixGenVector{11: 5, 12: 10},
+ "bar": interfaces.PrefixGenVector{10: 5, 11: 5, 12: 5},
+ },
+ respGen: 5,
+ outVec: interfaces.GenVector{
+ "foo": interfaces.PrefixGenVector{10: 5, 12: 8},
+ "foobar": interfaces.PrefixGenVector{10: 5, 12: 10},
+ "bar": interfaces.PrefixGenVector{10: 5, 12: 20},
+ },
+ genDiff: genRangeVector{
+ 10: &genRange{min: 1, max: 5},
+ 12: &genRange{min: 1, max: 20},
+ },
+ keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", "bar", "barbaz", ""},
+ },
+ { // Non-identical prefixes, local only updates.
+ initVec: interfaces.GenVector{
+ "foo": interfaces.PrefixGenVector{11: 5},
+ "foobar": interfaces.PrefixGenVector{11: 5},
+ },
+ respGen: 5,
+ outVec: interfaces.GenVector{
+ "foo": interfaces.PrefixGenVector{10: 5},
+ },
+ genDiff: genRangeVector{
+ 10: &genRange{min: 1, max: 5},
+ },
+ keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", "", "fo", "fooxyz"},
+ },
+ { // Non-identical prefixes, local and remote updates.
+ respVec: interfaces.GenVector{
+ "f": interfaces.PrefixGenVector{12: 5, 13: 5},
+ "foo": interfaces.PrefixGenVector{12: 10, 13: 10},
+ "foobar": interfaces.PrefixGenVector{12: 20, 13: 20},
+ },
+ initVec: interfaces.GenVector{
+ "foo": interfaces.PrefixGenVector{11: 5, 12: 1},
+ },
+ respGen: 5,
+ outVec: interfaces.GenVector{
+ "foo": interfaces.PrefixGenVector{10: 5, 12: 10, 13: 10},
+ "foobar": interfaces.PrefixGenVector{10: 5, 12: 20, 13: 20},
+ },
+ genDiff: genRangeVector{
+ 10: &genRange{min: 1, max: 5},
+ 12: &genRange{min: 2, max: 20},
+ 13: &genRange{min: 1, max: 20},
+ },
+ keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", "", "fo", "fooxyz"},
+ },
+ { // Non-identical prefixes, local and remote updates.
+ respVec: interfaces.GenVector{
+ "foobar": interfaces.PrefixGenVector{12: 20, 13: 20},
+ },
+ initVec: interfaces.GenVector{
+ "foo": interfaces.PrefixGenVector{11: 5, 12: 1},
+ },
+ respGen: 5,
+ outVec: interfaces.GenVector{
+ "foo": interfaces.PrefixGenVector{10: 5},
+ "foobar": interfaces.PrefixGenVector{10: 5, 12: 20, 13: 20},
+ },
+ genDiff: genRangeVector{
+ 10: &genRange{min: 1, max: 5},
+ 12: &genRange{min: 2, max: 20},
+ 13: &genRange{min: 1, max: 20},
+ },
+ keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", "", "fo", "fooxyz"},
+ },
+ { // Non-identical prefixes, local and remote updates.
+ respVec: interfaces.GenVector{
+ "f": interfaces.PrefixGenVector{12: 20, 13: 20},
+ },
+ initVec: interfaces.GenVector{
+ "foo": interfaces.PrefixGenVector{11: 5, 12: 1},
+ },
+ respGen: 5,
+ outVec: interfaces.GenVector{
+ "foo": interfaces.PrefixGenVector{10: 5, 12: 20, 13: 20},
+ },
+ genDiff: genRangeVector{
+ 10: &genRange{min: 1, max: 5},
+ 12: &genRange{min: 2, max: 20},
+ 13: &genRange{min: 1, max: 20},
+ },
+ keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", "", "fo", "fooxyz"},
+ },
+ { // Non-identical interleaving prefixes.
+ respVec: interfaces.GenVector{
+ "f": interfaces.PrefixGenVector{12: 20, 13: 10},
+ "foo": interfaces.PrefixGenVector{12: 30, 13: 20},
+ "foobar": interfaces.PrefixGenVector{12: 40, 13: 30},
+ },
+ initVec: interfaces.GenVector{
+ "fo": interfaces.PrefixGenVector{11: 5, 12: 1},
+ "foob": interfaces.PrefixGenVector{11: 5, 12: 10},
+ "foobarxyz": interfaces.PrefixGenVector{11: 5, 12: 20},
+ },
+ respGen: 5,
+ outVec: interfaces.GenVector{
+ "fo": interfaces.PrefixGenVector{10: 5, 12: 20, 13: 10},
+ "foo": interfaces.PrefixGenVector{10: 5, 12: 30, 13: 20},
+ "foobar": interfaces.PrefixGenVector{10: 5, 12: 40, 13: 30},
+ },
+ genDiff: genRangeVector{
+ 10: &genRange{min: 1, max: 5},
+ 12: &genRange{min: 2, max: 40},
+ 13: &genRange{min: 1, max: 30},
+ },
+ keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", "", "fo", "foob", "foobarxyz", "fooxyz"},
+ },
+ { // Non-identical interleaving prefixes.
+ respVec: interfaces.GenVector{
+ "fo": interfaces.PrefixGenVector{12: 20, 13: 10},
+ "foob": interfaces.PrefixGenVector{12: 30, 13: 20},
+ "foobarxyz": interfaces.PrefixGenVector{12: 40, 13: 30},
+ },
+ initVec: interfaces.GenVector{
+ "f": interfaces.PrefixGenVector{11: 5, 12: 1},
+ "foo": interfaces.PrefixGenVector{11: 5, 12: 10},
+ "foobar": interfaces.PrefixGenVector{11: 5, 12: 20},
+ },
+ respGen: 5,
+ outVec: interfaces.GenVector{
+ "f": interfaces.PrefixGenVector{10: 5},
+ "fo": interfaces.PrefixGenVector{10: 5, 12: 20, 13: 10},
+ "foob": interfaces.PrefixGenVector{10: 5, 12: 30, 13: 20},
+ "foobarxyz": interfaces.PrefixGenVector{10: 5, 12: 40, 13: 30},
+ },
+ genDiff: genRangeVector{
+ 10: &genRange{min: 1, max: 5},
+ 12: &genRange{min: 2, max: 40},
+ 13: &genRange{min: 1, max: 30},
+ },
+ keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", "", "fo", "foob", "foobarxyz", "fooxyz"},
+ },
+ { // Non-identical sibling prefixes.
+ respVec: interfaces.GenVector{
+ "foo": interfaces.PrefixGenVector{12: 20, 13: 10},
+ "foobarabc": interfaces.PrefixGenVector{12: 40, 13: 30},
+ "foobarxyz": interfaces.PrefixGenVector{12: 30, 13: 20},
+ },
+ initVec: interfaces.GenVector{
+ "foo": interfaces.PrefixGenVector{11: 5, 12: 1},
+ },
+ respGen: 5,
+ outVec: interfaces.GenVector{
+ "foo": interfaces.PrefixGenVector{10: 5, 12: 20, 13: 10},
+ "foobarabc": interfaces.PrefixGenVector{10: 5, 12: 40, 13: 30},
+ "foobarxyz": interfaces.PrefixGenVector{10: 5, 12: 30, 13: 20},
+ },
+ genDiff: genRangeVector{
+ 10: &genRange{min: 1, max: 5},
+ 12: &genRange{min: 2, max: 40},
+ 13: &genRange{min: 1, max: 30},
+ },
+ keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", "", "foobarabc", "foobarxyz", "foobar123", "fooxyz"},
+ },
+ { // Non-identical prefixes, local and remote updates.
+ respVec: interfaces.GenVector{
+ "barbaz": interfaces.PrefixGenVector{12: 18},
+ "f": interfaces.PrefixGenVector{12: 30, 13: 5},
+ "foobar": interfaces.PrefixGenVector{12: 30, 13: 8},
+ },
+ initVec: interfaces.GenVector{
+ "foo": interfaces.PrefixGenVector{11: 5, 12: 5},
+ "foobar": interfaces.PrefixGenVector{11: 5, 12: 5},
+ "bar": interfaces.PrefixGenVector{10: 5, 11: 5, 12: 5},
+ },
+ respGen: 5,
+ outVec: interfaces.GenVector{
+ "foo": interfaces.PrefixGenVector{10: 5, 12: 30, 13: 5},
+ "foobar": interfaces.PrefixGenVector{10: 5, 12: 30, 13: 8},
+ "bar": interfaces.PrefixGenVector{10: 5},
+ "barbaz": interfaces.PrefixGenVector{10: 5, 12: 18},
+ },
+ genDiff: genRangeVector{
+ 10: &genRange{min: 1, max: 5},
+ 12: &genRange{min: 6, max: 30},
+ 13: &genRange{min: 1, max: 8},
+ },
+ keyPfxs: []string{"baz", "wombat", "f", "foo", "foobar", "bar", "barbaz", ""},
+ },
+ }
+
+ for i, test := range tests {
+ svc := createService(t)
+ s := svc.sync
+ s.id = 10 //responder.
+
+ wantDiff, wantVec := test.genDiff, test.outVec
+ s.syncState[appDbName(appName, dbName)] = &dbSyncStateInMem{gen: test.respGen, checkptGen: test.respGen, genvec: test.respVec}
+
+ req := interfaces.DeltaReq{AppName: appName, DbName: dbName, InitVec: test.initVec}
+ rSt := newResponderState(nil, nil, s, req, "fakeInitiator")
+
+ rSt.computeDeltaBound(nil)
+ if rSt.errState != nil || !reflect.DeepEqual(rSt.outVec, wantVec) {
+ t.Fatalf("computeDeltaBound failed (I: %v), (R: %v, %v), got %v, want %v err %v", test.initVec, test.respGen, test.respVec, rSt.outVec, wantVec, rSt.errState)
+ }
+ checkEqualDevRanges(t, rSt.diff, wantDiff)
+
+ ////////////////////////////////////////
+ // Test sending deltas.
+
+ // Insert some log records to bootstrap testing below.
+ tRng := rand.New(rand.NewSource(int64(i)))
+ var wantRecs []*localLogRec
+ st := svc.St()
+ tx := st.NewTransaction()
+ objKeyPfxs := test.keyPfxs
+ j := 0
+ for id, r := range wantDiff {
+ pos := uint64(tRng.Intn(50) + 100*j)
+ for k := r.min; k <= r.max; k++ {
+ opfx := objKeyPfxs[tRng.Intn(len(objKeyPfxs))]
+ // Create holes in the log records.
+ if opfx == "" {
+ continue
+ }
+ okey := makeRowKey(fmt.Sprintf("%s~%x", opfx, tRng.Int()))
+ vers := fmt.Sprintf("%x", tRng.Int())
+ rec := &localLogRec{
+ Metadata: interfaces.LogRecMetadata{Id: id, Gen: k, ObjId: okey, CurVers: vers, UpdTime: time.Now().UTC()},
+ Pos: pos + k,
+ }
+ if err := putLogRec(nil, tx, rec); err != nil {
+ t.Fatalf("putLogRec(%d:%d) failed rec %v err %v", id, k, rec, err)
+ }
+ value := fmt.Sprintf("value_%s", okey)
+ if err := watchable.PutAtVersion(nil, tx, []byte(okey), []byte(value), []byte(vers)); err != nil {
+ t.Fatalf("PutAtVersion(%d:%d) failed rec %v value %s: err %v", id, k, rec, value, err)
+ }
+
+ initPfxs := extractAndSortPrefixes(test.initVec)
+ if !filterLogRec(rec, test.initVec, initPfxs) {
+ wantRecs = append(wantRecs, rec)
+ }
+ }
+ j++
+ }
+ if err := tx.Commit(); err != nil {
+ t.Fatalf("cannot commit putting log rec, err %v", err)
+ }
+
+ d := &dummyResponder{}
+ rSt.call = d
+ rSt.st, rSt.errState = rSt.sync.getDbStore(nil, nil, rSt.req.AppName, rSt.req.DbName)
+ if rSt.errState != nil {
+ t.Fatalf("filterAndSendDeltas failed to get store handle for app/db %v %v", rSt.req.AppName, rSt.req.DbName)
+ }
+ err := rSt.filterAndSendDeltas(nil)
+ if err != nil {
+ t.Fatalf("filterAndSendDeltas failed (I: %v), (R: %v, %v) err %v", test.initVec, test.respGen, test.respVec, err)
+ }
+ d.diffLogRecs(t, wantRecs, wantVec)
+
+ destroyService(t, svc)
+ }
+}
+
+//////////////////////////////
+// Helpers
+
+type dummyResponder struct {
+ start, finish int
+ gotRecs []*localLogRec
+ outVec interfaces.GenVector
+}
+
+func (d *dummyResponder) RecvStream() interface {
+ Advance() bool
+ Value() interfaces.DeltaReq
+ Err() error
+} {
+ return d
+}
+
+func (d *dummyResponder) Advance() bool {
+ return false
+}
+
+func (d *dummyResponder) Value() interfaces.DeltaReq {
+ return interfaces.DeltaReq{}
+}
+
+func (d *dummyResponder) Err() error { return nil }
+
+func (d *dummyResponder) SendStream() interface {
+ Send(item interfaces.DeltaResp) error
+} {
+ return d
+}
+
+func (d *dummyResponder) Send(item interfaces.DeltaResp) error {
+ switch v := item.(type) {
+ case interfaces.DeltaRespStart:
+ d.start++
+ case interfaces.DeltaRespFinish:
+ d.finish++
+ case interfaces.DeltaRespRespVec:
+ d.outVec = v.Value
+ case interfaces.DeltaRespRec:
+ d.gotRecs = append(d.gotRecs, &localLogRec{Metadata: v.Value.Metadata})
+ }
+ return nil
+}
+
+func (d *dummyResponder) Security() security.Call {
+ return nil
+}
+
+func (d *dummyResponder) Suffix() string {
+ return ""
+}
+
+func (d *dummyResponder) LocalEndpoint() naming.Endpoint {
+ return nil
+}
+
+func (d *dummyResponder) RemoteEndpoint() naming.Endpoint {
+ return nil
+}
+
+func (d *dummyResponder) GrantedBlessings() security.Blessings {
+ return security.Blessings{}
+}
+
+func (d *dummyResponder) Server() rpc.Server {
+ return nil
+}
+
+func (d *dummyResponder) diffLogRecs(t *testing.T, wantRecs []*localLogRec, wantVec interfaces.GenVector) {
+ if d.start != 1 || d.finish != 1 {
+ t.Fatalf("diffLogRecs incorrect start/finish records (%v, %v)", d.start, d.finish)
+ }
+ if len(d.gotRecs) != len(wantRecs) {
+ t.Fatalf("diffLogRecs failed, gotLen %v, wantLen %v\n", len(d.gotRecs), len(wantRecs))
+ }
+ for i, rec := range d.gotRecs {
+ if !reflect.DeepEqual(rec.Metadata, wantRecs[i].Metadata) {
+ t.Fatalf("diffLogRecs failed, i %v, got %v, want %v\n", i, rec.Metadata, wantRecs[i].Metadata)
+ }
+ }
+ if !reflect.DeepEqual(d.outVec, wantVec) {
+ t.Fatalf("diffLogRecs failed genvector, got %v, want %v\n", d.outVec, wantVec)
+ }
+}
+
+func checkEqualDevRanges(t *testing.T, s1, s2 genRangeVector) {
+ if len(s1) != len(s2) {
+ t.Fatalf("len(s1): %v != len(s2): %v", len(s1), len(s2))
+ }
+ for d1, r1 := range s1 {
+ if r2, ok := s2[d1]; !ok || !reflect.DeepEqual(r1, r2) {
+ t.Fatalf("Dev %v: r1 %v != r2 %v", d1, r1, r2)
+ }
+ }
+}
diff --git a/services/syncbase/vsync/sync.go b/services/syncbase/vsync/sync.go
new file mode 100644
index 0000000..fd65c66
--- /dev/null
+++ b/services/syncbase/vsync/sync.go
@@ -0,0 +1,197 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Package vsync provides sync functionality for Syncbase. Sync
+// service serves incoming GetDeltas requests and contacts other peers
+// to get deltas from them. When it receives a GetDeltas request, the
+// incoming generation vector is diffed with the local generation
+// vector, and missing generations are sent back. When it receives log
+// records in response to a GetDeltas request, it replays those log
+// records to get in sync with the sender.
+import (
+ "fmt"
+ "math/rand"
+ "path"
+ "sync"
+ "time"
+
+ "v.io/syncbase/v23/services/syncbase/nosql"
+ blob "v.io/syncbase/x/ref/services/syncbase/localblobstore"
+ fsblob "v.io/syncbase/x/ref/services/syncbase/localblobstore/fs_cablobstore"
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ "v.io/v23/rpc"
+ "v.io/v23/verror"
+)
+
+// syncService contains the metadata for the sync module.
+type syncService struct {
+ // TODO(hpucha): see if "v.io/v23/uniqueid" is a better fit. It is 128 bits.
+ id uint64 // globally unique id for this instance of Syncbase.
+ name string // name derived from the global id.
+ sv interfaces.Service
+ server rpc.Server
+
+ // High-level lock to serialize the watcher and the initiator. This lock is
+ // needed to handle the following cases: (a) When the initiator is
+ // cutting a local generation, it waits for the watcher to commit the
+ // latest local changes before including them in the checkpoint. (b)
+ // When the initiator is receiving updates, it reads the latest head of
+ // an object as per the DAG state in order to construct the in-memory
+ // graft map used for conflict detection. At the same time, if a watcher
+ // is processing local updates, it may move the object head. Hence the
+ // initiator and watcher contend on the DAG head of an object. Instead
+ // of retrying a transaction which causes the entire delta to be
+ // replayed, we use pessimistic locking to serialize the initiator and
+ // the watcher.
+ //
+ // TODO(hpucha): This is a temporary hack.
+ thLock sync.RWMutex
+
+ // State to coordinate shutdown of spawned goroutines.
+ pending sync.WaitGroup
+ closed chan struct{}
+
+ // TODO(hpucha): Other global names to advertise to enable Syncbase
+ // discovery. For example, every Syncbase must be reachable under
+ // <mttable>/<syncbaseid> for p2p sync. This is the name advertised
+ // during SyncGroup join. In addition, a Syncbase might also be
+ // accepting "publish SyncGroup requests", and might use a more
+ // human-readable name such as <mttable>/<idp>/<sgserver>. All these
+ // names must be advertised in the appropriate mount tables.
+
+ // In-memory sync membership info aggregated across databases.
+ allMembers *memberView
+ allMembersLock sync.RWMutex
+
+ // In-memory sync state per Database. This state is populated at
+ // startup, and periodically persisted by the initiator.
+ syncState map[string]*dbSyncStateInMem
+ syncStateLock sync.Mutex // lock to protect access to the sync state.
+
+ // In-memory tracking of batches during their construction.
+ // The sync Initiator and Watcher build batches incrementally here
+ // and then persist them in DAG batch entries. The mutex guards
+ // access to the batch set.
+ batchesLock sync.Mutex
+ batches batchSet
+
+ // Metadata related to blob handling.
+ bst blob.BlobStore // local blob store associated with this Syncbase.
+ blobDirectory map[nosql.BlobRef]*blobLocInfo // directory structure containing blob location information.
+ blobDirLock sync.RWMutex // lock to synchronize access to the blob directory information.
+
+}
+
+// syncDatabase contains the metadata for syncing a database. This struct is
+// used as a receiver to hand off the app-initiated SyncGroup calls that arrive
+// against a nosql.Database to the sync module.
+type syncDatabase struct {
+ db interfaces.Database
+ sync interfaces.SyncServerMethods
+}
+
+var (
+ rng = rand.New(rand.NewSource(time.Now().UTC().UnixNano()))
+ rngLock sync.Mutex
+ _ interfaces.SyncServerMethods = (*syncService)(nil)
+)
+
+// rand64 generates an unsigned 64-bit pseudo-random number.
+func rand64() uint64 {
+ rngLock.Lock()
+ defer rngLock.Unlock()
+ return (uint64(rng.Int63()) << 1) | uint64(rng.Int63n(2))
+}
+
+// randIntn mimics rand.Intn (generates a non-negative pseudo-random number in [0,n)).
+func randIntn(n int) int {
+ rngLock.Lock()
+ defer rngLock.Unlock()
+ return rng.Intn(n)
+}
+
+// New creates a new sync module.
+//
+// Concurrency: sync initializes two goroutines at startup: a "watcher" and an
+// "initiator". The "watcher" thread is responsible for watching the store for
+// changes to its objects. The "initiator" thread is responsible for
+// periodically contacting peers to fetch changes from them. In addition, the
+// sync module responds to incoming RPCs from remote sync modules.
+func New(ctx *context.T, call rpc.ServerCall, sv interfaces.Service, server rpc.Server, rootDir string) (*syncService, error) {
+ s := &syncService{
+ sv: sv,
+ server: server,
+ batches: make(batchSet),
+ }
+
+ data := &syncData{}
+ if err := store.RunInTransaction(sv.St(), func(tx store.Transaction) error {
+ if err := util.Get(ctx, sv.St(), s.stKey(), data); err != nil {
+ if verror.ErrorID(err) != verror.ErrNoExist.ID {
+ return err
+ }
+ // First invocation of vsync.New().
+ // TODO(sadovsky): Maybe move guid generation and storage to serviceData.
+ data.Id = rand64()
+ return util.Put(ctx, tx, s.stKey(), data)
+ }
+ return nil
+ }); err != nil {
+ return nil, err
+ }
+
+ // data.Id is now guaranteed to be initialized.
+ s.id = data.Id
+ s.name = syncbaseIdToName(s.id)
+
+ // Initialize in-memory state for the sync module before starting any threads.
+ if err := s.initSync(ctx); err != nil {
+ return nil, verror.New(verror.ErrInternal, ctx, err)
+ }
+
+ // Open a blob store.
+ var err error
+ s.bst, err = fsblob.Create(ctx, path.Join(rootDir, "blobs"))
+ if err != nil {
+ return nil, err
+ }
+ s.blobDirectory = make(map[nosql.BlobRef]*blobLocInfo)
+
+ // Channel to propagate close event to all threads.
+ s.closed = make(chan struct{})
+ s.pending.Add(2)
+
+ // Start watcher thread to watch for updates to local store.
+ go s.watchStore(ctx)
+
+ // Start initiator thread to periodically get deltas from peers.
+ go s.syncer(ctx)
+
+ return s, nil
+}
+
+// Close cleans up sync state.
+// TODO(hpucha): Hook it up to server shutdown of syncbased.
+func (s *syncService) Close() {
+ s.bst.Close()
+ close(s.closed)
+ s.pending.Wait()
+}
+
+func syncbaseIdToName(id uint64) string {
+ return fmt.Sprintf("%x", id)
+}
+
+func NewSyncDatabase(db interfaces.Database) *syncDatabase {
+ return &syncDatabase{db: db, sync: db.App().Service().Sync()}
+}
+
+func (s *syncService) stKey() string {
+ return util.SyncPrefix
+}
diff --git a/services/syncbase/vsync/sync_state.go b/services/syncbase/vsync/sync_state.go
new file mode 100644
index 0000000..8195559
--- /dev/null
+++ b/services/syncbase/vsync/sync_state.go
@@ -0,0 +1,349 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// New log records are created when objects in the local store are created,
+// updated or deleted. Local log records are also replayed to keep the
+// per-object dags consistent with the local store state. Sync module assigns
+// each log record created within a Database a unique sequence number, called
+// the generation number. Locally on each device, the position of each log
+// record is also recorded relative to other local and remote log records.
+//
+// When a device receives a request to send log records, it first computes the
+// missing generations between itself and the incoming request on a per-prefix
+// basis. It then sends all the log records belonging to the missing generations
+// in the order they occur locally (using the local log position). A device that
+// receives log records over the network replays all the records received from
+// another device in a single batch. Each replayed log record adds a new version
+// to the dag of the object contained in the log record. At the end of replaying
+// all the log records, conflict detection and resolution is carried out for all
+// the objects learned during this iteration. Conflict detection and resolution
+// is carried out after a batch of log records are replayed, instead of
+// incrementally after each record is replayed, to avoid repeating conflict
+// resolution already performed by other devices.
+//
+// Sync module tracks the current generation number and the current local log
+// position for each Database. In addition, it also tracks the current
+// generation vector for a Database. Log records are indexed such that they can
+// be selectively retrieved from the store for any missing generation from any
+// device.
+
+import (
+ "fmt"
+ "strconv"
+
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ "v.io/v23/verror"
+)
+
+// dbSyncStateInMem represents the in-memory sync state of a Database.
+type dbSyncStateInMem struct {
+ gen uint64
+ pos uint64
+
+ checkptGen uint64
+ genvec interfaces.GenVector // Note: Generation vector contains state from remote devices only.
+}
+
+// initSync initializes the sync module during startup. It scans all the
+// databases across all apps to initialize the following:
+// a) in-memory sync state of a Database consisting of the current generation
+// number, log position and generation vector.
+// b) watcher map of prefixes currently being synced.
+// c) republish names in mount tables for all syncgroups.
+//
+// TODO(hpucha): This is incomplete. Flesh this out further.
+func (s *syncService) initSync(ctx *context.T) error {
+ s.syncStateLock.Lock()
+ defer s.syncStateLock.Unlock()
+
+ var errFinal error
+ s.syncState = make(map[string]*dbSyncStateInMem)
+
+ s.forEachDatabaseStore(ctx, func(appName, dbName string, st store.Store) bool {
+ // Scan the SyncGroups, skipping those not yet being watched.
+ forEachSyncGroup(st, func(sg *interfaces.SyncGroup) bool {
+ // TODO(rdaoud): only use SyncGroups that have been
+ // marked as "watchable" by the sync watcher thread.
+ // This is to handle the case of a SyncGroup being
+ // created but Syncbase restarting before the watcher
+ // processed the SyncGroupOp entry in the watch queue.
+ // It should not be syncing that SyncGroup's data after
+ // restart, but wait until the watcher processes the
+ // entry as would have happened without a restart.
+ for _, prefix := range sg.Spec.Prefixes {
+ incrWatchPrefix(appName, dbName, prefix)
+ }
+ return false
+ })
+
+ if false {
+ // Fetch the sync state.
+ ds, err := getDbSyncState(ctx, st)
+ if err != nil && verror.ErrorID(err) != verror.ErrNoExist.ID {
+ errFinal = err
+ return false
+ }
+ var scanStart, scanLimit []byte
+ // Figure out what to scan among local log records.
+ if verror.ErrorID(err) == verror.ErrNoExist.ID {
+ scanStart, scanLimit = util.ScanPrefixArgs(logRecsPerDeviceScanPrefix(s.id), "")
+ } else {
+ scanStart, scanLimit = util.ScanPrefixArgs(logRecKey(s.id, ds.Gen), "")
+ }
+ var maxpos uint64
+ var dbName string
+ // Scan local log records to find the most recent one.
+ st.Scan(scanStart, scanLimit)
+ // Scan remote log records using the persisted GenVector.
+ s.syncState[dbName] = &dbSyncStateInMem{pos: maxpos + 1}
+ }
+
+ return false
+ })
+
+ return errFinal
+}
+
+// reserveGenAndPosInDbLog reserves a chunk of generation numbers and log
+// positions in a Database's log. Used when local updates result in log
+// entries.
+func (s *syncService) reserveGenAndPosInDbLog(ctx *context.T, appName, dbName string, count uint64) (uint64, uint64) {
+ return s.reserveGenAndPosInternal(appName, dbName, count, count)
+}
+
+// reservePosInDbLog reserves a chunk of log positions in a Database's log. Used
+// when remote log records are received.
+func (s *syncService) reservePosInDbLog(ctx *context.T, appName, dbName string, count uint64) uint64 {
+ _, pos := s.reserveGenAndPosInternal(appName, dbName, 0, count)
+ return pos
+}
+
+func (s *syncService) reserveGenAndPosInternal(appName, dbName string, genCount, posCount uint64) (uint64, uint64) {
+ s.syncStateLock.Lock()
+ defer s.syncStateLock.Unlock()
+
+ name := appDbName(appName, dbName)
+ ds, ok := s.syncState[name]
+ if !ok {
+ ds = &dbSyncStateInMem{gen: 1}
+ s.syncState[name] = ds
+ }
+
+ gen := ds.gen
+ pos := ds.pos
+
+ ds.gen += genCount
+ ds.pos += posCount
+
+ return gen, pos
+}
+
+// checkptLocalGen freezes the local generation number for the responder's use.
+func (s *syncService) checkptLocalGen(ctx *context.T, appName, dbName string) error {
+ s.syncStateLock.Lock()
+ defer s.syncStateLock.Unlock()
+
+ name := appDbName(appName, dbName)
+ ds, ok := s.syncState[name]
+ if !ok {
+ return verror.New(verror.ErrInternal, ctx, "db state not found", name)
+ }
+
+ // The frozen generation is the last generation number used, i.e. one
+ // below the next available one to use.
+ ds.checkptGen = ds.gen - 1
+ return nil
+}
+
+// initDbSyncStateInMem initializes the in memory sync state of the Database if needed.
+func (s *syncService) initDbSyncStateInMem(ctx *context.T, appName, dbName string) {
+ s.syncStateLock.Lock()
+ defer s.syncStateLock.Unlock()
+
+ name := appDbName(appName, dbName)
+ if s.syncState[name] == nil {
+ s.syncState[name] = &dbSyncStateInMem{gen: 1}
+ }
+}
+
+// copyDbSyncStateInMem returns a copy of the current in memory sync state of the Database.
+func (s *syncService) copyDbSyncStateInMem(ctx *context.T, appName, dbName string) (*dbSyncStateInMem, error) {
+ s.syncStateLock.Lock()
+ defer s.syncStateLock.Unlock()
+
+ name := appDbName(appName, dbName)
+ ds, ok := s.syncState[name]
+ if !ok {
+ return nil, verror.New(verror.ErrInternal, ctx, "db state not found", name)
+ }
+
+ dsCopy := &dbSyncStateInMem{
+ gen: ds.gen,
+ pos: ds.pos,
+ checkptGen: ds.checkptGen,
+ }
+
+ dsCopy.genvec = copyGenVec(ds.genvec)
+
+ return dsCopy, nil
+}
+
+// copyDbGenInfo returns a copy of the current generation information of the Database.
+func (s *syncService) copyDbGenInfo(ctx *context.T, appName, dbName string) (interfaces.GenVector, uint64, error) {
+ s.syncStateLock.Lock()
+ defer s.syncStateLock.Unlock()
+
+ name := appDbName(appName, dbName)
+ ds, ok := s.syncState[name]
+ if !ok {
+ return nil, 0, verror.New(verror.ErrInternal, ctx, "db state not found", name)
+ }
+
+ genvec := copyGenVec(ds.genvec)
+
+ // Add local generation information to the genvec.
+ for _, gv := range genvec {
+ gv[s.id] = ds.checkptGen
+ }
+
+ return genvec, ds.checkptGen, nil
+}
+
+// putDbGenInfoRemote puts the current remote generation information of the Database.
+func (s *syncService) putDbGenInfoRemote(ctx *context.T, appName, dbName string, genvec interfaces.GenVector) error {
+ s.syncStateLock.Lock()
+ defer s.syncStateLock.Unlock()
+
+ name := appDbName(appName, dbName)
+ ds, ok := s.syncState[name]
+ if !ok {
+ return verror.New(verror.ErrInternal, ctx, "db state not found", name)
+ }
+
+ ds.genvec = copyGenVec(genvec)
+
+ return nil
+}
+
+// appDbName combines the app and db names to return a globally unique name for
+// a Database. This relies on the fact that the app name is globally unique and
+// the db name is unique within the scope of the app.
+func appDbName(appName, dbName string) string {
+ return util.JoinKeyParts(appName, dbName)
+}
+
+// splitAppDbName is the inverse of appDbName and returns app and db name from a
+// globally unique name for a Database.
+func splitAppDbName(ctx *context.T, name string) (string, string, error) {
+ parts := util.SplitKeyParts(name)
+ if len(parts) != 2 {
+ return "", "", verror.New(verror.ErrInternal, ctx, "invalid appDbName", name)
+ }
+ return parts[0], parts[1], nil
+}
+
+func copyGenVec(in interfaces.GenVector) interfaces.GenVector {
+ genvec := make(interfaces.GenVector)
+ for p, inpgv := range in {
+ pgv := make(interfaces.PrefixGenVector)
+ for id, gen := range inpgv {
+ pgv[id] = gen
+ }
+ genvec[p] = pgv
+ }
+ return genvec
+}
+
+////////////////////////////////////////////////////////////
+// Low-level utility functions to access sync state.
+
+// dbSyncStateKey returns the key used to access the sync state of a Database.
+func dbSyncStateKey() string {
+ return util.JoinKeyParts(util.SyncPrefix, dbssPrefix)
+}
+
+// putDbSyncState persists the sync state object for a given Database.
+func putDbSyncState(ctx *context.T, tx store.Transaction, ds *dbSyncState) error {
+ return util.Put(ctx, tx, dbSyncStateKey(), ds)
+}
+
+// getDbSyncState retrieves the sync state object for a given Database.
+func getDbSyncState(ctx *context.T, st store.StoreReader) (*dbSyncState, error) {
+ var ds dbSyncState
+ if err := util.Get(ctx, st, dbSyncStateKey(), &ds); err != nil {
+ return nil, err
+ }
+ return &ds, nil
+}
+
+////////////////////////////////////////////////////////////
+// Low-level utility functions to access log records.
+
+// logRecsPerDeviceScanPrefix returns the prefix used to scan log records for a particular device.
+func logRecsPerDeviceScanPrefix(id uint64) string {
+ return util.JoinKeyParts(util.SyncPrefix, logPrefix, fmt.Sprintf("%x", id))
+}
+
+// logRecKey returns the key used to access a specific log record.
+func logRecKey(id, gen uint64) string {
+ return util.JoinKeyParts(util.SyncPrefix, logPrefix, fmt.Sprintf("%d", id), fmt.Sprintf("%016x", gen))
+}
+
+// splitLogRecKey is the inverse of logRecKey and returns device id and generation number.
+func splitLogRecKey(ctx *context.T, key string) (uint64, uint64, error) {
+ parts := util.SplitKeyParts(key)
+ verr := verror.New(verror.ErrInternal, ctx, "invalid logreckey", key)
+ if len(parts) != 4 {
+ return 0, 0, verr
+ }
+ if parts[0] != util.SyncPrefix || parts[1] != logPrefix {
+ return 0, 0, verr
+ }
+ id, err := strconv.ParseUint(parts[2], 10, 64)
+ if err != nil {
+ return 0, 0, verr
+ }
+ gen, err := strconv.ParseUint(parts[3], 16, 64)
+ if err != nil {
+ return 0, 0, verr
+ }
+ return id, gen, nil
+}
+
+// hasLogRec returns true if the log record for (devid, gen) exists.
+func hasLogRec(st store.StoreReader, id, gen uint64) (bool, error) {
+ // TODO(hpucha): optimize to avoid the unneeded fetch/decode of the data.
+ var rec localLogRec
+ if err := util.Get(nil, st, logRecKey(id, gen), &rec); err != nil {
+ if verror.ErrorID(err) == verror.ErrNoExist.ID {
+ err = nil
+ }
+ return false, err
+ }
+ return true, nil
+}
+
+// putLogRec stores the log record.
+func putLogRec(ctx *context.T, tx store.Transaction, rec *localLogRec) error {
+ return util.Put(ctx, tx, logRecKey(rec.Metadata.Id, rec.Metadata.Gen), rec)
+}
+
+// getLogRec retrieves the log record for a given (devid, gen).
+func getLogRec(ctx *context.T, st store.StoreReader, id, gen uint64) (*localLogRec, error) {
+ var rec localLogRec
+ if err := util.Get(ctx, st, logRecKey(id, gen), &rec); err != nil {
+ return nil, err
+ }
+ return &rec, nil
+}
+
+// delLogRec deletes the log record for a given (devid, gen).
+func delLogRec(ctx *context.T, tx store.Transaction, id, gen uint64) error {
+ return util.Delete(ctx, tx, logRecKey(id, gen))
+}
diff --git a/services/syncbase/vsync/sync_state_test.go b/services/syncbase/vsync/sync_state_test.go
new file mode 100644
index 0000000..7e9302b
--- /dev/null
+++ b/services/syncbase/vsync/sync_state_test.go
@@ -0,0 +1,174 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+import (
+ "reflect"
+ "testing"
+ "time"
+
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ _ "v.io/x/ref/runtime/factories/generic"
+)
+
+// Tests for sync state management and storage in Syncbase.
+
+// TestReserveGenAndPos tests reserving generation numbers and log positions in a
+// Database log.
+func TestReserveGenAndPos(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ s := svc.sync
+
+ var wantGen, wantPos uint64 = 1, 0
+ for i := 0; i < 5; i++ {
+ gotGen, gotPos := s.reserveGenAndPosInternal("mockapp", "mockdb", 5, 10)
+ if gotGen != wantGen || gotPos != wantPos {
+ t.Fatalf("reserveGenAndPosInternal failed, gotGen %v wantGen %v, gotPos %v wantPos %v", gotGen, wantGen, gotPos, wantPos)
+ }
+ wantGen += 5
+ wantPos += 10
+
+ name := appDbName("mockapp", "mockdb")
+ if s.syncState[name].gen != wantGen || s.syncState[name].pos != wantPos {
+ t.Fatalf("reserveGenAndPosInternal failed, gotGen %v wantGen %v, gotPos %v wantPos %v", s.syncState[name].gen, wantGen, s.syncState[name].pos, wantPos)
+ }
+ }
+}
+
+// TestPutGetDbSyncState tests setting and getting sync metadata.
+func TestPutGetDbSyncState(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+
+ checkDbSyncState(t, st, false, nil)
+
+ gv := interfaces.GenVector{
+ "mocktbl/foo": interfaces.PrefixGenVector{
+ 1: 2, 3: 4, 5: 6,
+ },
+ }
+
+ tx := st.NewTransaction()
+ wantSt := &dbSyncState{Gen: 40, GenVec: gv}
+ if err := putDbSyncState(nil, tx, wantSt); err != nil {
+ t.Fatalf("putDbSyncState failed, err %v", err)
+ }
+ if err := tx.Commit(); err != nil {
+ t.Fatalf("cannot commit putting db sync state, err %v", err)
+ }
+
+ checkDbSyncState(t, st, true, wantSt)
+}
+
+// TestPutGetDelLogRec tests setting, getting, and deleting a log record.
+func TestPutGetDelLogRec(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+
+ var id uint64 = 10
+ var gen uint64 = 100
+
+ checkLogRec(t, st, id, gen, false, nil)
+
+ tx := st.NewTransaction()
+ wantRec := &localLogRec{
+ Metadata: interfaces.LogRecMetadata{
+ Id: id,
+ Gen: gen,
+ RecType: interfaces.NodeRec,
+ ObjId: "foo",
+ CurVers: "3",
+ Parents: []string{"1", "2"},
+ UpdTime: time.Now().UTC(),
+ Delete: false,
+ BatchId: 10000,
+ BatchCount: 1,
+ },
+ Pos: 10,
+ }
+ if err := putLogRec(nil, tx, wantRec); err != nil {
+ t.Fatalf("putLogRec(%d:%d) failed err %v", id, gen, err)
+ }
+ if err := tx.Commit(); err != nil {
+ t.Fatalf("cannot commit putting log rec, err %v", err)
+ }
+
+ checkLogRec(t, st, id, gen, true, wantRec)
+
+ tx = st.NewTransaction()
+ if err := delLogRec(nil, tx, id, gen); err != nil {
+ t.Fatalf("delLogRec(%d:%d) failed err %v", id, gen, err)
+ }
+ if err := tx.Commit(); err != nil {
+ t.Fatalf("cannot commit deleting log rec, err %v", err)
+ }
+
+ checkLogRec(t, st, id, gen, false, nil)
+}
+
+func TestLogRecKeyUtils(t *testing.T) {
+ invalid := []string{"$sync:aa:bb", "log:aa:bb", "$sync:log:aa:xx", "$sync:log:x:bb"}
+
+ for _, k := range invalid {
+ if _, _, err := splitLogRecKey(nil, k); err == nil {
+ t.Fatalf("splitting log rec key didn't fail %q", k)
+ }
+ }
+
+ valid := []struct {
+ id uint64
+ gen uint64
+ }{
+ {10, 20},
+ {190, 540},
+ {9999, 999999},
+ }
+
+ for _, v := range valid {
+ gotId, gotGen, err := splitLogRecKey(nil, logRecKey(v.id, v.gen))
+ if gotId != v.id || gotGen != v.gen || err != nil {
+ t.Fatalf("failed key conversion id got %v want %v, gen got %v want %v, err %v", gotId, v.id, gotGen, v.gen, err)
+ }
+ }
+}
+
+//////////////////////////////
+// Helpers
+
+// TODO(hpucha): Look into using v.io/syncbase/v23/syncbase/testutil.Fatalf()
+// for getting the stack trace. Right now cannot import the package due to a
+// cycle.
+
+func checkDbSyncState(t *testing.T, st store.StoreReader, exists bool, wantSt *dbSyncState) {
+ gotSt, err := getDbSyncState(nil, st)
+
+ if (!exists && err == nil) || (exists && err != nil) {
+ t.Fatalf("getDbSyncState failed, exists %v err %v", exists, err)
+ }
+
+ if !reflect.DeepEqual(gotSt, wantSt) {
+ t.Fatalf("getDbSyncState() failed, got %v, want %v", gotSt, wantSt)
+ }
+}
+
+func checkLogRec(t *testing.T, st store.StoreReader, id, gen uint64, exists bool, wantRec *localLogRec) {
+ gotRec, err := getLogRec(nil, st, id, gen)
+
+ if (!exists && err == nil) || (exists && err != nil) {
+ t.Fatalf("getLogRec(%d:%d) failed, exists %v err %v", id, gen, exists, err)
+ }
+
+ if !reflect.DeepEqual(gotRec, wantRec) {
+ t.Fatalf("getLogRec(%d:%d) failed, got %v, want %v", id, gen, gotRec, wantRec)
+ }
+
+ if ok, err := hasLogRec(st, id, gen); err != nil || ok != exists {
+ t.Fatalf("hasLogRec(%d:%d) failed, want %v", id, gen, exists)
+ }
+}
diff --git a/services/syncbase/vsync/syncgroup.go b/services/syncbase/vsync/syncgroup.go
new file mode 100644
index 0000000..905a319
--- /dev/null
+++ b/services/syncbase/vsync/syncgroup.go
@@ -0,0 +1,895 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// SyncGroup management and storage in Syncbase. Handles the lifecycle
+// of SyncGroups (create, join, leave, etc.) and their persistence as
+// sync metadata in the application databases. Provides helper functions
+// to the higher levels of sync (Initiator, Watcher) to get membership
+// information and map key/value changes to their matching SyncGroups.
+
+// TODO(hpucha): Add high level commentary about the logic behind create/join
+// etc.
+
+import (
+ "fmt"
+ "strings"
+ "time"
+
+ wire "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/server/watchable"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ "v.io/v23/naming"
+ "v.io/v23/rpc"
+ "v.io/v23/security"
+ "v.io/v23/security/access"
+ "v.io/v23/verror"
+ "v.io/v23/vom"
+ "v.io/x/lib/vlog"
+)
+
+var (
+ // memberViewTTL is the shelf-life of the aggregate view of SyncGroup members.
+ memberViewTTL = 2 * time.Second
+)
+
+////////////////////////////////////////////////////////////
+// SyncGroup management internal to Syncbase.
+
+// memberView holds an aggregated view of all SyncGroup members across
+// databases. The view is not coherent, it gets refreshed according to a
+// configured TTL and not (coherently) when SyncGroup membership is updated in
+// the various databases. It is needed by the sync Initiator, which must select
+// a peer to contact from a global view of all SyncGroup members gathered from
+// all databases. This is why a slightly stale view is acceptable.
+// The members are identified by their Vanadium names (map keys).
+type memberView struct {
+ expiration time.Time
+ members map[string]*memberInfo
+}
+
+// memberInfo holds the member metadata for each SyncGroup this member belongs
+// to within each App/Database (i.e. global database name). It's a mapping of
+// global DB names to sets of SyncGroup member information.
+type memberInfo struct {
+ db2sg map[string]sgMemberInfo
+}
+
+// sgMemberInfo maps SyncGroups to their member metadata.
+type sgMemberInfo map[interfaces.GroupId]wire.SyncGroupMemberInfo
+
+// newSyncGroupVersion generates a random SyncGroup version ("etag").
+func newSyncGroupVersion() string {
+ return fmt.Sprintf("%x", rand64())
+}
+
+// newSyncGroupId generates a random SyncGroup ID.
+func newSyncGroupId() interfaces.GroupId {
+ id := interfaces.NoGroupId
+ for id == interfaces.NoGroupId {
+ id = interfaces.GroupId(rand64())
+ }
+ return id
+}
+
+// verifySyncGroup verifies if a SyncGroup struct is well-formed.
+func verifySyncGroup(ctx *context.T, sg *interfaces.SyncGroup) error {
+ if sg == nil {
+ return verror.New(verror.ErrBadArg, ctx, "group information not specified")
+ }
+ if sg.Name == "" {
+ return verror.New(verror.ErrBadArg, ctx, "group name not specified")
+ }
+ if sg.AppName == "" {
+ return verror.New(verror.ErrBadArg, ctx, "app name not specified")
+ }
+ if sg.DbName == "" {
+ return verror.New(verror.ErrBadArg, ctx, "db name not specified")
+ }
+ if sg.Creator == "" {
+ return verror.New(verror.ErrBadArg, ctx, "creator id not specified")
+ }
+ if sg.Id == interfaces.NoGroupId {
+ return verror.New(verror.ErrBadArg, ctx, "group id not specified")
+ }
+ if sg.SpecVersion == "" {
+ return verror.New(verror.ErrBadArg, ctx, "group version not specified")
+ }
+ if len(sg.Joiners) == 0 {
+ return verror.New(verror.ErrBadArg, ctx, "group has no joiners")
+ }
+ if len(sg.Spec.Prefixes) == 0 {
+ return verror.New(verror.ErrBadArg, ctx, "group has no prefixes specified")
+ }
+ return nil
+}
+
+// addSyncGroup adds a new SyncGroup given its information.
+func addSyncGroup(ctx *context.T, tx store.Transaction, sg *interfaces.SyncGroup) error {
+ // Verify SyncGroup before storing it since it may have been received
+ // from a remote peer.
+ if err := verifySyncGroup(ctx, sg); err != nil {
+ return err
+ }
+
+ if ok, err := hasSGDataEntry(tx, sg.Id); err != nil {
+ return err
+ } else if ok {
+ return verror.New(verror.ErrExist, ctx, "group id already exists")
+ }
+ if ok, err := hasSGNameEntry(tx, sg.Name); err != nil {
+ return err
+ } else if ok {
+ return verror.New(verror.ErrExist, ctx, "group name already exists")
+ }
+
+ // Add the group name and data entries.
+ if err := setSGNameEntry(ctx, tx, sg.Name, sg.Id); err != nil {
+ return err
+ }
+ if err := setSGDataEntry(ctx, tx, sg.Id, sg); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// getSyncGroupId retrieves the SyncGroup ID given its name.
+func getSyncGroupId(ctx *context.T, st store.StoreReader, name string) (interfaces.GroupId, error) {
+ return getSGNameEntry(ctx, st, name)
+}
+
+// getSyncGroupName retrieves the SyncGroup name given its ID.
+func getSyncGroupName(ctx *context.T, st store.StoreReader, gid interfaces.GroupId) (string, error) {
+ sg, err := getSyncGroupById(ctx, st, gid)
+ if err != nil {
+ return "", err
+ }
+ return sg.Name, nil
+}
+
+// getSyncGroupById retrieves the SyncGroup given its ID.
+func getSyncGroupById(ctx *context.T, st store.StoreReader, gid interfaces.GroupId) (*interfaces.SyncGroup, error) {
+ return getSGDataEntry(ctx, st, gid)
+}
+
+// getSyncGroupByName retrieves the SyncGroup given its name.
+func getSyncGroupByName(ctx *context.T, st store.StoreReader, name string) (*interfaces.SyncGroup, error) {
+ gid, err := getSyncGroupId(ctx, st, name)
+ if err != nil {
+ return nil, err
+ }
+ return getSyncGroupById(ctx, st, gid)
+}
+
+// delSyncGroupById deletes the SyncGroup given its ID.
+func delSyncGroupById(ctx *context.T, tx store.Transaction, gid interfaces.GroupId) error {
+ sg, err := getSyncGroupById(ctx, tx, gid)
+ if err != nil {
+ return err
+ }
+ if err = delSGNameEntry(ctx, tx, sg.Name); err != nil {
+ return err
+ }
+ return delSGDataEntry(ctx, tx, sg.Id)
+}
+
+// delSyncGroupByName deletes the SyncGroup given its name.
+func delSyncGroupByName(ctx *context.T, tx store.Transaction, name string) error {
+ gid, err := getSyncGroupId(ctx, tx, name)
+ if err != nil {
+ return err
+ }
+ return delSyncGroupById(ctx, tx, gid)
+}
+
+// refreshMembersIfExpired updates the aggregate view of SyncGroup members
+// across databases if the view has expired.
+// TODO(rdaoud): track dirty apps/dbs since the last refresh and incrementally
+// update the membership view for them instead of always scanning all of them.
+func (s *syncService) refreshMembersIfExpired(ctx *context.T) {
+ view := s.allMembers
+ if view == nil {
+ // The empty expiration time in Go is before "now" and treated as expired
+ // below.
+ view = &memberView{expiration: time.Time{}, members: nil}
+ s.allMembers = view
+ }
+
+ if time.Now().Before(view.expiration) {
+ return
+ }
+
+ // Create a new aggregate view of SyncGroup members across all app databases.
+ newMembers := make(map[string]*memberInfo)
+
+ s.forEachDatabaseStore(ctx, func(appName, dbName string, st store.Store) bool {
+ // For each database, fetch its SyncGroup data entries by scanning their
+ // prefix range. Use a database snapshot for the scan.
+ sn := st.NewSnapshot()
+ defer sn.Abort()
+ name := appDbName(appName, dbName)
+
+ forEachSyncGroup(sn, func(sg *interfaces.SyncGroup) bool {
+ // Add all members of this SyncGroup to the membership view.
+ // A member's info is different across SyncGroups, so gather all of them.
+ for member, info := range sg.Joiners {
+ if _, ok := newMembers[member]; !ok {
+ newMembers[member] = &memberInfo{db2sg: make(map[string]sgMemberInfo)}
+ }
+ if _, ok := newMembers[member].db2sg[name]; !ok {
+ newMembers[member].db2sg[name] = make(sgMemberInfo)
+ }
+ newMembers[member].db2sg[name][sg.Id] = info
+ }
+ return false
+ })
+ return false
+ })
+
+ view.members = newMembers
+ view.expiration = time.Now().Add(memberViewTTL)
+}
+
+// forEachSyncGroup iterates over all SyncGroups in the Database and invokes
+// the callback function on each one. The callback returns a "done" flag to
+// make forEachSyncGroup() stop the iteration earlier; otherwise the function
+// loops across all SyncGroups in the Database.
+func forEachSyncGroup(st store.StoreReader, callback func(*interfaces.SyncGroup) bool) {
+ scanStart, scanLimit := util.ScanPrefixArgs(sgDataKeyScanPrefix, "")
+ stream := st.Scan(scanStart, scanLimit)
+ for stream.Advance() {
+ var sg interfaces.SyncGroup
+ if vom.Decode(stream.Value(nil), &sg) != nil {
+ vlog.Errorf("sync: forEachSyncGroup: invalid SyncGroup value for key %s", string(stream.Key(nil)))
+ continue
+ }
+
+ if callback(&sg) {
+ break // done, early exit
+ }
+ }
+
+ if err := stream.Err(); err != nil {
+ vlog.Errorf("sync: forEachSyncGroup: scan stream error: %v", err)
+ }
+}
+
+// getMembers returns all SyncGroup members and the count of SyncGroups each one
+// joined.
+func (s *syncService) getMembers(ctx *context.T) map[string]uint32 {
+ s.allMembersLock.Lock()
+ defer s.allMembersLock.Unlock()
+
+ s.refreshMembersIfExpired(ctx)
+
+ members := make(map[string]uint32)
+ for member, info := range s.allMembers.members {
+ count := 0
+ for _, sgmi := range info.db2sg {
+ count += len(sgmi)
+ }
+ members[member] = uint32(count)
+ }
+
+ return members
+}
+
+// copyMemberInfo returns a copy of the info for the requested peer.
+func (s *syncService) copyMemberInfo(ctx *context.T, member string) *memberInfo {
+ s.allMembersLock.RLock()
+ defer s.allMembersLock.RUnlock()
+
+ info, ok := s.allMembers.members[member]
+ if !ok {
+ return nil
+ }
+
+ // Make a copy.
+ infoCopy := &memberInfo{make(map[string]sgMemberInfo)}
+ for gdbName, sgInfo := range info.db2sg {
+ infoCopy.db2sg[gdbName] = make(sgMemberInfo)
+ for gid, mi := range sgInfo {
+ infoCopy.db2sg[gdbName][gid] = mi
+ }
+ }
+
+ return infoCopy
+}
+
+// Low-level utility functions to access DB entries without tracking their
+// relationships.
+// Use the functions above to manipulate SyncGroups.
+
+var (
+ // sgDataKeyScanPrefix is the prefix used to scan SyncGroup data entries.
+ sgDataKeyScanPrefix = util.JoinKeyParts(util.SyncPrefix, sgPrefix, "d")
+
+ // sgNameKeyScanPrefix is the prefix used to scan SyncGroup name entries.
+ sgNameKeyScanPrefix = util.JoinKeyParts(util.SyncPrefix, sgPrefix, "n")
+)
+
+// sgDataKey returns the key used to access the SyncGroup data entry.
+func sgDataKey(gid interfaces.GroupId) string {
+ return util.JoinKeyParts(util.SyncPrefix, sgPrefix, "d", fmt.Sprintf("%d", gid))
+}
+
+// sgNameKey returns the key used to access the SyncGroup name entry.
+func sgNameKey(name string) string {
+ return util.JoinKeyParts(util.SyncPrefix, sgPrefix, "n", name)
+}
+
+// splitSgNameKey is the inverse of sgNameKey and returns the SyncGroup name.
+func splitSgNameKey(ctx *context.T, key string) (string, error) {
+ prefix := util.JoinKeyParts(util.SyncPrefix, sgPrefix, "n", "")
+
+ // Note that the actual SyncGroup name may contain ":" as a separator.
+ if !strings.HasPrefix(key, prefix) {
+ return "", verror.New(verror.ErrInternal, ctx, "invalid sgNamekey", key)
+ }
+ return strings.TrimPrefix(key, prefix), nil
+}
+
+// hasSGDataEntry returns true if the SyncGroup data entry exists.
+func hasSGDataEntry(sntx store.SnapshotOrTransaction, gid interfaces.GroupId) (bool, error) {
+ // TODO(rdaoud): optimize to avoid the unneeded fetch/decode of the data.
+ var sg interfaces.SyncGroup
+ if err := util.Get(nil, sntx, sgDataKey(gid), &sg); err != nil {
+ if verror.ErrorID(err) == verror.ErrNoExist.ID {
+ err = nil
+ }
+ return false, err
+ }
+ return true, nil
+}
+
+// hasSGNameEntry returns true if the SyncGroup name entry exists.
+func hasSGNameEntry(sntx store.SnapshotOrTransaction, name string) (bool, error) {
+ // TODO(rdaoud): optimize to avoid the unneeded fetch/decode of the data.
+ var gid interfaces.GroupId
+ if err := util.Get(nil, sntx, sgNameKey(name), &gid); err != nil {
+ if verror.ErrorID(err) == verror.ErrNoExist.ID {
+ err = nil
+ }
+ return false, err
+ }
+ return true, nil
+}
+
+// setSGDataEntry stores the SyncGroup data entry.
+func setSGDataEntry(ctx *context.T, tx store.Transaction, gid interfaces.GroupId, sg *interfaces.SyncGroup) error {
+ return util.Put(ctx, tx, sgDataKey(gid), sg)
+}
+
+// setSGNameEntry stores the SyncGroup name entry.
+func setSGNameEntry(ctx *context.T, tx store.Transaction, name string, gid interfaces.GroupId) error {
+ return util.Put(ctx, tx, sgNameKey(name), gid)
+}
+
+// getSGDataEntry retrieves the SyncGroup data for a given group ID.
+func getSGDataEntry(ctx *context.T, st store.StoreReader, gid interfaces.GroupId) (*interfaces.SyncGroup, error) {
+ var sg interfaces.SyncGroup
+ if err := util.Get(ctx, st, sgDataKey(gid), &sg); err != nil {
+ return nil, err
+ }
+ return &sg, nil
+}
+
+// getSGNameEntry retrieves the SyncGroup name to ID mapping.
+func getSGNameEntry(ctx *context.T, st store.StoreReader, name string) (interfaces.GroupId, error) {
+ var gid interfaces.GroupId
+ if err := util.Get(ctx, st, sgNameKey(name), &gid); err != nil {
+ return gid, err
+ }
+ return gid, nil
+}
+
+// delSGDataEntry deletes the SyncGroup data entry.
+func delSGDataEntry(ctx *context.T, tx store.Transaction, gid interfaces.GroupId) error {
+ return util.Delete(ctx, tx, sgDataKey(gid))
+}
+
+// delSGNameEntry deletes the SyncGroup name to ID mapping.
+func delSGNameEntry(ctx *context.T, tx store.Transaction, name string) error {
+ return util.Delete(ctx, tx, sgNameKey(name))
+}
+
+////////////////////////////////////////////////////////////
+// SyncGroup methods between Client and Syncbase.
+
+// TODO(hpucha): Pass blessings along.
+func (sd *syncDatabase) CreateSyncGroup(ctx *context.T, call rpc.ServerCall, sgName string, spec wire.SyncGroupSpec, myInfo wire.SyncGroupMemberInfo) error {
+ vlog.VI(2).Infof("sync: CreateSyncGroup: begin: %s", sgName)
+ defer vlog.VI(2).Infof("sync: CreateSyncGroup: end: %s", sgName)
+
+ err := store.RunInTransaction(sd.db.St(), func(tx store.Transaction) error {
+ // Check permissions on Database.
+ if err := sd.db.CheckPermsInternal(ctx, call, tx); err != nil {
+ return err
+ }
+
+ // TODO(hpucha): Check prefix ACLs on all SG prefixes.
+ // This may need another method on util.Database interface.
+
+ // TODO(hpucha): Do some SG ACL checking. Check creator
+ // has Admin privilege.
+
+ // Get this Syncbase's sync module handle.
+ ss := sd.sync.(*syncService)
+
+ // Instantiate sg. Add self as joiner.
+ sg := &interfaces.SyncGroup{
+ Id: newSyncGroupId(),
+ Name: sgName,
+ SpecVersion: newSyncGroupVersion(),
+ Spec: spec,
+ Creator: ss.name,
+ AppName: sd.db.App().Name(),
+ DbName: sd.db.Name(),
+ Status: interfaces.SyncGroupStatusPublishPending,
+ Joiners: map[string]wire.SyncGroupMemberInfo{ss.name: myInfo},
+ }
+
+ if err := addSyncGroup(ctx, tx, sg); err != nil {
+ return err
+ }
+
+ // TODO(hpucha): Bootstrap DAG/Genvector etc for syncing the SG metadata.
+
+ // Take a snapshot of the data to bootstrap the SyncGroup.
+ return sd.bootstrapSyncGroup(ctx, tx, spec.Prefixes)
+ })
+
+ if err != nil {
+ return err
+ }
+
+ // Local SG create succeeded. Publish the SG at the chosen server.
+ sd.publishSyncGroup(ctx, call, sgName)
+
+ // Publish at the chosen mount table and in the neighborhood.
+ sd.publishInMountTables(ctx, call, spec)
+
+ return nil
+}
+
+// TODO(hpucha): Pass blessings along.
+func (sd *syncDatabase) JoinSyncGroup(ctx *context.T, call rpc.ServerCall, sgName string, myInfo wire.SyncGroupMemberInfo) (wire.SyncGroupSpec, error) {
+ vlog.VI(2).Infof("sync: JoinSyncGroup: begin: %s", sgName)
+ defer vlog.VI(2).Infof("sync: JoinSyncGroup: end: %s", sgName)
+
+ var sgErr error
+ var sg *interfaces.SyncGroup
+ nullSpec := wire.SyncGroupSpec{}
+
+ err := store.RunInTransaction(sd.db.St(), func(tx store.Transaction) error {
+ // Check permissions on Database.
+ if err := sd.db.CheckPermsInternal(ctx, call, tx); err != nil {
+ return err
+ }
+
+ // Check if SyncGroup already exists.
+ sg, sgErr = getSyncGroupByName(ctx, tx, sgName)
+ if sgErr != nil {
+ return sgErr
+ }
+
+ // SyncGroup already exists. Possibilities include created
+ // locally, already joined locally or published at the device as
+ // a result of SyncGroup creation on a different device.
+ //
+ // TODO(hpucha): Handle the above cases. If the SG was published
+ // locally, but not joined, we need to bootstrap the DAG and
+ // watcher. If multiple joins are done locally, we may want to
+ // ref count the SG state and track the leaves accordingly. So
+ // we may need to add some local state for each SyncGroup.
+
+ // Check SG ACL.
+ return authorize(ctx, call.Security(), sg)
+ })
+
+ // The presented blessing is allowed to make this Syncbase instance join
+ // the specified SyncGroup, but this Syncbase instance has in fact
+ // already joined the SyncGroup. Join is idempotent, so we simply return
+ // the spec to indicate success.
+ if err == nil {
+ return sg.Spec, nil
+ }
+
+ // Join is not allowed (possibilities include Database permissions check
+ // failed, SG ACL check failed or error during fetching SG information).
+ if verror.ErrorID(sgErr) != verror.ErrNoExist.ID {
+ return nullSpec, err
+ }
+
+ // Brand new join.
+
+ // Get this Syncbase's sync module handle.
+ ss := sd.sync.(*syncService)
+
+ // Contact a SyncGroup Admin to join the SyncGroup.
+ sg = &interfaces.SyncGroup{}
+ *sg, err = sd.joinSyncGroupAtAdmin(ctx, call, sgName, ss.name, myInfo)
+ if err != nil {
+ return nullSpec, err
+ }
+
+ // Verify that the app/db combination is valid for this SyncGroup.
+ if sg.AppName != sd.db.App().Name() || sg.DbName != sd.db.Name() {
+ return nullSpec, verror.New(verror.ErrBadArg, ctx, "bad app/db with syncgroup")
+ }
+
+ err = store.RunInTransaction(sd.db.St(), func(tx store.Transaction) error {
+
+ // TODO(hpucha): Bootstrap DAG/Genvector etc for syncing the SG metadata.
+
+ // TODO(hpucha): Get SG Deltas from Admin device.
+
+ if err := addSyncGroup(ctx, tx, sg); err != nil {
+ return err
+ }
+
+ // Take a snapshot of the data to bootstrap the SyncGroup.
+ return sd.bootstrapSyncGroup(ctx, tx, sg.Spec.Prefixes)
+ })
+
+ if err != nil {
+ return nullSpec, err
+ }
+
+ // Publish at the chosen mount table and in the neighborhood.
+ sd.publishInMountTables(ctx, call, sg.Spec)
+
+ return sg.Spec, nil
+}
+
+func (sd *syncDatabase) GetSyncGroupNames(ctx *context.T, call rpc.ServerCall) ([]string, error) {
+ var sgNames []string
+
+ vlog.VI(2).Infof("sync: GetSyncGroupNames: begin")
+ defer vlog.VI(2).Infof("sync: GetSyncGroupNames: end: %v", sgNames)
+
+ sn := sd.db.St().NewSnapshot()
+ defer sn.Abort()
+
+ // Check permissions on Database.
+ if err := sd.db.CheckPermsInternal(ctx, call, sn); err != nil {
+ return nil, err
+ }
+
+ // Scan all the SyncGroup names found in the Database.
+ scanStart, scanLimit := util.ScanPrefixArgs(sgNameKeyScanPrefix, "")
+ stream := sn.Scan(scanStart, scanLimit)
+ var key []byte
+ for stream.Advance() {
+ sgName, err := splitSgNameKey(ctx, string(stream.Key(key)))
+ if err != nil {
+ return nil, err
+ }
+ sgNames = append(sgNames, sgName)
+ }
+
+ if err := stream.Err(); err != nil {
+ return nil, err
+ }
+
+ return sgNames, nil
+}
+
+func (sd *syncDatabase) GetSyncGroupSpec(ctx *context.T, call rpc.ServerCall, sgName string) (wire.SyncGroupSpec, string, error) {
+ var spec wire.SyncGroupSpec
+
+ vlog.VI(2).Infof("sync: GetSyncGroupSpec: begin %s", sgName)
+ defer vlog.VI(2).Infof("sync: GetSyncGroupSpec: end: %s spec %v", sgName, spec)
+
+ sn := sd.db.St().NewSnapshot()
+ defer sn.Abort()
+
+ // Check permissions on Database.
+ if err := sd.db.CheckPermsInternal(ctx, call, sn); err != nil {
+ return spec, "", err
+ }
+
+ // Get the SyncGroup information.
+ sg, err := getSyncGroupByName(ctx, sn, sgName)
+ if err != nil {
+ return spec, "", err
+ }
+ // TODO(hpucha): Check SyncGroup ACL.
+
+ spec = sg.Spec
+ return spec, sg.SpecVersion, nil
+}
+
+func (sd *syncDatabase) GetSyncGroupMembers(ctx *context.T, call rpc.ServerCall, sgName string) (map[string]wire.SyncGroupMemberInfo, error) {
+ var members map[string]wire.SyncGroupMemberInfo
+
+ vlog.VI(2).Infof("sync: GetSyncGroupMembers: begin %s", sgName)
+ defer vlog.VI(2).Infof("sync: GetSyncGroupMembers: end: %s members %v", sgName, members)
+
+ sn := sd.db.St().NewSnapshot()
+ defer sn.Abort()
+
+ // Check permissions on Database.
+ if err := sd.db.CheckPermsInternal(ctx, call, sn); err != nil {
+ return members, err
+ }
+
+ // Get the SyncGroup information.
+ sg, err := getSyncGroupByName(ctx, sn, sgName)
+ if err != nil {
+ return members, err
+ }
+
+ // TODO(hpucha): Check SyncGroup ACL.
+
+ members = sg.Joiners
+ return members, nil
+}
+
+// TODO(hpucha): Enable syncing syncgroup metadata.
+func (sd *syncDatabase) SetSyncGroupSpec(ctx *context.T, call rpc.ServerCall, sgName string, spec wire.SyncGroupSpec, version string) error {
+ vlog.VI(2).Infof("sync: SetSyncGroupSpec: begin %s %v %s", sgName, spec, version)
+ defer vlog.VI(2).Infof("sync: SetSyncGroupSpec: end: %s", sgName)
+
+ err := store.RunInTransaction(sd.db.St(), func(tx store.Transaction) error {
+ // Check permissions on Database.
+ if err := sd.db.CheckPermsInternal(ctx, call, tx); err != nil {
+ return err
+ }
+
+ sg, err := getSyncGroupByName(ctx, tx, sgName)
+ if err != nil {
+ return err
+ }
+
+ // TODO(hpucha): Check SyncGroup ACL. Perform version checking.
+
+ sg.Spec = spec
+ return setSGDataEntry(ctx, tx, sg.Id, sg)
+ })
+ return err
+}
+
+//////////////////////////////
+// Helper functions
+
+// TODO(hpucha): Call this periodically until we are able to contact the remote peer.
+func (sd *syncDatabase) publishSyncGroup(ctx *context.T, call rpc.ServerCall, sgName string) error {
+ sg, err := getSyncGroupByName(ctx, sd.db.St(), sgName)
+ if err != nil {
+ return err
+ }
+
+ if sg.Status != interfaces.SyncGroupStatusPublishPending {
+ return nil
+ }
+
+ c := interfaces.SyncClient(sgName)
+ err = c.PublishSyncGroup(ctx, *sg)
+
+ // Publish failed temporarily. Retry later.
+ // TODO(hpucha): Is there an RPC error that we can check here?
+ if err != nil && verror.ErrorID(err) != verror.ErrExist.ID {
+ return err
+ }
+
+ // Publish succeeded.
+ if err == nil {
+ // TODO(hpucha): Get SG Deltas from publisher. Obtaining the
+ // new version from the publisher prevents SG conflicts.
+ return err
+ }
+
+ // Publish rejected. Persist that to avoid retrying in the
+ // future and to remember the split universe scenario.
+ err = store.RunInTransaction(sd.db.St(), func(tx store.Transaction) error {
+ // Ensure SG still exists.
+ sg, err := getSyncGroupByName(ctx, tx, sgName)
+ if err != nil {
+ return err
+ }
+
+ sg.Status = interfaces.SyncGroupStatusPublishRejected
+ return setSGDataEntry(ctx, tx, sg.Id, sg)
+ })
+ return err
+}
+
+// bootstrapSyncGroup inserts into the transaction log a SyncGroup operation and
+// a set of Snapshot operations to notify the sync watcher about the SyncGroup
+// prefixes to start accepting and the initial state of existing store keys that
+// match these prefixes (both data and permission keys).
+// TODO(rdaoud): this operation scans the managed keys of the database and can
+// be time consuming. Consider doing it asynchronously and letting the server
+// reply to the client earlier. However it must happen within the scope of this
+// transaction (and its snapshot view).
+func (sd *syncDatabase) bootstrapSyncGroup(ctx *context.T, tx store.Transaction, prefixes []string) error {
+ if len(prefixes) == 0 {
+ return verror.New(verror.ErrInternal, ctx, "no prefixes specified")
+ }
+
+ // Get the store options to retrieve the list of managed key prefixes.
+ opts, err := watchable.GetOptions(sd.db.St())
+ if err != nil {
+ return err
+ }
+ if len(opts.ManagedPrefixes) == 0 {
+ return verror.New(verror.ErrInternal, ctx, "store has no managed prefixes")
+ }
+
+ // Notify the watcher of the SyncGroup prefixes to start accepting.
+ if err := watchable.AddSyncGroupOp(ctx, tx, prefixes, false); err != nil {
+ return err
+ }
+
+ // Loop over the store managed key prefixes (e.g. data and permissions).
+ // For each one, scan the ranges of the given SyncGroup prefixes. For
+ // each matching key, insert a snapshot operation in the log. Scanning
+ // is done over the version entries to retrieve the matching keys and
+ // their version numbers (the key values). Remove the version prefix
+ // from the key used in the snapshot operation.
+ // TODO(rdaoud): for SyncGroup prefixes, there should be a separation
+ // between their representation at the client (a list of (db, prefix)
+ // tuples) and internally as strings that match the store's key format.
+ for _, mp := range opts.ManagedPrefixes {
+ for _, p := range prefixes {
+ start, limit := util.ScanPrefixArgs(util.JoinKeyParts(util.VersionPrefix, mp), p)
+ stream := tx.Scan(start, limit)
+ for stream.Advance() {
+ k, v := stream.Key(nil), stream.Value(nil)
+ parts := util.SplitKeyParts(string(k))
+ if len(parts) < 2 {
+ vlog.Fatalf("sync: bootstrapSyncGroup: invalid version key %s", string(k))
+
+ }
+ key := []byte(util.JoinKeyParts(parts[1:]...))
+ if err := watchable.AddSyncSnapshotOp(ctx, tx, key, v); err != nil {
+ return err
+ }
+
+ }
+ if err := stream.Err(); err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
+
+func (sd *syncDatabase) publishInMountTables(ctx *context.T, call rpc.ServerCall, spec wire.SyncGroupSpec) error {
+ // Get this Syncbase's sync module handle.
+ ss := sd.sync.(*syncService)
+
+ for _, mt := range spec.MountTables {
+ name := naming.Join(mt, ss.name)
+ // TODO(hpucha): Is this add idempotent? Appears to be from code.
+ // Confirm that it is ok to use absolute names here.
+ if err := ss.server.AddName(name); err != nil {
+ return err
+ }
+ }
+
+ // TODO(hpucha): Do we have to publish in neighborhood explicitly?
+
+ return nil
+}
+
+func (sd *syncDatabase) joinSyncGroupAtAdmin(ctx *context.T, call rpc.ServerCall, sgName, name string, myInfo wire.SyncGroupMemberInfo) (interfaces.SyncGroup, error) {
+ c := interfaces.SyncClient(sgName)
+ return c.JoinSyncGroupAtAdmin(ctx, sgName, name, myInfo)
+
+ // TODO(hpucha): Try to join using an Admin on neighborhood if the publisher is not reachable.
+}
+
+func authorize(ctx *context.T, call security.Call, sg *interfaces.SyncGroup) error {
+ auth := access.TypicalTagTypePermissionsAuthorizer(sg.Spec.Perms)
+ if err := auth.Authorize(ctx, call); err != nil {
+ return verror.New(verror.ErrNoAccess, ctx, err)
+ }
+ return nil
+}
+
+////////////////////////////////////////////////////////////
+// Methods for SyncGroup create/join between Syncbases.
+
+func (s *syncService) PublishSyncGroup(ctx *context.T, call rpc.ServerCall, sg interfaces.SyncGroup) error {
+ st, err := s.getDbStore(ctx, call, sg.AppName, sg.DbName)
+ if err != nil {
+ return err
+ }
+
+ err = store.RunInTransaction(st, func(tx store.Transaction) error {
+ localSG, err := getSyncGroupByName(ctx, tx, sg.Name)
+
+ if err != nil && verror.ErrorID(err) != verror.ErrNoExist.ID {
+ return err
+ }
+
+ // SG name already claimed.
+ if err == nil && localSG.Id != sg.Id {
+ return verror.New(verror.ErrExist, ctx, sg.Name)
+ }
+
+ // TODO(hpucha): Bootstrap DAG/Genvector etc for syncing the SG
+ // metadata if needed.
+ //
+ // TODO(hpucha): Catch up on SG versions so far.
+
+ // SG already published. Update if needed.
+ if err == nil && localSG.Id == sg.Id {
+ if localSG.Status == interfaces.SyncGroupStatusPublishPending {
+ localSG.Status = interfaces.SyncGroupStatusRunning
+ return setSGDataEntry(ctx, tx, localSG.Id, localSG)
+ }
+ return nil
+ }
+
+ // Publish the SyncGroup.
+
+ // TODO(hpucha): Use some ACL check to allow/deny publishing.
+ // TODO(hpucha): Ensure node is on Admin ACL.
+
+ // TODO(hpucha): Default priority?
+ sg.Joiners[s.name] = wire.SyncGroupMemberInfo{}
+ sg.Status = interfaces.SyncGroupStatusRunning
+ return addSyncGroup(ctx, tx, &sg)
+ })
+
+ return err
+}
+
+func (s *syncService) JoinSyncGroupAtAdmin(ctx *context.T, call rpc.ServerCall, sgName, joinerName string, joinerInfo wire.SyncGroupMemberInfo) (interfaces.SyncGroup, error) {
+ var dbSt store.Store
+ var gid interfaces.GroupId
+ var err error
+
+ // Find the database store for this SyncGroup.
+ //
+ // TODO(hpucha): At a high level, we have yet to decide if the SG name
+ // is stand-alone or is derived from the app/db namespace, based on the
+ // feedback from app developers (see discussion in SyncGroup API
+ // doc). If we decide to keep the SG name as stand-alone, this scan can
+ // be optimized by a lazy cache of sgname to <app, db> info.
+ s.forEachDatabaseStore(ctx, func(appName, dbName string, st store.Store) bool {
+ if gid, err = getSyncGroupId(ctx, st, sgName); err == nil {
+ // Found the SyncGroup being looked for.
+ dbSt = st
+ return true
+ }
+ return false
+ })
+
+ // SyncGroup not found.
+ if err != nil {
+ return interfaces.SyncGroup{}, verror.New(verror.ErrNoExist, ctx, "SyncGroup not found", sgName)
+ }
+
+ var sg *interfaces.SyncGroup
+ err = store.RunInTransaction(dbSt, func(tx store.Transaction) error {
+ var err error
+ sg, err = getSyncGroupById(ctx, tx, gid)
+ if err != nil {
+ return err
+ }
+
+ // Check SG ACL.
+ if err := authorize(ctx, call.Security(), sg); err != nil {
+ return err
+ }
+
+ // Add to joiner list.
+ sg.Joiners[joinerName] = joinerInfo
+ return setSGDataEntry(ctx, tx, sg.Id, sg)
+ })
+
+ if err != nil {
+ return interfaces.SyncGroup{}, err
+ }
+ return *sg, nil
+}
diff --git a/services/syncbase/vsync/syncgroup_test.go b/services/syncbase/vsync/syncgroup_test.go
new file mode 100644
index 0000000..6c87ee7
--- /dev/null
+++ b/services/syncbase/vsync/syncgroup_test.go
@@ -0,0 +1,493 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Tests for SyncGroup management and storage in Syncbase.
+
+import (
+ "reflect"
+ "testing"
+ "time"
+
+ "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ _ "v.io/x/ref/runtime/factories/generic"
+)
+
+// checkSGStats verifies SyncGroup stats.
+func checkSGStats(t *testing.T, svc *mockService, which string, numSG, numMembers int) {
+ memberViewTTL = 0 // Always recompute the SyncGroup membership view.
+ svc.sync.refreshMembersIfExpired(nil)
+
+ view := svc.sync.allMembers
+ if num := len(view.members); num != numMembers {
+ t.Errorf("num-members (%s): got %v instead of %v", which, num, numMembers)
+ }
+
+ sgids := make(map[interfaces.GroupId]bool)
+ for _, info := range view.members {
+ for _, sgmi := range info.db2sg {
+ for gid := range sgmi {
+ sgids[gid] = true
+ }
+ }
+ }
+
+ if num := len(sgids); num != numSG {
+ t.Errorf("num-syncgroups (%s): got %v instead of %v", which, num, numSG)
+ }
+}
+
+// TestAddSyncGroup tests adding SyncGroups.
+func TestAddSyncGroup(t *testing.T) {
+ // Set a large value to prevent the initiator from running. Since this
+ // test adds a fake SyncGroup, if the initiator runs, it will attempt
+ // to initiate using this fake and partial SyncGroup data.
+ peerSyncInterval = 1 * time.Hour
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+
+ checkSGStats(t, svc, "add-1", 0, 0)
+
+ // Add a SyncGroup.
+
+ sgName := "foobar"
+ sgId := interfaces.GroupId(1234)
+
+ sg := &interfaces.SyncGroup{
+ Name: sgName,
+ Id: sgId,
+ AppName: "mockApp",
+ DbName: "mockDB",
+ Creator: "mockCreator",
+ SpecVersion: "etag-0",
+ Spec: nosql.SyncGroupSpec{
+ Prefixes: []string{"foo", "bar"},
+ },
+ Joiners: map[string]nosql.SyncGroupMemberInfo{
+ "phone": nosql.SyncGroupMemberInfo{SyncPriority: 10},
+ "tablet": nosql.SyncGroupMemberInfo{SyncPriority: 25},
+ "cloud": nosql.SyncGroupMemberInfo{SyncPriority: 1},
+ },
+ }
+
+ tx := st.NewTransaction()
+ if err := addSyncGroup(nil, tx, sg); err != nil {
+ t.Errorf("cannot add SyncGroup ID %d: %v", sg.Id, err)
+ }
+ if err := tx.Commit(); err != nil {
+ t.Errorf("cannot commit adding SyncGroup ID %d: %v", sg.Id, err)
+ }
+
+ // Verify SyncGroup ID, name, and data.
+
+ if id, err := getSyncGroupId(nil, st, sgName); err != nil || id != sgId {
+ t.Errorf("cannot get ID of SyncGroup %s: got %d instead of %d; err: %v", sgName, id, sgId, err)
+ }
+ if name, err := getSyncGroupName(nil, st, sgId); err != nil || name != sgName {
+ t.Errorf("cannot get name of SyncGroup %d: got %s instead of %s; err: %v",
+ sgId, name, sgName, err)
+ }
+
+ sgOut, err := getSyncGroupById(nil, st, sgId)
+ if err != nil {
+ t.Errorf("cannot get SyncGroup by ID %d: %v", sgId, err)
+ }
+ if !reflect.DeepEqual(sgOut, sg) {
+ t.Errorf("invalid SyncGroup data for group ID %d: got %v instead of %v", sgId, sgOut, sg)
+ }
+
+ sgOut, err = getSyncGroupByName(nil, st, sgName)
+ if err != nil {
+ t.Errorf("cannot get SyncGroup by Name %s: %v", sgName, err)
+ }
+ if !reflect.DeepEqual(sgOut, sg) {
+ t.Errorf("invalid SyncGroup data for group name %s: got %v instead of %v", sgName, sgOut, sg)
+ }
+
+ // Verify membership data.
+
+ expMembers := map[string]uint32{"phone": 1, "tablet": 1, "cloud": 1}
+
+ members := svc.sync.getMembers(nil)
+ if !reflect.DeepEqual(members, expMembers) {
+ t.Errorf("invalid SyncGroup members: got %v instead of %v", members, expMembers)
+ }
+
+ view := svc.sync.allMembers
+ for mm := range members {
+ mi := view.members[mm]
+ if mi == nil {
+ t.Errorf("cannot get info for SyncGroup member %s", mm)
+ }
+ if len(mi.db2sg) != 1 {
+ t.Errorf("invalid info for SyncGroup member %s: %v", mm, mi)
+ }
+ var sgmi sgMemberInfo
+ for _, v := range mi.db2sg {
+ sgmi = v
+ break
+ }
+ if len(sgmi) != 1 {
+ t.Errorf("invalid member info for SyncGroup member %s: %v", mm, sgmi)
+ }
+ expJoinerInfo := sg.Joiners[mm]
+ joinerInfo := sgmi[sgId]
+ if !reflect.DeepEqual(joinerInfo, expJoinerInfo) {
+ t.Errorf("invalid Info for SyncGroup member %s in group ID %d: got %v instead of %v",
+ mm, sgId, joinerInfo, expJoinerInfo)
+ }
+ }
+
+ checkSGStats(t, svc, "add-2", 1, 3)
+
+ // Adding a SyncGroup for a pre-existing group ID or name should fail.
+
+ sg.Name = "another-name"
+
+ tx = st.NewTransaction()
+ if err = addSyncGroup(nil, tx, sg); err == nil {
+ t.Errorf("re-adding SyncGroup %d did not fail", sgId)
+ }
+ tx.Abort()
+
+ sg.Name = sgName
+ sg.Id = interfaces.GroupId(5555)
+
+ tx = st.NewTransaction()
+ if err = addSyncGroup(nil, tx, sg); err == nil {
+ t.Errorf("adding SyncGroup %s with a different ID did not fail", sgName)
+ }
+ tx.Abort()
+
+ checkSGStats(t, svc, "add-3", 1, 3)
+
+ // Fetch a non-existing SyncGroup by ID or name should fail.
+
+ badName := "not-available"
+ badId := interfaces.GroupId(999)
+ if id, err := getSyncGroupId(nil, st, badName); err == nil {
+ t.Errorf("found non-existing SyncGroup %s: got ID %d", badName, id)
+ }
+ if name, err := getSyncGroupName(nil, st, badId); err == nil {
+ t.Errorf("found non-existing SyncGroup %d: got name %s", badId, name)
+ }
+ if sg, err := getSyncGroupByName(nil, st, badName); err == nil {
+ t.Errorf("found non-existing SyncGroup %s: got %v", badName, sg)
+ }
+ if sg, err := getSyncGroupById(nil, st, badId); err == nil {
+ t.Errorf("found non-existing SyncGroup %d: got %v", badId, sg)
+ }
+}
+
+// TestInvalidAddSyncGroup tests adding SyncGroups.
+func TestInvalidAddSyncGroup(t *testing.T) {
+ // Set a large value to prevent the threads from firing.
+ peerSyncInterval = 1 * time.Hour
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+
+ checkBadAddSyncGroup := func(t *testing.T, st store.Store, sg *interfaces.SyncGroup, msg string) {
+ tx := st.NewTransaction()
+ if err := addSyncGroup(nil, tx, sg); err == nil {
+ t.Errorf("checkBadAddSyncGroup: adding bad SyncGroup (%s) did not fail", msg)
+ }
+ tx.Abort()
+ }
+
+ checkBadAddSyncGroup(t, st, nil, "nil SG")
+
+ sg := &interfaces.SyncGroup{Id: 1234}
+ checkBadAddSyncGroup(t, st, sg, "SG w/o name")
+
+ sg = &interfaces.SyncGroup{Name: "foobar"}
+ checkBadAddSyncGroup(t, st, sg, "SG w/o Id")
+
+ sg.Id = 1234
+ checkBadAddSyncGroup(t, st, sg, "SG w/o Version")
+
+ sg.SpecVersion = "v1"
+ checkBadAddSyncGroup(t, st, sg, "SG w/o Joiners")
+
+ sg.Joiners = map[string]nosql.SyncGroupMemberInfo{
+ "phone": nosql.SyncGroupMemberInfo{SyncPriority: 10},
+ }
+ checkBadAddSyncGroup(t, st, sg, "SG w/o Prefixes")
+}
+
+// TestDeleteSyncGroup tests deleting a SyncGroup.
+func TestDeleteSyncGroup(t *testing.T) {
+ // Set a large value to prevent the threads from firing.
+ peerSyncInterval = 1 * time.Hour
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+
+ sgName := "foobar"
+ sgId := interfaces.GroupId(1234)
+
+ // Delete non-existing SyncGroups.
+
+ tx := st.NewTransaction()
+ if err := delSyncGroupById(nil, tx, sgId); err == nil {
+ t.Errorf("deleting a non-existing SyncGroup ID did not fail")
+ }
+ if err := delSyncGroupByName(nil, tx, sgName); err == nil {
+ t.Errorf("deleting a non-existing SyncGroup name did not fail")
+ }
+ tx.Abort()
+
+ checkSGStats(t, svc, "del-1", 0, 0)
+
+ // Create the SyncGroup to delete later.
+
+ sg := &interfaces.SyncGroup{
+ Name: sgName,
+ Id: sgId,
+ AppName: "mockApp",
+ DbName: "mockDB",
+ Creator: "mockCreator",
+ SpecVersion: "etag-0",
+ Spec: nosql.SyncGroupSpec{
+ Prefixes: []string{"foo", "bar"},
+ },
+ Joiners: map[string]nosql.SyncGroupMemberInfo{
+ "phone": nosql.SyncGroupMemberInfo{SyncPriority: 10},
+ "tablet": nosql.SyncGroupMemberInfo{SyncPriority: 25},
+ "cloud": nosql.SyncGroupMemberInfo{SyncPriority: 1},
+ },
+ }
+
+ tx = st.NewTransaction()
+ if err := addSyncGroup(nil, tx, sg); err != nil {
+ t.Errorf("creating SyncGroup ID %d failed: %v", sgId, err)
+ }
+ if err := tx.Commit(); err != nil {
+ t.Errorf("cannot commit adding SyncGroup ID %d: %v", sgId, err)
+ }
+
+ checkSGStats(t, svc, "del-2", 1, 3)
+
+ // Delete it by ID.
+
+ tx = st.NewTransaction()
+ if err := delSyncGroupById(nil, tx, sgId); err != nil {
+ t.Errorf("deleting SyncGroup ID %d failed: %v", sgId, err)
+ }
+ if err := tx.Commit(); err != nil {
+ t.Errorf("cannot commit deleting SyncGroup ID %d: %v", sgId, err)
+ }
+
+ checkSGStats(t, svc, "del-3", 0, 0)
+
+ // Create it again then delete it by name.
+
+ tx = st.NewTransaction()
+ if err := addSyncGroup(nil, tx, sg); err != nil {
+ t.Errorf("creating SyncGroup ID %d after delete failed: %v", sgId, err)
+ }
+ if err := tx.Commit(); err != nil {
+ t.Errorf("cannot commit adding SyncGroup ID %d after delete: %v", sgId, err)
+ }
+
+ checkSGStats(t, svc, "del-4", 1, 3)
+
+ tx = st.NewTransaction()
+ if err := delSyncGroupByName(nil, tx, sgName); err != nil {
+ t.Errorf("deleting SyncGroup name %s failed: %v", sgName, err)
+ }
+ if err := tx.Commit(); err != nil {
+ t.Errorf("cannot commit deleting SyncGroup name %s: %v", sgName, err)
+ }
+
+ checkSGStats(t, svc, "del-5", 0, 0)
+}
+
+// TestMultiSyncGroups tests creating multiple SyncGroups.
+func TestMultiSyncGroups(t *testing.T) {
+ // Set a large value to prevent the threads from firing.
+ peerSyncInterval = 1 * time.Hour
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+
+ sgName1, sgName2 := "foo", "bar"
+ sgId1, sgId2 := interfaces.GroupId(1234), interfaces.GroupId(8888)
+
+ // Add two SyncGroups.
+
+ sg1 := &interfaces.SyncGroup{
+ Name: sgName1,
+ Id: sgId1,
+ AppName: "mockApp",
+ DbName: "mockDB",
+ Creator: "mockCreator",
+ SpecVersion: "etag-1",
+ Spec: nosql.SyncGroupSpec{
+ Prefixes: []string{"foo"},
+ },
+ Joiners: map[string]nosql.SyncGroupMemberInfo{
+ "phone": nosql.SyncGroupMemberInfo{SyncPriority: 10},
+ "tablet": nosql.SyncGroupMemberInfo{SyncPriority: 25},
+ "cloud": nosql.SyncGroupMemberInfo{SyncPriority: 1},
+ },
+ }
+ sg2 := &interfaces.SyncGroup{
+ Name: sgName2,
+ Id: sgId2,
+ AppName: "mockApp",
+ DbName: "mockDB",
+ Creator: "mockCreator",
+ SpecVersion: "etag-2",
+ Spec: nosql.SyncGroupSpec{
+ Prefixes: []string{"bar"},
+ },
+ Joiners: map[string]nosql.SyncGroupMemberInfo{
+ "tablet": nosql.SyncGroupMemberInfo{SyncPriority: 111},
+ "door": nosql.SyncGroupMemberInfo{SyncPriority: 33},
+ "lamp": nosql.SyncGroupMemberInfo{SyncPriority: 9},
+ },
+ }
+
+ tx := st.NewTransaction()
+ if err := addSyncGroup(nil, tx, sg1); err != nil {
+ t.Errorf("creating SyncGroup ID %d failed: %v", sgId1, err)
+ }
+ if err := tx.Commit(); err != nil {
+ t.Errorf("cannot commit adding SyncGroup ID %d: %v", sgId1, err)
+ }
+
+ checkSGStats(t, svc, "multi-1", 1, 3)
+
+ tx = st.NewTransaction()
+ if err := addSyncGroup(nil, tx, sg2); err != nil {
+ t.Errorf("creating SyncGroup ID %d failed: %v", sgId2, err)
+ }
+ if err := tx.Commit(); err != nil {
+ t.Errorf("cannot commit adding SyncGroup ID %d: %v", sgId2, err)
+ }
+
+ checkSGStats(t, svc, "multi-2", 2, 5)
+
+ // Verify membership data.
+
+ expMembers := map[string]uint32{"phone": 1, "tablet": 2, "cloud": 1, "door": 1, "lamp": 1}
+
+ members := svc.sync.getMembers(nil)
+ if !reflect.DeepEqual(members, expMembers) {
+ t.Errorf("invalid SyncGroup members: got %v instead of %v", members, expMembers)
+ }
+
+ expMemberInfo := map[string]*memberInfo{
+ "phone": &memberInfo{
+ db2sg: map[string]sgMemberInfo{
+ "mockapp:mockdb": sgMemberInfo{
+ sgId1: sg1.Joiners["phone"],
+ },
+ },
+ },
+ "tablet": &memberInfo{
+ db2sg: map[string]sgMemberInfo{
+ "mockapp:mockdb": sgMemberInfo{
+ sgId1: sg1.Joiners["tablet"],
+ sgId2: sg2.Joiners["tablet"],
+ },
+ },
+ },
+ "cloud": &memberInfo{
+ db2sg: map[string]sgMemberInfo{
+ "mockapp:mockdb": sgMemberInfo{
+ sgId1: sg1.Joiners["cloud"],
+ },
+ },
+ },
+ "door": &memberInfo{
+ db2sg: map[string]sgMemberInfo{
+ "mockapp:mockdb": sgMemberInfo{
+ sgId2: sg2.Joiners["door"],
+ },
+ },
+ },
+ "lamp": &memberInfo{
+ db2sg: map[string]sgMemberInfo{
+ "mockapp:mockdb": sgMemberInfo{
+ sgId2: sg2.Joiners["lamp"],
+ },
+ },
+ },
+ }
+
+ view := svc.sync.allMembers
+ for mm := range members {
+ mi := view.members[mm]
+ if mi == nil {
+ t.Errorf("cannot get info for SyncGroup member %s", mm)
+ }
+ expInfo := expMemberInfo[mm]
+ if !reflect.DeepEqual(mi, expInfo) {
+ t.Errorf("invalid Info for SyncGroup member %s: got %v instead of %v", mm, mi, expInfo)
+ }
+ }
+
+ // Delete the 1st SyncGroup.
+
+ tx = st.NewTransaction()
+ if err := delSyncGroupById(nil, tx, sgId1); err != nil {
+ t.Errorf("deleting SyncGroup ID %d failed: %v", sgId1, err)
+ }
+ if err := tx.Commit(); err != nil {
+ t.Errorf("cannot commit deleting SyncGroup ID %d: %v", sgId1, err)
+ }
+
+ checkSGStats(t, svc, "multi-3", 1, 3)
+
+ // Verify SyncGroup membership data.
+
+ expMembers = map[string]uint32{"tablet": 1, "door": 1, "lamp": 1}
+
+ members = svc.sync.getMembers(nil)
+ if !reflect.DeepEqual(members, expMembers) {
+ t.Errorf("invalid SyncGroup members: got %v instead of %v", members, expMembers)
+ }
+
+ expMemberInfo = map[string]*memberInfo{
+ "tablet": &memberInfo{
+ db2sg: map[string]sgMemberInfo{
+ "mockapp:mockdb": sgMemberInfo{
+ sgId2: sg2.Joiners["tablet"],
+ },
+ },
+ },
+ "door": &memberInfo{
+ db2sg: map[string]sgMemberInfo{
+ "mockapp:mockdb": sgMemberInfo{
+ sgId2: sg2.Joiners["door"],
+ },
+ },
+ },
+ "lamp": &memberInfo{
+ db2sg: map[string]sgMemberInfo{
+ "mockapp:mockdb": sgMemberInfo{
+ sgId2: sg2.Joiners["lamp"],
+ },
+ },
+ },
+ }
+
+ view = svc.sync.allMembers
+ for mm := range members {
+ mi := view.members[mm]
+ if mi == nil {
+ t.Errorf("cannot get info for SyncGroup member %s", mm)
+ }
+ expInfo := expMemberInfo[mm]
+ if !reflect.DeepEqual(mi, expInfo) {
+ t.Errorf("invalid Info for SyncGroup member %s: got %v instead of %v", mm, mi, expInfo)
+ }
+ }
+}
diff --git a/services/syncbase/vsync/test_util.go b/services/syncbase/vsync/test_util.go
new file mode 100644
index 0000000..8328fda
--- /dev/null
+++ b/services/syncbase/vsync/test_util.go
@@ -0,0 +1,154 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Utilities for testing sync.
+
+import (
+ "fmt"
+ "os"
+ "path"
+ "testing"
+ "time"
+
+ wire "v.io/syncbase/v23/services/syncbase/nosql"
+ "v.io/syncbase/x/ref/services/syncbase/clock"
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/server/watchable"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ "v.io/v23/rpc"
+ "v.io/v23/security/access"
+ "v.io/v23/verror"
+ "v.io/x/ref/test"
+)
+
+// mockService emulates a Syncbase service that includes store and sync.
+// It is used to access a mock application.
+type mockService struct {
+ engine string
+ dir string
+ st store.Store
+ sync *syncService
+ shutdown func()
+}
+
+func (s *mockService) St() store.Store {
+ return s.st
+}
+
+func (s *mockService) Sync() interfaces.SyncServerMethods {
+ return s.sync
+}
+
+func (s *mockService) App(ctx *context.T, call rpc.ServerCall, appName string) (interfaces.App, error) {
+ return &mockApp{st: s.st}, nil
+}
+
+func (s *mockService) AppNames(ctx *context.T, call rpc.ServerCall) ([]string, error) {
+ return []string{"mockapp"}, nil
+}
+
+// mockApp emulates a Syncbase App. It is used to access a mock database.
+type mockApp struct {
+ st store.Store
+}
+
+func (a *mockApp) NoSQLDatabase(ctx *context.T, call rpc.ServerCall, dbName string) (interfaces.Database, error) {
+ return &mockDatabase{st: a.st}, nil
+}
+
+func (a *mockApp) NoSQLDatabaseNames(ctx *context.T, call rpc.ServerCall) ([]string, error) {
+ return []string{"mockdb"}, nil
+}
+
+func (a *mockApp) CreateNoSQLDatabase(ctx *context.T, call rpc.ServerCall, dbName string, perms access.Permissions, metadata *wire.SchemaMetadata) error {
+ return verror.NewErrNotImplemented(ctx)
+}
+
+func (a *mockApp) DeleteNoSQLDatabase(ctx *context.T, call rpc.ServerCall, dbName string) error {
+ return verror.NewErrNotImplemented(ctx)
+}
+
+func (a *mockApp) SetDatabasePerms(ctx *context.T, call rpc.ServerCall, dbName string, perms access.Permissions, version string) error {
+ return verror.NewErrNotImplemented(ctx)
+}
+
+func (a *mockApp) Service() interfaces.Service {
+ return nil
+}
+
+func (a *mockApp) Name() string {
+ return "mockapp"
+}
+
+// mockDatabase emulates a Syncbase Database. It is used to test sync functionality.
+type mockDatabase struct {
+ st store.Store
+}
+
+func (d *mockDatabase) St() store.Store {
+ return d.st
+}
+
+func (d *mockDatabase) CheckPermsInternal(ctx *context.T, call rpc.ServerCall, st store.StoreReader) error {
+ return verror.NewErrNotImplemented(ctx)
+}
+
+func (d *mockDatabase) SetPermsInternal(ctx *context.T, call rpc.ServerCall, perms access.Permissions, version string) error {
+ return verror.NewErrNotImplemented(ctx)
+}
+
+func (d *mockDatabase) Name() string {
+ return "mockdb"
+}
+
+func (d *mockDatabase) App() interfaces.App {
+ return nil
+}
+
+// createService creates a mock Syncbase service used for testing sync functionality.
+func createService(t *testing.T) *mockService {
+ ctx, shutdown := test.V23Init()
+ engine := "leveldb"
+ opts := util.OpenOptions{CreateIfMissing: true, ErrorIfExists: false}
+ dir := fmt.Sprintf("%s/vsync_test_%d_%d", os.TempDir(), os.Getpid(), time.Now().UnixNano())
+
+ st, err := util.OpenStore(engine, path.Join(dir, engine), opts)
+ if err != nil {
+ t.Fatalf("cannot create store %s (%s): %v", engine, dir, err)
+ }
+ vclock := clock.NewVClock(st)
+ st, err = watchable.Wrap(st, vclock, &watchable.Options{
+ ManagedPrefixes: []string{util.RowPrefix, util.PermsPrefix},
+ })
+
+ s := &mockService{
+ st: st,
+ engine: engine,
+ dir: dir,
+ shutdown: shutdown,
+ }
+ if s.sync, err = New(ctx, nil, s, nil, dir); err != nil {
+ util.DestroyStore(engine, dir)
+ t.Fatalf("cannot create sync service: %v", err)
+ }
+ return s
+}
+
+// destroyService cleans up the mock Syncbase service.
+func destroyService(t *testing.T, s *mockService) {
+ defer s.shutdown()
+ defer s.sync.Close()
+ if err := util.DestroyStore(s.engine, s.dir); err != nil {
+ t.Fatalf("cannot destroy store %s (%s): %v", s.engine, s.dir, err)
+ }
+}
+
+// makeRowKey returns the database row key for a given application key.
+func makeRowKey(key string) string {
+ return util.JoinKeyParts(util.RowPrefix, key)
+}
diff --git a/services/syncbase/vsync/testdata/local-init-00.log.sync b/services/syncbase/vsync/testdata/local-init-00.log.sync
new file mode 100644
index 0000000..7435348
--- /dev/null
+++ b/services/syncbase/vsync/testdata/local-init-00.log.sync
@@ -0,0 +1,6 @@
+# Create an object locally and update it twice (linked-list).
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addl|foo1|1|||$sync:log:10:1|0|1|false
+addl|foo1|2|1||$sync:log:10:2|0|1|false
+addl|foo1|3|2||$sync:log:10:3|0|1|false
diff --git a/services/syncbase/vsync/testdata/local-init-01.sync b/services/syncbase/vsync/testdata/local-init-01.sync
new file mode 100644
index 0000000..86e24de
--- /dev/null
+++ b/services/syncbase/vsync/testdata/local-init-01.sync
@@ -0,0 +1,12 @@
+# Create an object DAG locally with branches and resolved conflicts.
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addl|1234|1|||logrec-00|0|1|false
+addl|1234|2|1||logrec-01|0|1|false
+addl|1234|3|2||logrec-02|0|1|false
+addl|1234|4|2||logrec-03|0|1|false
+addl|1234|5|3|4|logrec-04|0|1|false
+addl|1234|6|5||logrec-05|0|1|false
+addl|1234|7|2||logrec-06|0|1|false
+addl|1234|8|6|7|logrec-07|0|1|false
+addl|1234|9|8||logrec-08|0|1|false
diff --git a/services/syncbase/vsync/testdata/local-init-02.sync b/services/syncbase/vsync/testdata/local-init-02.sync
new file mode 100644
index 0000000..cb60a79
--- /dev/null
+++ b/services/syncbase/vsync/testdata/local-init-02.sync
@@ -0,0 +1,10 @@
+# Create DAGs for 3 objects locally.
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addl|1234|1|||logrec-a-01|0|1|false
+addl|1234|2|1||logrec-a-02|0|1|false
+
+addl|6789|1|||logrec-b-01|0|1|false
+addl|6789|2|1||logrec-b-02|0|1|false
+
+addl|2222|1|||logrec-c-01|0|1|false
diff --git a/services/syncbase/vsync/testdata/local-init-03.sync b/services/syncbase/vsync/testdata/local-init-03.sync
new file mode 100644
index 0000000..202a752
--- /dev/null
+++ b/services/syncbase/vsync/testdata/local-init-03.sync
@@ -0,0 +1,10 @@
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addl|1234|1|||logrec-01|0|1|false
+addl|1234|2|1||logrec-02|0|1|false
+addl|1234|3|1||logrec-03|0|1|false
+addl|1234|4|2||logrec-04|0|1|false
+addl|1234|5|2||logrec-05|0|1|true
+addl|1234|6|4|5|logrec-06|0|1|false
+addl|1234|7|3|5|logrec-07|0|1|false
+addl|1234|8|6|7|logrec-08|0|1|false
diff --git a/services/syncbase/vsync/testdata/local-resolve-00.sync b/services/syncbase/vsync/testdata/local-resolve-00.sync
new file mode 100644
index 0000000..1666cf0
--- /dev/null
+++ b/services/syncbase/vsync/testdata/local-resolve-00.sync
@@ -0,0 +1,4 @@
+# Create an object locally and update it twice (linked-list).
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addl|foo1|7|3|6|logrec-06|0|1|false
diff --git a/services/syncbase/vsync/testdata/remote-conf-00.log.sync b/services/syncbase/vsync/testdata/remote-conf-00.log.sync
new file mode 100644
index 0000000..060cf0c
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-conf-00.log.sync
@@ -0,0 +1,8 @@
+# Update an object remotely three times triggering one conflict after
+# it was created locally up to v3 (i.e. assume the remote sync received
+# it from the local sync at v2, then updated separately).
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addr|foo1|4|2||$sync:log:11:1|0|1|false
+addr|foo1|5|4||$sync:log:11:2|0|1|false
+addr|foo1|6|5||$sync:log:11:3|0|1|false
diff --git a/services/syncbase/vsync/testdata/remote-conf-01.log.sync b/services/syncbase/vsync/testdata/remote-conf-01.log.sync
new file mode 100644
index 0000000..2053157
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-conf-01.log.sync
@@ -0,0 +1,10 @@
+# Update an object remotely three times triggering a conflict with
+# 2 graft points: v1 and v4. This assumes that the remote sync got
+# v1, made its own conflicting v4 that it resolved into v5 (against v2)
+# then made a v6 change. When the local sync gets back this info it
+# sees 2 graft points: v1-v4 and v2-v5.
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addr|foo1|4|1||$sync:log:12:1|0|1|false
+addr|foo1|5|2|4|$sync:log:11:1|0|1|false
+addr|foo1|6|5||$sync:log:11:2|0|1|false
diff --git a/services/syncbase/vsync/testdata/remote-conf-03.log.sync b/services/syncbase/vsync/testdata/remote-conf-03.log.sync
new file mode 100644
index 0000000..673405e
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-conf-03.log.sync
@@ -0,0 +1,6 @@
+# Create the same object remotely from scratch and update it twice (linked-list).
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addr|foo1|4|||$sync:log:11:1|0|1|false
+addr|foo1|5|4||$sync:log:11:2|0|1|false
+addr|foo1|6|5||$sync:log:11:3|0|1|false
diff --git a/services/syncbase/vsync/testdata/remote-conf-link.log.sync b/services/syncbase/vsync/testdata/remote-conf-link.log.sync
new file mode 100644
index 0000000..bdf0331
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-conf-link.log.sync
@@ -0,0 +1,5 @@
+# Update an object remotely, detect conflict, and bless the local version.
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addr|foo1|4|1||$sync:log:11:1|0|1|false
+linkr|foo1|4|2||$sync:log:11:2
diff --git a/services/syncbase/vsync/testdata/remote-init-00.log.sync b/services/syncbase/vsync/testdata/remote-init-00.log.sync
new file mode 100644
index 0000000..2546c47
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-init-00.log.sync
@@ -0,0 +1,7 @@
+# Create an object remotely and update it twice (linked-list).
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addr|foo1|1|||$sync:log:11:1|0|1|false
+addr|foo1|2|1||$sync:log:11:2|0|1|false
+addr|foo1|3|2||$sync:log:11:3|0|1|false
+genvec|foo1|10:0,11:3|bar|11:0
\ No newline at end of file
diff --git a/services/syncbase/vsync/testdata/remote-noconf-00.log.sync b/services/syncbase/vsync/testdata/remote-noconf-00.log.sync
new file mode 100644
index 0000000..6adf5dd
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-noconf-00.log.sync
@@ -0,0 +1,9 @@
+# Update an object remotely three times without triggering a conflict
+# after it was created locally up to v3 (i.e. assume the remote sync
+# received it from the local sync first, then updated it).
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addr|foo1|4|3||$sync:log:11:1|0|1|false
+addr|foo1|5|4||$sync:log:11:2|0|1|false
+addr|foo1|6|5||$sync:log:11:3|0|1|false
+genvec|foo1|10:0,11:3|bar|11:0
\ No newline at end of file
diff --git a/services/syncbase/vsync/testdata/remote-noconf-link-00.log.sync b/services/syncbase/vsync/testdata/remote-noconf-link-00.log.sync
new file mode 100644
index 0000000..a06bec5
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-noconf-link-00.log.sync
@@ -0,0 +1,5 @@
+# Update an object remotely, detect conflict, and bless the remote version.
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addr|foo1|4|1||$sync:log:11:1|0|1|false
+linkr|foo1|2|4||$sync:log:11:2
diff --git a/services/syncbase/vsync/testdata/remote-noconf-link-01.log.sync b/services/syncbase/vsync/testdata/remote-noconf-link-01.log.sync
new file mode 100644
index 0000000..1271e23
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-noconf-link-01.log.sync
@@ -0,0 +1,5 @@
+# Update an object remotely, detect conflict, and bless the local version.
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addr|foo1|4|1||$sync:log:11:1|0|1|false
+linkr|foo1|4|3||$sync:log:11:2
diff --git a/services/syncbase/vsync/testdata/remote-noconf-link-02.log.sync b/services/syncbase/vsync/testdata/remote-noconf-link-02.log.sync
new file mode 100644
index 0000000..890d2bc
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-noconf-link-02.log.sync
@@ -0,0 +1,6 @@
+# Update an object remotely, detect conflict, and bless the remote version, and continue updating.
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+addr|foo1|4|1||$sync:log:11:1|0|1|false
+linkr|foo1|3|4||$sync:log:11:2
+addr|foo1|5|3||$sync:log:11:3|0|1|false
diff --git a/services/syncbase/vsync/testdata/remote-noconf-link-repeat.log.sync b/services/syncbase/vsync/testdata/remote-noconf-link-repeat.log.sync
new file mode 100644
index 0000000..31e85a9
--- /dev/null
+++ b/services/syncbase/vsync/testdata/remote-noconf-link-repeat.log.sync
@@ -0,0 +1,4 @@
+# Resolve the same conflict on two different devices.
+# The format is: <cmd>|<objid>|<version>|<parent1>|<parent2>|<logrec>|<txid>|<txcount>|<deleted>
+
+linkr|foo1|3|4||$sync:log:12:1
\ No newline at end of file
diff --git a/services/syncbase/vsync/types.vdl b/services/syncbase/vsync/types.vdl
new file mode 100644
index 0000000..f44468e
--- /dev/null
+++ b/services/syncbase/vsync/types.vdl
@@ -0,0 +1,37 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+import (
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+)
+
+// Key prefixes for sync data structures. All these prefixes are prepended with
+// util.SyncPrefix.
+const (
+ logPrefix = "log"
+ dbssPrefix = "dbss"
+ dagPrefix = "dag"
+ sgPrefix = "sg"
+)
+
+// syncData represents the persistent state of the sync module.
+type syncData struct {
+ Id uint64
+}
+
+// dbSyncState represents the persistent sync state of a Database.
+type dbSyncState struct {
+ Gen uint64 // local generation number incremented on every local update.
+ CheckptGen uint64 // local generation number advertised to remote peers (used by the responder).
+ GenVec interfaces.GenVector // generation vector capturing the locally-known generations of remote peers.
+}
+
+// localLogRec represents the persistent local state of a log record. Metadata
+// is synced across peers, while pos is local-only.
+type localLogRec struct {
+ Metadata interfaces.LogRecMetadata
+ Pos uint64 // position in the Database log.
+}
diff --git a/services/syncbase/vsync/types.vdl.go b/services/syncbase/vsync/types.vdl.go
new file mode 100644
index 0000000..e9b1d7d
--- /dev/null
+++ b/services/syncbase/vsync/types.vdl.go
@@ -0,0 +1,64 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated by the vanadium vdl tool.
+// Source: types.vdl
+
+package vsync
+
+import (
+ // VDL system imports
+ "v.io/v23/vdl"
+
+ // VDL user imports
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+)
+
+// syncData represents the persistent state of the sync module.
+type syncData struct {
+ Id uint64
+}
+
+func (syncData) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/vsync.syncData"`
+}) {
+}
+
+// dbSyncState represents the persistent sync state of a Database.
+type dbSyncState struct {
+ Gen uint64 // local generation number incremented on every local update.
+ CheckptGen uint64 // local generation number advertised to remote peers (used by the responder).
+ GenVec interfaces.GenVector // generation vector capturing the locally-known generations of remote peers.
+}
+
+func (dbSyncState) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/vsync.dbSyncState"`
+}) {
+}
+
+// localLogRec represents the persistent local state of a log record. Metadata
+// is synced across peers, while pos is local-only.
+type localLogRec struct {
+ Metadata interfaces.LogRecMetadata
+ Pos uint64 // position in the Database log.
+}
+
+func (localLogRec) __VDLReflect(struct {
+ Name string `vdl:"v.io/syncbase/x/ref/services/syncbase/vsync.localLogRec"`
+}) {
+}
+
+func init() {
+ vdl.Register((*syncData)(nil))
+ vdl.Register((*dbSyncState)(nil))
+ vdl.Register((*localLogRec)(nil))
+}
+
+const logPrefix = "log"
+
+const dbssPrefix = "dbss"
+
+const dagPrefix = "dag"
+
+const sgPrefix = "sg"
diff --git a/services/syncbase/vsync/util.go b/services/syncbase/vsync/util.go
new file mode 100644
index 0000000..2f15cf8
--- /dev/null
+++ b/services/syncbase/vsync/util.go
@@ -0,0 +1,98 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Sync utility functions
+
+import (
+ "time"
+
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ "v.io/v23/rpc"
+ "v.io/x/lib/vlog"
+)
+
+const (
+ nanoPerSec = int64(1000000000)
+)
+
+// forEachDatabaseStore iterates over all Databases in all Apps within the
+// service and invokes the callback function on each database. The callback
+// returns a "done" flag to make forEachDatabaseStore() stop the iteration
+// earlier; otherwise the function loops across all databases of all apps.
+func (s *syncService) forEachDatabaseStore(ctx *context.T, callback func(string, string, store.Store) bool) {
+ // Get the apps and iterate over them.
+ // TODO(rdaoud): use a "privileged call" parameter instead of nil (here and
+ // elsewhere).
+ appNames, err := s.sv.AppNames(ctx, nil)
+ if err != nil {
+ vlog.Errorf("sync: forEachDatabaseStore: cannot get all app names: %v", err)
+ return
+ }
+
+ for _, a := range appNames {
+ // For each app, get its databases and iterate over them.
+ app, err := s.sv.App(ctx, nil, a)
+ if err != nil {
+ vlog.Errorf("sync: forEachDatabaseStore: cannot get app %s: %v", a, err)
+ continue
+ }
+ dbNames, err := app.NoSQLDatabaseNames(ctx, nil)
+ if err != nil {
+ vlog.Errorf("sync: forEachDatabaseStore: cannot get all db names for app %s: %v", a, err)
+ continue
+ }
+
+ for _, d := range dbNames {
+ // For each database, get its Store and invoke the callback.
+ db, err := app.NoSQLDatabase(ctx, nil, d)
+ if err != nil {
+ vlog.Errorf("sync: forEachDatabaseStore: cannot get db %s for app %s: %v", d, a, err)
+ continue
+ }
+
+ if callback(a, d, db.St()) {
+ return // done, early exit
+ }
+ }
+ }
+}
+
+// getDbStore gets the store handle to the database.
+func (s *syncService) getDbStore(ctx *context.T, call rpc.ServerCall, appName, dbName string) (store.Store, error) {
+ app, err := s.sv.App(ctx, call, appName)
+ if err != nil {
+ return nil, err
+ }
+ db, err := app.NoSQLDatabase(ctx, call, dbName)
+ if err != nil {
+ return nil, err
+ }
+ return db.St(), nil
+}
+
+// unixNanoToTime converts a Unix timestamp in nanoseconds to a Time object.
+func unixNanoToTime(timestamp int64) time.Time {
+ if timestamp < 0 {
+ vlog.Fatalf("sync: unixNanoToTime: invalid timestamp %d", timestamp)
+ }
+ return time.Unix(timestamp/nanoPerSec, timestamp%nanoPerSec)
+}
+
+// extractAppKey extracts the app key from the key sent over the wire between
+// two Syncbases. The on-wire key starts with one of the store's reserved
+// prefixes for managed namespaces (e.g. $row, $perms). This function removes
+// that prefix and returns the application component of the key. This is done
+// typically before comparing keys with the SyncGroup prefixes which are defined
+// by the application.
+func extractAppKey(key string) string {
+ parts := util.SplitKeyParts(key)
+ if len(parts) < 2 {
+ vlog.Fatalf("sync: extractAppKey: invalid entry key %s", key)
+ }
+ return util.JoinKeyParts(parts[1:]...)
+}
diff --git a/services/syncbase/vsync/watcher.go b/services/syncbase/vsync/watcher.go
new file mode 100644
index 0000000..ae69dc6
--- /dev/null
+++ b/services/syncbase/vsync/watcher.go
@@ -0,0 +1,436 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Syncbase Watcher is a goroutine that listens to local Database updates from
+// applications and modifies sync metadata (e.g. DAG and local log records).
+// The coupling between Syncbase storage and sync is loose, via asynchronous
+// listening by the Watcher, to unblock the application operations as soon as
+// possible, and offload the sync metadata update to the Watcher. When the
+// application mutates objects in a Database, additional entries are written
+// to a log queue, persisted in the same Database. This queue is read by the
+// sync Watcher to learn of the changes.
+
+import (
+ "strings"
+ "time"
+
+ "v.io/syncbase/x/ref/services/syncbase/server/interfaces"
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/server/watchable"
+ "v.io/syncbase/x/ref/services/syncbase/store"
+ "v.io/v23/context"
+ "v.io/v23/services/watch"
+ "v.io/v23/verror"
+ "v.io/x/lib/vlog"
+)
+
+var (
+ // watchPollInterval is the duration between consecutive watch polling
+ // events across all app databases. Every watch event loops across all
+ // app databases and fetches from each one at most one batch update
+ // (transaction) to process.
+ // TODO(rdaoud): add a channel between store and watch to get change
+ // notifications instead of using a polling solution.
+ watchPollInterval = 100 * time.Millisecond
+
+ // watchPrefixes is an in-memory cache of SyncGroup prefixes for each
+ // app database. It is filled at startup from persisted SyncGroup data
+ // and updated at runtime when SyncGroups are joined or left. It is
+ // not guarded by a mutex because only the watcher goroutine uses it
+ // beyond the startup phase (before any sync goroutines are started).
+ // The map keys are the appdb names (globally unique).
+ watchPrefixes = make(map[string]sgPrefixes)
+)
+
+// sgPrefixes tracks SyncGroup prefixes being synced in a database and their
+// counts.
+type sgPrefixes map[string]uint32
+
+// watchStore processes updates obtained by watching the store. This is the
+// sync watcher goroutine that learns about store updates asynchronously by
+// reading log records that track object mutation histories in each database.
+// For each batch mutation, the watcher updates the sync DAG and log records.
+// When an application makes a single non-transactional put, it is represented
+// as a batch of one log record. Thus the watcher only deals with batches.
+func (s *syncService) watchStore(ctx *context.T) {
+ defer s.pending.Done()
+
+ ticker := time.NewTicker(watchPollInterval)
+ defer ticker.Stop()
+
+ ctx, cancel := context.WithCancel(ctx)
+ defer cancel()
+
+ for {
+ select {
+ case <-s.closed:
+ vlog.VI(1).Info("sync: watchStore: channel closed, stop watching and exit")
+ return
+
+ case <-ticker.C:
+ s.processStoreUpdates(ctx)
+ }
+ }
+}
+
+// processStoreUpdates fetches updates from all databases and processes them.
+// To maintain fairness among databases, it processes one batch update from
+// each database, in a round-robin manner, until there are no further updates
+// from any database.
+func (s *syncService) processStoreUpdates(ctx *context.T) {
+ for {
+ total, active := 0, 0
+ s.forEachDatabaseStore(ctx, func(appName, dbName string, st store.Store) bool {
+ if s.processDatabase(ctx, appName, dbName, st) {
+ active++
+ }
+ total++
+ return false
+ })
+
+ vlog.VI(2).Infof("sync: processStoreUpdates: %d/%d databases had updates", active, total)
+ if active == 0 {
+ break
+ }
+ }
+}
+
+// processDatabase fetches from the given database at most one new batch update
+// (transaction) and processes it. The one-batch limit prevents one database
+// from starving others. A batch is stored as a contiguous set of log records
+// ending with one record having the "continued" flag set to false. The call
+// returns true if a new batch update was processed.
+func (s *syncService) processDatabase(ctx *context.T, appName, dbName string, st store.Store) bool {
+ s.thLock.Lock()
+ defer s.thLock.Unlock()
+
+ vlog.VI(2).Infof("sync: processDatabase: begin: %s, %s", appName, dbName)
+ defer vlog.VI(2).Infof("sync: processDatabase: end: %s, %s", appName, dbName)
+
+ resMark, err := getResMark(ctx, st)
+ if err != nil {
+ if verror.ErrorID(err) != verror.ErrNoExist.ID {
+ vlog.Errorf("sync: processDatabase: %s, %s: cannot get resMark: %v", appName, dbName, err)
+ return false
+ }
+ resMark = watchable.MakeResumeMarker(0)
+ }
+
+ // Initialize Database sync state if needed.
+ s.initDbSyncStateInMem(ctx, appName, dbName)
+
+ // Get a batch of watch log entries, if any, after this resume marker.
+ logs, nextResmark, err := watchable.ReadBatchFromLog(st, resMark)
+ if err != nil {
+ vlog.Fatalf("sync: processDatabase: %s, %s: cannot get watch log batch: %v", appName, dbName, verror.DebugString(err))
+ }
+ if logs != nil {
+ s.processWatchLogBatch(ctx, appName, dbName, st, logs, nextResmark)
+ return true
+ }
+ return false
+}
+
+// processWatchLogBatch parses the given batch of watch log records, updates the
+// watchable SyncGroup prefixes, uses the prefixes to filter the batch to the
+// subset of syncable records, and transactionally applies these updates to the
+// sync metadata (DAG & log records) and updates the watch resume marker.
+func (s *syncService) processWatchLogBatch(ctx *context.T, appName, dbName string, st store.Store, logs []*watchable.LogEntry, resMark watch.ResumeMarker) {
+ if len(logs) == 0 {
+ return
+ }
+
+ // If the first log entry is a SyncGroup prefix operation, then this is
+ // a SyncGroup snapshot and not an application batch. In this case,
+ // handle the SyncGroup prefix changes by updating the watch prefixes
+ // and exclude the first entry from the batch. Also inform the batch
+ // processing below to not assign it a batch ID in the DAG.
+ appBatch := true
+ if processSyncGroupLogRecord(appName, dbName, logs[0]) {
+ appBatch = false
+ logs = logs[1:]
+ }
+
+ // Filter out the log entries for keys not part of any SyncGroup.
+ // Ignore as well log entries made by sync (echo suppression).
+ totalCount := uint64(len(logs))
+ appdb := appDbName(appName, dbName)
+
+ i := 0
+ for _, entry := range logs {
+ if !entry.FromSync && syncable(appdb, entry) {
+ logs[i] = entry
+ i++
+ }
+ }
+ logs = logs[:i]
+ vlog.VI(3).Infof("sync: processWatchLogBatch: %s, %s: sg snap %t, syncable %d, total %d",
+ appName, dbName, !appBatch, len(logs), totalCount)
+
+ // Transactional processing of the batch: convert these syncable log
+ // records to a batch of sync log records, filling their parent versions
+ // from the DAG head nodes.
+ err := store.RunInTransaction(st, func(tx store.Transaction) error {
+ batch := make([]*localLogRec, 0, len(logs))
+ for _, entry := range logs {
+ if rec, err := convertLogRecord(ctx, tx, entry); err != nil {
+ return err
+ } else if rec != nil {
+ batch = append(batch, rec)
+ }
+ }
+
+ if err := s.processBatch(ctx, appName, dbName, batch, appBatch, totalCount, tx); err != nil {
+ return err
+ }
+ return setResMark(ctx, tx, resMark)
+ })
+
+ if err != nil {
+ // TODO(rdaoud): don't crash, quarantine this app database.
+ vlog.Fatalf("sync: processWatchLogBatch:: %s, %s: watcher cannot process batch: %v", appName, dbName, err)
+ }
+}
+
+// processBatch applies a single batch of changes (object mutations) received
+// from watching a particular Database.
+func (s *syncService) processBatch(ctx *context.T, appName, dbName string, batch []*localLogRec, appBatch bool, totalCount uint64, tx store.Transaction) error {
+ count := uint64(len(batch))
+ if count == 0 {
+ return nil
+ }
+
+ // If an application batch has more than one mutation, start a batch for it.
+ batchId := NoBatchId
+ if appBatch && totalCount > 1 {
+ batchId = s.startBatch(ctx, tx, batchId)
+ if batchId == NoBatchId {
+ return verror.New(verror.ErrInternal, ctx, "failed to generate batch ID")
+ }
+ }
+
+ gen, pos := s.reserveGenAndPosInDbLog(ctx, appName, dbName, count)
+
+ vlog.VI(3).Infof("sync: processBatch: %s, %s: len %d, total %d, btid %x, gen %d, pos %d",
+ appName, dbName, count, totalCount, batchId, gen, pos)
+
+ for _, rec := range batch {
+ // Update the log record. Portions of the record Metadata must
+ // already be filled.
+ rec.Metadata.Id = s.id
+ rec.Metadata.Gen = gen
+ rec.Metadata.RecType = interfaces.NodeRec
+
+ rec.Metadata.BatchId = batchId
+ rec.Metadata.BatchCount = totalCount
+
+ rec.Pos = pos
+
+ gen++
+ pos++
+
+ if err := s.processLocalLogRec(ctx, tx, rec); err != nil {
+ return verror.New(verror.ErrInternal, ctx, err)
+ }
+ }
+
+ // End the batch if any.
+ if batchId != NoBatchId {
+ if err := s.endBatch(ctx, tx, batchId, totalCount); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// processLocalLogRec processes a local log record by adding to the Database and
+// suitably updating the DAG metadata.
+func (s *syncService) processLocalLogRec(ctx *context.T, tx store.Transaction, rec *localLogRec) error {
+ // Insert the new log record into the log.
+ if err := putLogRec(ctx, tx, rec); err != nil {
+ return err
+ }
+
+ m := rec.Metadata
+ logKey := logRecKey(m.Id, m.Gen)
+
+ // Insert the new log record into dag.
+ if err := s.addNode(ctx, tx, m.ObjId, m.CurVers, logKey, m.Delete, m.Parents, m.BatchId, nil); err != nil {
+ return err
+ }
+
+ // Move the head.
+ return moveHead(ctx, tx, m.ObjId, m.CurVers)
+}
+
+// incrWatchPrefix increments (or sets) a SyncGroup prefix for an app database
+// in the watch prefix cache.
+func incrWatchPrefix(appName, dbName, prefix string) {
+ name := appDbName(appName, dbName)
+ if pfxs := watchPrefixes[name]; pfxs != nil {
+ pfxs[prefix]++ // it auto-initializes a non-existent prefix
+ } else {
+ watchPrefixes[name] = sgPrefixes{prefix: 1}
+ }
+}
+
+// decrWatchPrefix decrements (or unsets) a SyncGroup prefix for an app database
+// in the watch prefix cache.
+func decrWatchPrefix(appName, dbName, prefix string) {
+ name := appDbName(appName, dbName)
+ if pfxs := watchPrefixes[name]; pfxs != nil {
+ if pfxs[prefix] > 1 {
+ pfxs[prefix]--
+ } else if len(pfxs) > 1 {
+ delete(pfxs, prefix)
+ } else {
+ delete(watchPrefixes, name)
+ }
+ }
+}
+
+// convertLogRecord converts a store log entry to a sync log record. It fills
+// the previous object version (parent) by fetching its current DAG head if it
+// has one. For a delete, it generates a new object version because the store
+// does not version a deletion.
+// TODO(rdaoud): change Syncbase to store and version a deleted object to
+// simplify the store-to-sync interaction. A deleted key would still have a
+// version and its value entry would encode the "deleted" flag, either in the
+// key or probably in a value wrapper that would contain other metadata.
+func convertLogRecord(ctx *context.T, tx store.Transaction, logEnt *watchable.LogEntry) (*localLogRec, error) {
+ var rec *localLogRec
+ timestamp := logEnt.CommitTimestamp
+
+ switch op := logEnt.Op.(type) {
+ case watchable.OpGet:
+ // TODO(rdaoud): save read-set in sync.
+
+ case watchable.OpScan:
+ // TODO(rdaoud): save scan-set in sync.
+
+ case watchable.OpPut:
+ rec = newLocalLogRec(ctx, tx, op.Value.Key, op.Value.Version, false, timestamp)
+
+ case watchable.OpSyncSnapshot:
+ // Create records for object versions not already in the DAG.
+ // Duplicates can appear here in cases of nested SyncGroups or
+ // peer SyncGroups.
+ if ok, err := hasNode(ctx, tx, string(op.Value.Key), string(op.Value.Version)); err != nil {
+ return nil, err
+ } else if !ok {
+ rec = newLocalLogRec(ctx, tx, op.Value.Key, op.Value.Version, false, timestamp)
+ }
+
+ case watchable.OpDelete:
+ rec = newLocalLogRec(ctx, tx, op.Value.Key, watchable.NewVersion(), true, timestamp)
+
+ case watchable.OpSyncGroup:
+ vlog.Errorf("sync: convertLogRecord: watch LogEntry for SyncGroup should not be converted: %v", logEnt)
+ return nil, verror.New(verror.ErrInternal, ctx, "cannot convert a watch log OpSyncGroup entry")
+
+ default:
+ vlog.Errorf("sync: convertLogRecord: invalid watch LogEntry: %v", logEnt)
+ return nil, verror.New(verror.ErrInternal, ctx, "cannot convert unknown watch log entry")
+ }
+
+ return rec, nil
+}
+
+// newLocalLogRec creates a local sync log record given its information: key,
+// version, deletion flag, and timestamp. It retrieves the current DAG head
+// for the key (if one exists) to use as its parent (previous) version.
+func newLocalLogRec(ctx *context.T, tx store.Transaction, key, version []byte, deleted bool, timestamp int64) *localLogRec {
+ rec := localLogRec{}
+ oid := string(key)
+
+ rec.Metadata.ObjId = oid
+ rec.Metadata.CurVers = string(version)
+ rec.Metadata.Delete = deleted
+ if head, err := getHead(ctx, tx, oid); err == nil {
+ rec.Metadata.Parents = []string{head}
+ } else if deleted || (verror.ErrorID(err) != verror.ErrNoExist.ID) {
+ vlog.Fatalf("sync: newLocalLogRec: cannot getHead to convert log record for %s: %v", oid, err)
+ }
+ rec.Metadata.UpdTime = unixNanoToTime(timestamp)
+ return &rec
+}
+
+// processSyncGroupLogRecord checks if the log entry is a SyncGroup update and,
+// if it is, updates the watch prefixes for the app database and returns true.
+// Otherwise it returns false with no other changes.
+func processSyncGroupLogRecord(appName, dbName string, logEnt *watchable.LogEntry) bool {
+ switch op := logEnt.Op.(type) {
+ case watchable.OpSyncGroup:
+ remove := op.Value.Remove
+ for _, prefix := range op.Value.Prefixes {
+ if remove {
+ decrWatchPrefix(appName, dbName, prefix)
+ } else {
+ incrWatchPrefix(appName, dbName, prefix)
+ }
+ }
+ vlog.VI(3).Infof("sync: processSyncGroupLogRecord: %s, %s: remove %t, prefixes: %q",
+ appName, dbName, remove, op.Value.Prefixes)
+ return true
+
+ default:
+ return false
+ }
+}
+
+// syncable returns true if the given log entry falls within the scope of a
+// SyncGroup prefix for the given app database, and thus should be synced.
+// It is used to pre-filter the batch of log entries before sync processing.
+func syncable(appdb string, logEnt *watchable.LogEntry) bool {
+ var key string
+ switch op := logEnt.Op.(type) {
+ case watchable.OpPut:
+ key = string(op.Value.Key)
+ case watchable.OpDelete:
+ key = string(op.Value.Key)
+ case watchable.OpSyncSnapshot:
+ key = string(op.Value.Key)
+ default:
+ return false
+ }
+
+ // The key starts with one of the store's reserved prefixes for managed
+ // namespaced (e.g. $row or $perm). Remove that prefix before comparing
+ // it with the SyncGroup prefixes which are defined by the application.
+ parts := util.SplitKeyParts(key)
+ if len(parts) < 2 {
+ vlog.Fatalf("sync: syncable: %s: invalid entry key %s: %v", appdb, key, logEnt)
+ }
+ key = util.JoinKeyParts(parts[1:]...)
+
+ for prefix := range watchPrefixes[appdb] {
+ if strings.HasPrefix(key, prefix) {
+ return true
+ }
+ }
+ return false
+}
+
+// resMarkKey returns the key used to access the watcher resume marker.
+func resMarkKey() string {
+ return util.JoinKeyParts(util.SyncPrefix, "w", "rm")
+}
+
+// setResMark stores the watcher resume marker for a database.
+func setResMark(ctx *context.T, tx store.Transaction, resMark watch.ResumeMarker) error {
+ return util.Put(ctx, tx, resMarkKey(), resMark)
+}
+
+// getResMark retrieves the watcher resume marker for a database.
+func getResMark(ctx *context.T, st store.StoreReader) (watch.ResumeMarker, error) {
+ var resMark watch.ResumeMarker
+ key := resMarkKey()
+ if err := util.Get(ctx, st, key, &resMark); err != nil {
+ return nil, err
+ }
+ return resMark, nil
+}
diff --git a/services/syncbase/vsync/watcher_test.go b/services/syncbase/vsync/watcher_test.go
new file mode 100644
index 0000000..be77b94
--- /dev/null
+++ b/services/syncbase/vsync/watcher_test.go
@@ -0,0 +1,317 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vsync
+
+// Tests for the sync watcher in Syncbase.
+
+import (
+ "bytes"
+ "reflect"
+ "testing"
+ "time"
+
+ "v.io/syncbase/x/ref/services/syncbase/server/util"
+ "v.io/syncbase/x/ref/services/syncbase/server/watchable"
+ "v.io/v23/vom"
+ _ "v.io/x/ref/runtime/factories/generic"
+)
+
+// TestSetResmark tests setting and getting a resume marker.
+func TestSetResmark(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+
+ resmark, err := getResMark(nil, st)
+ if err == nil || resmark != nil {
+ t.Errorf("found non-existent resume marker: %s, %v", resmark, err)
+ }
+
+ wantResmark := watchable.MakeResumeMarker(1234567890)
+ tx := st.NewTransaction()
+ if err := setResMark(nil, tx, wantResmark); err != nil {
+ t.Errorf("cannot set resume marker: %v", err)
+ }
+ tx.Commit()
+
+ resmark, err = getResMark(nil, st)
+ if err != nil {
+ t.Errorf("cannot get new resume marker: %v", err)
+ }
+ if !bytes.Equal(resmark, wantResmark) {
+ t.Errorf("invalid new resume: got %s instead of %s", resmark, wantResmark)
+ }
+}
+
+// TestWatchPrefixes tests setting and updating the watch prefixes map.
+func TestWatchPrefixes(t *testing.T) {
+ watchPollInterval = time.Millisecond
+ svc := createService(t)
+ defer destroyService(t, svc)
+
+ if len(watchPrefixes) != 0 {
+ t.Errorf("watch prefixes not empty: %v", watchPrefixes)
+ }
+
+ watchPrefixOps := []struct {
+ appName, dbName, key string
+ incr bool
+ }{
+ {"app1", "db1", "foo", true},
+ {"app1", "db1", "bar", true},
+ {"app2", "db1", "xyz", true},
+ {"app3", "db1", "haha", true},
+ {"app1", "db1", "foo", true},
+ {"app1", "db1", "foo", true},
+ {"app1", "db1", "foo", false},
+ {"app2", "db1", "ttt", true},
+ {"app2", "db1", "ttt", true},
+ {"app2", "db1", "ttt", false},
+ {"app2", "db1", "ttt", false},
+ {"app2", "db2", "qwerty", true},
+ {"app3", "db1", "haha", true},
+ {"app2", "db2", "qwerty", false},
+ {"app3", "db1", "haha", false},
+ }
+
+ for _, op := range watchPrefixOps {
+ if op.incr {
+ incrWatchPrefix(op.appName, op.dbName, op.key)
+ } else {
+ decrWatchPrefix(op.appName, op.dbName, op.key)
+ }
+ }
+
+ expPrefixes := map[string]sgPrefixes{
+ "app1:db1": sgPrefixes{"foo": 2, "bar": 1},
+ "app2:db1": sgPrefixes{"xyz": 1},
+ "app3:db1": sgPrefixes{"haha": 1},
+ }
+ if !reflect.DeepEqual(watchPrefixes, expPrefixes) {
+ t.Errorf("invalid watch prefixes: got %v instead of %v", watchPrefixes, expPrefixes)
+ }
+
+ checkSyncableTests := []struct {
+ appName, dbName, key string
+ result bool
+ }{
+ {"app1", "db1", "foo", true},
+ {"app1", "db1", "foobar", true},
+ {"app1", "db1", "bar", true},
+ {"app1", "db1", "bar123", true},
+ {"app1", "db1", "f", false},
+ {"app1", "db1", "ba", false},
+ {"app1", "db1", "xyz", false},
+ {"app1", "db555", "foo", false},
+ {"app555", "db1", "foo", false},
+ {"app2", "db1", "xyz123", true},
+ {"app2", "db1", "ttt123", false},
+ {"app2", "db2", "qwerty", false},
+ {"app3", "db1", "hahahoho", true},
+ {"app3", "db1", "hoho", false},
+ {"app3", "db1", "h", false},
+ }
+
+ for _, test := range checkSyncableTests {
+ log := &watchable.LogEntry{
+ Op: watchable.OpPut{
+ Value: watchable.PutOp{Key: []byte(makeRowKey(test.key))},
+ },
+ }
+ res := syncable(appDbName(test.appName, test.dbName), log)
+ if res != test.result {
+ t.Errorf("checkSyncable: invalid output: %s, %s, %s: got %t instead of %t",
+ test.appName, test.dbName, test.key, res, test.result)
+ }
+ }
+}
+
+// newLog creates a Put or Delete watch log entry.
+func newLog(key, version string, delete bool) *watchable.LogEntry {
+ k, v := []byte(key), []byte(version)
+ log := &watchable.LogEntry{}
+ if delete {
+ log.Op = watchable.OpDelete{watchable.DeleteOp{Key: k}}
+ } else {
+ log.Op = watchable.OpPut{watchable.PutOp{Key: k, Version: v}}
+ }
+ return log
+}
+
+// newSGLog creates a SyncGroup watch log entry.
+func newSGLog(prefixes []string, remove bool) *watchable.LogEntry {
+ return &watchable.LogEntry{
+ Op: watchable.OpSyncGroup{
+ Value: watchable.SyncGroupOp{Prefixes: prefixes, Remove: remove},
+ },
+ }
+}
+
+// TestProcessWatchLogBatch tests the processing of a batch of log records.
+func TestProcessWatchLogBatch(t *testing.T) {
+ svc := createService(t)
+ defer destroyService(t, svc)
+ st := svc.St()
+ s := svc.sync
+
+ app, db := "mockapp", "mockdb"
+ fooKey := makeRowKey("foo")
+ barKey := makeRowKey("bar")
+ fooxyzKey := makeRowKey("fooxyz")
+
+ // Empty logs does not fail.
+ s.processWatchLogBatch(nil, app, db, st, nil, nil)
+
+ // Non-syncable logs.
+ batch := []*watchable.LogEntry{
+ newLog(fooKey, "123", false),
+ newLog(barKey, "555", false),
+ }
+
+ resmark := watchable.MakeResumeMarker(1234)
+ s.processWatchLogBatch(nil, app, db, st, batch, resmark)
+
+ if res, err := getResMark(nil, st); err != nil && !bytes.Equal(res, resmark) {
+ t.Errorf("invalid resmark batch processing: got %s instead of %s", res, resmark)
+ }
+ if ok, err := hasNode(nil, st, fooKey, "123"); err != nil || ok {
+ t.Error("hasNode() found DAG entry for non-syncable log on foo")
+ }
+ if ok, err := hasNode(nil, st, barKey, "555"); err != nil || ok {
+ t.Error("hasNode() found DAG entry for non-syncable log on bar")
+ }
+
+ // Partially syncable logs.
+ batch = []*watchable.LogEntry{
+ newSGLog([]string{"f", "x"}, false),
+ newLog(fooKey, "333", false),
+ newLog(fooxyzKey, "444", false),
+ newLog(barKey, "222", false),
+ }
+
+ resmark = watchable.MakeResumeMarker(3456)
+ s.processWatchLogBatch(nil, app, db, st, batch, resmark)
+
+ if res, err := getResMark(nil, st); err != nil && !bytes.Equal(res, resmark) {
+ t.Errorf("invalid resmark batch processing: got %s instead of %s", res, resmark)
+ }
+ if head, err := getHead(nil, st, fooKey); err != nil && head != "333" {
+ t.Errorf("getHead() did not find foo: %s, %v", head, err)
+ }
+ node, err := getNode(nil, st, fooKey, "333")
+ if err != nil {
+ t.Errorf("getNode() did not find foo: %v", err)
+ }
+ if node.Level != 0 || node.Parents != nil || node.Logrec == "" || node.BatchId != NoBatchId {
+ t.Errorf("invalid DAG node for foo: %v", node)
+ }
+ node2, err := getNode(nil, st, fooxyzKey, "444")
+ if err != nil {
+ t.Errorf("getNode() did not find fooxyz: %v", err)
+ }
+ if node2.Level != 0 || node2.Parents != nil || node2.Logrec == "" || node2.BatchId != NoBatchId {
+ t.Errorf("invalid DAG node for fooxyz: %v", node2)
+ }
+ if ok, err := hasNode(nil, st, barKey, "222"); err != nil || ok {
+ t.Error("hasNode() found DAG entry for non-syncable log on bar")
+ }
+
+ // More partially syncable logs updating existing ones.
+ batch = []*watchable.LogEntry{
+ newLog(fooKey, "1", false),
+ newLog(fooxyzKey, "", true),
+ newLog(barKey, "7", false),
+ }
+
+ resmark = watchable.MakeResumeMarker(7890)
+ s.processWatchLogBatch(nil, app, db, st, batch, resmark)
+
+ if res, err := getResMark(nil, st); err != nil && !bytes.Equal(res, resmark) {
+ t.Errorf("invalid resmark batch processing: got %s instead of %s", res, resmark)
+ }
+ if head, err := getHead(nil, st, fooKey); err != nil && head != "1" {
+ t.Errorf("getHead() did not find foo: %s, %v", head, err)
+ }
+ node, err = getNode(nil, st, fooKey, "1")
+ if err != nil {
+ t.Errorf("getNode() did not find foo: %v", err)
+ }
+ expParents := []string{"333"}
+ if node.Level != 1 || !reflect.DeepEqual(node.Parents, expParents) ||
+ node.Logrec == "" || node.BatchId == NoBatchId {
+ t.Errorf("invalid DAG node for foo: %v", node)
+ }
+ head2, err := getHead(nil, st, fooxyzKey)
+ if err != nil {
+ t.Errorf("getHead() did not find fooxyz: %v", err)
+ }
+ node2, err = getNode(nil, st, fooxyzKey, head2)
+ if err != nil {
+ t.Errorf("getNode() did not find fooxyz: %v", err)
+ }
+ expParents = []string{"444"}
+ if node2.Level != 1 || !reflect.DeepEqual(node2.Parents, expParents) ||
+ node2.Logrec == "" || node2.BatchId == NoBatchId {
+ t.Errorf("invalid DAG node for fooxyz: %v", node2)
+ }
+ if ok, err := hasNode(nil, st, barKey, "7"); err != nil || ok {
+ t.Error("hasNode() found DAG entry for non-syncable log on bar")
+ }
+
+ // Back to non-syncable logs (remove "f" prefix).
+ batch = []*watchable.LogEntry{
+ newSGLog([]string{"f"}, true),
+ newLog(fooKey, "99", false),
+ newLog(fooxyzKey, "888", true),
+ newLog(barKey, "007", false),
+ }
+
+ resmark = watchable.MakeResumeMarker(20212223)
+ s.processWatchLogBatch(nil, app, db, st, batch, resmark)
+
+ if res, err := getResMark(nil, st); err != nil && !bytes.Equal(res, resmark) {
+ t.Errorf("invalid resmark batch processing: got %s instead of %s", res, resmark)
+ }
+ // No changes to "foo".
+ if head, err := getHead(nil, st, fooKey); err != nil && head != "333" {
+ t.Errorf("getHead() did not find foo: %s, %v", head, err)
+ }
+ if node, err := getNode(nil, st, fooKey, "99"); err == nil {
+ t.Errorf("getNode() should not have found foo @ 99: %v", node)
+ }
+ if node, err := getNode(nil, st, fooxyzKey, "888"); err == nil {
+ t.Errorf("getNode() should not have found fooxyz @ 888: %v", node)
+ }
+ if ok, err := hasNode(nil, st, barKey, "007"); err != nil || ok {
+ t.Error("hasNode() found DAG entry for non-syncable log on bar")
+ }
+
+ // Scan the batch records and verify that there is only 1 DAG batch
+ // stored, with a total count of 3 and a map of 2 syncable entries.
+ // This is because the 1st batch, while containing syncable keys, is a
+ // SyncGroup snapshot that does not get assigned a batch ID. The 2nd
+ // batch is an application batch with 3 keys of which 2 are syncable.
+ // The 3rd batch is also a SyncGroup snapshot.
+ count := 0
+ start, limit := util.ScanPrefixArgs(util.JoinKeyParts(util.SyncPrefix, "dag", "b"), "")
+ stream := st.Scan(start, limit)
+ for stream.Advance() {
+ count++
+ key := string(stream.Key(nil))
+ var info batchInfo
+ if err := vom.Decode(stream.Value(nil), &info); err != nil {
+ t.Errorf("cannot decode batch %s: %v", key, err)
+ }
+ if info.Count != 3 {
+ t.Errorf("wrong total count in batch %s: got %d instead of 3", key, info.Count)
+ }
+ if n := len(info.Objects); n != 2 {
+ t.Errorf("wrong object count in batch %s: got %d instead of 2", key, n)
+ }
+ }
+ if count != 1 {
+ t.Errorf("wrong count of batches: got %d instead of 2", count)
+ }
+}