services/mounttable/btmtd: mounttable on bigtable This is a proof of concept for implementing a mounttable server on top of Cloud Bigtable. Everything works except: - Accounting and quotas - Exported stats Most of the original mounttabled tests pass with this new implementation. See README.md for more details. Change-Id: I93637a47f2ed7dec474f832d88d29eb1431293c5

commit: e45e6291a5d7f32ce8150fe747a2e0e3c2276ffb [log] [tgz]
author: Robin Thellend <rthellend@google.com> Thu Jun 09 20:40:46 2016 -0700
committer: Robin Thellend <rthellend@google.com> Thu Jun 09 20:40:46 2016 -0700
tree: 2c9a0787c998055789d9cab285cd7e0cc0765848
parent: 0c9beb763c1d15d7991b521c754930c81ce8b271 [diff]
diff --git a/cmd/mounttable/doc.go b/cmd/mounttable/doc.go
index 0bd4ca8..935802c 100644
--- a/cmd/mounttable/doc.go
+++ b/cmd/mounttable/doc.go

@@ -87,7 +87,7 @@
 Mounts a server <name> onto a mount table
 
 Usage:
-   mounttable mount [flags] <mount name> <name> <ttl> [M|R]
+   mounttable mount [flags] <mount name> <name> <ttl> [L|M|R]
 
 <mount name> is a mount name on a mount table.
 
@@ -96,8 +96,8 @@
 <ttl> is the TTL of the new entry. It is a decimal number followed by a unit
 suffix (s, m, h). A value of 0s represents an infinite duration.
 
-[M|R] are mount options. M indicates that <name> is a mounttable. R indicates
-that existing entries should be removed.
+[L|M|R] are mount options. L indicates that <name> is a leaf. M indicates that
+<name> is a mounttable. R indicates that existing entries should be removed.
 
 Mounttable unmount
 

diff --git a/cmd/mounttable/impl.go b/cmd/mounttable/impl.go
index 25a0c80..cf1e781 100644
--- a/cmd/mounttable/impl.go
+++ b/cmd/mounttable/impl.go

@@ -89,7 +89,7 @@
 	Name:     "mount",
 	Short:    "Mounts a server <name> onto a mount table",
 	Long:     "Mounts a server <name> onto a mount table",
-	ArgsName: "<mount name> <name> <ttl> [M|R]",
+	ArgsName: "<mount name> <name> <ttl> [L|M|R]",
 	ArgsLong: `
 <mount name> is a mount name on a mount table.
 
@@ -98,8 +98,8 @@
 <ttl> is the TTL of the new entry. It is a decimal number followed by a unit
 suffix (s, m, h). A value of 0s represents an infinite duration.
 
-[M|R] are mount options. M indicates that <name> is a mounttable. R indicates
-that existing entries should be removed.
+[L|M|R] are mount options. L indicates that <name> is a leaf. M indicates that
+<name> is a mounttable. R indicates that existing entries should be removed.
 `,
 }
 
@@ -123,6 +123,8 @@
 	if got >= 4 {
 		for _, c := range args[3] {
 			switch c {
+			case 'L':
+				flags |= naming.MountFlag(naming.Leaf)
 			case 'M':
 				flags |= naming.MountFlag(naming.MT)
 			case 'R':

diff --git a/services/mounttable/btmtd/README.md b/services/mounttable/btmtd/README.md
new file mode 100644
index 0000000..d08ce9a
--- /dev/null
+++ b/services/mounttable/btmtd/README.md

@@ -0,0 +1,110 @@
+# Mounttable on Cloud Bigtable
+
+This package and its sub-packages contain a [mounttable server] implementation
+that uses Google's [Cloud Bigtable] service for storage. It is fast and scalable
+to millions of nodes and, with enough replicas, millions of requests per second.
+
+## Schema
+
+Bigtable is not a relational database. Each table has only one key, and all
+operations are atomic only at the row level. There is no way to mutate multiple
+rows together atomically.
+
+See the [Overview of Cloud Bigtable] for more information.
+
+Our table has one row per node. The row key is a hash of the node name followed
+by the name itself. This spreads rows evenly across all tablet servers with no
+risk of name collision.
+
+The table has three column families:
+
+   * Metadata `m`: used to store information about the row:
+      * Version `v`: The version changes every time the row is mutated. It is
+        used to detect conflicts related to concurrent access.
+      * Timestamp `t`: The cell's timestamp is the node creation time.
+      * Sticky `s`: When this column is present, the node is not automatically
+        garbage collected.
+      * Permissions `p`: The [access.Permissions] of the node.
+   * Servers `s`: Each mounted server has its own column. The column name is the
+     server's address. The value contains the [mount flags]. The timestamp is
+     the mount deadline.
+   * Children `c`: Each child has its own column. The column name is the name of
+     the child, without the path. The timestamp is the child creation time.
+
+Example:
+
+| Key              | Version | Timestamp | Sticky | Permissions  | Mounted Server...           | Child...  |
+| ---              | ---     | ---       | ---    | ---          | ---                         | ---       |
+| 540f1a56/        | 54321   | (ts)      | 1      | {"Admin":... |                             | foo (ts1) |
+| 1234abcd/foo     | 123     | (ts1)     |        | {"Admin":... |                             | bar (ts2) |
+| 46d523e3/foo/bar | 5436    | (ts2)     |        | {"Admin":... | /example.com:123 (deadline) |           |
+
+## Mutations
+
+All operations use optimistic concurrency control. If a conflicting
+change happens during a mutation, the whole operation is restarted.
+
+  * Mutation on N
+    * Get Node N
+    * Check caller's permissions
+    * Apply mutation on N if node version hasn't changed
+
+If the node version changed, it means that another mutation was applied between
+the time when we retrieved the node and when we tried to apply our mutation.
+When that happens, we restart to whole operation, starting with retrieving the
+node again.
+
+### Mounting / Unmounting a server
+
+Mounting a server consists of adding a new cell to the node's row. The column
+family is `s`, the column name is the address of the server, the timestamp is
+the mount deadline, and the value contains the [mount flags].
+
+Unmounting a server consists of deleting the server's column.
+
+### Adding / Removing a child node
+
+Adding or removing a node requires two mutations: one on the parent, one on the
+child.
+
+When adding a node, we first add it to the parent, and then create a new row
+with the same timestamp.
+
+When deleting a node, we first delete the row, and then delete the child column
+on the parent.
+
+If the server process dies between the two mutations, it will leave the parent
+with a reference to a child row that doesn't exist. As a consequence, the parent
+will never be seen as "empty" and will not be automatically garbage collected.
+This will be corrected when:
+
+  * the child is re-created, or
+  * the parent is forcibly deleted.
+
+## Hot rows & caching
+
+Some nodes are expected to be accessed significantly more than others, e.g. the
+root node and its immediate children are traversed more often than nodes that
+are further down the tree. The bigtable rows associated with these nodes are
+"hotter" which can lead to traffic imbalance and poor performance.
+
+This problem is alleviated with a small cache in the bigtable client. High
+frequency or concurrent requests for the same rows can be bundled together to
+reduce both latency and bigtable load at the same time.
+
+## Opportunistic garbage collection
+
+A node can be garbage-collected when it has no children, no mounted servers, and
+hasn't been marked as _sticky_. A node is _sticky_ when someone explicitly
+called [SetPermissions] on it.
+
+The garbage collection happens opportunistically. When a mounttable server
+accessed a node that is eligible for garbage collection while processing a
+request, this node is removed before the ongoing request completes.
+
+[Cloud Bigtable]: https://cloud.google.com/bigtable/docs/
+[Overview of Cloud Bigtable]: https://cloud.google.com/bigtable/docs/api-overview
+[mounttable server]: https://github.com/vanadium/go.v23/blob/master/services/mounttable/service.vdl
+[SetPermissions]: https://github.com/vanadium/go.v23/blob/master/services/permissions/service.vdl#L58
+[access.Permissions]: https://github.com/vanadium/go.v23/blob/master/security/access/types.vdl#L130
+[mount flags]: https://github.com/vanadium/go.v23/blob/master/naming/types.vdl#L9

diff --git a/services/mounttable/btmtd/doc.go b/services/mounttable/btmtd/doc.go
new file mode 100644
index 0000000..3882d49
--- /dev/null
+++ b/services/mounttable/btmtd/doc.go

@@ -0,0 +1,94 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file was auto-generated via go generate.
+// DO NOT UPDATE MANUALLY
+
+/*
+Runs the mounttable service.
+
+Usage:
+   btmtd [flags]
+   btmtd [flags] <command>
+
+The btmtd commands are:
+   setup       Creates and sets up the table
+   destroy     Destroy the table
+   dump        Dump the table
+   help        Display help for commands or topics
+
+The btmtd flags are:
+ -cluster=
+   The Cloud Bigtable cluster name
+ -in-memory-test=false
+   If true, use an in-memory bigtable server (for testing only)
+ -key-file=
+   The file that contains the Google Cloud JSON credentials to use
+ -permissions-file=
+   The file that contains the initial node permissions.
+ -project=
+   The Google Cloud project of the Cloud Bigtable cluster
+ -table=mounttable
+   The name of the table to use
+ -zone=
+   The Google Cloud zone of the Cloud Bigtable cluster
+
+The global flags are:
+ -alsologtostderr=true
+   log to standard error as well as files
+ -log_backtrace_at=:0
+   when logging hits line file:N, emit a stack trace
+ -log_dir=
+   if non-empty, write log files to this directory
+ -logtostderr=false
+   log to standard error instead of files
+ -max_stack_buf_size=4292608
+   max size in bytes of the buffer to use for logging stack traces
+ -metadata=<just specify -metadata to activate>
+   Displays metadata for the program and exits.
+ -stderrthreshold=2
+   logs at or above this threshold go to stderr
+ -time=false
+   Dump timing information to stderr before exiting the program.
+ -v=0
+   log level for V logs
+ -v23.credentials=
+   directory to use for storing security credentials
+ -v23.i18n-catalogue=
+   18n catalogue files to load, comma separated
+ -v23.namespace.root=[/(dev.v.io:r:vprod:service:mounttabled)@ns.dev.v.io:8101]
+   local namespace root; can be repeated to provided multiple roots
+ -v23.permissions.file=map[]
+   specify a perms file as <name>:<permsfile>
+ -v23.permissions.literal=
+   explicitly specify the runtime perms as a JSON-encoded access.Permissions.
+   Overrides all --v23.permissions.file flags.
+ -v23.proxy=
+   object name of proxy service to use to export services across network
+   boundaries
+ -v23.tcp.address=
+   address to listen on
+ -v23.tcp.protocol=wsh
+   protocol to listen with
+ -v23.vtrace.cache-size=1024
+   The number of vtrace traces to store in memory.
+ -v23.vtrace.collect-regexp=
+   Spans and annotations that match this regular expression will trigger trace
+   collection.
+ -v23.vtrace.dump-on-shutdown=true
+   If true, dump all stored traces on runtime shutdown.
+ -v23.vtrace.sample-rate=0
+   Rate (from 0.0 to 1.0) to sample vtrace traces.
+ -v23.vtrace.v=0
+   The verbosity level of the log messages to be captured in traces
+ -vmodule=
+   comma-separated list of globpattern=N settings for filename-filtered logging
+   (without the .go suffix).  E.g. foo/bar/baz.go is matched by patterns baz or
+   *az or b* but not by bar/baz or baz.go or az or b.*
+ -vpath=
+   comma-separated list of regexppattern=N settings for file pathname-filtered
+   logging (without the .go suffix).  E.g. foo/bar/baz.go is matched by patterns
+   foo/bar/baz or fo.*az or oo/ba or b.z but not by foo/bar/baz.go or fo*az
+*/
+package main

diff --git a/services/mounttable/btmtd/internal/bt.go b/services/mounttable/btmtd/internal/bt.go
new file mode 100644
index 0000000..140a7a1
--- /dev/null
+++ b/services/mounttable/btmtd/internal/bt.go

@@ -0,0 +1,298 @@
+// Copyright 2016 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package internal
+
+import (
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"math/rand"
+	"strconv"
+	"strings"
+	"time"
+
+	netcontext "golang.org/x/net/context"
+	"golang.org/x/oauth2"
+	"golang.org/x/oauth2/google"
+	"google.golang.org/cloud"
+	"google.golang.org/cloud/bigtable"
+	"google.golang.org/cloud/bigtable/bttest"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/codes"
+
+	"v.io/v23/context"
+	"v.io/v23/security"
+	"v.io/v23/security/access"
+	v23mt "v.io/v23/services/mounttable"
+
+	"v.io/x/ref/lib/timekeeper"
+)
+
+const (
+	metadataFamily = "m"
+	serversFamily  = "s"
+	childrenFamily = "c"
+
+	versionColumn     = "v"
+	permissionsColumn = "p"
+	stickyColumn      = "s"
+	timestampColumn   = "t"
+)
+
+// NewBigTable returns a BigTable object that abstracts some aspects of the
+// Cloud Bigtable API.
+func NewBigTable(keyFile, project, zone, cluster, tableName string) (*BigTable, error) {
+	ctx := netcontext.Background()
+	tk, err := getTokenSource(ctx, bigtable.Scope, keyFile)
+	if err != nil {
+		return nil, err
+	}
+	client, err := bigtable.NewClient(ctx, project, zone, cluster, cloud.WithTokenSource(tk))
+	if err != nil {
+		return nil, err
+	}
+
+	return &BigTable{
+		tableName: tableName,
+		cache:     &rowCache{},
+		tbl:       client.Open(tableName),
+		createAdminClient: func() (*bigtable.AdminClient, error) {
+			return bigtable.NewAdminClient(ctx, project, zone, cluster, cloud.WithTokenSource(tk))
+
+		},
+	}, nil
+}
+
+// NewTestBigTable returns a BigTable object that is connected to an in-memory
+// fake bigtable cluster.
+func NewTestBigTable(tableName string) (*BigTable, func(), error) {
+	srv, err := bttest.NewServer("127.0.0.1:0")
+	if err != nil {
+		return nil, nil, err
+	}
+	ctx := netcontext.Background()
+	conn, err := grpc.Dial(srv.Addr, grpc.WithInsecure())
+	if err != nil {
+		return nil, nil, err
+	}
+	client, err := bigtable.NewClient(ctx, "", "", "", cloud.WithBaseGRPC(conn))
+	if err != nil {
+		return nil, nil, err
+	}
+
+	return &BigTable{
+		tableName: tableName,
+		testMode:  true,
+		cache:     &rowCache{},
+		tbl:       client.Open(tableName),
+		createAdminClient: func() (*bigtable.AdminClient, error) {
+			conn, err := grpc.Dial(srv.Addr, grpc.WithInsecure())
+			if err != nil {
+				return nil, err
+			}
+			return bigtable.NewAdminClient(ctx, "", "", "", cloud.WithBaseGRPC(conn))
+		},
+	}, func() { srv.Close() }, nil
+}
+
+type BigTable struct {
+	tableName         string
+	testMode          bool
+	tbl               *bigtable.Table
+	cache             *rowCache
+	createAdminClient func() (*bigtable.AdminClient, error)
+}
+
+// SetupTable creates the table, column families, and GC policies.
+func (b *BigTable) SetupTable(ctx *context.T, permissionsFile string) error {
+	bctx, cancel := btctx(ctx)
+	defer cancel()
+
+	client, err := b.createAdminClient()
+	if err != nil {
+		return err
+	}
+	defer client.Close()
+
+	if err := client.CreateTable(bctx, b.tableName); err != nil {
+		return err
+	}
+
+	families := []struct {
+		name     string
+		gcPolicy bigtable.GCPolicy
+	}{
+		{serversFamily, bigtable.UnionPolicy(bigtable.MaxVersionsPolicy(1), bigtable.MaxAgePolicy(time.Second))},
+		{metadataFamily, bigtable.MaxVersionsPolicy(1)},
+		{childrenFamily, bigtable.MaxVersionsPolicy(1)},
+	}
+	for _, f := range families {
+		if err := client.CreateColumnFamily(bctx, b.tableName, f.name); err != nil {
+			return err
+		}
+		if err := client.SetGCPolicy(bctx, b.tableName, f.name, f.gcPolicy); err != nil {
+			return err
+		}
+	}
+
+	if permissionsFile != "" {
+		return createNodesFromFile(ctx, b, permissionsFile)
+	}
+	perms := make(access.Permissions)
+	perms.Add(security.AllPrincipals, string(v23mt.Admin))
+	return b.createRow(ctx, "", perms, b.now())
+}
+
+func (b *BigTable) timeFloor(t bigtable.Timestamp) bigtable.Timestamp {
+	// The bigtable server expects millisecond granularity, but
+	// bigtable.Now() returns a timestamp with microsecond granularity.
+	//
+	// https://github.com/GoogleCloudPlatform/gcloud-golang/blob/master/bigtable/bttest/inmem.go#L734
+	// https://github.com/GoogleCloudPlatform/gcloud-golang/blob/master/bigtable/bigtable.go#L531
+	return (t / 1000) * 1000
+}
+
+func (b *BigTable) timeNext(t bigtable.Timestamp) bigtable.Timestamp {
+	return (t/1000 + 1) * 1000
+}
+
+func (b *BigTable) now() bigtable.Timestamp {
+	return b.timeFloor(bigtable.Now())
+}
+
+func (b *BigTable) time(t time.Time) bigtable.Timestamp {
+	return b.timeFloor(bigtable.Time(t))
+}
+
+// DeleteTable deletes the table.
+func (b *BigTable) DeleteTable(ctx *context.T) error {
+	bctx, cancel := btctx(ctx)
+	defer cancel()
+
+	client, err := b.createAdminClient()
+	if err != nil {
+		return err
+	}
+	defer client.Close()
+	return client.DeleteTable(bctx, b.tableName)
+}
+
+// DumpTable prints all the mounttable nodes stored in the bigtable.
+func (b *BigTable) DumpTable(ctx *context.T) error {
+	bctx, cancel := btctx(ctx)
+	defer cancel()
+
+	clock := timekeeper.RealTime()
+	return b.tbl.ReadRows(bctx, bigtable.InfiniteRange(""),
+		func(row bigtable.Row) bool {
+			n := nodeFromRow(ctx, b, row, clock)
+			if n.name == "" {
+				n.name = "(root)"
+			}
+			fmt.Printf("%s version: %s", n.name, n.version)
+			if n.sticky {
+				fmt.Printf(" sticky")
+			}
+			fmt.Printf(" perms: %s", n.permissions)
+			if len(n.servers) > 0 {
+				fmt.Printf(" servers:")
+				for _, s := range n.servers {
+					delta := s.Deadline.Time.Sub(clock.Now())
+					fmt.Printf(" [%s %ds]", s.Server, int(delta.Seconds()))
+				}
+				fmt.Printf(" flags: %+v", n.mountFlags)
+			}
+			if len(n.children) > 0 {
+				fmt.Printf(" children: [%s]", strings.Join(n.children, " "))
+			}
+			fmt.Println()
+			return true
+		},
+		bigtable.RowFilter(bigtable.LatestNFilter(1)),
+	)
+}
+
+func (b *BigTable) CountRows(ctx *context.T) (int, error) {
+	bctx, cancel := btctx(ctx)
+	defer cancel()
+
+	count := 0
+	if err := b.tbl.ReadRows(bctx, bigtable.InfiniteRange(""),
+		func(row bigtable.Row) bool {
+			count++
+			return true
+		},
+		bigtable.RowFilter(bigtable.LatestNFilter(1)),
+	); err != nil {
+		return 0, err
+	}
+	return count, nil
+}
+
+func getTokenSource(ctx netcontext.Context, scope, keyFile string) (oauth2.TokenSource, error) {
+	if len(keyFile) == 0 {
+		return google.DefaultTokenSource(ctx, scope)
+	}
+	data, err := ioutil.ReadFile(keyFile)
+	if err != nil {
+		return nil, err
+	}
+	config, err := google.JWTConfigFromJSON(data, scope)
+	if err != nil {
+		return nil, err
+	}
+	return config.TokenSource(ctx), nil
+}
+
+func btctx(ctx *context.T) (netcontext.Context, func()) {
+	deadline, hasDeadline := ctx.Deadline()
+	now := time.Now()
+	if !hasDeadline || deadline.Sub(now) < time.Minute {
+		deadline = now.Add(time.Minute)
+	}
+	return netcontext.WithDeadline(netcontext.Background(), deadline)
+}
+
+func (b *BigTable) apply(ctx *context.T, row string, m *bigtable.Mutation, opts ...bigtable.ApplyOption) error {
+	bctx, cancel := btctx(ctx)
+	defer cancel()
+	// The local cache entry for this row is invalidated after each
+	// mutation, whether it succeeds or not.
+	// If it succeeds, the row has changed and the cached data is stale.
+	// If it fails, it's likely because of a concurrent mutation by another
+	// server.
+	// Either way, we can't used the cached version anymore.
+	defer b.cache.invalidate(row)
+	return b.tbl.Apply(bctx, row, m, opts...)
+}
+
+func (b *BigTable) readRow(ctx *context.T, key string, opts ...bigtable.ReadOption) (bigtable.Row, error) {
+	row, err := b.cache.getRefresh(key,
+		func() (bigtable.Row, error) {
+			bctx, cancel := btctx(ctx)
+			defer cancel()
+			return b.tbl.ReadRow(bctx, key, opts...)
+		},
+	)
+	if grpc.Code(err) == codes.DeadlineExceeded {
+		ctx.Errorf("Received DeadlineExceeded for %s", key)
+		if b.testMode {
+			panic("DeadlineExceeded from testserver")
+		}
+	}
+	return row, err
+}
+
+func (b *BigTable) createRow(ctx *context.T, name string, perms access.Permissions, ts bigtable.Timestamp) error {
+	jsonPerms, err := json.Marshal(perms)
+	if err != nil {
+		return err
+	}
+	mut := bigtable.NewMutation()
+	mut.Set(metadataFamily, timestampColumn, ts, []byte{1})
+	mut.Set(metadataFamily, permissionsColumn, bigtable.ServerTime, jsonPerms)
+	mut.Set(metadataFamily, versionColumn, bigtable.ServerTime, []byte(strconv.FormatUint(uint64(rand.Uint32()), 10)))
+	return b.apply(ctx, rowKey(name), mut)
+}

diff --git a/services/mounttable/btmtd/internal/cache.go b/services/mounttable/btmtd/internal/cache.go
new file mode 100644
index 0000000..1664217
--- /dev/null
+++ b/services/mounttable/btmtd/internal/cache.go

@@ -0,0 +1,86 @@
+// Copyright 2016 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package internal
+
+import (
+	"sync"
+	"time"
+
+	"github.com/golang/groupcache/lru"
+	"google.golang.org/cloud/bigtable"
+)
+
+// The purpose of this cache is to minimize the impact of "hot" rows, i.e.
+// rows that get accessed significantly more often than others.
+//
+// For example, rows associated to the root node and its immediate children are
+// expected to be hotter than rows associated with nodes further down the
+// tree.
+//
+// The cache also makes it so that concurrent read requests for the same row
+// are bundled together and only one read request is sent.
+//
+// The cacheTimeout controls how often rows can be refetched, which directly
+// affects how quickly other servers see mutations from this server. A short
+// timeout means that mutations propagate faster.
+//
+// With the current value, the bigtable server will see at most 1 request per
+// node per server per second, with the assumption that the server see at
+// most 'cacheSize' nodes per second.
+
+const (
+	cacheSize    = 1000
+	cacheTimeout = time.Second
+)
+
+type rowCache struct {
+	// mu guards cache insertions and lookups.
+	mu    sync.Mutex
+	cache *lru.Cache
+}
+
+type cacheEntry struct {
+	// mu guards 'row' and 'updated', and prevents concurrent rpc requests
+	// for the same row. It is locked when the refresh rpc is running.
+	mu      sync.Mutex
+	row     bigtable.Row
+	updated time.Time
+}
+
+func (c *rowCache) getRefresh(key string, getRow func() (bigtable.Row, error)) (bigtable.Row, error) {
+	c.mu.Lock()
+	if c.cache == nil {
+		c.cache = lru.New(cacheSize)
+	}
+	entry, ok := c.cache.Get(key)
+	if !ok {
+		entry = &cacheEntry{}
+		c.cache.Add(key, entry)
+	}
+	c.mu.Unlock()
+
+	e := entry.(*cacheEntry)
+	e.mu.Lock()
+	defer e.mu.Unlock()
+
+	if clock.Now().Sub(e.updated) < cacheTimeout {
+		return e.row, nil
+	}
+	var err error
+	if e.row, err = getRow(); err != nil || e.row.Key() == "" {
+		return e.row, err
+	}
+	e.updated = clock.Now()
+	return e.row, nil
+}
+
+func (c *rowCache) invalidate(key string) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	if c.cache == nil {
+		return
+	}
+	c.cache.Remove(key)
+}

diff --git a/services/mounttable/btmtd/internal/cache_test.go b/services/mounttable/btmtd/internal/cache_test.go
new file mode 100644
index 0000000..be79e23
--- /dev/null
+++ b/services/mounttable/btmtd/internal/cache_test.go

@@ -0,0 +1,97 @@
+// Copyright 2016 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package internal
+
+import (
+	"sync"
+	"testing"
+	"time"
+
+	"google.golang.org/cloud/bigtable"
+
+	"v.io/x/ref/test/timekeeper"
+)
+
+func TestRowCache(t *testing.T) {
+	clock := timekeeper.NewManualTime()
+	clock.AdvanceTime(2 * cacheTimeout)
+	SetClock(clock)
+	rowCache := &rowCache{}
+
+	testcases := []struct {
+		key             string
+		returnedVersion string
+		expectedVersion string
+		expectedCalls   int
+		skip            time.Duration
+	}{
+		// Time 0
+		{"one", "100", "100", 1, 0},
+		{"two", "200", "200", 1, cacheTimeout/2 + 1},
+		// Time 0.5 sec
+		{"one", "XXX", "100", 0, 0},
+		{"two", "XXX", "200", 0, cacheTimeout/2 + 1},
+		// Time 1 sec
+		{"one", "101", "101", 1, 0},
+		{"two", "201", "201", 1, cacheTimeout/2 + 1},
+		// Time 1.5 sec
+		{"one", "XXX", "101", 0, 0},
+		{"two", "XXX", "201", 0, cacheTimeout/2 + 1},
+		// Time 2 sec
+		{"one", "102", "102", 1, 0},
+		{"two", "202", "202", 1, cacheTimeout/2 + 1},
+	}
+
+	for i, tc := range testcases {
+		calls := 0
+		getRow := func() (bigtable.Row, error) {
+			calls++
+			row := bigtable.Row{
+				metadataFamily: []bigtable.ReadItem{
+					{
+						Row:    tc.key,
+						Column: versionColumn,
+						Value:  []byte(tc.returnedVersion),
+					},
+				},
+			}
+			return row, nil
+		}
+		const N = 5
+		var wg sync.WaitGroup
+		wg.Add(N)
+		for x := 0; x < N; x++ {
+			go func() {
+				defer wg.Done()
+				row, err := rowCache.getRefresh(tc.key, getRow)
+				if err != nil {
+					t.Errorf("Unexpected error for #%d: %v", i, err)
+					return
+				}
+				if got := row.Key(); got != tc.key {
+					t.Errorf("Unexpected key for #%d. Got %q, expected %q", i, got, tc.key)
+				}
+				meta := row[metadataFamily]
+				if meta == nil {
+					t.Errorf("Missing metadata family for #%d", i)
+					return
+				}
+				if len(meta) != 1 {
+					t.Errorf("Unexpected number of cells for #%d: %v", i, meta)
+					return
+				}
+				if got := string(meta[0].Value); got != tc.expectedVersion {
+					t.Errorf("Unexpected version for #%d. Got %q, expected %q", i, got, tc.expectedVersion)
+				}
+			}()
+		}
+		wg.Wait()
+		if got := calls; got != tc.expectedCalls {
+			t.Errorf("Unexpected function call count for #%d. Got %d, expected %d", i, got, tc.expectedCalls)
+		}
+		clock.AdvanceTime(tc.skip)
+	}
+
+}

diff --git a/services/mounttable/btmtd/internal/collectionserver_test.go b/services/mounttable/btmtd/internal/collectionserver_test.go
new file mode 100644
index 0000000..17b7396
--- /dev/null
+++ b/services/mounttable/btmtd/internal/collectionserver_test.go

@@ -0,0 +1,59 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package internal_test
+
+import (
+	"sync"
+
+	"v.io/v23/context"
+	"v.io/v23/naming"
+	"v.io/v23/rpc"
+	"v.io/v23/security"
+	"v.io/v23/verror"
+)
+
+// collectionServer is a very simple collection server implementation for testing, with sufficient debugging to help
+// when there are problems.
+type collectionServer struct {
+	sync.Mutex
+	contents map[string][]byte
+}
+type rpcContext struct {
+	name string
+	*collectionServer
+}
+
+var instance collectionServer
+
+func newCollectionServer() *collectionServer {
+	return &collectionServer{contents: make(map[string][]byte)}
+}
+
+// Lookup implements rpc.Dispatcher.Lookup.
+func (s *collectionServer) Lookup(_ *context.T, name string) (interface{}, security.Authorizer, error) {
+	rpcc := &rpcContext{name: name, collectionServer: s}
+	return rpcc, security.AllowEveryone(), nil
+}
+
+// Export implements CollectionServerMethods.Export.
+func (c *rpcContext) Export(ctx *context.T, _ rpc.ServerCall, val string, overwrite bool) error {
+	c.Lock()
+	defer c.Unlock()
+	if b := c.contents[c.name]; overwrite || b == nil {
+		c.contents[c.name] = []byte(val)
+		return nil
+	}
+	return verror.New(naming.ErrNameExists, ctx, c.name)
+}
+
+// Lookup implements CollectionServerMethods.Lookup.
+func (c *rpcContext) Lookup(ctx *context.T, _ rpc.ServerCall) ([]byte, error) {
+	c.Lock()
+	defer c.Unlock()
+	if val := c.contents[c.name]; val != nil {
+		return val, nil
+	}
+	return nil, verror.New(naming.ErrNoSuchName, ctx, c.name)
+}

diff --git a/services/mounttable/btmtd/internal/concurrency_test.go b/services/mounttable/btmtd/internal/concurrency_test.go
new file mode 100644
index 0000000..fc0a819
--- /dev/null
+++ b/services/mounttable/btmtd/internal/concurrency_test.go

@@ -0,0 +1,138 @@
+// Copyright 2016 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package internal_test
+
+import (
+	"fmt"
+	"math/rand"
+	"sync"
+	"testing"
+	"time"
+
+	"v.io/v23/naming"
+	"v.io/v23/verror"
+)
+
+func TestConcurrentMountUnmount(t *testing.T) {
+	rootCtx, _, _, shutdown := initTest()
+	defer shutdown()
+
+	stop, mtAddr, bt, _ := newMT(t, "", rootCtx)
+	defer stop()
+
+	var wg sync.WaitGroup
+	busyMountUnmount := func(name string) {
+		defer wg.Done()
+		for i := 0; i < 250; i++ {
+			n := fmt.Sprintf("%s/%d", name, rand.Intn(10))
+			doMount(t, rootCtx, mtAddr, n, "/example.com:12345", true)
+			if _, err := resolve(rootCtx, naming.JoinAddressName(mtAddr, n)); err != nil {
+				t.Errorf("resolve(%q) failed: %v", n, err)
+			}
+			doUnmount(t, rootCtx, mtAddr, n, "", true)
+		}
+	}
+
+	for i := 1; i <= 5; i++ {
+		wg.Add(3)
+		go busyMountUnmount(fmt.Sprintf("a/%d", i))
+		go busyMountUnmount(fmt.Sprintf("a/b/%d", i))
+		go busyMountUnmount(fmt.Sprintf("a/c/d/e/%d", i))
+	}
+	wg.Wait()
+
+	checkMatch(t, []string{}, doGlob(t, rootCtx, mtAddr, "", "*"))
+
+	count, err := bt.CountRows(rootCtx)
+	if err != nil {
+		t.Errorf("bt.CountRows failed: %v", err)
+	}
+	if expected := 1; count != expected {
+		t.Errorf("Unexpected number of rows. Got %d, expected %d", count, expected)
+		bt.DumpTable(rootCtx)
+	}
+}
+
+func TestConcurrentMountDelete(t *testing.T) {
+	rootCtx, _, _, shutdown := initTest()
+	defer shutdown()
+
+	stop, mtAddr, bt, _ := newMT(t, "", rootCtx)
+	defer stop()
+
+	var wg sync.WaitGroup
+	busyMount := func(name string) {
+		defer wg.Done()
+		for i := 0; i < 250; i++ {
+			doMount(t, rootCtx, mtAddr, name, "/example.com:12345", true)
+		}
+	}
+	busyDelete := func(name string) {
+		defer wg.Done()
+		for i := 0; i < 250; i++ {
+			doDeleteSubtree(t, rootCtx, mtAddr, name, true)
+		}
+	}
+
+	for i := 1; i <= 5; i++ {
+		wg.Add(4)
+		go busyMount(fmt.Sprintf("a/%d", i))
+		go busyDelete(fmt.Sprintf("a/%d", i))
+		go busyMount(fmt.Sprintf("b/%d/c/d/e/f/g/h/i", i))
+		go busyDelete(fmt.Sprintf("b/%d", i))
+	}
+	wg.Wait()
+
+	doDeleteSubtree(t, rootCtx, mtAddr, "a", true)
+	doDeleteSubtree(t, rootCtx, mtAddr, "b", true)
+
+	count, err := bt.CountRows(rootCtx)
+	if err != nil {
+		t.Errorf("bt.CountRows failed: %v", err)
+	}
+	if expected := 1; count != expected {
+		t.Errorf("Unexpected number of rows. Got %d, expected %d", count, expected)
+		bt.DumpTable(rootCtx)
+	}
+}
+
+func TestConcurrentExpiry(t *testing.T) {
+	rootCtx, _, _, shutdown := initTest()
+	defer shutdown()
+
+	stop, mtAddr, bt, clock := newMT(t, "", rootCtx)
+	defer stop()
+
+	const N = 100
+	name := func(i int) string { return fmt.Sprintf("a/b/c/d/e/f/g/h/i/j/k/l/%05d/%05d", i, i) }
+	for i := 0; i < N; i++ {
+		doMount(t, rootCtx, mtAddr, name(i), "/example.com:12345", true)
+	}
+	clock.AdvanceTime(time.Duration(ttlSecs+4) * time.Second)
+
+	var wg sync.WaitGroup
+	concurrentResolve := func(name string) {
+		defer wg.Done()
+		if _, err := resolve(rootCtx, naming.JoinAddressName(mtAddr, name)); verror.ErrorID(err) != naming.ErrNoSuchName.ID {
+			t.Errorf("resolve(%q) returned unexpected error: %v", name, err)
+		}
+	}
+
+	for i := 0; i < N; i++ {
+		wg.Add(2)
+		go concurrentResolve(name(i))
+		go concurrentResolve(name(i))
+	}
+	wg.Wait()
+
+	count, err := bt.CountRows(rootCtx)
+	if err != nil {
+		t.Errorf("bt.CountRows failed: %v", err)
+	}
+	if expected := 1; count != expected {
+		t.Errorf("Unexpected number of rows. Got %d, expected %d", count, expected)
+		bt.DumpTable(rootCtx)
+	}
+}

diff --git a/services/mounttable/btmtd/internal/dispatcher.go b/services/mounttable/btmtd/internal/dispatcher.go
new file mode 100644
index 0000000..3ac4168
--- /dev/null
+++ b/services/mounttable/btmtd/internal/dispatcher.go

@@ -0,0 +1,40 @@
+// Copyright 2016 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package internal
+
+import (
+	"path"
+
+	"v.io/v23/context"
+	"v.io/v23/rpc"
+	"v.io/v23/security"
+	"v.io/v23/security/access"
+	v23mt "v.io/v23/services/mounttable"
+
+	"v.io/x/ref/lib/timekeeper"
+)
+
+var clock = timekeeper.RealTime()
+
+// For testing only.
+func SetClock(c timekeeper.TimeKeeper) {
+	clock = c
+}
+
+func NewDispatcher(bt *BigTable, globalAcl *access.AccessList) rpc.Dispatcher {
+	return &dispatcher{bt, globalAcl}
+}
+
+type dispatcher struct {
+	bt        *BigTable
+	globalAcl *access.AccessList
+}
+
+func (d *dispatcher) Lookup(ctx *context.T, suffix string) (interface{}, security.Authorizer, error) {
+	if suffix != "" {
+		suffix = path.Clean(suffix)
+	}
+	return v23mt.MountTableServer(&mounttable{suffix, d.globalAcl, d.bt}), security.AllowEveryone(), nil
+}

diff --git a/services/mounttable/btmtd/internal/mounttable.go b/services/mounttable/btmtd/internal/mounttable.go
new file mode 100644
index 0000000..c744974
--- /dev/null
+++ b/services/mounttable/btmtd/internal/mounttable.go

@@ -0,0 +1,425 @@
+// Copyright 2016 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package internal
+
+import (
+	"math"
+	"math/rand"
+	"path"
+	"reflect"
+	"strings"
+	"time"
+
+	"v.io/v23/context"
+	"v.io/v23/glob"
+	"v.io/v23/naming"
+	"v.io/v23/rpc"
+	"v.io/v23/security"
+	"v.io/v23/security/access"
+	v23mt "v.io/v23/services/mounttable"
+	"v.io/v23/verror"
+)
+
+const pkgPath = "v.io/x/ref/services/mounttable/btmtd/internal"
+
+var (
+	errMalformedAddress = verror.Register(pkgPath+".errMalformedAddress", verror.NoRetry, "{1:}{2:} malformed address {3} for mounted server {4}{:_}")
+	errNotEmpty         = verror.Register(pkgPath+".errNotEmpty", verror.NoRetry, "{1:}{2:} cannot delete {3}: has children{:_}")
+	errConcurrentAccess = verror.Register(pkgPath+".errConcurrentAccess", verror.RetryRefetch, "{1:}{2:} concurrent access caused conflict{:_}")
+)
+
+func shouldAbort(ctx *context.T) error {
+	select {
+	case <-ctx.Done():
+		return verror.NewErrAborted(ctx)
+	default:
+		return nil
+	}
+}
+
+func op(ctx *context.T, f func() error) (err error) {
+	for {
+		if err = shouldAbort(ctx); err != nil {
+			return
+		}
+		if err = f(); verror.ErrorID(err) == errConcurrentAccess.ID {
+			ctx.Infof("Concurrent access conflict detected: %v", err)
+			time.Sleep(time.Duration(rand.Intn(50)) * time.Millisecond)
+			continue
+		}
+		return
+	}
+}
+
+type mounttable struct {
+	suffix    string
+	globalAcl *access.AccessList
+	bt        *BigTable
+}
+
+// Mount a server onto the name in the receiver.
+func (mt *mounttable) Mount(ctx *context.T, call rpc.ServerCall, server string, ttl uint32, flags naming.MountFlag) error {
+	ctx.Infof("%q.Mount(%q, %d, %v)", mt.suffix, server, ttl, flags)
+	if ttl == 0 {
+		ttl = math.MaxUint32
+	}
+	// Make sure the server address is reasonable.
+	ep := server
+	if naming.Rooted(server) {
+		ep, _ = naming.SplitAddressName(server)
+	}
+	if _, err := naming.ParseEndpoint(ep); err != nil {
+		return verror.New(errMalformedAddress, ctx, ep, server)
+	}
+
+	return op(ctx, func() error {
+		n, err := mt.accessNodeCreate(ctx, mt.suffix, call.Security())
+		if err != nil {
+			return err
+		}
+		if err := mt.authorize(ctx, call.Security(), n.permissions, v23mt.Mount, v23mt.Admin); err != nil {
+			return err
+		}
+		return n.mount(ctx, server, clock.Now().Add(time.Duration(ttl)*time.Second), flags)
+	})
+}
+
+// Unmount removes servers from the name in the receiver. If server is specified, only that
+// server is removed.
+func (mt *mounttable) Unmount(ctx *context.T, call rpc.ServerCall, server string) error {
+	ctx.Infof("%q.Unmount(%q)", mt.suffix, server)
+
+	return op(ctx, func() error {
+		n, err := mt.accessNodeRead(ctx, mt.suffix, call.Security())
+		if err != nil {
+			return err
+		}
+		if err := mt.authorize(ctx, call.Security(), n.permissions, v23mt.Mount, v23mt.Admin); err != nil {
+			return err
+		}
+		return n.unmount(ctx, server)
+	})
+}
+
+// Delete removes the receiver.  If the receiver has children, it will not
+// be removed unless deleteSubtree is true in which case the whole subtree is
+// removed.
+func (mt *mounttable) Delete(ctx *context.T, call rpc.ServerCall, deleteSubtree bool) error {
+	ctx.Infof("%q.Delete(%v)", mt.suffix, deleteSubtree)
+
+	if mt.suffix == "" {
+		return verror.New(verror.ErrNoAccess, ctx, "")
+	}
+
+	return op(ctx, func() error {
+		n, err := mt.accessNodeRead(ctx, mt.suffix, call.Security())
+		if verror.ErrorID(err) == naming.ErrNoSuchName.ID {
+			return nil
+		}
+		if err != nil {
+			return err
+		}
+		// Caller must have Admin access on either the node itself or
+		// its parent.
+		if err := mt.authorize(ctx, call.Security(), n.permissions, v23mt.Admin); err != nil {
+			parent, _ := path.Split(mt.suffix)
+			if p, _ := mt.accessNodeRead(ctx, parent, call.Security()); p == nil {
+				return err
+			} else if err2 := mt.authorize(ctx, call.Security(), p.permissions, v23mt.Admin); err2 != nil {
+				return err
+			}
+		}
+		return n.deleteAndGC(ctx, deleteSubtree)
+	})
+}
+
+// ResolveStep takes the next step in resolving a name.  Returns the next
+// servers to query and the suffix at those servers.
+func (mt *mounttable) ResolveStep(ctx *context.T, call rpc.ServerCall) (entry naming.MountEntry, err error) {
+	ctx.Infof("%q.ResolveStep()", mt.suffix)
+
+	n, remainder, err := mt.accessNodeWithServers(ctx, mt.suffix, call.Security())
+	if err != nil {
+		return entry, err
+	}
+	if n == nil {
+		entry.Name = mt.suffix
+		err = verror.New(naming.ErrNoSuchName, ctx, mt.suffix)
+		return
+	}
+	entry.Name = remainder
+	entry.Servers = make([]naming.MountedServer, len(n.servers))
+	for i, s := range n.servers {
+		entry.Servers[i] = s
+		entry.ServesMountTable = n.mountFlags.MT
+		entry.IsLeaf = n.mountFlags.Leaf
+	}
+	return entry, nil
+}
+
+// Glob__ finds matches in the namespace.  If we reach a mount point before
+// matching the whole pattern, return that mount point.
+func (mt *mounttable) Glob__(ctx *context.T, call rpc.GlobServerCall, g *glob.Glob) error {
+	ctx.Infof("%q.Glob__(%q)", mt.suffix, g)
+
+	n, remainder, err := mt.accessNodeWithServers(ctx, mt.suffix, call.Security())
+	if err != nil {
+		return err
+	}
+	if n != nil {
+		me := naming.MountEntry{
+			Name:             "",
+			Servers:          make([]naming.MountedServer, len(n.servers)),
+			ServesMountTable: n.mountFlags.MT,
+			IsLeaf:           n.mountFlags.Leaf,
+		}
+		for i, s := range n.servers {
+			me.Servers[i].Server = naming.Join(s.Server, remainder)
+			me.Servers[i].Deadline = s.Deadline
+		}
+		call.SendStream().Send(naming.GlobReplyEntry{Value: me})
+		return nil
+	}
+
+	type gState struct {
+		name string
+		g    *glob.Glob
+	}
+	queue := []gState{gState{"", g}}
+	for len(queue) != 0 {
+		if err := shouldAbort(ctx); err != nil {
+			return err
+		}
+		state := queue[0]
+		queue = queue[1:]
+
+		n, err := mt.accessNodeRead(ctx, naming.Join(mt.suffix, state.name), call.Security())
+		if err != nil {
+			ctx.VI(2).Infof("caller doesn't have access to %s", state.name)
+			continue
+		}
+		if n == nil {
+			continue
+		}
+		if len(n.servers) > 0 {
+			me := naming.MountEntry{
+				Name:             state.name,
+				Servers:          make([]naming.MountedServer, len(n.servers)),
+				ServesMountTable: n.mountFlags.MT,
+				IsLeaf:           n.mountFlags.Leaf,
+			}
+			for i, s := range n.servers {
+				me.Servers[i] = s
+			}
+			call.SendStream().Send(naming.GlobReplyEntry{Value: me})
+			continue
+		}
+		if state.g.Len() == 0 {
+			call.SendStream().Send(naming.GlobReplyEntry{Value: naming.MountEntry{Name: state.name}})
+		}
+		if state.g.Empty() {
+			continue
+		}
+		matcher, left := state.g.Head(), state.g.Tail()
+		for _, child := range n.children {
+			if matcher.Match(child) {
+				// TODO(rthellend): We should protect against large query results
+				// that could blow up memory usage.
+				queue = append(queue, gState{naming.Join(state.name, child), left})
+			}
+		}
+	}
+	return nil
+}
+
+// SetPermissions replaces the current Permissions for an object.  version
+// allows for optional, optimistic concurrency control.  If non-empty,
+// version's value must come from GetPermissions.  If any client has
+// successfully called SetPermissions in the meantime, the version will be
+// stale and SetPermissions will fail.  If empty, SetPermissions performs an
+// unconditional update.
+func (mt *mounttable) SetPermissions(ctx *context.T, call rpc.ServerCall, perms access.Permissions, version string) error {
+	ctx.Infof("%q.SetPermissions(%#v, %q)", mt.suffix, perms, version)
+
+	// If the user is trying to set permissions on a node such that the
+	// user will no longer have admin access, add the user as Admin.
+	if err := mt.authorize(ctx, call.Security(), perms, v23mt.Admin); err != nil {
+		blessings, _ := security.RemoteBlessingNames(ctx, call.Security())
+		for _, b := range blessings {
+			perms.Add(security.BlessingPattern(b), string(v23mt.Admin))
+		}
+	}
+	perms.Normalize()
+
+	return op(ctx, func() error {
+		n, err := mt.accessNodeCreate(ctx, mt.suffix, call.Security())
+		if err != nil {
+			return err
+		}
+		if err := mt.authorize(ctx, call.Security(), n.permissions, v23mt.Admin); err != nil {
+			return err
+		}
+		if version != "" && n.version != version {
+			return verror.NewErrBadVersion(ctx)
+		}
+		return n.setPermissions(ctx, perms)
+	})
+}
+
+// GetPermissions returns the complete, current Permissions for an object. The
+// returned version can be passed to a subsequent call to SetPermissions for
+// optimistic concurrency control. A successful call to SetPermissions will
+// invalidate version, and the client must call GetPermissions again to get
+// the current version.
+func (mt *mounttable) GetPermissions(ctx *context.T, call rpc.ServerCall) (perms access.Permissions, version string, err error) {
+	ctx.Infof("%q.GetPermissions()", mt.suffix)
+
+	var n *mtNode
+	if n, err = mt.accessNodeRead(ctx, mt.suffix, call.Security()); err != nil {
+		return
+	}
+	return n.permissions, n.version, nil
+}
+
+func (mt *mounttable) authorize(ctx *context.T, call security.Call, perms access.Permissions, tags ...v23mt.Tag) error {
+	if l, r := call.LocalBlessings().PublicKey(), call.RemoteBlessings().PublicKey(); l != nil && r != nil && reflect.DeepEqual(l, r) {
+		return nil
+	}
+	blessings, invalid := security.RemoteBlessingNames(ctx, call)
+	for _, tag := range tags {
+		if acl, exists := perms[string(tag)]; exists && acl.Includes(blessings...) {
+			return nil
+		}
+	}
+	if mt.globalAcl != nil {
+		if mt.globalAcl.Includes(blessings...) {
+			return nil
+		}
+	}
+	return access.NewErrNoPermissions(ctx, blessings, invalid, string(tags[0]))
+}
+
+// accessNode returns the requested mounttable node after verifying that the
+// caller is authorized to access it. Missing nodes are created if 'create' is
+// true and the caller has 'Create' permission on the parent.
+//
+// If withServers is true, returns the first node within 'name' that has
+// mounted servers, and the remaining part is returned as a string.
+//
+// Example:
+//   a -> b -> c
+// If b has a mounted server,
+//  accessNode(.., "a/b", false, true, ..) returns (b, "")
+//  accessNode(.., "a/b/c", false, true, ..) returns (b, "c")
+//  accessNode(.., "a/b", false, false, ..) returns (b, "")
+//  accessNode(.., "a/b/c", false, false, ..) returns (c, "")
+func (mt *mounttable) accessNode(ctx *context.T, name string, create, withServers bool, call security.Call) (*mtNode, string, error) {
+start:
+	var (
+		n, parent *mtNode
+		current   string
+		elems     = append([]string{""}, strings.Split(name, "/")...)
+	)
+
+	for len(elems) > 0 {
+		if err := shouldAbort(ctx); err != nil {
+			return nil, "", err
+		}
+
+		elem := elems[0]
+		elems = elems[1:]
+		current = naming.Join(current, elem)
+
+		var err error
+		n, err = getNode(ctx, mt.bt, current)
+
+		switch {
+		case err != nil:
+			return nil, "", err
+		case n == nil:
+			if parent == nil {
+				return nil, "", verror.New(naming.ErrNoSuchNameRoot, ctx, current)
+			}
+			if !create {
+				return nil, "", verror.New(naming.ErrNoSuchName, ctx, current)
+			}
+			if err := mt.authorize(ctx, call, parent.permissions, v23mt.Create, v23mt.Admin); err != nil {
+				return nil, "", err
+			}
+			var perms access.Permissions
+			if perms = templatePermissions(parent.permissions, elem); perms == nil {
+				perms = parent.permissions.Copy()
+				if current == name {
+					names, _ := security.RemoteBlessingNames(ctx, call)
+					for _, n := range names {
+						perms.Add(security.BlessingPattern(n), string(v23mt.Admin))
+					}
+				}
+			}
+			perms.Normalize()
+			n, err = parent.createChild(ctx, elem, perms)
+			if err != nil {
+				return nil, "", err
+			}
+		default:
+			deleted, err := n.gc(ctx)
+			if deleted || verror.ErrorID(err) == errConcurrentAccess.ID {
+				goto start
+			}
+			if err != nil {
+				return nil, "", err
+			}
+		}
+		if err := mt.authorize(ctx, call, n.permissions, v23mt.Resolve, v23mt.Read, v23mt.Admin); err != nil {
+			return nil, "", err
+		}
+		if withServers && len(n.servers) > 0 {
+			return n, naming.Join(elems...), nil
+		}
+		parent = n
+	}
+	if withServers {
+		return nil, "", nil
+	}
+	return n, "", nil
+}
+
+func (mt *mounttable) accessNodeRead(ctx *context.T, name string, call security.Call) (*mtNode, error) {
+	n, _, err := mt.accessNode(ctx, name, false, false, call)
+	return n, err
+}
+
+func (mt *mounttable) accessNodeCreate(ctx *context.T, name string, call security.Call) (*mtNode, error) {
+	n, _, err := mt.accessNode(ctx, name, true, false, call)
+	return n, err
+}
+
+func (mt *mounttable) accessNodeWithServers(ctx *context.T, name string, call security.Call) (*mtNode, string, error) {
+	return mt.accessNode(ctx, name, false, true, call)
+}
+
+func templatePermissions(perms access.Permissions, child string) access.Permissions {
+	var templatePerms access.Permissions
+	for tag, acl := range perms {
+		if !strings.HasPrefix(tag, "%%/") {
+			continue
+		}
+		tmpAcl := access.AccessList{
+			In:    make([]security.BlessingPattern, len(acl.In)),
+			NotIn: make([]string, len(acl.NotIn)),
+		}
+		for i, b := range acl.In {
+			tmpAcl.In[i] = security.BlessingPattern(strings.Replace(string(b), "%%", child, -1))
+		}
+		for i, b := range acl.NotIn {
+			tmpAcl.NotIn[i] = strings.Replace(b, "%%", child, -1)
+		}
+		if templatePerms == nil {
+			templatePerms = make(access.Permissions)
+		}
+		templatePerms[strings.TrimPrefix(tag, "%%/")] = tmpAcl
+	}
+	return templatePerms
+}

diff --git a/services/mounttable/btmtd/internal/mounttable_test.go b/services/mounttable/btmtd/internal/mounttable_test.go
new file mode 100644
index 0000000..e591686
--- /dev/null
+++ b/services/mounttable/btmtd/internal/mounttable_test.go

@@ -0,0 +1,764 @@
+// Copyright 2015 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package internal_test
+
+import (
+	"errors"
+	"io"
+	"reflect"
+	"runtime/debug"
+	"sort"
+	"testing"
+	"time"
+
+	"v.io/v23"
+	"v.io/v23/context"
+	"v.io/v23/naming"
+	"v.io/v23/options"
+	"v.io/v23/rpc"
+	"v.io/v23/security"
+	"v.io/v23/security/access"
+	_ "v.io/x/ref/runtime/factories/roaming"
+	"v.io/x/ref/services/debug/debuglib"
+	"v.io/x/ref/services/mounttable/btmtd/internal"
+	"v.io/x/ref/test"
+	"v.io/x/ref/test/testutil"
+	"v.io/x/ref/test/timekeeper"
+)
+
+// Simulate different processes with different runtimes.
+// rootCtx is the one running the mounttable service.
+const ttlSecs = 60 * 60
+
+func boom(t *testing.T, f string, v ...interface{}) {
+	t.Logf(f, v...)
+	t.Fatal(string(debug.Stack()))
+}
+
+func doMount(t *testing.T, ctx *context.T, ep, suffix, service string, shouldSucceed bool) {
+	name := naming.JoinAddressName(ep, suffix)
+	client := v23.GetClient(ctx)
+	if err := client.Call(ctx, name, "Mount", []interface{}{service, uint32(ttlSecs), 0}, nil, options.Preresolved{}); err != nil {
+		if !shouldSucceed {
+			return
+		}
+		boom(t, "Failed to Mount %s onto %s: %s", service, name, err)
+	}
+}
+
+func doUnmount(t *testing.T, ctx *context.T, ep, suffix, service string, shouldSucceed bool) {
+	name := naming.JoinAddressName(ep, suffix)
+	client := v23.GetClient(ctx)
+	if err := client.Call(ctx, name, "Unmount", []interface{}{service}, nil, options.Preresolved{}); err != nil {
+		if !shouldSucceed {
+			return
+		}
+		boom(t, "Failed to Unmount %s off of %s: %s", service, name, err)
+	}
+}
+
+func doGetPermissions(t *testing.T, ctx *context.T, ep, suffix string, shouldSucceed bool) (perms access.Permissions, version string) {
+	name := naming.JoinAddressName(ep, suffix)
+	client := v23.GetClient(ctx)
+	if err := client.Call(ctx, name, "GetPermissions", nil, []interface{}{&perms, &version}, options.Preresolved{}); err != nil {
+		if !shouldSucceed {
+			return
+		}
+		boom(t, "Failed to GetPermissions %s: %s", name, err)
+	}
+	return
+}
+
+func doSetPermissions(t *testing.T, ctx *context.T, ep, suffix string, perms access.Permissions, version string, shouldSucceed bool) {
+	name := naming.JoinAddressName(ep, suffix)
+	client := v23.GetClient(ctx)
+	if err := client.Call(ctx, name, "SetPermissions", []interface{}{perms, version}, nil, options.Preresolved{}); err != nil {
+		if !shouldSucceed {
+			return
+		}
+		boom(t, "Failed to SetPermissions %s: %s", name, err)
+	}
+}
+
+func doDeleteNode(t *testing.T, ctx *context.T, ep, suffix string, shouldSucceed bool) {
+	name := naming.JoinAddressName(ep, suffix)
+	client := v23.GetClient(ctx)
+	if err := client.Call(ctx, name, "Delete", []interface{}{false}, nil, options.Preresolved{}); err != nil {
+		if !shouldSucceed {
+			return
+		}
+		boom(t, "Failed to Delete node %s: %s", name, err)
+	}
+}
+
+func doDeleteSubtree(t *testing.T, ctx *context.T, ep, suffix string, shouldSucceed bool) {
+	name := naming.JoinAddressName(ep, suffix)
+	client := v23.GetClient(ctx)
+	if err := client.Call(ctx, name, "Delete", []interface{}{true}, nil, options.Preresolved{}); err != nil {
+		if !shouldSucceed {
+			return
+		}
+		boom(t, "Failed to Delete subtree %s: %s", name, err)
+	}
+}
+
+func mountentry2names(e *naming.MountEntry) []string {
+	names := make([]string, len(e.Servers))
+	for idx, s := range e.Servers {
+		names[idx] = naming.JoinAddressName(s.Server, e.Name)
+	}
+	return names
+}
+
+func resolve(ctx *context.T, name string) (*naming.MountEntry, error) {
+	// Resolve the name one level.
+	var entry naming.MountEntry
+	client := v23.GetClient(ctx)
+	if err := client.Call(ctx, name, "ResolveStep", nil, []interface{}{&entry}, options.Preresolved{}); err != nil {
+		return nil, err
+	}
+	if len(entry.Servers) < 1 {
+		return nil, errors.New("resolve returned no servers")
+	}
+	return &entry, nil
+}
+
+func export(t *testing.T, ctx *context.T, name, contents string) {
+	// Resolve the name.
+	resolved, err := resolve(ctx, name)
+	if err != nil {
+		boom(t, "Failed to Export.Resolve %s: %s", name, err)
+	}
+	// Export the value.
+	client := v23.GetClient(ctx)
+	if err := client.Call(ctx, mountentry2names(resolved)[0], "Export", []interface{}{contents, true}, nil, options.Preresolved{resolved}); err != nil {
+		boom(t, "Failed to Export.Call %s to %s: %s", name, contents, err)
+	}
+}
+
+func checkContents(t *testing.T, ctx *context.T, name, expected string, shouldSucceed bool) {
+	// Resolve the name.
+	resolved, err := resolve(ctx, name)
+	if err != nil {
+		if !shouldSucceed {
+			return
+		}
+		boom(t, "Failed to Resolve %s: %s", name, err)
+	}
+	// Look up the value.
+	client := v23.GetClient(ctx)
+	call, err := client.StartCall(ctx, mountentry2names(resolved)[0], "Lookup", nil, options.Preresolved{resolved})
+	if err != nil {
+		if shouldSucceed {
+			boom(t, "Failed Lookup.StartCall %s: %s", name, err)
+		}
+		return
+	}
+	var contents []byte
+	if err := call.Finish(&contents); err != nil {
+		if shouldSucceed {
+			boom(t, "Failed to Lookup %s: %s", name, err)
+		}
+		return
+	}
+	if string(contents) != expected {
+		boom(t, "Lookup %s, expected %q, got %q", name, expected, contents)
+	}
+	if !shouldSucceed {
+		boom(t, "Lookup %s, expected failure, got %q", name, contents)
+	}
+}
+
+func newMT(t *testing.T, permsFile string, rootCtx *context.T) (func(), string, *internal.BigTable, timekeeper.ManualTime) {
+	reservedDisp := debuglib.NewDispatcher(nil)
+	ctx := v23.WithReservedNameDispatcher(rootCtx, reservedDisp)
+
+	bt, shutdownBT, err := internal.NewTestBigTable("mounttable")
+	if err != nil {
+		boom(t, "NewTestBigTable: %s", err)
+	}
+	if err := bt.SetupTable(ctx, permsFile); err != nil {
+		boom(t, "bt.SetupTable: %s", err)
+	}
+
+	// Set clock slightly in the future to avoid triggering bigtable's GC.
+	clock := timekeeper.NewManualTime()
+	now := time.Now().UTC().Add(time.Hour)
+	for {
+		delta := now.Sub(clock.Now())
+		if delta < time.Second {
+			break
+		}
+		clock.AdvanceTime(delta)
+	}
+	// Add mount table service.
+	internal.SetClock(clock)
+	mt := internal.NewDispatcher(bt, nil)
+
+	// Start serving on a loopback address.
+	ctx, cancel := context.WithCancel(ctx)
+	ctx, server, err := v23.WithNewDispatchingServer(ctx, "", mt, options.ServesMountTable(true))
+	if err != nil {
+		boom(t, "r.NewServer: %s", err)
+	}
+
+	estr := server.Status().Endpoints[0].String()
+	t.Logf("endpoint %s", estr)
+	return func() {
+		cancel()
+		<-server.Closed()
+		shutdownBT()
+	}, estr, bt, clock
+}
+
+func newCollection(t *testing.T, rootCtx *context.T) (func(), string) {
+	// Start serving a collection service on a loopback address.  This
+	// is just a service we can mount and test against.
+	ctx, cancel := context.WithCancel(rootCtx)
+	_, server, err := v23.WithNewDispatchingServer(ctx, "collection", newCollectionServer())
+	if err != nil {
+		boom(t, "r.NewServer: %s", err)
+	}
+	estr := server.Status().Endpoints[0].String()
+	t.Logf("endpoint %s", estr)
+	return func() {
+		cancel()
+		<-server.Closed()
+	}, estr
+}
+
+func TestMountTable(t *testing.T) {
+	rootCtx, aliceCtx, bobCtx, shutdown := initTest()
+	defer shutdown()
+
+	stop, mtAddr, _, clock := newMT(t, "testdata/test.perms", rootCtx)
+	defer stop()
+	stop, collectionAddr := newCollection(t, rootCtx)
+	defer stop()
+
+	collectionName := naming.JoinAddressName(collectionAddr, "collection")
+
+	// Mount the collection server into the mount table.
+	rootCtx.Infof("Mount the collection server into the mount table.")
+	doMount(t, rootCtx, mtAddr, "stuff", collectionName, true)
+
+	// Create a few objects and make sure we can read them.
+	rootCtx.Infof("Create a few objects.")
+	export(t, rootCtx, naming.JoinAddressName(mtAddr, "stuff/the/rain"), "the rain")
+	export(t, rootCtx, naming.JoinAddressName(mtAddr, "stuff/in/spain"), "in spain")
+	export(t, rootCtx, naming.JoinAddressName(mtAddr, "stuff/falls"), "falls mainly on the plain")
+	rootCtx.Infof("Make sure we can read them.")
+	checkContents(t, rootCtx, naming.JoinAddressName(mtAddr, "stuff/the/rain"), "the rain", true)
+	checkContents(t, rootCtx, naming.JoinAddressName(mtAddr, "stuff/in/spain"), "in spain", true)
+	checkContents(t, rootCtx, naming.JoinAddressName(mtAddr, "stuff/falls"), "falls mainly on the plain", true)
+	checkContents(t, rootCtx, naming.JoinAddressName(mtAddr, "/stuff/falls"), "falls mainly on the plain", true)
+	checkContents(t, rootCtx, naming.JoinAddressName(mtAddr, "stuff/nonexistant"), "falls mainly on the plain", false)
+	checkContents(t, bobCtx, naming.JoinAddressName(mtAddr, "stuff/the/rain"), "the rain", true)
+	checkContents(t, aliceCtx, naming.JoinAddressName(mtAddr, "stuff/the/rain"), "the rain", false)
+
+	// Test name element too long.
+	rootCtx.Infof("Name element too long.")
+	doMount(t, rootCtx, mtAddr, "a/abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnop", collectionName, false)
+
+	// Test multiple mounts.
+	rootCtx.Infof("Multiple mounts.")
+	doMount(t, rootCtx, mtAddr, "a/b", collectionName, true)
+	doMount(t, rootCtx, mtAddr, "x/y", collectionName, true)
+	doMount(t, rootCtx, mtAddr, "alpha//beta", collectionName, true)
+	rootCtx.Infof("Make sure we can read them.")
+	checkContents(t, rootCtx, naming.JoinAddressName(mtAddr, "stuff/falls"), "falls mainly on the plain", true)
+	checkContents(t, rootCtx, naming.JoinAddressName(mtAddr, "a/b/falls"), "falls mainly on the plain", true)
+	checkContents(t, rootCtx, naming.JoinAddressName(mtAddr, "x/y/falls"), "falls mainly on the plain", true)
+	checkContents(t, rootCtx, naming.JoinAddressName(mtAddr, "alpha/beta/falls"), "falls mainly on the plain", true)
+	checkContents(t, aliceCtx, naming.JoinAddressName(mtAddr, "a/b/falls"), "falls mainly on the plain", true)
+	checkContents(t, bobCtx, naming.JoinAddressName(mtAddr, "a/b/falls"), "falls mainly on the plain", false)
+
+	// Test getting/setting AccessLists.
+	perms, version := doGetPermissions(t, rootCtx, mtAddr, "stuff", true)
+	doSetPermissions(t, rootCtx, mtAddr, "stuff", perms, "xyzzy", false) // bad version
+	doSetPermissions(t, rootCtx, mtAddr, "stuff", perms, version, true)  // correct version
+	_, nversion := doGetPermissions(t, rootCtx, mtAddr, "stuff", true)
+	if nversion == version {
+		boom(t, "version didn't change after SetPermissions: %s", nversion)
+	}
+	doSetPermissions(t, rootCtx, mtAddr, "stuff", perms, "", true) // no version
+
+	// Bob should be able to create nodes under the mounttable root but not alice.
+	doSetPermissions(t, aliceCtx, mtAddr, "onlybob", perms, "", false)
+	doSetPermissions(t, bobCtx, mtAddr, "onlybob", perms, "", true)
+
+	// Test that setting Permissions to permissions that don't include the the setter's
+	// blessings in Admin, automatically add their Blessings to Admin to prevent
+	// locking everyone out.
+	perms, _ = doGetPermissions(t, bobCtx, mtAddr, "onlybob", true)
+	noRootPerms := perms.Copy()
+	noRootPerms.Clear("bob", "Admin")
+	doSetPermissions(t, bobCtx, mtAddr, "onlybob", noRootPerms, "", true)
+	// This should succeed, because "bob" should automatically be added to "Admin"
+	// even though he cleared himself from "Admin".
+	doSetPermissions(t, bobCtx, mtAddr, "onlybob", perms, "", true)
+	// Test that adding a non-standard perms is normalized when retrieved.
+	admin := perms["Admin"]
+	admin.In = []security.BlessingPattern{"bob", "bob"}
+	perms["Admin"] = admin
+	doSetPermissions(t, bobCtx, mtAddr, "onlybob", perms, "", true)
+	perms, _ = doGetPermissions(t, bobCtx, mtAddr, "onlybob", true)
+	if got, want := perms["Admin"].In, []security.BlessingPattern{"bob"}; !reflect.DeepEqual(got, want) {
+		boom(t, "got %v, want %v", got, want)
+	}
+
+	// Test generic unmount.
+	rootCtx.Info("Test generic unmount.")
+	doUnmount(t, rootCtx, mtAddr, "a/b", "", true)
+	checkContents(t, rootCtx, naming.JoinAddressName(mtAddr, "a/b/falls"), "falls mainly on the plain", false)
+
+	// Test specific unmount.
+	rootCtx.Info("Test specific unmount.")
+	doMount(t, rootCtx, mtAddr, "a/b", collectionName, true)
+	doUnmount(t, rootCtx, mtAddr, "a/b", collectionName, true)
+	checkContents(t, rootCtx, naming.JoinAddressName(mtAddr, "a/b/falls"), "falls mainly on the plain", false)
+
+	// Try timing out a mount.
+	rootCtx.Info("Try timing out a mount.")
+	doMount(t, rootCtx, mtAddr, "stuffWithTTL", collectionName, true)
+	checkContents(t, rootCtx, naming.JoinAddressName(mtAddr, "stuffWithTTL/the/rain"), "the rain", true)
+	clock.AdvanceTime(time.Duration(ttlSecs+4) * time.Second)
+	checkContents(t, rootCtx, naming.JoinAddressName(mtAddr, "stuffWithTTL/the/rain"), "the rain", false)
+
+	// Test unauthorized mount.
+	rootCtx.Info("Test unauthorized mount.")
+	doMount(t, bobCtx, mtAddr, "/a/b", collectionName, false)
+	doMount(t, aliceCtx, mtAddr, "/a/b", collectionName, false)
+
+	doUnmount(t, bobCtx, mtAddr, "x/y", collectionName, false)
+}
+
+func doGlobX(t *testing.T, ctx *context.T, ep, suffix, pattern string, joinServer bool) []string {
+	name := naming.JoinAddressName(ep, suffix)
+	client := v23.GetClient(ctx)
+	call, err := client.StartCall(ctx, name, rpc.GlobMethod, []interface{}{pattern}, options.Preresolved{})
+	if err != nil {
+		boom(t, "Glob.StartCall %s %s: %s", name, pattern, err)
+	}
+	var reply []string
+	for {
+		var gr naming.GlobReply
+		err := call.Recv(&gr)
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			boom(t, "Glob.StartCall %s: %s", name, pattern, err)
+		}
+		switch v := gr.(type) {
+		case naming.GlobReplyEntry:
+			if joinServer && len(v.Value.Servers) > 0 {
+				reply = append(reply, naming.JoinAddressName(v.Value.Servers[0].Server, v.Value.Name))
+			} else {
+				reply = append(reply, v.Value.Name)
+			}
+		}
+	}
+	if err := call.Finish(); err != nil {
+		boom(t, "Glob.Finish %s: %s", name, pattern, err)
+	}
+	return reply
+}
+
+func doGlob(t *testing.T, ctx *context.T, ep, suffix, pattern string) []string {
+	return doGlobX(t, ctx, ep, suffix, pattern, false)
+}
+
+// checkMatch verified that the two slices contain the same string items, albeit
+// not necessarily in the same order.  Item repetitions are allowed, but their
+// numbers need to match as well.
+func checkMatch(t *testing.T, want []string, got []string) {
+	if len(want) == 0 && len(got) == 0 {
+		return
+	}
+	w := sort.StringSlice(want)
+	w.Sort()
+	g := sort.StringSlice(got)
+	g.Sort()
+	if !reflect.DeepEqual(w, g) {
+		boom(t, "Glob expected %v got %v", want, got)
+	}
+}
+
+// checkExists makes sure a name exists (or not).
+func checkExists(t *testing.T, ctx *context.T, ep, suffix string, shouldSucceed bool) {
+	x := doGlobX(t, ctx, ep, "", suffix, false)
+	if len(x) != 1 || x[0] != suffix {
+		if shouldSucceed {
+			boom(t, "Failed to find %s", suffix)
+		}
+		return
+	}
+	if !shouldSucceed {
+		boom(t, "%s exists but shouldn't", suffix)
+	}
+}
+
+func TestGlob(t *testing.T) {
+	rootCtx, shutdown := test.V23InitWithMounttable()
+	defer shutdown()
+
+	stop, estr, _, _ := newMT(t, "", rootCtx)
+	defer stop()
+
+	// set up a mount space
+	fakeServer := naming.JoinAddressName(estr, "quux")
+	doMount(t, rootCtx, estr, "one/bright/day", fakeServer, true)
+	doMount(t, rootCtx, estr, "in/the/middle", fakeServer, true)
+	doMount(t, rootCtx, estr, "of/the/night", fakeServer, true)
+
+	// Try various globs.
+	tests := []struct {
+		in       string
+		expected []string
+	}{
+		{"*", []string{"one", "in", "of"}},
+		{"...", []string{"", "one", "in", "of", "one/bright", "in/the", "of/the", "one/bright/day", "in/the/middle", "of/the/night"}},
+		{"*/...", []string{"one", "in", "of", "one/bright", "in/the", "of/the", "one/bright/day", "in/the/middle", "of/the/night"}},
+		{"one/...", []string{"one", "one/bright", "one/bright/day"}},
+		{"of/the/night/two/dead/boys", []string{"of/the/night"}},
+		{"*/the", []string{"in/the", "of/the"}},
+		{"*/the/...", []string{"in/the", "of/the", "in/the/middle", "of/the/night"}},
+		{"o*", []string{"one", "of"}},
+		{"", []string{""}},
+	}
+	for _, test := range tests {
+		out := doGlob(t, rootCtx, estr, "", test.in)
+		checkMatch(t, test.expected, out)
+	}
+
+	// Test Glob on a name that is under a mounted server. The result should the
+	// the address the mounted server with the extra suffix.
+	{
+		results := doGlobX(t, rootCtx, estr, "of/the/night/two/dead/boys/got/up/to/fight", "*", true)
+		if len(results) != 1 {
+			boom(t, "Unexpected number of results. Got %v, want 1", len(results))
+		}
+		_, suffix := naming.SplitAddressName(results[0])
+		if expected := "quux/two/dead/boys/got/up/to/fight"; suffix != expected {
+			boom(t, "Unexpected suffix. Got %v, want %v", suffix, expected)
+		}
+	}
+}
+
+type fakeServerCall struct {
+	sendCount int
+}
+
+func (fakeServerCall) Security() security.Call              { return security.NewCall(&security.CallParams{}) }
+func (fakeServerCall) Suffix() string                       { return "" }
+func (fakeServerCall) LocalEndpoint() naming.Endpoint       { return naming.Endpoint{} }
+func (fakeServerCall) RemoteEndpoint() naming.Endpoint      { return naming.Endpoint{} }
+func (fakeServerCall) GrantedBlessings() security.Blessings { return security.Blessings{} }
+func (fakeServerCall) Server() rpc.Server                   { return nil }
+func (c *fakeServerCall) SendStream() interface {
+	Send(naming.GlobReply) error
+} {
+	return c
+}
+func (c *fakeServerCall) Send(reply naming.GlobReply) error {
+	c.sendCount++
+	return nil
+}
+
+/*
+func TestGlobAborts(t *testing.T) {
+	ctx, shutdown := test.V23InitWithMounttable()
+	defer shutdown()
+
+	mt, err := mounttablelib.NewMountTableDispatcher(ctx, "", "", "")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	mount := func(name string) error {
+		invoker, _, _ := mt.Lookup(ctx, name)
+		server := naming.FormatEndpoint("tcp", name)
+		return invoker.(mounttable.MountTableServerStub).Mount(ctx, fakeServerCall{}, server, 0, 0)
+	}
+	// Mount 125 entries: 5 "directories" with 25 entries each.
+	for i := 0; i < 5; i++ {
+		for j := 0; j < 25; j++ {
+			if err := mount(fmt.Sprintf("%d/%d", i, j)); err != nil {
+				t.Fatalf("%v (%d, %d)", err, i, j)
+			}
+		}
+	}
+
+	glob := func(ctx *context.T) (int, error) {
+		root, _, _ := mt.Lookup(ctx, "")
+		g, _ := glob.Parse("...")
+		fCall := &fakeServerCall{}
+		root.(rpc.Globber).Globber().AllGlobber.Glob__(ctx, fCall, g)
+		return fCall.sendCount, nil
+	}
+
+	got, err := glob(ctx)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if want := 5 + 125 + 1; got != want { // 5 "directories", 125 entries, 1 root entry
+		t.Errorf("Got %d want %d", got, want)
+	}
+	canceled, cancel := context.WithCancel(ctx)
+	cancel()
+	if got, err = glob(canceled); err != nil {
+		t.Fatal(err)
+	}
+	if got != 0 {
+		t.Errorf("Glob returned entries even though the context was cancelled first (returned %d)", got)
+	}
+}
+*/
+
+func TestAccessListTemplate(t *testing.T) {
+	rootCtx, aliceCtx, bobCtx, shutdown := initTest()
+	defer shutdown()
+
+	stop, estr, _, _ := newMT(t, "testdata/test.perms", rootCtx)
+	defer stop()
+	fakeServer := naming.JoinAddressName(estr, "quux")
+
+	// Noone should be able to mount on someone else's names.
+	doMount(t, aliceCtx, estr, "users/ted", fakeServer, false)
+	doMount(t, bobCtx, estr, "users/carol", fakeServer, false)
+	doMount(t, rootCtx, estr, "users/george", fakeServer, false)
+
+	// Anyone should be able to mount on their own names.
+	doMount(t, aliceCtx, estr, "users/alice", fakeServer, true)
+	doMount(t, bobCtx, estr, "users/bob", fakeServer, true)
+	doMount(t, rootCtx, estr, "users/root", fakeServer, true)
+
+	// Make sure the counter works.
+	doUnmount(t, aliceCtx, estr, "users/alice", "", true)
+	doUnmount(t, bobCtx, estr, "users/bob", "", true)
+	doUnmount(t, rootCtx, estr, "users/root", "", true)
+	perms := access.Permissions{"Admin": access.AccessList{In: []security.BlessingPattern{security.AllPrincipals}}}
+	doSetPermissions(t, aliceCtx, estr, "users/alice/a/b/c/d", perms, "", true)
+	doSetPermissions(t, aliceCtx, estr, "users/alice/a/b/c/d", perms, "", true)
+
+	/*
+		// Do we obey limits?
+		for i := 0; i < mounttablelib.DefaultMaxNodesPerUser()-5; i++ {
+			node := fmt.Sprintf("users/alice/a/b/c/d/%d", i)
+			doSetPermissions(t, aliceCtx, estr, node, perms, "", true)
+		}
+		doSetPermissions(t, aliceCtx, estr, "users/alice/a/b/c/d/straw", perms, "", false)
+
+		// See if the stats numbers are correct.
+		testcases := []struct {
+			key      string
+			expected interface{}
+		}{
+			{"alice", int64(mounttablelib.DefaultMaxNodesPerUser())},
+			{"bob", int64(0)},
+			{"root", int64(0)},
+			{conventions.ServerUser, int64(3)},
+		}
+		for _, tc := range testcases {
+			name := "testAccessListTemplate/num-nodes-per-user/" + tc.key
+			got, err := libstats.Value(name)
+			if err != nil {
+				t.Errorf("unexpected error getting map entry for %s: %s", name, err)
+			}
+			if got != tc.expected {
+				t.Errorf("unexpected getting map entry for %s. Got %v, want %v", name, got, tc.expected)
+			}
+		}
+	*/
+}
+
+func TestGlobAccessLists(t *testing.T) {
+	rootCtx, aliceCtx, bobCtx, shutdown := initTest()
+	defer shutdown()
+
+	stop, estr, _, _ := newMT(t, "testdata/test.perms", rootCtx)
+	defer stop()
+
+	// set up a mount space
+	fakeServer := naming.JoinAddressName(estr, "quux")
+	doMount(t, aliceCtx, estr, "one/bright/day", fakeServer, false) // Fails because alice can't mount there.
+	doMount(t, bobCtx, estr, "one/bright/day", fakeServer, true)
+	doMount(t, rootCtx, estr, "a/b/c", fakeServer, true)
+
+	// Try various globs.
+	tests := []struct {
+		ctx      *context.T
+		in       string
+		expected []string
+	}{
+		{rootCtx, "*", []string{"one", "a", "stuff", "users"}},
+		{aliceCtx, "*", []string{"one", "a", "users"}},
+		{bobCtx, "*", []string{"one", "stuff", "users"}},
+		// bob, alice, and root have different visibility to the space.
+		{rootCtx, "*/...", []string{"one", "a", "one/bright", "a/b", "one/bright/day", "a/b/c", "stuff", "users"}},
+		{aliceCtx, "*/...", []string{"one", "a", "one/bright", "a/b", "one/bright/day", "a/b/c", "users"}},
+		{bobCtx, "*/...", []string{"one", "one/bright", "one/bright/day", "stuff", "users"}},
+	}
+	for _, test := range tests {
+		out := doGlob(t, test.ctx, estr, "", test.in)
+		checkMatch(t, test.expected, out)
+	}
+}
+
+func TestCleanup(t *testing.T) {
+	rootCtx, shutdown := test.V23InitWithMounttable()
+	defer shutdown()
+
+	stop, estr, _, _ := newMT(t, "", rootCtx)
+	defer stop()
+
+	// Set up one mount.
+	fakeServer := naming.JoinAddressName(estr, "quux")
+	doMount(t, rootCtx, estr, "one/bright/day", fakeServer, true)
+	checkMatch(t, []string{"one", "one/bright", "one/bright/day"}, doGlob(t, rootCtx, estr, "", "*/..."))
+
+	// After the unmount nothing should be left
+	doUnmount(t, rootCtx, estr, "one/bright/day", "", true)
+	checkMatch(t, nil, doGlob(t, rootCtx, estr, "", "one"))
+	checkMatch(t, nil, doGlob(t, rootCtx, estr, "", "*/..."))
+
+	// Set up a mount, then set the AccessList.
+	doMount(t, rootCtx, estr, "one/bright/day", fakeServer, true)
+	checkMatch(t, []string{"one", "one/bright", "one/bright/day"}, doGlob(t, rootCtx, estr, "", "*/..."))
+	perms := access.Permissions{"Read": access.AccessList{In: []security.BlessingPattern{security.AllPrincipals}}}
+	doSetPermissions(t, rootCtx, estr, "one/bright", perms, "", true)
+
+	// After the unmount we should still have everything above the AccessList.
+	doUnmount(t, rootCtx, estr, "one/bright/day", "", true)
+	checkMatch(t, []string{"one", "one/bright"}, doGlob(t, rootCtx, estr, "", "*/..."))
+}
+
+func TestDelete(t *testing.T) {
+	rootCtx, aliceCtx, bobCtx, shutdown := initTest()
+	defer shutdown()
+
+	stop, estr, _, _ := newMT(t, "testdata/test.perms", rootCtx)
+	defer stop()
+
+	// set up a mount space
+	fakeServer := naming.JoinAddressName(estr, "quux")
+	doMount(t, bobCtx, estr, "one/bright/day", fakeServer, true)
+	doMount(t, rootCtx, estr, "a/b/c", fakeServer, true)
+
+	// It shouldn't be possible to delete anything with children unless explicitly requested.
+	doDeleteNode(t, rootCtx, estr, "a/b", false)
+	checkExists(t, rootCtx, estr, "a/b", true)
+	doDeleteSubtree(t, rootCtx, estr, "a/b", true)
+	checkExists(t, rootCtx, estr, "a/b", false)
+
+	// Alice shouldn't be able to delete what bob created but bob and root should.
+	doDeleteNode(t, aliceCtx, estr, "one/bright/day", false)
+	checkExists(t, rootCtx, estr, "one/bright/day", true)
+	doDeleteNode(t, rootCtx, estr, "one/bright/day", true)
+	checkExists(t, rootCtx, estr, "one/bright/day", false)
+	doDeleteNode(t, bobCtx, estr, "one/bright", true)
+	checkExists(t, rootCtx, estr, "one/bright", false)
+
+	// Make sure directory admin can delete directory children.
+	perms := access.Permissions{"Admin": access.AccessList{In: []security.BlessingPattern{"bob"}}}
+	doSetPermissions(t, bobCtx, estr, "hoohaa", perms, "", false)
+	doDeleteNode(t, rootCtx, estr, "hoohaa", true)
+	checkExists(t, rootCtx, estr, "hoohaa", false)
+}
+
+func TestServerFormat(t *testing.T) {
+	rootCtx, shutdown := test.V23InitWithMounttable()
+	defer shutdown()
+
+	stop, estr, _, _ := newMT(t, "", rootCtx)
+	defer stop()
+
+	doMount(t, rootCtx, estr, "endpoint", naming.JoinAddressName(estr, "life/on/the/mississippi"), true)
+	doMount(t, rootCtx, estr, "hostport", "/atrampabroad:8000", true)
+	doMount(t, rootCtx, estr, "invalid/not/rooted", "atrampabroad:8000", false)
+	doMount(t, rootCtx, estr, "invalid/no/port", "/atrampabroad", false)
+	doMount(t, rootCtx, estr, "invalid/endpoint", "/@following the equator:8000@@@", false)
+}
+
+func TestExpiry(t *testing.T) {
+	rootCtx, shutdown := test.V23InitWithMounttable()
+	defer shutdown()
+
+	stop, estr, _, clock := newMT(t, "", rootCtx)
+	defer stop()
+	stop, collectionAddr := newCollection(t, rootCtx)
+	defer stop()
+
+	collectionName := naming.JoinAddressName(collectionAddr, "collection")
+
+	doMount(t, rootCtx, estr, "a1/b1", collectionName, true)
+	doMount(t, rootCtx, estr, "a1/b2", collectionName, true)
+	doMount(t, rootCtx, estr, "a2/b1", collectionName, true)
+	doMount(t, rootCtx, estr, "a2/b2/c", collectionName, true)
+
+	checkMatch(t, []string{"a1/b1", "a2/b1"}, doGlob(t, rootCtx, estr, "", "*/b1/..."))
+	clock.AdvanceTime(time.Duration(ttlSecs/2) * time.Second)
+	checkMatch(t, []string{"a1/b1", "a2/b1"}, doGlob(t, rootCtx, estr, "", "*/b1/..."))
+	checkMatch(t, []string{"c"}, doGlob(t, rootCtx, estr, "a2/b2", "*"))
+	// Refresh only a1/b1.  All the other mounts will expire upon the next
+	// ft advance.
+	doMount(t, rootCtx, estr, "a1/b1", collectionName, true)
+	clock.AdvanceTime(time.Duration(ttlSecs/2+4) * time.Second)
+	resolve(rootCtx, naming.JoinAddressName(estr, "a2/b2/c"))
+	resolve(rootCtx, naming.JoinAddressName(estr, "a2/b1"))
+	checkMatch(t, []string{"a1", "a1/b1"}, doGlob(t, rootCtx, estr, "", "*/..."))
+	checkMatch(t, []string{"a1/b1"}, doGlob(t, rootCtx, estr, "", "*/b1/..."))
+}
+
+func TestIntermediateNodesCreatedFromConfig(t *testing.T) {
+	rootCtx, _, _, shutdown := initTest()
+	defer shutdown()
+
+	stop, estr, _, _ := newMT(t, "testdata/intermediate.perms", rootCtx)
+	defer stop()
+
+	// x and x/y should have the same permissions at the root.
+	rootPerms, _ := doGetPermissions(t, rootCtx, estr, "", true)
+	if perms, _ := doGetPermissions(t, rootCtx, estr, "x", true); !reflect.DeepEqual(rootPerms, perms) {
+		boom(t, "for x got %v, want %v", perms, rootPerms)
+	}
+	if perms, _ := doGetPermissions(t, rootCtx, estr, "x/y", true); !reflect.DeepEqual(rootPerms, perms) {
+		boom(t, "for x/y got %v, want %v", perms, rootPerms)
+	}
+	if perms, _ := doGetPermissions(t, rootCtx, estr, "x/y/z", true); reflect.DeepEqual(rootPerms, perms) {
+		boom(t, "for x/y/z got %v, don't want %v", perms, rootPerms)
+	}
+}
+
+func initTest() (rootCtx *context.T, aliceCtx *context.T, bobCtx *context.T, shutdown v23.Shutdown) {
+	ctx, shutdown := test.V23InitWithMounttable()
+	var err error
+	if rootCtx, err = v23.WithPrincipal(ctx, testutil.NewPrincipal("root")); err != nil {
+		panic("failed to set root principal")
+	}
+	if aliceCtx, err = v23.WithPrincipal(ctx, testutil.NewPrincipal("alice")); err != nil {
+		panic("failed to set alice principal")
+	}
+	if bobCtx, err = v23.WithPrincipal(ctx, testutil.NewPrincipal("bob")); err != nil {
+		panic("failed to set bob principal")
+	}
+	for _, r := range []*context.T{rootCtx, aliceCtx, bobCtx} {
+		// A hack to set the namespace roots to a value that won't work.
+		v23.GetNamespace(r).SetRoots()
+		// And have all principals recognize each others blessings.
+		p1 := v23.GetPrincipal(r)
+		for _, other := range []*context.T{rootCtx, aliceCtx, bobCtx} {
+			// testutil.NewPrincipal has already setup each
+			// principal to use the same blessing for both server
+			// and client activities.
+			bother, _ := v23.GetPrincipal(other).BlessingStore().Default()
+			if err := security.AddToRoots(p1, bother); err != nil {
+				panic(err)
+			}
+		}
+	}
+	return rootCtx, aliceCtx, bobCtx, shutdown
+}

diff --git a/services/mounttable/btmtd/internal/node.go b/services/mounttable/btmtd/internal/node.go
new file mode 100644
index 0000000..d26aa3a
--- /dev/null
+++ b/services/mounttable/btmtd/internal/node.go

@@ -0,0 +1,392 @@
+// Copyright 2016 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package internal
+
+import (
+	"encoding/hex"
+	"encoding/json"
+	"hash/fnv"
+	"io/ioutil"
+	"path"
+	"sort"
+	"strconv"
+	"strings"
+	"time"
+
+	"google.golang.org/cloud/bigtable"
+
+	"v.io/v23"
+	"v.io/v23/context"
+	"v.io/v23/naming"
+	"v.io/v23/security/access"
+	vdltime "v.io/v23/vdlroot/time"
+	"v.io/v23/verror"
+
+	"v.io/x/ref/lib/timekeeper"
+)
+
+type mtNode struct {
+	bt           *BigTable
+	name         string
+	sticky       bool
+	creationTime bigtable.Timestamp
+	permissions  access.Permissions
+	version      string
+	mountFlags   mtFlags
+	servers      []naming.MountedServer
+	children     []string
+}
+
+type mtFlags struct {
+	MT   bool `json:"mt,omitempty"`
+	Leaf bool `json:"leaf,omitempty"`
+}
+
+func longTimeout(ctx *context.T) (*context.T, func()) {
+	return context.WithTimeout(v23.GetBackgroundContext(ctx), time.Hour)
+}
+
+func getNode(ctx *context.T, bt *BigTable, name string) (*mtNode, error) {
+	row, err := bt.readRow(ctx, rowKey(name), bigtable.RowFilter(bigtable.LatestNFilter(1)))
+	if err != nil {
+		return nil, err
+	}
+	return nodeFromRow(ctx, bt, row, clock), nil
+}
+
+func rowKey(name string) string {
+	// The row key is a hash of the node name followed by the node name
+	// itself.
+	// This spreads the rows evenly across the tablet servers to avoid
+	// traffic imbalance.
+	name = naming.Clean("/" + name)
+	h := fnv.New32()
+	h.Write([]byte(name))
+	return hex.EncodeToString(h.Sum(nil)) + name
+}
+
+func nodeFromRow(ctx *context.T, bt *BigTable, row bigtable.Row, clock timekeeper.TimeKeeper) *mtNode {
+	const offset = 9 // 32-bit value in hex + '/'
+	name := row.Key()
+	if len(name) < offset {
+		return nil
+	}
+	n := &mtNode{
+		bt:   bt,
+		name: name[offset:],
+	}
+	for _, i := range row[metadataFamily] {
+		col := strings.TrimPrefix(i.Column, metadataFamily+":")
+		switch col {
+		case stickyColumn:
+			n.sticky = true
+		case versionColumn:
+			n.version = string(i.Value)
+		case permissionsColumn:
+			if err := json.Unmarshal(i.Value, &n.permissions); err != nil {
+				ctx.Errorf("Failed to decode permissions for %s", name)
+				return nil
+			}
+		case timestampColumn:
+			n.creationTime = i.Timestamp
+		}
+	}
+	n.servers = make([]naming.MountedServer, 0, len(row[serversFamily]))
+	for _, i := range row[serversFamily] {
+		deadline := i.Timestamp.Time()
+		if deadline.Before(clock.Now()) {
+			continue
+		}
+		if err := json.Unmarshal(i.Value, &n.mountFlags); err != nil {
+			ctx.Errorf("Failed to decode mount flags for %s", name)
+			return nil
+		}
+		n.servers = append(n.servers, naming.MountedServer{
+			Server:   i.Column[2:],
+			Deadline: vdltime.Deadline{deadline},
+		})
+	}
+	n.children = make([]string, 0, len(row[childrenFamily]))
+	for _, i := range row[childrenFamily] {
+		child := strings.TrimPrefix(i.Column, childrenFamily+":")
+		n.children = append(n.children, child)
+	}
+	return n
+}
+
+func (n *mtNode) createChild(ctx *context.T, child string, perms access.Permissions) (*mtNode, error) {
+	ts := n.bt.now()
+	mut := bigtable.NewMutation()
+	mut.Set(childrenFamily, child, ts, []byte{1})
+	if err := n.mutate(ctx, mut, false); err != nil {
+		return nil, err
+	}
+
+	// If the current process dies right here, it will leave the parent with
+	// a reference to a child row that doesn't exist. This means that the
+	// parent will never be seen as "empty" and will not be garbage
+	// collected. This will be corrected when:
+	//  - the child is created again, or
+	//  - the parent is forcibly deleted with Delete().
+
+	childName := naming.Join(n.name, child)
+	longCtx, cancel := longTimeout(ctx)
+	defer cancel()
+	if err := n.bt.createRow(longCtx, childName, perms, ts); err != nil {
+		return nil, err
+	}
+	n, err := getNode(ctx, n.bt, childName)
+	if err != nil {
+		return nil, err
+	}
+	if n == nil {
+		return nil, verror.New(errConcurrentAccess, ctx, childName)
+	}
+	return n, nil
+}
+
+func (n *mtNode) mount(ctx *context.T, server string, deadline time.Time, flags naming.MountFlag) error {
+	mut := bigtable.NewMutation()
+	if flags&naming.Replace != 0 {
+		mut.DeleteCellsInFamily(serversFamily)
+	}
+	f := mtFlags{
+		MT:   flags&naming.MT != 0,
+		Leaf: flags&naming.Leaf != 0,
+	}
+	jsonValue, err := json.Marshal(f)
+	if err != nil {
+		return err
+	}
+	mut.Set(serversFamily, server, n.bt.time(deadline), jsonValue)
+	if err := n.mutate(ctx, mut, false); err != nil {
+		return err
+	}
+	if flags&naming.Replace != 0 {
+		n.servers = nil
+	}
+	return nil
+}
+
+func (n *mtNode) unmount(ctx *context.T, server string) error {
+	mut := bigtable.NewMutation()
+	if server == "" {
+		// HACK ALERT
+		// The bttest server doesn't support DeleteCellsInFamily
+		if !n.bt.testMode {
+			mut.DeleteCellsInFamily(serversFamily)
+		} else {
+			for _, s := range n.servers {
+				mut.DeleteCellsInColumn(serversFamily, s.Server)
+			}
+		}
+	} else {
+		mut.DeleteCellsInColumn(serversFamily, server)
+	}
+	if err := n.mutate(ctx, mut, false); err != nil {
+		return err
+	}
+	if n, err := getNode(ctx, n.bt, n.name); err == nil {
+		n.gc(ctx)
+	}
+	return nil
+}
+
+func (n *mtNode) gc(ctx *context.T) (deletedAtLeastOne bool, err error) {
+	for n != nil && n.name != "" && !n.sticky && len(n.children) == 0 && len(n.servers) == 0 {
+		if err = n.delete(ctx, false); err != nil {
+			break
+		}
+		ctx.Infof("Deleted empty node %q", n.name)
+		deletedAtLeastOne = true
+		parent := path.Dir(n.name)
+		if parent == "." {
+			break
+		}
+		if n, err = getNode(ctx, n.bt, parent); err != nil {
+			break
+		}
+	}
+	return
+}
+
+func (n *mtNode) deleteAndGC(ctx *context.T, deleteSubtree bool) error {
+	if err := n.delete(ctx, deleteSubtree); err != nil {
+		return err
+	}
+	parentName, _ := path.Split(n.name)
+	if parent, err := getNode(ctx, n.bt, parentName); err == nil {
+		parent.gc(ctx)
+	}
+	return nil
+}
+
+func (n *mtNode) delete(ctx *context.T, deleteSubtree bool) error {
+	if !deleteSubtree && len(n.children) > 0 {
+		return verror.New(errNotEmpty, ctx, n.name)
+	}
+
+	// TODO(rthellend): This naive approach could be very expensive in
+	// terms of memory. A smarter, but slower, approach would be to walk
+	// the tree without holding on to all the node data.
+	for _, c := range n.children {
+		cn, err := getNode(ctx, n.bt, naming.Join(n.name, c))
+		if err != nil {
+			return err
+		}
+		if cn == nil {
+			// Node 'n' has a reference to a child that doesn't
+			// exist. It could be that it is being created or
+			// deleted concurrently. To be sure, we have to create
+			// it before deleting it.
+			if cn, err = n.createChild(ctx, c, n.permissions); err != nil {
+				return err
+			}
+		}
+		if err := cn.delete(ctx, true); err != nil {
+			return err
+		}
+	}
+
+	mut := bigtable.NewMutation()
+	mut.DeleteRow()
+	if err := n.mutate(ctx, mut, true); err != nil {
+		return err
+	}
+
+	// If the current process dies right here, it will leave the parent with
+	// a reference to a child row that no longer exists. This means that the
+	// parent will never be seen as "empty" and will not be garbage
+	// collected. This will be corrected when:
+	//  - the child is re-created, or
+	//  - the parent is forcibly deleted with Delete().
+
+	// Delete from parent node.
+	parent, child := path.Split(n.name)
+	mut = bigtable.NewMutation()
+	// DeleteTimestampRange deletes the cells whose timestamp is in the
+	// half open range [start,end). We need to delete the cell with
+	// timestamp n.creationTime (and any older ones).
+	mut.DeleteTimestampRange(childrenFamily, child, 0, n.bt.timeNext(n.creationTime))
+
+	longCtx, cancel := longTimeout(ctx)
+	defer cancel()
+	return n.bt.apply(longCtx, rowKey(parent), mut)
+}
+
+func (n *mtNode) setPermissions(ctx *context.T, perms access.Permissions) error {
+	jsonPerms, err := json.Marshal(perms)
+	if err != nil {
+		return err
+	}
+	mut := bigtable.NewMutation()
+	mut.Set(metadataFamily, permissionsColumn, bigtable.ServerTime, jsonPerms)
+	mut.Set(metadataFamily, stickyColumn, bigtable.ServerTime, []byte{1})
+	if err := n.mutate(ctx, mut, false); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (n *mtNode) mutate(ctx *context.T, mut *bigtable.Mutation, delete bool) error {
+	if !delete {
+		v, err := strconv.ParseUint(n.version, 10, 64)
+		if err != nil {
+			return err
+		}
+		newVersion := strconv.FormatUint(v+1, 10)
+		mut.Set(metadataFamily, versionColumn, bigtable.ServerTime, []byte(newVersion))
+	}
+
+	// The mutation will succeed iff the row already exists with the
+	// expected version.
+	filter := bigtable.ChainFilters(
+		bigtable.FamilyFilter(metadataFamily),
+		bigtable.ColumnFilter(versionColumn),
+		bigtable.LatestNFilter(1),
+		bigtable.ValueFilter(n.version),
+	)
+	condMut := bigtable.NewCondMutation(filter, mut, nil)
+	var success bool
+	if err := n.bt.apply(ctx, rowKey(n.name), condMut, bigtable.GetCondMutationResult(&success)); err != nil {
+		return err
+	}
+	if !success {
+		return verror.New(errConcurrentAccess, ctx, n.name)
+	}
+	return nil
+}
+
+func createNodesFromFile(ctx *context.T, bt *BigTable, fileName string) error {
+	var nodes map[string]access.Permissions
+	data, err := ioutil.ReadFile(fileName)
+	if err != nil {
+		return err
+	}
+	if err := json.Unmarshal(data, &nodes); err != nil {
+		return err
+	}
+	// This loop adds backward compatibility with the older template format,
+	// e.g. "a/b/%%" { "Admin": { "In": [ "%%" ] } }
+	// With the new format, this is equivalent to:
+	// "a/b" { "%%/Admin": { "In": [ "%%" ] } }
+	for node, perms := range nodes {
+		if strings.HasSuffix(node, "/%%") {
+			delete(nodes, node)
+			node = strings.TrimSuffix(node, "/%%")
+			p := nodes[node]
+			if p == nil {
+				p = make(access.Permissions)
+			}
+			for tag := range perms {
+				p["%%/"+tag] = perms[tag]
+			}
+			nodes[node] = p
+		}
+	}
+
+	// Create the nodes in alphanumeric order so that children are
+	// created after their parents.
+	sortedNodes := []string{}
+	for node := range nodes {
+		sortedNodes = append(sortedNodes, node)
+	}
+	sort.Strings(sortedNodes)
+
+	ts := bt.now()
+	for _, node := range sortedNodes {
+		perms := nodes[node]
+		if node == "" {
+			if err := bt.createRow(ctx, "", perms, ts); err != nil {
+				return err
+			}
+			continue
+		}
+		parentName := ""
+		for _, e := range strings.Split(node, "/") {
+			n, err := getNode(ctx, bt, naming.Join(parentName, e))
+			if err != nil {
+				return err
+			}
+			if n == nil {
+				parent, err := getNode(ctx, bt, parentName)
+				if err != nil {
+					return err
+				}
+				if n, err = parent.createChild(ctx, e, parent.permissions); err != nil {
+					return err
+				}
+			}
+			if n.name == node {
+				// setPermissions also makes the node sticky.
+				if err := n.setPermissions(ctx, perms); err != nil {
+					return err
+				}
+			}
+			parentName = n.name
+		}
+	}
+	return nil
+}

diff --git a/services/mounttable/btmtd/internal/testdata/intermediate.perms b/services/mounttable/btmtd/internal/testdata/intermediate.perms
new file mode 100644
index 0000000..438d3f1
--- /dev/null
+++ b/services/mounttable/btmtd/internal/testdata/intermediate.perms

@@ -0,0 +1,10 @@
+{
+"": {
+	"Read":  { "In": ["..."] },
+	"Admin": { "In": ["root"] }
+},
+"x/y/z": {
+	"Read":  { "In": ["bob", "root"] },
+        "Mount": { "In": ["root"] }
+}
+}

diff --git a/services/mounttable/btmtd/internal/testdata/test.perms b/services/mounttable/btmtd/internal/testdata/test.perms
new file mode 100644
index 0000000..270628e
--- /dev/null
+++ b/services/mounttable/btmtd/internal/testdata/test.perms

@@ -0,0 +1,22 @@
+{
+"": {
+	"Read":  { "In": ["..."] },
+	"Admin": { "In": ["root"] },
+	"Create": { "In": ["root", "bob"] }
+},
+"stuff": {
+	"Read":  { "In": ["bob", "root"] },
+        "Mount": { "In": ["root"] }
+},
+"a": {
+	"Read":  { "In": ["alice", "root"] },
+        "Create": { "In": ["root"] }
+},
+"users": {
+	"Create": { "In": ["..."] },
+	"Resolve": { "In": ["..."] }
+},
+"users/%%": {
+	"Admin":  { "In": ["%%"] }
+}
+}

diff --git a/services/mounttable/btmtd/main.go b/services/mounttable/btmtd/main.go
new file mode 100644
index 0000000..2e5676a
--- /dev/null
+++ b/services/mounttable/btmtd/main.go

@@ -0,0 +1,137 @@
+// Copyright 2016 The Vanadium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The following enables go generate to generate the doc.go file.
+//go:generate go run $JIRI_ROOT/release/go/src/v.io/x/lib/cmdline/testdata/gendoc.go . -help
+
+package main
+
+import (
+	"math/rand"
+	"time"
+
+	"v.io/v23"
+	"v.io/v23/context"
+	"v.io/v23/options"
+
+	"v.io/x/lib/cmdline"
+	"v.io/x/ref/lib/security/securityflag"
+	"v.io/x/ref/lib/signals"
+	"v.io/x/ref/lib/v23cmd"
+	_ "v.io/x/ref/runtime/factories/roaming"
+	"v.io/x/ref/services/mounttable/btmtd/internal"
+)
+
+var (
+	cmdRoot = &cmdline.Command{
+		Runner: v23cmd.RunnerFunc(runMT),
+		Name:   "btmtd",
+		Short:  "Runs the mounttable service",
+		Long:   "Runs the mounttable service.",
+		Children: []*cmdline.Command{
+			cmdSetup, cmdDestroy, cmdDump,
+		},
+	}
+	cmdSetup = &cmdline.Command{
+		Runner: v23cmd.RunnerFunc(runSetup),
+		Name:   "setup",
+		Short:  "Creates and sets up the table",
+		Long:   "Creates and sets up the table.",
+	}
+	cmdDestroy = &cmdline.Command{
+		Runner: v23cmd.RunnerFunc(runDestroy),
+		Name:   "destroy",
+		Short:  "Destroy the table",
+		Long:   "Destroy the table. All data will be lost.",
+	}
+	cmdDump = &cmdline.Command{
+		Runner: v23cmd.RunnerFunc(runDump),
+		Name:   "dump",
+		Short:  "Dump the table",
+		Long:   "Dump the table.",
+	}
+
+	keyFileFlag      string
+	projectFlag      string
+	zoneFlag         string
+	clusterFlag      string
+	tableFlag        string
+	inMemoryTestFlag bool
+
+	permissionsFileFlag string
+)
+
+func main() {
+	rand.Seed(time.Now().UnixNano())
+	cmdRoot.Flags.StringVar(&keyFileFlag, "key-file", "", "The file that contains the Google Cloud JSON credentials to use")
+	cmdRoot.Flags.StringVar(&projectFlag, "project", "", "The Google Cloud project of the Cloud Bigtable cluster")
+	cmdRoot.Flags.StringVar(&zoneFlag, "zone", "", "The Google Cloud zone of the Cloud Bigtable cluster")
+	cmdRoot.Flags.StringVar(&clusterFlag, "cluster", "", "The Cloud Bigtable cluster name")
+	cmdRoot.Flags.StringVar(&tableFlag, "table", "mounttable", "The name of the table to use")
+	cmdRoot.Flags.BoolVar(&inMemoryTestFlag, "in-memory-test", false, "If true, use an in-memory bigtable server (for testing only)")
+	cmdRoot.Flags.StringVar(&permissionsFileFlag, "permissions-file", "", "The file that contains the initial node permissions.")
+
+	cmdline.HideGlobalFlagsExcept()
+	cmdline.Main(cmdRoot)
+}
+
+func runSetup(ctx *context.T, env *cmdline.Env, args []string) error {
+	bt, err := internal.NewBigTable(keyFileFlag, projectFlag, zoneFlag, clusterFlag, tableFlag)
+	if err != nil {
+		return err
+	}
+	return bt.SetupTable(ctx, permissionsFileFlag)
+}
+
+func runDestroy(ctx *context.T, env *cmdline.Env, args []string) error {
+	bt, err := internal.NewBigTable(keyFileFlag, projectFlag, zoneFlag, clusterFlag, tableFlag)
+	if err != nil {
+		return err
+	}
+	return bt.DeleteTable(ctx)
+}
+
+func runDump(ctx *context.T, env *cmdline.Env, args []string) error {
+	bt, err := internal.NewBigTable(keyFileFlag, projectFlag, zoneFlag, clusterFlag, tableFlag)
+	if err != nil {
+		return err
+	}
+	return bt.DumpTable(ctx)
+}
+
+func runMT(ctx *context.T, env *cmdline.Env, args []string) error {
+	var (
+		bt  *internal.BigTable
+		err error
+	)
+	if inMemoryTestFlag {
+		var shutdown func()
+		if bt, shutdown, err = internal.NewTestBigTable(tableFlag); err != nil {
+			return err
+		}
+		defer shutdown()
+		if err := bt.SetupTable(ctx, permissionsFileFlag); err != nil {
+			return err
+		}
+	} else {
+		if bt, err = internal.NewBigTable(keyFileFlag, projectFlag, zoneFlag, clusterFlag, tableFlag); err != nil {
+			return err
+		}
+	}
+
+	globalPerms, err := securityflag.PermissionsFromFlag()
+	if err != nil {
+		return err
+	}
+	acl := globalPerms["Admin"]
+	disp := internal.NewDispatcher(bt, &acl)
+	ctx, server, err := v23.WithNewDispatchingServer(ctx, "", disp, options.ServesMountTable(true))
+	if err != nil {
+		return err
+	}
+	ctx.Infof("Listening on: %v", server.Status().Endpoints)
+
+	<-signals.ShutdownOnSignals(ctx)
+	return nil
+}
commit	e45e6291a5d7f32ce8150fe747a2e0e3c2276ffb	[log] [tgz]
author	Robin Thellend <rthellend@google.com>	Thu Jun 09 20:40:46 2016 -0700
committer	Robin Thellend <rthellend@google.com>	Thu Jun 09 20:40:46 2016 -0700
tree	2c9a0787c998055789d9cab285cd7e0cc0765848
parent	0c9beb763c1d15d7991b521c754930c81ce8b271 [diff]