blob: 0e6d2f9ca5fffd8c83c3c7d7d1568865a607e65a [file] [log] [blame]
// Copyright 2015 The Vanadium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package fs_cablobstore implements a content addressable blob store
// on top of a file system. It assumes that either os.Link() or
// os.Rename() is available.
package fs_cablobstore
// Internals:
// Blobs are partitioned into two types of unit: "fragments" and "chunks".
// A fragment is stored in a single file on disc. A chunk is a unit of network
// transmission.
//
// The blobstore consists of a directory with "blob", "cas", "chunk", and
// "tmp" subdirectories.
// - "tmp" is used for temporary files that are moved into place via
// link()/unlink() or rename(), depending on what's available.
// - "cas" contains files whose names are content hashes of the files being
// named. A few slashes are thrown into the name near the front so that no
// single directory gets too large. These files are called "fragments".
// - "blob" contains files whose names are random numbers. These names are
// visible externally as "blob names". Again, a few slashes are thrown
// into the name near the front so that no single directory gets too large.
// Each of these files contains a series of lines of the form:
// d <size> <offset> <cas-fragment>
// followed optionally by a line of the form:
// f <md5-hash>
// Each "d" line indicates that the next <size> bytes of the blob appear at
// <offset> bytes into <cas-fragment>, which is in the "cas" subtree. The
// "f" line indicates that the blob is "finalized" and gives its complete
// md5 hash. No fragments may be appended to a finalized blob.
// - "chunk" contains a store (currently implemented with leveldb) that
// maps chunks of blobs to content hashes and vice versa.
import "bufio"
import "bytes"
import "crypto/md5"
import "fmt"
import "hash"
import "io"
import "io/ioutil"
import "math"
import "math/rand"
import "os"
import "path/filepath"
import "strconv"
import "strings"
import "sync"
import "time"
import "v.io/x/ref/services/syncbase/localblobstore"
import "v.io/x/ref/services/syncbase/localblobstore/chunker"
import "v.io/x/ref/services/syncbase/localblobstore/blobmap"
import "v.io/v23/context"
import "v.io/v23/verror"
const pkgPath = "v.io/x/ref/services/syncbase/localblobstore/fs_cablobstore"
var (
errNotADir = verror.Register(pkgPath+".errNotADir", verror.NoRetry, "{1:}{2:} Not a directory{:_}")
errAppendFailed = verror.Register(pkgPath+".errAppendFailed", verror.NoRetry, "{1:}{2:} fs_cablobstore.Append failed{:_}")
errMalformedField = verror.Register(pkgPath+".errMalformedField", verror.NoRetry, "{1:}{2:} Malformed field in blob specification{:_}")
errAlreadyClosed = verror.Register(pkgPath+".errAlreadyClosed", verror.NoRetry, "{1:}{2:} BlobWriter is already closed{:_}")
errBlobAlreadyFinalized = verror.Register(pkgPath+".errBlobAlreadyFinalized", verror.NoRetry, "{1:}{2:} Blob is already finalized{:_}")
errIllegalPositionForRead = verror.Register(pkgPath+".errIllegalPositionForRead", verror.NoRetry, "{1:}{2:} BlobReader: illegal position {3} on Blob of size {4}{:_}")
errBadSeekWhence = verror.Register(pkgPath+".errBadSeekWhence", verror.NoRetry, "{1:}{2:} BlobReader: Bad value for 'whence' in Seek{:_}")
errNegativeSeekPosition = verror.Register(pkgPath+".errNegativeSeekPosition", verror.NoRetry, "{1:}{2:} BlobReader: negative position for Seek: offset {3}, whence {4}{:_}")
errBadSizeOrOffset = verror.Register(pkgPath+".errBadSizeOrOffset", verror.NoRetry, "{1:}{2:} Bad size ({3}) or offset ({4}) in blob {5} (size {6}){:_}")
errMalformedBlobHash = verror.Register(pkgPath+".errMalformedBlobHash", verror.NoRetry, "{1:}{2:} Blob {3} hash malformed hash{:_}")
errInvalidBlobName = verror.Register(pkgPath+".errInvalidBlobName", verror.NoRetry, "{1:}{2:} Invalid blob name {3}{:_}")
errCantDeleteBlob = verror.Register(pkgPath+".errCantDeleteBlob", verror.NoRetry, "{1:}{2:} Can't delete blob {3}{:_}")
errBlobDeleted = verror.Register(pkgPath+".errBlobDeleted", verror.NoRetry, "{1:}{2:} Blob is deleted{:_}")
errSizeTooBigForFragment = verror.Register(pkgPath+".errSizeTooBigForFragment", verror.NoRetry, "{1:}{2:} writing blob {1}, size too big for fragment{:1}")
errStreamCancelled = verror.Register(pkgPath+".errStreamCancelled", verror.NoRetry, "{1:}{2:} Advance() called on cancelled stream{:_}")
)
// For the moment, we disallow others from accessing the tree where blobs are
// stored. We could in the future relax this to 0711/0755, and 0644.
const dirPermissions = 0700
const filePermissions = 0600
// Subdirectories of the blobstore's tree
const (
blobDir = "blob" // Subdirectory where blobs are indexed by blob id.
casDir = "cas" // Subdirectory where fragments are indexed by content hash.
chunkDir = "chunk" // Subdirectory where chunks are indexed by content hash.
tmpDir = "tmp" // Subdirectory where temporary files are created.
)
// An FsCaBlobStore represents a simple, content-addressable store.
type FsCaBlobStore struct {
rootName string // The name of the root of the store.
bm *blobmap.BlobMap // Mapping from chunks to blob locations and vice versa.
// mu protects fields below, plus most fields in each blobDesc when used from a BlobWriter.
mu sync.Mutex
activeDesc []*blobDesc // The blob descriptors in use by active BlobReaders and BlobWriters.
toDelete []*map[string]bool // Sets of items that active GC threads are about to delete. (Pointers to maps, to allow pointer comparison.)
}
// hashToFileName() returns the name of the binary ID with the specified
// prefix. Requires len(id)==16. An md5 hash is suitable.
func hashToFileName(prefix string, hash []byte) string {
return filepath.Join(prefix,
fmt.Sprintf("%02x", hash[0]),
fmt.Sprintf("%02x", hash[1]),
fmt.Sprintf("%02x", hash[2]),
fmt.Sprintf("%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
hash[3],
hash[4], hash[5], hash[6], hash[7],
hash[8], hash[9], hash[10], hash[11],
hash[12], hash[13], hash[14], hash[15]))
}
// fileNameToHash() converts a file name in the format generated by
// hashToFileName(prefix, ...) to a vector of 16 bytes. If the string is
// malformed, the nil slice is returned.
func fileNameToHash(prefix string, s string) []byte {
idStr := strings.TrimPrefix(filepath.ToSlash(s), prefix+"/")
hash := make([]byte, 16, 16)
n, err := fmt.Sscanf(idStr, "%02x/%02x/%02x/%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
&hash[0], &hash[1], &hash[2], &hash[3],
&hash[4], &hash[5], &hash[6], &hash[7],
&hash[8], &hash[9], &hash[10], &hash[11],
&hash[12], &hash[13], &hash[14], &hash[15])
if n != 16 || err != nil {
hash = nil
}
return hash
}
// newBlobName() returns a new random name for a blob.
func newBlobName() string {
return filepath.Join(blobDir,
fmt.Sprintf("%02x", rand.Int31n(256)),
fmt.Sprintf("%02x", rand.Int31n(256)),
fmt.Sprintf("%02x", rand.Int31n(256)),
fmt.Sprintf("%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
rand.Int31n(256),
rand.Int31n(256), rand.Int31n(256), rand.Int31n(256), rand.Int31n(256),
rand.Int31n(256), rand.Int31n(256), rand.Int31n(256), rand.Int31n(256),
rand.Int31n(256), rand.Int31n(256), rand.Int31n(256), rand.Int31n(256)))
}
// hashToString() returns a string representation of the hash.
// Requires len(hash)==16. An md5 hash is suitable.
func hashToString(hash []byte) string {
return fmt.Sprintf("%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
hash[0], hash[1], hash[2], hash[3],
hash[4], hash[5], hash[6], hash[7],
hash[8], hash[9], hash[10], hash[11],
hash[12], hash[13], hash[14], hash[15])
}
// stringToHash() converts a string in the format generated by hashToString()
// to a vector of 16 bytes. If the string is malformed, the nil slice is
// returned.
func stringToHash(s string) []byte {
hash := make([]byte, 16, 16)
n, err := fmt.Sscanf(s, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
&hash[0], &hash[1], &hash[2], &hash[3],
&hash[4], &hash[5], &hash[6], &hash[7],
&hash[8], &hash[9], &hash[10], &hash[11],
&hash[12], &hash[13], &hash[14], &hash[15])
if n != 16 || err != nil {
hash = nil
}
return hash
}
// Create() returns a pointer to an FsCaBlobStore stored in the file system at
// "rootName". If the directory rootName does not exist, it is created.
func Create(ctx *context.T, stEngine, rootName string) (fscabs *FsCaBlobStore, err error) {
dir := []string{tmpDir, casDir, chunkDir, blobDir}
for i := 0; i != len(dir) && err == nil; i++ {
fullName := filepath.Join(rootName, dir[i])
os.MkdirAll(fullName, dirPermissions)
var fi os.FileInfo
fi, err = os.Stat(fullName)
if err == nil && !fi.IsDir() {
err = verror.New(errNotADir, ctx, fullName)
}
}
var bm *blobmap.BlobMap
if err == nil {
bm, err = blobmap.New(ctx, stEngine, filepath.Join(rootName, chunkDir))
}
if err == nil {
fscabs = new(FsCaBlobStore)
fscabs.rootName = rootName
fscabs.bm = bm
}
return fscabs, err
}
// Close() closes the FsCaBlobStore. {
func (fscabs *FsCaBlobStore) Close() error {
return fscabs.bm.Close()
}
// Root() returns the name of the root directory where *fscabs is stored.
func (fscabs *FsCaBlobStore) Root() string {
return fscabs.rootName
}
// DeleteBlob() deletes the named blob from *fscabs.
func (fscabs *FsCaBlobStore) DeleteBlob(ctx *context.T, blobName string) (err error) {
// Disallow deletions of things outside the blob tree, or that may contain "..".
// For simplicity, the code currently disallows '.'.
blobID := fileNameToHash(blobDir, blobName)
if blobID == nil || strings.IndexByte(blobName, '.') != -1 {
err = verror.New(errInvalidBlobName, ctx, blobName)
} else {
err = os.Remove(filepath.Join(fscabs.rootName, blobName))
if err != nil {
err = verror.New(errCantDeleteBlob, ctx, blobName, err)
} else {
err = fscabs.bm.DeleteBlob(ctx, blobID)
}
}
return err
}
// -----------------------------------------------------------
// A file encapsulates both an os.File and a bufio.Writer on that file.
type file struct {
fh *os.File
writer *bufio.Writer
}
// newFile() returns a *file containing fh and a bufio.Writer on that file, if
// err is nil.
func newFile(fh *os.File, err error) (*file, error) {
var f *file
if err == nil {
f = new(file)
f.fh = fh
f.writer = bufio.NewWriter(f.fh)
}
return f, err
}
// newTempFile() returns a *file on a new temporary file created in the
// directory dir.
func newTempFile(ctx *context.T, dir string) (*file, error) {
return newFile(ioutil.TempFile(dir, "newfile"))
}
// close() flushes buffers (if err==nil initially) and closes the file,
// returning its name.
func (f *file) close(ctx *context.T, err error) (string, error) {
name := f.fh.Name()
// Flush the data out to disc and close the file.
if err == nil {
err = f.writer.Flush()
}
if err == nil {
err = f.fh.Sync()
}
err2 := f.fh.Close()
if err == nil {
err = err2
}
return name, err
}
// closeAndRename() calls f.close(), and if err==nil initially and no new
// errors are seen, renames the file to newName.
func (f *file) closeAndRename(ctx *context.T, newName string, err error) error {
var oldName string
oldName, err = f.close(ctx, err)
if err == nil { // if temp file written successfully...
// Link or rename the file into place, hoping at least one is
// supported on this file system.
os.MkdirAll(filepath.Dir(newName), dirPermissions)
err = os.Link(oldName, newName)
if err == nil {
os.Remove(oldName)
} else {
err = os.Rename(oldName, newName)
}
}
if err != nil {
os.Remove(oldName)
}
return err
}
// -----------------------------------------------------------
// A blobFragment represents a vector of bytes and its position within a blob.
type blobFragment struct {
pos int64 // position of this fragment within its containing blob.
size int64 // size of this fragment.
offset int64 // offset within fileName.
fileName string // name of file describing this fragment.
}
// A blobDesc is the in-memory representation of a blob.
type blobDesc struct {
activeDescIndex int // Index into fscabs.activeDesc if refCount>0; under fscabs.mu.
refCount int // Reference count; under fscabs.mu.
name string // Name of the blob.
// The following fields are modified under fscabs.mu and in BlobWriter
// owner's thread; they may be read by GC (when obtained from
// fscabs.activeDesc) and the chunk writer under fscabs.mu. In the
// BlobWriter owner's thread, reading does not require a lock, but
// writing does. In other contexts (BlobReader, or a desc that has
// just been allocated by getBlob()), no locking is needed.
fragment []blobFragment // All the fragments in this blob
size int64 // Total size of the blob.
finalized bool // Whether the blob has been finalized.
// A finalized blob has a valid hash field, and no new bytes may be added
// to it. A well-formed hash has 16 bytes.
hash []byte
openWriter bool // Whether this descriptor is being written by an open BlobWriter.
cv *sync.Cond // signalled when a BlobWriter writes or closes.
}
// isBeingDeleted() returns whether fragment fragName is about to be deleted
// by the garbage collector. Requires fscabs.mu held.
func (fscabs *FsCaBlobStore) isBeingDeleted(fragName string) (beingDeleted bool) {
for i := 0; i != len(fscabs.toDelete) && !beingDeleted; i++ {
_, beingDeleted = (*(fscabs.toDelete[i]))[fragName]
}
return beingDeleted
}
// descRef() increments the reference count of *desc and returns whether
// successful. It may fail if the fragments referenced by the descriptor are
// being deleted by the garbage collector.
func (fscabs *FsCaBlobStore) descRef(desc *blobDesc) bool {
beingDeleted := false
fscabs.mu.Lock()
if desc.refCount == 0 {
// On the first reference, check whether the fragments are
// being deleted, and if not, add *desc to the
// fscabs.activeDesc vector.
for i := 0; i != len(desc.fragment) && !beingDeleted; i++ {
beingDeleted = fscabs.isBeingDeleted(desc.fragment[i].fileName)
}
if !beingDeleted {
desc.activeDescIndex = len(fscabs.activeDesc)
fscabs.activeDesc = append(fscabs.activeDesc, desc)
}
}
if !beingDeleted {
desc.refCount++
}
fscabs.mu.Unlock()
return !beingDeleted
}
// descUnref() decrements the reference count of *desc if desc!=nil; if that
// removes the last reference, *desc is removed from the fscabs.activeDesc
// vector.
func (fscabs *FsCaBlobStore) descUnref(desc *blobDesc) {
if desc != nil {
fscabs.mu.Lock()
desc.refCount--
if desc.refCount < 0 {
panic("negative reference count")
} else if desc.refCount == 0 {
// Remove desc from fscabs.activeDesc by moving the
// last entry in fscabs.activeDesc to desc's slot.
n := len(fscabs.activeDesc)
lastDesc := fscabs.activeDesc[n-1]
lastDesc.activeDescIndex = desc.activeDescIndex
fscabs.activeDesc[desc.activeDescIndex] = lastDesc
fscabs.activeDesc = fscabs.activeDesc[0 : n-1]
desc.activeDescIndex = -1
}
fscabs.mu.Unlock()
}
}
// getBlob() returns the in-memory blob descriptor for the named blob.
func (fscabs *FsCaBlobStore) getBlob(ctx *context.T, blobName string) (desc *blobDesc, err error) {
slashBlobName := filepath.ToSlash(blobName)
if !strings.HasPrefix(slashBlobName, blobDir+"/") || strings.IndexByte(blobName, '.') != -1 {
err = verror.New(errInvalidBlobName, ctx, blobName)
} else {
absBlobName := filepath.Join(fscabs.rootName, blobName)
var fh *os.File
fh, err = os.Open(absBlobName)
if err == nil {
var line string
desc = new(blobDesc)
desc.activeDescIndex = -1
desc.name = blobName
desc.cv = sync.NewCond(&fscabs.mu)
scanner := bufio.NewScanner(fh)
for scanner.Scan() {
field := strings.Split(scanner.Text(), " ")
if len(field) == 4 && field[0] == "d" {
var fragSize int64
var fragOffset int64
fragSize, err = strconv.ParseInt(field[1], 0, 64)
if err == nil {
fragOffset, err = strconv.ParseInt(field[2], 0, 64)
}
if err == nil {
// No locking needed here because desc
// is newly allocated and not yet passed to descRef().
desc.fragment = append(desc.fragment,
blobFragment{
fileName: field[3],
pos: desc.size,
size: fragSize,
offset: fragOffset})
}
desc.size += fragSize
} else if len(field) == 2 && field[0] == "f" {
desc.hash = stringToHash(field[1])
desc.finalized = true
if desc.hash == nil {
err = verror.New(errMalformedBlobHash, ctx, blobName, field[1])
}
} else if len(field) > 0 && len(field[0]) == 1 && "a" <= field[0] && field[0] <= "z" {
// unrecognized line, reserved for extensions: ignore.
} else {
err = verror.New(errMalformedField, ctx, line)
}
}
err = scanner.Err()
fh.Close()
}
}
// Ensure that we return either a properly referenced desc, or nil.
if err != nil {
desc = nil
} else if !fscabs.descRef(desc) {
err = verror.New(errBlobDeleted, ctx, blobName)
desc = nil
}
return desc, err
}
// -----------------------------------------------------------
// A BlobWriter allows a blob to be written. If a blob has not yet been
// finalized, it also allows that blob to be extended. A BlobWriter may be
// created with NewBlobWriter(), and should be closed with Close() or
// CloseWithoutFinalize().
type BlobWriter struct {
// The BlobWriter exists within a particular FsCaBlobStore and context.T
fscabs *FsCaBlobStore
ctx *context.T
desc *blobDesc // Description of the blob being written.
f *file // The file being written.
hasher hash.Hash // Running hash of blob.
// The following three fields represent the state of
// a temporary file that, when complete, will become a fragment.
// These fields are manipulated by commitBytes() and writeToTempFragment().
fragFile *file // The current (temporary) fragment file being appended to, or nil if none.
fragSize int64 // Bytes written to fragment.
fragHash hash.Hash // Running hash of fragment.
// Fields to allow the BlobMap to be written.
csBr *BlobReader // Reader over the blob that's currently being written.
cs *chunker.Stream // Stream of chunks derived from csBr
csErr chan error // writeBlobMap() sends its result here; Close/CloseWithoutFinalize receives it.
}
// NewBlobWriter() returns a pointer to a newly allocated BlobWriter on
// a newly created blob. If "name" is non-empty, it is used to name
// the blob, and it must be in the format of a name returned by this
// interface (probably by another instance on another device).
// Otherwise, a new name is created, which can be found using
// the Name() method. It is an error to attempt to overwrite a blob
// that already exists in this blob store. BlobWriters should not be
// used concurrently by multiple threads. The returned handle should
// be closed with either the Close() or CloseWithoutFinalize() method
// to avoid leaking file handles.
func (fscabs *FsCaBlobStore) NewBlobWriter(ctx *context.T, name string) (localblobstore.BlobWriter, error) {
var bw *BlobWriter
if name == "" {
name = newBlobName()
}
fileName := filepath.Join(fscabs.rootName, name)
os.MkdirAll(filepath.Dir(fileName), dirPermissions)
f, err := newFile(os.OpenFile(fileName, os.O_RDWR|os.O_CREATE|os.O_EXCL, filePermissions))
if err == nil {
bw = new(BlobWriter)
bw.fscabs = fscabs
bw.ctx = ctx
bw.desc = new(blobDesc)
bw.desc.activeDescIndex = -1
bw.desc.name = name
bw.desc.cv = sync.NewCond(&fscabs.mu)
bw.desc.openWriter = true
bw.f = f
bw.hasher = md5.New()
if !fscabs.descRef(bw.desc) {
// Can't happen; descriptor refers to no fragments.
panic(verror.New(errBlobDeleted, ctx, bw.desc.name))
}
// Write the chunks of this blob into the BlobMap, as they are
// written by this writer.
bw.forkWriteBlobMap()
}
return bw, err
}
// ResumeBlobWriter() returns a pointer to a newly allocated BlobWriter on an
// old, but unfinalized blob name.
func (fscabs *FsCaBlobStore) ResumeBlobWriter(ctx *context.T, blobName string) (localblobstore.BlobWriter, error) {
var err error
var bw *BlobWriter
var desc *blobDesc
desc, err = fscabs.getBlob(ctx, blobName)
if err == nil && desc.finalized {
err = verror.New(errBlobAlreadyFinalized, ctx, blobName)
} else if err == nil {
bw = new(BlobWriter)
bw.fscabs = fscabs
bw.ctx = ctx
bw.desc = desc
bw.desc.openWriter = true
fileName := filepath.Join(fscabs.rootName, bw.desc.name)
bw.f, err = newFile(os.OpenFile(fileName, os.O_WRONLY|os.O_APPEND, 0666))
bw.hasher = md5.New()
// Add the existing fragments to the running hash.
// The descRef's ref count is incremented here to compensate
// for the decrement it will receive in br.Close(), below.
if !fscabs.descRef(bw.desc) {
// Can't happen; descriptor's ref count was already
// non-zero.
panic(verror.New(errBlobDeleted, ctx, fileName))
}
br := fscabs.blobReaderFromDesc(ctx, bw.desc, dontWaitForWriter)
buf := make([]byte, 8192, 8192)
for err == nil {
var n int
n, err = br.Read(buf)
bw.hasher.Write(buf[0:n])
}
br.Close()
if err == io.EOF { // EOF is expected.
err = nil
}
if err == nil {
// Write the chunks of this blob into the BlobMap, as
// they are written by this writer.
bw.forkWriteBlobMap()
}
}
return bw, err
}
// commitBytes() commits bytes added by AppendBytes(), if any.
// If any bytes are committed, they are added to *bw's fragment list.
func (bw *BlobWriter) commitBytes() (err error) {
if bw.fragFile != nil {
hash := bw.fragHash.Sum(nil)
relFileName := hashToFileName(casDir, hash)
absFileName := filepath.Join(bw.fscabs.rootName, relFileName)
// Add the fragment's name to bw.desc's fragments so the garbage
// collector will not delete it when it is renamed. The temporary
// file will not be deleted before renaming because the garbage
// collector does not delete temporary files that have been
// written in the last several hours.
bw.fscabs.mu.Lock()
bw.desc.fragment = append(bw.desc.fragment, blobFragment{
pos: bw.desc.size,
size: bw.fragSize,
offset: 0,
fileName: relFileName})
bw.fscabs.mu.Unlock()
if _, statErr := os.Stat(absFileName); statErr != nil && os.IsNotExist(statErr) {
// Fragment file does not yet exist; rename the temporary file into place.
err = bw.fragFile.closeAndRename(bw.ctx, absFileName, err)
} else {
// Fragment is already present; delete temporary file.
var oldName string
oldName, err = bw.fragFile.close(bw.ctx, err)
os.Remove(oldName)
}
if err == nil {
_, err = fmt.Fprintf(bw.f.writer, "d %d %d %s\n", bw.fragSize, 0 /*offset*/, relFileName)
}
if err == nil {
err = bw.f.writer.Flush()
}
if err != nil {
err = verror.New(errAppendFailed, bw.ctx, bw.fscabs.rootName, err)
// Remove the entry added to fragment list above.
bw.fscabs.mu.Lock()
bw.desc.fragment = bw.desc.fragment[0 : len(bw.desc.fragment)-1]
bw.fscabs.mu.Unlock()
} else { // commit the change by updating the size; it's then visible to readers.
bw.fscabs.mu.Lock()
bw.desc.size += bw.fragSize
bw.desc.cv.Broadcast() // Tell blobmap BlobReader there's more to read.
bw.fscabs.mu.Unlock()
}
bw.fragFile = nil
bw.fragSize = 0
bw.fragHash = nil
}
return err
}
// writeToTempFragment() writes buf[] to a temporary file that is eventually to become a fragment of *bw.
// Bytes are committed as necessary when a temporary file becomes large.
func (bw *BlobWriter) writeToTempFragment(buf []byte) (err error) {
for len(buf) > 0 && err == nil {
if bw.fragSize >= maxFragmentSize {
err = bw.commitBytes()
}
if err == nil && bw.fragFile == nil {
var fragFile *file
fragFile, err = newTempFile(bw.ctx, filepath.Join(bw.fscabs.rootName, tmpDir))
if err == nil {
bw.fragFile = fragFile
bw.fragSize = 0
bw.fragHash = md5.New()
}
}
// Process a prefix of buf[] on this iteration that will not violate maxFragmentSize.
consume := buf
if int64(len(buf))+bw.fragSize > maxFragmentSize {
consume = buf[:maxFragmentSize-bw.fragSize]
}
if err == nil {
bw.fragSize += int64(len(consume))
bw.fragHash.Write(consume) // Cannot fail; see Hash interface.
bw.hasher.Write(consume) // Cannot fail; see Hash interface.
_, err = bw.fragFile.writer.Write(consume) // Writes all bytes unless returned err != nil.
}
buf = buf[len(consume):] // process rest of buf[] on next iteration
}
return err
}
// AppendBytes() tentatively appends bytes to the blob being written by *bw,
// where the bytes are composed of the byte vectors described by the elements
// of item[]. The implementation may choose when to commit these bytes to disc,
// except that they will be committed before the return of a subsequent call to
// Close() or CloseWithoutFinalize(). Uncomitted bytes may be lost during a crash,
// and will not be returned by a concurrent reader until they are committed.
func (bw *BlobWriter) AppendBytes(item ...localblobstore.BlockOrFile) (err error) {
if bw.f == nil {
panic("fs_cablobstore.BlobWriter programming error: AppendBytes() after Close()")
}
var buf []byte
for i := 0; i != len(item) && err == nil; i++ {
if len(item[i].FileName) != 0 {
if buf == nil {
buf = make([]byte, 8192, 8192)
}
var fileHandle *os.File
fileHandle, err = os.Open(filepath.Join(bw.fscabs.rootName, item[i].FileName))
if err == nil {
at := item[i].Offset
toRead := item[i].Size
var haveRead int64
for err == nil && (toRead == -1 || haveRead < toRead) {
var n int
n, err = fileHandle.ReadAt(buf, at)
if err == nil {
if toRead != -1 && int64(n)+haveRead > toRead {
n = int(toRead - haveRead)
}
haveRead += int64(n)
at += int64(n)
err = bw.writeToTempFragment(buf[0:n])
}
}
if err == io.EOF {
if toRead == -1 || haveRead == toRead {
err = nil // The loop read all that was asked; EOF is a possible outcome.
} else { // The loop read less than was asked; request must have been too big.
err = verror.New(errSizeTooBigForFragment, bw.ctx, bw.desc.name, item[i].FileName)
}
}
fileHandle.Close()
}
} else {
err = bw.writeToTempFragment(item[i].Block)
}
}
return err
}
// forkWriteBlobMap() creates a new thread to run writeBlobMap(). It adds
// the chunks written to *bw to the blob store's BlobMap. The caller is
// expected to call joinWriteBlobMap() at some later point.
func (bw *BlobWriter) forkWriteBlobMap() {
// The descRef's ref count is incremented here to compensate
// for the decrement it will receive in br.Close() in joinWriteBlobMap.
if !bw.fscabs.descRef(bw.desc) {
// Can't happen; descriptor's ref count was already non-zero.
panic(verror.New(errBlobDeleted, bw.ctx, bw.desc.name))
}
bw.csBr = bw.fscabs.blobReaderFromDesc(bw.ctx, bw.desc, waitForWriter)
bw.cs = chunker.NewStream(bw.ctx, &chunker.DefaultParam, bw.csBr)
bw.csErr = make(chan error)
go bw.writeBlobMap()
}
// insertChunk() inserts chunk into the blob store's BlobMap, associating it
// with the specified byte offset in the blob blobID being written by *bw. The byte
// offset of the next chunk is returned.
func (bw *BlobWriter) insertChunk(blobID []byte, chunkHash []byte, offset int64, size int64) (int64, error) {
err := bw.fscabs.bm.AssociateChunkWithLocation(bw.ctx, chunkHash[:],
blobmap.Location{BlobID: blobID, Offset: offset, Size: size})
if err != nil {
bw.cs.Cancel()
}
return offset + size, err
}
// writeBlobMap() iterates over the chunk in stream bw.cs, and associates each
// one with the blob being written.
func (bw *BlobWriter) writeBlobMap() {
var err error
var offset int64
blobID := fileNameToHash(blobDir, bw.desc.name)
// Associate each chunk only after the next chunk has been seen (or
// the blob finalized), to avoid recording an artificially short chunk
// at the end of a partial transfer.
var chunkHash [md5.Size]byte
var chunkLen int64
if bw.cs.Advance() {
chunk := bw.cs.Value()
// Record the hash and size, since chunk's underlying buffer
// may be reused by the next call to Advance().
chunkHash = md5.Sum(chunk)
chunkLen = int64(len(chunk))
for bw.cs.Advance() {
offset, err = bw.insertChunk(blobID, chunkHash[:], offset, chunkLen)
chunk = bw.cs.Value()
chunkHash = md5.Sum(chunk)
chunkLen = int64(len(chunk))
}
}
if err == nil {
err = bw.cs.Err()
}
bw.fscabs.mu.Lock()
if err == nil && chunkLen != 0 && bw.desc.finalized {
offset, err = bw.insertChunk(blobID, chunkHash[:], offset, chunkLen)
}
bw.fscabs.mu.Unlock()
bw.csErr <- err // wake joinWriteBlobMap()
}
// joinWriteBlobMap waits for the completion of the thread forked by forkWriteBlobMap().
// It returns when the chunks in the blob have been written to the blob store's BlobMap.
func (bw *BlobWriter) joinWriteBlobMap(err error) error {
err2 := <-bw.csErr // read error from end of writeBlobMap()
if err == nil {
err = err2
}
bw.csBr.Close()
return err
}
// Close() finalizes *bw, and indicates that the client will perform no further
// append operations on *bw. Any internal open file handles are closed.
func (bw *BlobWriter) Close() (err error) {
if bw.f == nil {
err = verror.New(errAlreadyClosed, bw.ctx, bw.desc.name)
} else if bw.desc.finalized {
err = verror.New(errBlobAlreadyFinalized, bw.ctx, bw.desc.name)
} else {
err = bw.commitBytes()
if err == nil {
h := bw.hasher.Sum(nil)
_, err = fmt.Fprintf(bw.f.writer, "f %s\n", hashToString(h)) // finalize
}
_, err = bw.f.close(bw.ctx, err)
bw.f = nil
bw.fscabs.mu.Lock()
bw.desc.finalized = true
bw.desc.openWriter = false
bw.desc.cv.Broadcast() // Tell blobmap BlobReader that writing has ceased.
bw.fscabs.mu.Unlock()
err = bw.joinWriteBlobMap(err)
bw.fscabs.descUnref(bw.desc)
}
return err
}
// CloseWithoutFinalize() indicates that the client will perform no further
// append operations on *bw, but does not finalize the blob. Any internal open
// file handles are closed. Clients are expected to need this operation
// infrequently.
func (bw *BlobWriter) CloseWithoutFinalize() (err error) {
if bw.f == nil {
err = verror.New(errAlreadyClosed, bw.ctx, bw.desc.name)
} else {
err = bw.commitBytes()
bw.fscabs.mu.Lock()
bw.desc.openWriter = false
bw.desc.cv.Broadcast() // Tell blobmap BlobReader that writing has ceased.
bw.fscabs.mu.Unlock()
_, err = bw.f.close(bw.ctx, err)
bw.f = nil
err = bw.joinWriteBlobMap(err)
bw.fscabs.descUnref(bw.desc)
}
return err
}
// AppendBlob() adds a (substring of a) pre-existing blob to the blob being
// written by *bw. The fragments of the pre-existing blob are not physically
// copied; they are referenced by both blobs.
func (bw *BlobWriter) AppendBlob(blobName string, size int64, offset int64) (err error) {
if bw.f == nil {
panic("fs_cablobstore.BlobWriter programming error: AppendBlob() after Close()")
}
err = bw.commitBytes()
var desc *blobDesc
var origSize int64
if err == nil {
desc, err = bw.fscabs.getBlob(bw.ctx, blobName)
origSize = bw.desc.size
}
if err == nil {
if size == -1 {
size = desc.size - offset
}
if offset < 0 || desc.size < offset+size {
err = verror.New(errBadSizeOrOffset, bw.ctx, size, offset, blobName, desc.size)
}
for i := 0; i != len(desc.fragment) && err == nil && size > 0; i++ {
if desc.fragment[i].size <= offset {
offset -= desc.fragment[i].size
} else {
consume := desc.fragment[i].size - offset
if size < consume {
consume = size
}
_, err = fmt.Fprintf(bw.f.writer, "d %d %d %s\n",
consume, offset+desc.fragment[i].offset, desc.fragment[i].fileName)
if err == nil {
// Add fragment so garbage collector can see it.
// The garbage collector cannot be
// about to delete the fragment, because
// getBlob() already checked for that
// above, and kept a reference.
bw.fscabs.mu.Lock()
bw.desc.fragment = append(bw.desc.fragment, blobFragment{
pos: bw.desc.size,
size: consume,
offset: offset + desc.fragment[i].offset,
fileName: desc.fragment[i].fileName})
bw.desc.size += consume
bw.desc.cv.Broadcast() // Tell blobmap BlobReader there's more to read.
bw.fscabs.mu.Unlock()
}
offset = 0
size -= consume
}
}
bw.fscabs.descUnref(desc)
// Add the new fragments to the running hash.
if !bw.fscabs.descRef(bw.desc) {
// Can't happen; descriptor's ref count was already
// non-zero.
panic(verror.New(errBlobDeleted, bw.ctx, blobName))
}
br := bw.fscabs.blobReaderFromDesc(bw.ctx, bw.desc, dontWaitForWriter)
if err == nil {
_, err = br.Seek(origSize, 0)
}
buf := make([]byte, 8192, 8192)
for err == nil {
var n int
n, err = br.Read(buf)
bw.hasher.Write(buf[0:n]) // Cannot fail; see Hash interface.
}
br.Close()
if err == io.EOF { // EOF is expected.
err = nil
}
if err == nil {
err = bw.f.writer.Flush()
}
}
return err
}
// IsFinalized() returns whether *bw has been finalized.
func (bw *BlobWriter) IsFinalized() bool {
return bw.desc.finalized
}
// Size() returns *bw's size.
func (bw *BlobWriter) Size() int64 {
return bw.desc.size + bw.fragSize // Count uncommited bytes in size for writer; they aren't yet counted for readers.
}
// Name() returns *bw's name.
func (bw *BlobWriter) Name() string {
return bw.desc.name
}
// Hash() returns *bw's hash, reflecting the bytes written so far.
func (bw *BlobWriter) Hash() []byte {
return bw.hasher.Sum(nil)
}
// -----------------------------------------------------------
// A BlobReader allows a blob to be read using the standard ReadAt(), Read(),
// and Seek() calls. A BlobReader can be created with NewBlobReader(), and
// should be closed with the Close() method to avoid leaking file handles.
type BlobReader struct {
// The BlobReader exists within a particular FsCaBlobStore and context.T.
fscabs *FsCaBlobStore
ctx *context.T
desc *blobDesc // A description of the blob being read.
waitForWriter bool // whether this reader should wait for a concurrent BlobWriter
pos int64 // The next position we will read from (used by Read/Seek, not ReadAt).
// The fields below represent a cached open fragment desc.fragment[fragmentIndex].
fragmentIndex int // -1 or 0 <= fragmentIndex < len(desc.fragment).
fh *os.File // non-nil iff fragmentIndex != -1.
}
// constants to make the calls to blobReaderFromDesc invocations more readable
const (
dontWaitForWriter = false
waitForWriter = true
)
// blobReaderFromDesc() returns a pointer to a newly allocated BlobReader given
// a pre-existing blobDesc. If waitForWriter is true, the reader will wait for
// any BlobWriter to finish writing the part of the blob the reader is trying
// to read.
func (fscabs *FsCaBlobStore) blobReaderFromDesc(ctx *context.T, desc *blobDesc, waitForWriter bool) *BlobReader {
br := new(BlobReader)
br.fscabs = fscabs
br.ctx = ctx
br.fragmentIndex = -1
br.desc = desc
br.waitForWriter = waitForWriter
return br
}
// NewBlobReader() returns a pointer to a newly allocated BlobReader on the
// specified blobName. BlobReaders should not be used concurrently by multiple
// threads. Returned handles should be closed with Close().
func (fscabs *FsCaBlobStore) NewBlobReader(ctx *context.T, blobName string) (br localblobstore.BlobReader, err error) {
var desc *blobDesc
desc, err = fscabs.getBlob(ctx, blobName)
if err == nil {
br = fscabs.blobReaderFromDesc(ctx, desc, dontWaitForWriter)
}
return br, err
}
// closeInternal() closes any open file handles within *br.
func (br *BlobReader) closeInternal() {
if br.fh != nil {
br.fh.Close()
br.fh = nil
}
br.fragmentIndex = -1
}
// Close() indicates that the client will perform no further operations on *br.
// It closes any open file handles within a BlobReader.
func (br *BlobReader) Close() error {
br.closeInternal()
br.fscabs.descUnref(br.desc)
return nil
}
// findFragment() returns the index of the first element of fragment[] that may
// contain "offset", based on the "pos" fields of each element.
// Requires that fragment[] be sorted on the "pos" fields of the elements.
func findFragment(fragment []blobFragment, offset int64) int {
lo := 0
hi := len(fragment)
for lo < hi {
mid := (lo + hi) >> 1
if offset < fragment[mid].pos {
hi = mid
} else {
lo = mid + 1
}
}
if lo > 0 {
lo--
}
return lo
}
// waitUntilAvailable() waits until position pos within *br is available for
// reading, if this reader is waiting for writers. This may be because:
// - *br is on an already written blob.
// - *br is on a blob being written that has been closed, or whose writes have
// passed position pos.
// The value pos==math.MaxInt64 can be used to mean "until the writer is closed".
// Requires br.fscabs.mu held.
func (br *BlobReader) waitUntilAvailable(pos int64) {
for br.waitForWriter && br.desc.openWriter && br.desc.size < pos {
br.desc.cv.Wait()
}
}
// ReadAt() fills b[] with up to len(b) bytes of data starting at position "at"
// within the blob that *br indicates, and returns the number of bytes read.
func (br *BlobReader) ReadAt(b []byte, at int64) (n int, err error) {
br.fscabs.mu.Lock()
br.waitUntilAvailable(at + int64(len(b)))
i := findFragment(br.desc.fragment, at)
if i < len(br.desc.fragment) && at <= br.desc.size {
fragmenti := br.desc.fragment[i] // copy fragment data to allow releasing lock
br.fscabs.mu.Unlock()
if i != br.fragmentIndex {
br.closeInternal()
}
if br.fragmentIndex == -1 {
br.fh, err = os.Open(filepath.Join(br.fscabs.rootName, fragmenti.fileName))
if err == nil {
br.fragmentIndex = i
} else {
br.closeInternal()
}
}
var offset int64 = at - fragmenti.pos + fragmenti.offset
consume := fragmenti.size - (at - fragmenti.pos)
if int64(len(b)) < consume {
consume = int64(len(b))
}
if br.fh != nil {
n, err = br.fh.ReadAt(b[0:consume], offset)
} else if err == nil {
panic("failed to open blob fragment")
}
br.fscabs.mu.Lock()
// Return io.EOF if the Read reached the end of the last
// fragment, but not if it's merely the end of some interior
// fragment or the blob is still being extended.
if int64(n)+at >= br.desc.size && !(br.waitForWriter && br.desc.openWriter) {
if err == nil {
err = io.EOF
}
} else if err == io.EOF {
err = nil
}
} else if at == br.desc.size { // Reading at the end of the file, past the last fragment.
err = io.EOF
} else {
err = verror.New(errIllegalPositionForRead, br.ctx, br.pos, br.desc.size)
}
br.fscabs.mu.Unlock()
return n, err
}
// Read() fills b[] with up to len(b) bytes of data starting at the current
// seek position of *br within the blob that *br indicates, and then both
// returns the number of bytes read and advances *br's seek position by that
// amount.
func (br *BlobReader) Read(b []byte) (n int, err error) {
n, err = br.ReadAt(b, br.pos)
if err == nil {
br.pos += int64(n)
}
return n, err
}
// Seek() sets the seek position of *br to offset if whence==0,
// offset+current_seek_position if whence==1, and offset+end_of_blob if
// whence==2, and then returns the current seek position.
func (br *BlobReader) Seek(offset int64, whence int) (result int64, err error) {
br.fscabs.mu.Lock()
if whence == 0 {
result = offset
} else if whence == 1 {
result = offset + br.pos
} else if whence == 2 {
br.waitUntilAvailable(math.MaxInt64)
result = offset + br.desc.size
} else {
err = verror.New(errBadSeekWhence, br.ctx, whence)
result = br.pos
}
if result < 0 {
err = verror.New(errNegativeSeekPosition, br.ctx, offset, whence)
result = br.pos
} else if result > br.desc.size {
err = verror.New(errIllegalPositionForRead, br.ctx, result, br.desc.size)
result = br.pos
} else if err == nil {
br.pos = result
}
br.fscabs.mu.Unlock()
return result, err
}
// IsFinalized() returns whether *br has been finalized.
func (br *BlobReader) IsFinalized() bool {
br.fscabs.mu.Lock()
br.waitUntilAvailable(math.MaxInt64)
finalized := br.desc.finalized
br.fscabs.mu.Unlock()
return finalized
}
// Size() returns *br's size.
func (br *BlobReader) Size() int64 {
br.fscabs.mu.Lock()
br.waitUntilAvailable(math.MaxInt64)
size := br.desc.size
br.fscabs.mu.Unlock()
return size
}
// Name() returns *br's name.
func (br *BlobReader) Name() string {
return br.desc.name
}
// Hash() returns *br's hash. It may be nil if the blob is not finalized.
func (br *BlobReader) Hash() []byte {
br.fscabs.mu.Lock()
br.waitUntilAvailable(math.MaxInt64)
hash := br.desc.hash
br.fscabs.mu.Unlock()
return hash
}
// -----------------------------------------------------------
// A dirListing is a list of names in a directory, plus a position, which
// indexes the last item in nameList that has been processed.
type dirListing struct {
pos int // Current position in nameList; may be -1 at the start of iteration.
nameList []string // List of directory entries.
}
// An FsCasIter represents an iterator that allows the client to enumerate all
// the blobs or fragments in a FsCaBlobStore.
type FsCasIter struct {
fscabs *FsCaBlobStore // The parent FsCaBlobStore.
err error // If non-nil, the error that terminated iteration.
stack []dirListing // The stack of dirListings leading to the current entry.
ctx *context.T // context passed to ListBlobIds() or ListCAIds()
mu sync.Mutex // Protects cancelled.
cancelled bool // Whether Cancel() has been called.
}
// ListBlobIds() returns an iterator that can be used to enumerate the blobs in
// an FsCaBlobStore. Expected use is:
// fscabsi := fscabs.ListBlobIds(ctx)
// for fscabsi.Advance() {
// // Process fscabsi.Value() here.
// }
// if fscabsi.Err() != nil {
// // The loop terminated early due to an error.
// }
func (fscabs *FsCaBlobStore) ListBlobIds(ctx *context.T) localblobstore.Stream {
stack := make([]dirListing, 1)
stack[0] = dirListing{pos: -1, nameList: []string{blobDir}}
return &FsCasIter{fscabs: fscabs, stack: stack, ctx: ctx}
}
// ListCAIds() returns an iterator that can be used to enumerate the
// content-addressable fragments in an FsCaBlobStore.
// Expected use is:
// fscabsi := fscabs.ListCAIds(ctx)
// for fscabsi.Advance() {
// // Process fscabsi.Value() here.
// }
// if fscabsi.Err() != nil {
// // The loop terminated early due to an error.
// }
func (fscabs *FsCaBlobStore) ListCAIds(ctx *context.T) localblobstore.Stream {
stack := make([]dirListing, 1)
stack[0] = dirListing{pos: -1, nameList: []string{casDir}}
return &FsCasIter{fscabs: fscabs, stack: stack, ctx: ctx}
}
// isCancelled() returns whether Cancel() has been called.
func (fscabsi *FsCasIter) isCancelled() bool {
fscabsi.mu.Lock()
cancelled := fscabsi.cancelled
fscabsi.mu.Unlock()
return cancelled
}
// Advance() stages an item so that it may be retrieved via Value. Returns
// true iff there is an item to retrieve. Advance must be called before Value
// is called.
func (fscabsi *FsCasIter) Advance() (advanced bool) {
stack := fscabsi.stack
err := fscabsi.err
for err == nil && !advanced && len(stack) != 0 && !fscabsi.isCancelled() {
last := len(stack) - 1
stack[last].pos++
if stack[last].pos == len(stack[last].nameList) {
stack = stack[0:last]
fscabsi.stack = stack
} else {
fullName := filepath.Join(fscabsi.fscabs.rootName, fscabsi.Value())
var fi os.FileInfo
fi, err = os.Lstat(fullName)
if err != nil {
// error: nothing to do
} else if fi.IsDir() {
var dirHandle *os.File
dirHandle, err = os.Open(fullName)
if err == nil {
var nameList []string
nameList, err = dirHandle.Readdirnames(0)
dirHandle.Close()
stack = append(stack, dirListing{pos: -1, nameList: nameList})
fscabsi.stack = stack
last = len(stack) - 1
}
} else {
advanced = true
}
}
}
if fscabsi.isCancelled() {
if err == nil {
fscabsi.err = verror.New(errStreamCancelled, fscabsi.ctx)
}
advanced = false
}
fscabsi.err = err
return advanced
}
// Value() returns the item that was staged by Advance. May panic if Advance
// returned false or was not called. Never blocks.
func (fscabsi *FsCasIter) Value() (name string) {
stack := fscabsi.stack
if fscabsi.err == nil && len(stack) != 0 && stack[0].pos >= 0 {
name = stack[0].nameList[stack[0].pos]
for i := 1; i != len(stack); i++ {
name = filepath.Join(name, stack[i].nameList[stack[i].pos])
}
}
return name
}
// Err() returns any error encountered by Advance. Never blocks.
func (fscabsi *FsCasIter) Err() error {
return fscabsi.err
}
// Cancel() indicates that the iteration stream should terminate early.
// Never blocks. May be called concurrently with other methods on fscabsi.
func (fscabsi *FsCasIter) Cancel() {
fscabsi.mu.Lock()
fscabsi.cancelled = true
fscabsi.mu.Unlock()
}
// -----------------------------------------------------------
// An errorChunkStream is a localblobstore.ChunkStream that yields an error.
type errorChunkStream struct {
err error
}
func (*errorChunkStream) Advance() bool { return false }
func (*errorChunkStream) Value([]byte) []byte { return nil }
func (ecs *errorChunkStream) Err() error { return ecs.err }
func (*errorChunkStream) Cancel() {}
// BlobChunkStream() returns a ChunkStream that can be used to read the ordered
// list of content hashes of chunks in blob blobName. It is expected that this
// list will be presented to RecipeFromChunks() on another device, to create a
// recipe for transmitting the blob efficiently to that other device.
func (fscabs *FsCaBlobStore) BlobChunkStream(ctx *context.T, blobName string) (cs localblobstore.ChunkStream) {
blobID := fileNameToHash(blobDir, blobName)
if blobID == nil {
cs = &errorChunkStream{err: verror.New(errInvalidBlobName, ctx, blobName)}
} else {
cs = fscabs.bm.NewChunkStream(ctx, blobID)
}
return cs
}
// -----------------------------------------------------------
// LookupChunk returns the location of a chunk with the specified chunk hash
// within the store.
func (fscabs *FsCaBlobStore) LookupChunk(ctx *context.T, chunkHash []byte) (loc localblobstore.Location, err error) {
var chunkMapLoc blobmap.Location
chunkMapLoc, err = fscabs.bm.LookupChunk(ctx, chunkHash)
if err == nil {
loc.BlobName = hashToFileName(blobDir, chunkMapLoc.BlobID)
loc.Size = chunkMapLoc.Size
loc.Offset = chunkMapLoc.Offset
}
return loc, err
}
// -----------------------------------------------------------
// A RecipeStream implements localblobstore.RecipeStream. It allows the client
// to iterate over the recipe steps to recreate a blob identified by a stream
// of chunk hashes (from chunkStream), but using parts of blobs in the current
// blob store where possible.
type RecipeStream struct {
fscabs *FsCaBlobStore
ctx *context.T
chunkStream localblobstore.ChunkStream // the stream of chunks in the blob
pendingChunkBuf [16]byte // a buffer for pendingChunk
pendingChunk []byte // the last unprocessed chunk hash read chunkStream, or nil if none
step localblobstore.RecipeStep // the recipe step to be returned by Value()
mu sync.Mutex // protects cancelled
cancelled bool // whether Cancel() has been called
}
// RecipeStreamFromChunkStream() returns a pointer to a RecipeStream that allows
// the client to iterate over each RecipeStep needed to create the blob formed
// by the chunks in chunkStream.
func (fscabs *FsCaBlobStore) RecipeStreamFromChunkStream(ctx *context.T, chunkStream localblobstore.ChunkStream) localblobstore.RecipeStream {
rs := new(RecipeStream)
rs.fscabs = fscabs
rs.ctx = ctx
rs.chunkStream = chunkStream
return rs
}
// isCancelled() returns whether rs.Cancel() has been called.
func (rs *RecipeStream) isCancelled() bool {
rs.mu.Lock()
cancelled := rs.cancelled
rs.mu.Unlock()
return cancelled
}
// Advance() stages an item so that it may be retrieved via Value().
// Returns true iff there is an item to retrieve. Advance() must be
// called before Value() is called. The caller is expected to read
// until Advance() returns false, or to call Cancel().
func (rs *RecipeStream) Advance() (ok bool) {
if rs.pendingChunk == nil && rs.chunkStream.Advance() {
rs.pendingChunk = rs.chunkStream.Value(rs.pendingChunkBuf[:])
}
for !ok && rs.pendingChunk != nil && !rs.isCancelled() {
var err error
var loc0 blobmap.Location
loc0, err = rs.fscabs.bm.LookupChunk(rs.ctx, rs.pendingChunk)
if err == nil {
blobName := hashToFileName(blobDir, loc0.BlobID)
var blobDesc *blobDesc
if blobDesc, err = rs.fscabs.getBlob(rs.ctx, blobName); err != nil {
// The BlobMap contained a reference to a
// deleted blob. Delete the reference in the
// BlobMap; the next loop iteration will
// consider the chunk again.
rs.fscabs.bm.DeleteBlob(rs.ctx, loc0.BlobID)
} else {
rs.fscabs.descUnref(blobDesc)
// The chunk is in a known blob. Combine
// contiguous chunks into a single recipe
// entry.
rs.pendingChunk = nil // consumed
for rs.pendingChunk == nil && rs.chunkStream.Advance() {
rs.pendingChunk = rs.chunkStream.Value(rs.pendingChunkBuf[:])
var loc blobmap.Location
loc, err = rs.fscabs.bm.LookupChunk(rs.ctx, rs.pendingChunk)
if err == nil && bytes.Compare(loc0.BlobID, loc.BlobID) == 0 && loc.Offset == loc0.Offset+loc0.Size {
loc0.Size += loc.Size
rs.pendingChunk = nil // consumed
}
}
rs.step = localblobstore.RecipeStep{Blob: blobName, Offset: loc0.Offset, Size: loc0.Size}
ok = true
}
} else { // The chunk is not in the BlobMap; yield a single chunk hash.
rs.step = localblobstore.RecipeStep{Chunk: rs.pendingChunk}
rs.pendingChunk = nil // consumed
ok = true
}
}
return ok && !rs.isCancelled()
}
// Value() returns the item that was staged by Advance(). May panic if
// Advance() returned false or was not called. Never blocks.
func (rs *RecipeStream) Value() localblobstore.RecipeStep {
return rs.step
}
// Err() returns any error encountered by Advance. Never blocks.
func (rs *RecipeStream) Err() error {
// There are no errors to return here. The errors encountered in
// Advance() are expected and recoverable.
return nil
}
// Cancel() indicates that the client wishes to cease reading from the stream.
// It causes the next call to Advance() to return false. Never blocks.
// It may be called concurrently with other calls on the stream.
func (rs *RecipeStream) Cancel() {
rs.mu.Lock()
rs.cancelled = true
rs.mu.Unlock()
rs.chunkStream.Cancel()
}
// -----------------------------------------------------------
// gcTemp() attempts to delete files in dirName older than threshold.
// Errors are ignored.
func gcTemp(dirName string, threshold time.Time) {
fh, err := os.Open(dirName)
if err == nil {
fi, _ := fh.Readdir(0)
fh.Close()
for i := 0; i < len(fi); i++ {
if fi[i].ModTime().Before(threshold) {
os.Remove(filepath.Join(dirName, fi[i].Name()))
}
}
}
}
// GC() removes old temp files and content-addressed blocks that are no longer
// referenced by any blob. It may be called concurrently with other calls to
// GC(), and with uses of BlobReaders and BlobWriters.
func (fscabs *FsCaBlobStore) GC(ctx *context.T) (err error) {
// Remove old temporary files.
gcTemp(filepath.Join(fscabs.rootName, tmpDir), time.Now().Add(-10*time.Hour))
// Add a key to caSet for each content-addressed fragment in *fscabs,
caSet := make(map[string]bool)
caIter := fscabs.ListCAIds(ctx)
for caIter.Advance() {
caSet[caIter.Value()] = true
}
err = caIter.Err()
// cmBlobs maps the names of blobs found in the BlobMap to their IDs.
// (The IDs can be derived from the names; the map is really being used
// to record which blobs exist, and the value merely avoids repeated
// conversions.)
cmBlobs := make(map[string][]byte)
if err == nil {
// Record all the blobs known to the BlobMap;
bs := fscabs.bm.NewBlobStream(ctx)
for bs.Advance() {
blobID := bs.Value(nil)
cmBlobs[hashToFileName(blobDir, blobID)] = blobID
}
}
if err == nil {
// Remove from cmBlobs all extant blobs, and remove from
// caSet all their fragments.
blobIter := fscabs.ListBlobIds(ctx)
for blobIter.Advance() {
var blobDesc *blobDesc
if blobDesc, err = fscabs.getBlob(ctx, blobIter.Value()); err == nil {
delete(cmBlobs, blobDesc.name)
for i := range blobDesc.fragment {
delete(caSet, blobDesc.fragment[i].fileName)
}
fscabs.descUnref(blobDesc)
}
}
}
if err == nil {
// Remove all blobs still mentioned in cmBlobs from the BlobMap;
// these are the ones that no longer exist in the blobs directory.
for _, blobID := range cmBlobs {
err = fscabs.bm.DeleteBlob(ctx, blobID)
if err != nil {
break
}
}
}
if err == nil {
// Remove from caSet all fragments referenced by open BlobReaders and
// BlobWriters. Advertise to new readers and writers which blobs are
// about to be deleted.
fscabs.mu.Lock()
for _, desc := range fscabs.activeDesc {
for i := range desc.fragment {
delete(caSet, desc.fragment[i].fileName)
}
}
fscabs.toDelete = append(fscabs.toDelete, &caSet)
fscabs.mu.Unlock()
// Delete the things that still remain in caSet; they are no longer
// referenced.
for caName := range caSet {
os.Remove(filepath.Join(fscabs.rootName, caName))
}
// Stop advertising what's been deleted.
fscabs.mu.Lock()
n := len(fscabs.toDelete)
var i int
// We require that &caSet still be in the list.
for i = 0; fscabs.toDelete[i] != &caSet; i++ {
}
fscabs.toDelete[i] = fscabs.toDelete[n-1]
fscabs.toDelete = fscabs.toDelete[0 : n-1]
fscabs.mu.Unlock()
}
return err
}