blob: a3e89b589ffda8ebcab0d5ce51371564c453b359 [file] [log] [blame]
package impl
// The app invoker is responsible for managing the state of applications on the
// node manager. The node manager manages the applications it installs and runs
// using the following directory structure:
//
// TODO(caprita): Not all is yet implemented.
//
// <config.Root>/
// app-<hash 1>/ - the application dir is named using a hash of the application title
// installation-<id 1>/ - installations are labelled with ids
// acls/
// data - the ACL data for this
// installation. Controls acces to
// Start, Uinstall, Update, UpdateTo
// and Revert.
// signature - the signature for the ACLs in data
// <status> - one of the values for installationState enum
// origin - object name for application envelope
// <version 1 timestamp>/ - timestamp of when the version was downloaded
// bin - application binary
// previous - symbolic link to previous version directory
// envelope - application envelope (JSON-encoded)
// <version 2 timestamp>
// ...
// current - symbolic link to the current version
// instances/
// instance-<id a>/ - instances are labelled with ids
// credentials/ - holds veyron credentials (unless running
// through security agent)
// root/ - workspace that the instance is run from
// logs/ - stderr/stdout and log files generated by instance
// info - metadata for the instance (such as app
// cycle manager name and process id)
// version - symbolic link to installation version for the instance
// acls/
// data - the ACLs for this instance. These
// ACLs control access to Refresh,
// Restart, Resume, Stop and
// Suspend.
// signature - the signature for these ACLs.
// <status> - one of the values for instanceState enum
// systemname - the system name used to execute this instance
// instance-<id b>
// ...
// installation-<id 2>
// ...
// app-<hash 2>
// ...
//
// The node manager uses the suid helper binary to invoke an application as a
// specified user. The path to the helper is specified as config.Helper.
// When node manager starts up, it goes through all instances and resumes the
// ones that are not suspended. If the application was still running, it
// suspends it first. If an application fails to resume, it stays suspended.
//
// When node manager shuts down, it suspends all running instances.
//
// Start starts an instance. Suspend kills the process but leaves the workspace
// untouched. Resume restarts the process. Stop kills the process and prevents
// future resumes (it also eventually gc's the workspace).
//
// If the process dies on its own, it stays dead and is assumed suspended.
// TODO(caprita): Later, we'll add auto-restart option.
//
// Concurrency model: installations can be created independently of one another;
// installations can be removed at any time (any running instances will be
// stopped). The first call to Uninstall will rename the installation dir as a
// first step; subsequent Uninstall's will fail. Instances can be created
// independently of one another, as long as the installation exists (if it gets
// Uninstall'ed during an instance Start, the Start may fail).
//
// The status file present in each instance is used to flag the state of the
// instance and prevent concurrent operations against the instance:
//
// - when an instance is created with Start, it is placed in state 'suspended'.
// To run the instance, Start transitions 'suspended' to 'starting' and then
// 'started' (upon success) or the instance is deleted (upon failure).
//
// - Suspend attempts to transition from 'started' to 'suspending' (if the
// instance was not in 'started' state, Suspend fails). From 'suspending', the
// instance transitions to 'suspended' upon success or back to 'started' upon
// failure.
//
// - Resume attempts to transition from 'suspended' to 'starting' (if the
// instance was not in 'suspended' state, Resume fails). From 'starting', the
// instance transitions to 'started' upon success or back to 'suspended' upon
// failure.
//
// - Stop attempts to transition from 'started' to 'stopping' and then to
// 'stopped' (upon success) or back to 'started' (upon failure); or from
// 'suspended' to 'stopped'. If the initial state is neither 'started' or
// 'suspended', Stop fails.
//
// TODO(caprita): There is room for synergy between how node manager organizes
// its own workspace and that for the applications it runs. In particular,
// previous, origin, and envelope could be part of a single config. We'll
// refine that later.
import (
"crypto/md5"
"crypto/rand"
"encoding/base64"
"encoding/binary"
"encoding/hex"
"encoding/json"
"fmt"
"hash/crc64"
"io/ioutil"
"os"
"os/exec"
"os/user"
"path"
"path/filepath"
"reflect"
"strconv"
"strings"
"sync"
"time"
"veyron.io/veyron/veyron2/context"
"veyron.io/veyron/veyron2/ipc"
"veyron.io/veyron/veyron2/mgmt"
"veyron.io/veyron/veyron2/naming"
"veyron.io/veyron/veyron2/options"
"veyron.io/veyron/veyron2/rt"
"veyron.io/veyron/veyron2/security"
"veyron.io/veyron/veyron2/services/mgmt/appcycle"
"veyron.io/veyron/veyron2/services/mgmt/application"
"veyron.io/veyron/veyron2/verror"
"veyron.io/veyron/veyron2/vlog"
vexec "veyron.io/veyron/veyron/lib/exec"
"veyron.io/veyron/veyron/lib/flags/consts"
vsecurity "veyron.io/veyron/veyron/security"
"veyron.io/veyron/veyron/security/agent"
"veyron.io/veyron/veyron/security/agent/keymgr"
iconfig "veyron.io/veyron/veyron/services/mgmt/node/config"
)
// instanceInfo holds state about a running instance.
type instanceInfo struct {
AppCycleMgrName string
Pid int
NodeManagerPeerPattern string
SecurityAgentHandle []byte
}
func saveInstanceInfo(dir string, info *instanceInfo) error {
jsonInfo, err := json.Marshal(info)
if err != nil {
vlog.Errorf("Marshal(%v) failed: %v", info, err)
return errOperationFailed
}
infoPath := filepath.Join(dir, "info")
if err := ioutil.WriteFile(infoPath, jsonInfo, 0600); err != nil {
vlog.Errorf("WriteFile(%v) failed: %v", infoPath, err)
return errOperationFailed
}
return nil
}
func loadInstanceInfo(dir string) (*instanceInfo, error) {
infoPath := filepath.Join(dir, "info")
info := new(instanceInfo)
if infoBytes, err := ioutil.ReadFile(infoPath); err != nil {
vlog.Errorf("ReadFile(%v) failed: %v", infoPath, err)
return nil, errOperationFailed
} else if err := json.Unmarshal(infoBytes, info); err != nil {
vlog.Errorf("Unmarshal(%v) failed: %v", infoBytes, err)
return nil, errOperationFailed
}
return info, nil
}
type securityAgentState struct {
// Security agent key manager client.
keyMgrAgent *keymgr.Agent
// Ensures only one security agent connection socket is created
// at any time, preventing fork/exec from potentially passing
// down sockets meant for other children (as per ribrdb@, Go's
// exec implementation does not prune the set of files passed
// down to only include those specified in cmd.ExtraFiles).
startLock sync.Mutex
}
// appInvoker holds the state of an application-related method invocation.
type appInvoker struct {
callback *callbackState
config *iconfig.State
// suffix contains the name components of the current invocation name
// suffix. It is used to identify an application, installation, or
// instance.
suffix []string
uat BlessingSystemAssociationStore
locks aclLocks
// Reference to the nodemanager top-level ACL list.
nodeACL security.ACL
// securityAgent holds state related to the security agent (nil if not
// using the agent).
securityAgent *securityAgentState
}
func saveEnvelope(dir string, envelope *application.Envelope) error {
jsonEnvelope, err := json.Marshal(envelope)
if err != nil {
vlog.Errorf("Marshal(%v) failed: %v", envelope, err)
return errOperationFailed
}
path := filepath.Join(dir, "envelope")
if err := ioutil.WriteFile(path, jsonEnvelope, 0600); err != nil {
vlog.Errorf("WriteFile(%v) failed: %v", path, err)
return errOperationFailed
}
return nil
}
func loadEnvelope(dir string) (*application.Envelope, error) {
path := filepath.Join(dir, "envelope")
envelope := new(application.Envelope)
if envelopeBytes, err := ioutil.ReadFile(path); err != nil {
vlog.Errorf("ReadFile(%v) failed: %v", path, err)
return nil, errOperationFailed
} else if err := json.Unmarshal(envelopeBytes, envelope); err != nil {
vlog.Errorf("Unmarshal(%v) failed: %v", envelopeBytes, err)
return nil, errOperationFailed
}
return envelope, nil
}
func saveOrigin(dir, originVON string) error {
path := filepath.Join(dir, "origin")
if err := ioutil.WriteFile(path, []byte(originVON), 0600); err != nil {
vlog.Errorf("WriteFile(%v) failed: %v", path, err)
return errOperationFailed
}
return nil
}
func loadOrigin(dir string) (string, error) {
path := filepath.Join(dir, "origin")
if originBytes, err := ioutil.ReadFile(path); err != nil {
vlog.Errorf("ReadFile(%v) failed: %v", path, err)
return "", errOperationFailed
} else {
return string(originBytes), nil
}
}
// generateID returns a new unique id string. The uniqueness is based on the
// current timestamp. Not cryptographically secure.
func generateID() string {
timestamp := fmt.Sprintf("%v", time.Now().Format(time.RFC3339Nano))
h := crc64.New(crc64.MakeTable(crc64.ISO))
h.Write([]byte(timestamp))
b := make([]byte, 8)
binary.LittleEndian.PutUint64(b, uint64(h.Sum64()))
return strings.TrimRight(base64.URLEncoding.EncodeToString(b), "=")
}
// generateRandomString returns a cryptographically-strong random string.
func generateRandomString() (string, error) {
b := make([]byte, 16)
_, err := rand.Read(b)
if err != nil {
return "", err
}
return hex.EncodeToString(b), nil
}
// TODO(caprita): Nothing prevents different applications from sharing the same
// title, and thereby being installed in the same app dir. Do we want to
// prevent that for the same user or across users?
// applicationDirName generates a cryptographic hash of the application title,
// to be used as a directory name for installations of the application with the
// given title.
func applicationDirName(title string) string {
h := md5.New()
h.Write([]byte(title))
hash := strings.TrimRight(base64.URLEncoding.EncodeToString(h.Sum(nil)), "=")
return "app-" + hash
}
func installationDirName(installationID string) string {
return "installation-" + installationID
}
func instanceDirName(instanceID string) string {
return "instance-" + instanceID
}
func mkdir(dir string) error {
perm := os.FileMode(0700)
if err := os.MkdirAll(dir, perm); err != nil {
vlog.Errorf("MkdirAll(%v, %v) failed: %v", dir, perm, err)
return err
}
return nil
}
func fetchAppEnvelope(ctx context.T, origin string) (*application.Envelope, error) {
envelope, err := fetchEnvelope(ctx, origin)
if err != nil {
return nil, err
}
if envelope.Title == application.NodeManagerTitle {
// Disallow node manager apps from being installed like a
// regular app.
return nil, errInvalidOperation
}
return envelope, nil
}
// newVersion sets up the directory for a new application version.
func newVersion(installationDir string, envelope *application.Envelope, oldVersionDir string) (string, error) {
versionDir := filepath.Join(installationDir, generateVersionDirName())
if err := mkdir(versionDir); err != nil {
return "", errOperationFailed
}
// TODO(caprita): Share binaries if already existing locally.
if err := downloadBinary(versionDir, "bin", envelope.Binary); err != nil {
return versionDir, err
}
if err := saveEnvelope(versionDir, envelope); err != nil {
return versionDir, err
}
if oldVersionDir != "" {
previousLink := filepath.Join(versionDir, "previous")
if err := os.Symlink(oldVersionDir, previousLink); err != nil {
vlog.Errorf("Symlink(%v, %v) failed: %v", oldVersionDir, previousLink, err)
return versionDir, errOperationFailed
}
}
// updateLink should be the last thing we do, after we've ensured the
// new version is viable (currently, that just means it installs
// properly).
return versionDir, updateLink(versionDir, filepath.Join(installationDir, "current"))
}
// TODO(rjkroege): Refactor this code with the intance creation code.
func initializeInstallationACLs(dir string, blessings []string, acl security.ACL) error {
// Start out with the claimant's ACLs and add the invoker's blessings.
var labels security.LabelSet
if acl.In == nil {
// The acl.In will be empty for an unclaimed node manager. In this case,
// create it.
acl.In = make(map[security.BlessingPattern]security.LabelSet)
}
labels = security.AllLabels
for _, name := range blessings {
// TODO(rjkroege): Use custom labels.
acl.In[security.BlessingPattern(name)] = labels
}
aclDir := path.Join(dir, "acls")
aclData := path.Join(aclDir, "data")
aclSig := path.Join(aclDir, "signature")
return writeACLs(aclData, aclSig, aclDir, acl)
}
func (i *appInvoker) Install(call ipc.ServerContext, applicationVON string) (string, error) {
if len(i.suffix) > 0 {
return "", errInvalidSuffix
}
ctx, cancel := rt.R().NewContext().WithTimeout(time.Minute)
defer cancel()
envelope, err := fetchAppEnvelope(ctx, applicationVON)
if err != nil {
return "", err
}
installationID := generateID()
installationDir := filepath.Join(i.config.Root, applicationDirName(envelope.Title), installationDirName(installationID))
deferrer := func() {
cleanupDir(installationDir, "")
}
defer func() {
if deferrer != nil {
deferrer()
}
}()
if _, err := newVersion(installationDir, envelope, ""); err != nil {
return "", err
}
if err := saveOrigin(installationDir, applicationVON); err != nil {
return "", err
}
if err := initializeInstallation(installationDir, active); err != nil {
return "", err
}
if err := initializeInstallationACLs(installationDir, call.RemoteBlessings().ForContext(call), i.nodeACL); err != nil {
return "", err
}
deferrer = nil
return naming.Join(envelope.Title, installationID), nil
}
func (*appInvoker) Refresh(ipc.ServerContext) error {
// TODO(jsimsa): Implement.
return nil
}
func (*appInvoker) Restart(ipc.ServerContext) error {
// TODO(jsimsa): Implement.
return nil
}
func openWriteFile(path string) (*os.File, error) {
perm := os.FileMode(0600)
file, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE, perm)
if err != nil {
vlog.Errorf("OpenFile(%v) failed: %v", path, err)
return nil, errOperationFailed
}
return file, nil
}
func installationDirCore(components []string, root string) (string, error) {
if nComponents := len(components); nComponents != 2 {
return "", errInvalidSuffix
}
app, installation := components[0], components[1]
installationDir := filepath.Join(root, applicationDirName(app), installationDirName(installation))
if _, err := os.Stat(installationDir); err != nil {
if os.IsNotExist(err) {
return "", errNotExist
}
vlog.Errorf("Stat(%v) failed: %v", installationDir, err)
return "", errOperationFailed
}
return installationDir, nil
}
// setupPrincipal sets up the instance's principal, with the right blessings.
func setupPrincipal(instanceDir, versionDir string, call ipc.ServerContext, securityAgent *securityAgentState, info *instanceInfo) error {
var p security.Principal
if securityAgent != nil {
// TODO(caprita): Part of the cleanup upon destroying an
// instance, we should tell the agent to drop the principal.
handle, conn, err := securityAgent.keyMgrAgent.NewPrincipal(rt.R().NewContext(), false)
defer conn.Close()
client, err := rt.R().NewClient(options.VCSecurityNone)
if err != nil {
vlog.Errorf("NewClient() failed: %v", err)
return errOperationFailed
}
defer client.Close()
// TODO(caprita): release the socket created by NewAgentPrincipal.
if p, err = agent.NewAgentPrincipal(client, int(conn.Fd()), rt.R().NewContext()); err != nil {
vlog.Errorf("NewAgentPrincipal() failed: %v", err)
return errOperationFailed
}
info.SecurityAgentHandle = handle
} else {
credentialsDir := filepath.Join(instanceDir, "credentials")
// TODO(caprita): The app's system user id needs access to this dir.
// Use the suidhelper to chown it.
var err error
if p, err = vsecurity.CreatePersistentPrincipal(credentialsDir, nil); err != nil {
vlog.Errorf("CreatePersistentPrincipal(%v, nil) failed: %v", credentialsDir, err)
return errOperationFailed
}
}
// Read the app installation version's envelope to obtain the app title.
//
// NOTE: we could have gotten this from the suffix as well, but the
// format of the object name suffix may change in the future: there's no
// guarantee it will always include the title.
envelope, err := loadEnvelope(versionDir)
if err != nil {
return err
}
nmPrincipal := call.LocalPrincipal()
// Take the blessings conferred upon us by the Start-er, extend them
// with the app title.
grantedBlessings := call.Blessings()
if grantedBlessings == nil {
return errInvalidBlessing
}
// TODO(caprita): Revisit UnconstrainedUse.
appBlessings, err := nmPrincipal.Bless(p.PublicKey(), grantedBlessings, envelope.Title, security.UnconstrainedUse())
if err != nil {
vlog.Errorf("Bless() failed: %v", err)
return errOperationFailed
}
// The blessings we extended from the blessings that the Start-er
// granted are the default blessings for the app.
if err := p.BlessingStore().SetDefault(appBlessings); err != nil {
vlog.Errorf("BlessingStore.SetDefault() failed: %v", err)
return errOperationFailed
}
if _, err := p.BlessingStore().Set(appBlessings, security.AllPrincipals); err != nil {
vlog.Errorf("BlessingStore.Set() failed: %v", err)
return errOperationFailed
}
if err := p.AddToRoots(appBlessings); err != nil {
vlog.Errorf("AddToRoots() failed: %v", err)
return errOperationFailed
}
// In addition, we give the app separate blessings for the purpose of
// communicating with the node manager.
//
// NOTE(caprita/ataly): Giving the app an unconstrained blessing from
// the node manager's default presents the app with a blessing that's
// potentially more powerful than what is strictly needed to allow
// communication between node manager and app (which could be more
// narrowly accomplished by using a custom-purpose self-signed blessing
// that the node manger produces solely to talk to the app).
//
// TODO(caprita): Figure out if there is any feature value in providing
// the app with a node manager-derived blessing (e.g., may the app need
// to prove it's running on the node?).
nmBlessings, err := nmPrincipal.Bless(p.PublicKey(), nmPrincipal.BlessingStore().Default(), "callback", security.UnconstrainedUse())
// Put the names of the node manager's default blessings as patterns for
// the child, so that the child uses the right blessing when talking
// back to the node manager.
names := nmPrincipal.BlessingStore().Default().ForContext(call)
for _, n := range names {
if _, err := p.BlessingStore().Set(nmBlessings, security.BlessingPattern(n)); err != nil {
vlog.Errorf("BlessingStore.Set() failed: %v", err)
return errOperationFailed
}
}
// We also want to override the app cycle manager's server blessing in
// the child (so that the node manager can send RPCs to it). We signal
// to the child's app manager to use a randomly generated pattern to
// extract the right blessing to use from its store for this purpose.
randomPattern, err := generateRandomString()
if err != nil {
vlog.Errorf("generateRandomString() failed: %v", err)
return errOperationFailed
}
if _, err := p.BlessingStore().Set(nmBlessings, security.BlessingPattern(randomPattern)); err != nil {
vlog.Errorf("BlessingStore.Set() failed: %v", err)
return errOperationFailed
}
info.NodeManagerPeerPattern = randomPattern
if err := p.AddToRoots(nmBlessings); err != nil {
vlog.Errorf("AddToRoots() failed: %v", err)
return errOperationFailed
}
return nil
}
// installationDir returns the path to the directory containing the app
// installation referred to by the invoker's suffix. Returns an error if the
// suffix does not name an installation or if the named installation does not
// exist.
func (i *appInvoker) installationDir() (string, error) {
return installationDirCore(i.suffix, i.config.Root)
}
func initializeInstanceACLs(key, installationDir, instanceDir string, blessings []string, acl security.ACL) error {
if acl.In == nil {
// The acl.In will be empty for an unclaimed node manager. In this case,
// create it
acl.In = make(map[security.BlessingPattern]security.LabelSet)
}
labels := security.AllLabels
for _, name := range blessings {
// TODO(rjkroege): Use custom labels.
acl.In[security.BlessingPattern(name)] = labels
}
aclDir := path.Join(instanceDir, "acls")
aclData := path.Join(aclDir, "data")
aclSig := path.Join(aclDir, "signature")
return writeACLs(aclData, aclSig, aclDir, acl)
}
// newInstance sets up the directory for a new application instance.
func (i *appInvoker) newInstance(call ipc.ServerContext) (string, string, error) {
installationDir, err := i.installationDir()
if err != nil {
return "", "", err
}
if !installationStateIs(installationDir, active) {
return "", "", errInvalidOperation
}
instanceID := generateID()
instanceDir := filepath.Join(installationDir, "instances", instanceDirName(instanceID))
if mkdir(instanceDir) != nil {
return "", instanceID, errOperationFailed
}
currLink := filepath.Join(installationDir, "current")
versionDir, err := filepath.EvalSymlinks(currLink)
if err != nil {
vlog.Errorf("EvalSymlinks(%v) failed: %v", currLink, err)
return instanceDir, instanceID, errOperationFailed
}
versionLink := filepath.Join(instanceDir, "version")
if err := os.Symlink(versionDir, versionLink); err != nil {
vlog.Errorf("Symlink(%v, %v) failed: %v", versionDir, versionLink, err)
return instanceDir, instanceID, errOperationFailed
}
instanceInfo := new(instanceInfo)
if err := setupPrincipal(instanceDir, versionDir, call, i.securityAgent, instanceInfo); err != nil {
return instanceDir, instanceID, err
}
if err := saveInstanceInfo(instanceDir, instanceInfo); err != nil {
return instanceDir, instanceID, err
}
if err := initializeInstance(instanceDir, suspended); err != nil {
return instanceDir, instanceID, err
}
if err := initializeInstanceACLs(installationDir, installationDir, instanceDir, call.RemoteBlessings().ForContext(call), i.nodeACL); err != nil {
return instanceDir, instanceID, err
}
return instanceDir, instanceID, nil
}
// isSetuid is defined like this so we can override its
// implementation for tests.
var isSetuid = func(fileStat os.FileInfo) bool {
vlog.VI(2).Infof("running the original isSetuid")
return fileStat.Mode()&os.ModeSetuid == os.ModeSetuid
}
// systemAccountForHelper returns the uname that the helper uses to invoke the
// application. If the helper exists and is setuid, the node manager
// requires that there is a uname associated with the Veyron
// identity that requested starting an application.
// TODO(rjkroege): This function assumes a desktop installation target
// and is probably not a good fit in other contexts. Revisit the design
// as appropriate. This function also internalizes a decision as to when
// it is possible to start an application that needs to be made explicit.
func systemAccountForHelper(helperPath string, identityNames []string, uat BlessingSystemAssociationStore) (systemName string, err error) {
helperStat, err := os.Stat(helperPath)
if err != nil {
vlog.Errorf("Stat(%v) failed: %v. helper is required.", helperPath, err)
return "", errOperationFailed
}
haveHelper := isSetuid(helperStat)
systemName, present := uat.SystemAccountForBlessings(identityNames)
switch {
case haveHelper && present:
return systemName, nil
case haveHelper && !present:
// The helper is owned by the node manager and installed as setuid root.
// Therefore, the node manager must never run an app as itself to
// prevent an app trivially granting itself root permissions.
// There must be an associated uname for the account in this case.
return "", verror.NoAccessf("use of setuid helper requires an associated uname.")
case !haveHelper:
// When the helper is not setuid, the helper can't change the
// app's uid so just run the app as the node manager's uname
// whether or not there is an association.
vlog.VI(1).Infof("helper not setuid. Node manager will invoke app with its own userid")
user, err := user.Current()
if err != nil {
vlog.Errorf("user.Current() failed: %v", err)
return "", errOperationFailed
}
return user.Username, nil
}
return "", errOperationFailed
}
func genCmd(instanceDir, helperPath, systemName string) (*exec.Cmd, error) {
versionLink := filepath.Join(instanceDir, "version")
versionDir, err := filepath.EvalSymlinks(versionLink)
if err != nil {
vlog.Errorf("EvalSymlinks(%v) failed: %v", versionLink, err)
return nil, errOperationFailed
}
envelope, err := loadEnvelope(versionDir)
if err != nil {
return nil, err
}
binPath := filepath.Join(versionDir, "bin")
if _, err := os.Stat(binPath); err != nil {
vlog.Errorf("Stat(%v) failed: %v", binPath, err)
return nil, errOperationFailed
}
cmd := exec.Command(helperPath)
cmd.Args = append(cmd.Args, "--username", systemName)
// TODO(caprita): Also pass in configuration info like NAMESPACE_ROOT to
// the app (to point to the device mounttable).
cmd.Env = envelope.Env
rootDir := filepath.Join(instanceDir, "root")
if err := mkdir(rootDir); err != nil {
return nil, err
}
cmd.Dir = rootDir
cmd.Args = append(cmd.Args, "--workspace", rootDir)
logDir := filepath.Join(instanceDir, "logs")
if err := mkdir(logDir); err != nil {
return nil, err
}
cmd.Args = append(cmd.Args, "--logdir", logDir)
timestamp := time.Now().UnixNano()
stdoutLog := filepath.Join(logDir, fmt.Sprintf("STDOUT-%d", timestamp))
if cmd.Stdout, err = openWriteFile(stdoutLog); err != nil {
return nil, err
}
stderrLog := filepath.Join(logDir, fmt.Sprintf("STDERR-%d", timestamp))
if cmd.Stderr, err = openWriteFile(stderrLog); err != nil {
return nil, err
}
cmd.Args = append(cmd.Args, "--run", binPath)
cmd.Args = append(cmd.Args, "--")
// Set up args and env.
cmd.Args = append(cmd.Args, "--log_dir=../logs")
cmd.Args = append(cmd.Args, envelope.Args...)
return cmd, nil
}
func (i *appInvoker) startCmd(instanceDir string, cmd *exec.Cmd) error {
info, err := loadInstanceInfo(instanceDir)
if err != nil {
return err
}
// Setup up the child process callback.
callbackState := i.callback
listener := callbackState.listenFor(mgmt.AppCycleManagerConfigKey)
defer listener.cleanup()
cfg := vexec.NewConfig()
cfg.Set(mgmt.ParentNameConfigKey, listener.name())
cfg.Set(mgmt.ProtocolConfigKey, "tcp")
cfg.Set(mgmt.AddressConfigKey, "127.0.0.1:0")
cfg.Set(mgmt.ParentBlessingConfigKey, info.NodeManagerPeerPattern)
// Set up any agent-specific state.
// NOTE(caprita): This ought to belong in genCmd, but we do it here
// to avoid holding on to the lock for too long.
//
// TODO(caprita): We need to take care to grab/release the lock
// excluding concurrent start operations. See if we can make this more
// robust.
var agentCleaner func()
if sa := i.securityAgent; sa != nil {
sa.startLock.Lock()
file, err := sa.keyMgrAgent.NewConnection(info.SecurityAgentHandle)
if err != nil {
sa.startLock.Unlock()
vlog.Errorf("NewConnection(%v) failed: %v", info.SecurityAgentHandle, err)
return err
}
agentCleaner = func() {
file.Close()
sa.startLock.Unlock()
}
// We need to account for the file descriptors corresponding to
// std{err|out|in} as well as the implementation-specific pipes
// that the vexec library adds to ExtraFiles during
// handle.Start. vexec.FileOffset properly offsets fd
// accordingly.
fd := len(cmd.ExtraFiles) + vexec.FileOffset
cmd.ExtraFiles = append(cmd.ExtraFiles, file)
cfg.Set(mgmt.SecurityAgentFDConfigKey, strconv.Itoa(fd))
} else {
cmd.Env = append(cmd.Env, consts.VeyronCredentials+"="+filepath.Join(instanceDir, "credentials"))
}
handle := vexec.NewParentHandle(cmd, vexec.ConfigOpt{cfg})
defer func() {
if handle != nil {
if err := handle.Clean(); err != nil {
vlog.Errorf("Clean() failed: %v", err)
}
}
}()
// Start the child process.
if err := handle.Start(); err != nil {
if agentCleaner != nil {
agentCleaner()
}
vlog.Errorf("Start() failed: %v", err)
return errOperationFailed
}
if agentCleaner != nil {
agentCleaner()
}
// Wait for the child process to start.
timeout := 10 * time.Second
if err := handle.WaitForReady(timeout); err != nil {
vlog.Errorf("WaitForReady(%v) failed: %v", timeout, err)
return errOperationFailed
}
childName, err := listener.waitForValue(timeout)
if err != nil {
return errOperationFailed
}
info.AppCycleMgrName, info.Pid = childName, handle.Pid()
if err := saveInstanceInfo(instanceDir, info); err != nil {
return err
}
// TODO(caprita): Spin up a goroutine to reap child status upon exit and
// transition it to suspended state if it exits on its own.
handle = nil
return nil
}
func (i *appInvoker) run(instanceDir, systemName string) error {
if err := transitionInstance(instanceDir, suspended, starting); err != nil {
return err
}
cmd, err := genCmd(instanceDir, i.config.Helper, systemName)
if err == nil {
err = i.startCmd(instanceDir, cmd)
}
if err != nil {
transitionInstance(instanceDir, starting, suspended)
return err
}
return transitionInstance(instanceDir, starting, started)
}
func (i *appInvoker) Start(call ipc.ServerContext) ([]string, error) {
helper := i.config.Helper
instanceDir, instanceID, err := i.newInstance(call)
if err != nil {
cleanupDir(instanceDir, helper)
return nil, err
}
systemName, err := systemAccountForHelper(helper, call.RemoteBlessings().ForContext(call), i.uat)
if err != nil {
cleanupDir(instanceDir, helper)
return nil, err
}
if err := saveSystemNameForInstance(instanceDir, systemName); err != nil {
cleanupDir(instanceDir, helper)
return nil, err
}
if err = i.run(instanceDir, systemName); err != nil {
// TODO(caprita): We should call cleanupDir here, but we don't
// in order to not lose the logs for the instance (so we can
// debug why run failed). Clean this up.
// cleanupDir(instanceDir, helper)
return nil, err
}
return []string{instanceID}, nil
}
// instanceDir returns the path to the directory containing the app instance
// referred to by the given suffix relative to the given root directory.
func instanceDir(root string, suffix []string) (string, error) {
if nComponents := len(suffix); nComponents != 3 {
return "", errInvalidSuffix
}
app, installation, instance := suffix[0], suffix[1], suffix[2]
instancesDir := filepath.Join(root, applicationDirName(app), installationDirName(installation), "instances")
instanceDir := filepath.Join(instancesDir, instanceDirName(instance))
return instanceDir, nil
}
// instanceDir returns the path to the directory containing the app instance
// referred to by the invoker's suffix, as well as the corresponding stopped
// instance dir. Returns an error if the suffix does not name an instance.
func (i *appInvoker) instanceDir() (string, error) {
return instanceDir(i.config.Root, i.suffix)
}
func (i *appInvoker) Resume(call ipc.ServerContext) error {
instanceDir, err := i.instanceDir()
if err != nil {
return err
}
systemName, err := systemAccountForHelper(i.config.Helper, call.RemoteBlessings().ForContext(call), i.uat)
if err != nil {
return err
}
startSystemName, err := readSystemNameForInstance(instanceDir)
if err != nil {
return err
}
if startSystemName != systemName {
return verror.NoAccessf("Not allowed to resume an application under a different system name.")
}
return i.run(instanceDir, systemName)
}
func stopAppRemotely(appVON string) error {
appStub := appcycle.AppCycleClient(appVON)
ctx, cancel := rt.R().NewContext().WithTimeout(time.Minute)
defer cancel()
stream, err := appStub.Stop(ctx)
if err != nil {
vlog.Errorf("%v.Stop() failed: %v", appVON, err)
return errOperationFailed
}
rstream := stream.RecvStream()
for rstream.Advance() {
vlog.VI(2).Infof("%v.Stop() task update: %v", appVON, rstream.Value())
}
if err := rstream.Err(); err != nil {
vlog.Errorf("Advance() failed: %v", err)
return errOperationFailed
}
if err := stream.Finish(); err != nil {
vlog.Errorf("Finish() failed: %v", err)
return errOperationFailed
}
return nil
}
func stop(instanceDir string) error {
info, err := loadInstanceInfo(instanceDir)
if err != nil {
return err
}
return stopAppRemotely(info.AppCycleMgrName)
}
// TODO(caprita): implement deadline for Stop.
func (i *appInvoker) Stop(_ ipc.ServerContext, deadline uint32) error {
instanceDir, err := i.instanceDir()
if err != nil {
return err
}
if err := transitionInstance(instanceDir, suspended, stopped); err == errOperationFailed || err == nil {
return err
}
if err := transitionInstance(instanceDir, started, stopping); err != nil {
return err
}
if err := stop(instanceDir); err != nil {
transitionInstance(instanceDir, stopping, started)
return err
}
return transitionInstance(instanceDir, stopping, stopped)
}
func (i *appInvoker) Suspend(ipc.ServerContext) error {
instanceDir, err := i.instanceDir()
if err != nil {
return err
}
if err := transitionInstance(instanceDir, started, suspending); err != nil {
return err
}
if err := stop(instanceDir); err != nil {
transitionInstance(instanceDir, suspending, started)
return err
}
return transitionInstance(instanceDir, suspending, suspended)
}
func (i *appInvoker) Uninstall(ipc.ServerContext) error {
installationDir, err := i.installationDir()
if err != nil {
return err
}
return transitionInstallation(installationDir, active, uninstalled)
}
func (i *appInvoker) Update(ipc.ServerContext) error {
installationDir, err := i.installationDir()
if err != nil {
return err
}
if !installationStateIs(installationDir, active) {
return errInvalidOperation
}
originVON, err := loadOrigin(installationDir)
if err != nil {
return err
}
ctx, cancel := rt.R().NewContext().WithTimeout(time.Minute)
defer cancel()
newEnvelope, err := fetchAppEnvelope(ctx, originVON)
if err != nil {
return err
}
currLink := filepath.Join(installationDir, "current")
oldVersionDir, err := filepath.EvalSymlinks(currLink)
if err != nil {
vlog.Errorf("EvalSymlinks(%v) failed: %v", currLink, err)
return errOperationFailed
}
// NOTE(caprita): A race can occur between two competing updates, where
// both use the old version as their baseline. This can result in both
// updates succeeding even if they are updating the app installation to
// the same new envelope. This will result in one of the updates
// becoming the new 'current'. Both versions will point their
// 'previous' link to the old version. This doesn't appear to be of
// practical concern, so we avoid the complexity of synchronizing
// updates.
oldEnvelope, err := loadEnvelope(oldVersionDir)
if err != nil {
return err
}
if oldEnvelope.Title != newEnvelope.Title {
return errIncompatibleUpdate
}
if reflect.DeepEqual(oldEnvelope, newEnvelope) {
return errUpdateNoOp
}
versionDir, err := newVersion(installationDir, newEnvelope, oldVersionDir)
if err != nil {
cleanupDir(versionDir, "")
return err
}
return nil
}
func (*appInvoker) UpdateTo(_ ipc.ServerContext, von string) error {
// TODO(jsimsa): Implement.
return nil
}
func (i *appInvoker) Revert(ipc.ServerContext) error {
installationDir, err := i.installationDir()
if err != nil {
return err
}
if !installationStateIs(installationDir, active) {
return errInvalidOperation
}
// NOTE(caprita): A race can occur between an update and a revert, where
// both use the same current version as their starting point. This will
// render the update inconsequential. This doesn't appear to be of
// practical concern, so we avoid the complexity of synchronizing
// updates and revert operations.
currLink := filepath.Join(installationDir, "current")
currVersionDir, err := filepath.EvalSymlinks(currLink)
if err != nil {
vlog.Errorf("EvalSymlinks(%v) failed: %v", currLink, err)
return errOperationFailed
}
previousLink := filepath.Join(currVersionDir, "previous")
if _, err := os.Lstat(previousLink); err != nil {
if os.IsNotExist(err) {
// No 'previous' link -- must be the first version.
return errUpdateNoOp
}
vlog.Errorf("Lstat(%v) failed: %v", previousLink, err)
return errOperationFailed
}
prevVersionDir, err := filepath.EvalSymlinks(previousLink)
if err != nil {
vlog.Errorf("EvalSymlinks(%v) failed: %v", previousLink, err)
return errOperationFailed
}
return updateLink(prevVersionDir, currLink)
}
type treeNode struct {
children map[string]*treeNode
}
func newTreeNode() *treeNode {
return &treeNode{children: make(map[string]*treeNode)}
}
func (n *treeNode) find(names []string, create bool) *treeNode {
for {
if len(names) == 0 {
return n
}
if next, ok := n.children[names[0]]; ok {
n = next
names = names[1:]
continue
}
if create {
nn := newTreeNode()
n.children[names[0]] = nn
n = nn
names = names[1:]
continue
}
return nil
}
}
func (i *appInvoker) scanEnvelopes(tree *treeNode, appDir string) {
// Find all envelopes, extract installID.
envGlob := []string{i.config.Root, appDir, "installation-*", "*", "envelope"}
envelopes, err := filepath.Glob(filepath.Join(envGlob...))
if err != nil {
vlog.Errorf("unexpected error: %v", err)
return
}
for _, path := range envelopes {
env, err := loadEnvelope(filepath.Dir(path))
if err != nil {
continue
}
relpath, _ := filepath.Rel(i.config.Root, path)
elems := strings.Split(relpath, string(filepath.Separator))
if len(elems) != len(envGlob)-1 {
vlog.Errorf("unexpected number of path components: %q (%q)", elems, path)
continue
}
installID := strings.TrimPrefix(elems[1], "installation-")
tree.find([]string{env.Title, installID}, true)
}
return
}
func (i *appInvoker) scanInstances(tree *treeNode) {
if len(i.suffix) < 2 {
return
}
title := i.suffix[0]
installDir, err := installationDirCore(i.suffix[:2], i.config.Root)
if err != nil {
return
}
// Find all instances.
infoGlob := []string{installDir, "instances", "instance-*", "info"}
instances, err := filepath.Glob(filepath.Join(infoGlob...))
if err != nil {
vlog.Errorf("unexpected error: %v", err)
return
}
for _, path := range instances {
instanceDir := filepath.Dir(path)
i.scanInstance(tree, title, instanceDir)
}
return
}
func (i *appInvoker) scanInstance(tree *treeNode, title, instanceDir string) {
if _, err := loadInstanceInfo(instanceDir); err != nil {
return
}
relpath, _ := filepath.Rel(i.config.Root, instanceDir)
elems := strings.Split(relpath, string(filepath.Separator))
if len(elems) < 4 {
vlog.Errorf("unexpected number of path components: %q (%q)", elems, instanceDir)
return
}
installID := strings.TrimPrefix(elems[1], "installation-")
instanceID := strings.TrimPrefix(elems[3], "instance-")
tree.find([]string{title, installID, instanceID, "logs"}, true)
if instanceStateIs(instanceDir, started) {
for _, obj := range []string{"pprof", "stats"} {
tree.find([]string{title, installID, instanceID, obj}, true)
}
}
}
func (i *appInvoker) VGlobChildren() ([]string, error) {
tree := newTreeNode()
switch len(i.suffix) {
case 0:
i.scanEnvelopes(tree, "app-*")
case 1:
appDir := applicationDirName(i.suffix[0])
i.scanEnvelopes(tree, appDir)
case 2:
i.scanInstances(tree)
case 3:
dir, err := i.instanceDir()
if err != nil {
break
}
i.scanInstance(tree, i.suffix[0], dir)
default:
return nil, errNotExist
}
n := tree.find(i.suffix, false)
if n == nil {
return nil, errInvalidSuffix
}
children := make([]string, len(n.children))
index := 0
for child, _ := range n.children {
children[index] = child
index++
}
return children, nil
}
// TODO(rjkroege): Refactor to eliminate redundancy with newAppSpecificAuthorizer.
func dirFromSuffix(suffix []string, root string) (string, error) {
if len(suffix) == 2 {
p, err := installationDirCore(suffix, root)
if err != nil {
vlog.Errorf("dirFromSuffix failed: %v", err)
return "", err
}
return p, nil
} else if len(suffix) > 2 {
p, err := instanceDir(root, suffix[0:3])
if err != nil {
vlog.Errorf("dirFromSuffix failed: %v", err)
return "", err
}
return p, nil
}
return "", errInvalidSuffix
}
// TODO(rjkroege): Consider maintaining an in-memory ACL cache.
// TODO(rjkroege): Excise the idea of the key. Use the dir instead.
func (i *appInvoker) SetACL(_ ipc.ServerContext, acl security.ACL, etag string) error {
dir, err := dirFromSuffix(i.suffix, i.config.Root)
if err != nil {
return err
}
return setAppACL(i.locks, dir, dir, acl, etag)
}
func (i *appInvoker) GetACL(_ ipc.ServerContext) (acl security.ACL, etag string, err error) {
dir, err := dirFromSuffix(i.suffix, i.config.Root)
if err != nil {
return security.ACL{}, "", err
}
return getAppACL(i.locks, dir, dir)
}