Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func (a *APIStore) PostAdminTeamsTeamIDSandboxesKill(c *gin.Context, teamID uuid
// Kill each sandbox
for _, sbx := range sandboxes {
wg.Go(func() error {
err := a.orchestrator.RemoveSandbox(ctx, sbx.TeamID, sbx.SandboxID, sandbox.StateActionKill)
err := a.orchestrator.RemoveSandbox(ctx, sbx.TeamID, sbx.SandboxID, sandbox.RemoveOpts{Action: sandbox.StateActionKill})
if err != nil {
logger.L().Error(ctx, "Failed to kill sandbox",
logger.WithSandboxID(sbx.SandboxID),
Expand Down
3 changes: 1 addition & 2 deletions packages/api/internal/handlers/sandbox_connect.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,7 @@ func (a *APIStore) PostSandboxesSandboxIDConnect(c *gin.Context, sandboxID api.S
}

// Sandbox not in store at all → fall through to snapshot resume.
var notFoundErr *sandbox.NotFoundError
if errors.As(apiErr.Err, &notFoundErr) {
if errors.Is(apiErr.Err, sandbox.ErrNotFound) {
break
}

Expand Down
2 changes: 1 addition & 1 deletion packages/api/internal/handlers/sandbox_kill.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func (a *APIStore) DeleteSandboxesSandboxID(

killedOrRemoved := false

err = a.orchestrator.RemoveSandbox(ctx, teamID, sandboxID, sandbox.StateActionKill)
err = a.orchestrator.RemoveSandbox(ctx, teamID, sandboxID, sandbox.RemoveOpts{Action: sandbox.StateActionKill})
switch {
case err == nil:
killedOrRemoved = true
Expand Down
2 changes: 1 addition & 1 deletion packages/api/internal/handlers/sandbox_pause.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func (a *APIStore) PostSandboxesSandboxIDPause(c *gin.Context, sandboxID api.San
traceID := span.SpanContext().TraceID().String()
c.Set("traceID", traceID)

err = a.orchestrator.RemoveSandbox(ctx, teamID, sandboxID, sandbox.StateActionPause)
err = a.orchestrator.RemoveSandbox(ctx, teamID, sandboxID, sandbox.RemoveOpts{Action: sandbox.StateActionPause})
var transErr *sandbox.InvalidStateTransitionError

switch {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,7 @@ func (a *APIStore) PostSandboxesSandboxIDSnapshots(c *gin.Context, sandboxID api

result, err := a.orchestrator.CreateSnapshotTemplate(ctx, teamID, sandboxID, opts)
if err != nil {
var notFoundErr *sandbox.NotFoundError
if errors.As(err, &notFoundErr) {
if errors.Is(err, sandbox.ErrNotFound) {
logger.L().Debug(ctx, "Sandbox not found for snapshot", logger.WithSandboxID(sandboxID))
a.sendAPIStoreError(c, http.StatusNotFound, utils.SandboxNotFoundMsg(sandboxID))

Expand Down
25 changes: 14 additions & 11 deletions packages/api/internal/orchestrator/delete_instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,20 @@ import (
sbxlogger "github.com/e2b-dev/infra/packages/shared/pkg/logger/sandbox"
)

func (o *Orchestrator) RemoveSandbox(ctx context.Context, teamID uuid.UUID, sandboxID string, stateAction sandbox.StateAction) error {
func (o *Orchestrator) RemoveSandbox(ctx context.Context, teamID uuid.UUID, sandboxID string, opts sandbox.RemoveOpts) error {
ctx, span := tracer.Start(ctx, "remove-sandbox")
defer span.End()

sbx, alreadyDone, finish, err := o.sandboxStore.StartRemoving(ctx, teamID, sandboxID, stateAction)
sbx, alreadyDone, finish, err := o.sandboxStore.StartRemoving(ctx, teamID, sandboxID, opts)
if err != nil {
switch stateAction {
// For eviction, propagate all errors to the evictor.
if opts.Eviction {
return err
}

switch opts.Action {
case sandbox.StateActionKill:
var notFoundErr *sandbox.NotFoundError
if errors.As(err, &notFoundErr) {
if errors.Is(err, sandbox.ErrNotFound) {
logger.L().Info(ctx, "Sandbox not found, already removed", logger.WithSandboxID(sandboxID))

return ErrSandboxNotFound
Expand All @@ -43,8 +47,7 @@ func (o *Orchestrator) RemoveSandbox(ctx context.Context, teamID uuid.UUID, sand
return ErrSandboxOperationFailed
}
case sandbox.StateActionPause:
var notFoundErrPause *sandbox.NotFoundError
if errors.As(err, &notFoundErrPause) {
if errors.Is(err, sandbox.ErrNotFound) {
logger.L().Info(ctx, "Sandbox not found for pause", logger.WithSandboxID(sandboxID))

return ErrSandboxNotFound
Expand All @@ -65,7 +68,7 @@ func (o *Orchestrator) RemoveSandbox(ctx context.Context, teamID uuid.UUID, sand

return ErrSandboxOperationFailed
default:
logger.L().Error(ctx, "Invalid state action", logger.WithSandboxID(sandboxID), zap.String("state_action", stateAction.Name))
logger.L().Error(ctx, "Invalid state action", logger.WithSandboxID(sandboxID), zap.String("state_action", opts.Action.Name))

return ErrSandboxOperationFailed
}
Expand All @@ -80,10 +83,10 @@ func (o *Orchestrator) RemoveSandbox(ctx context.Context, teamID uuid.UUID, sand
return nil
}

defer func() { go o.countersRemove(context.WithoutCancel(ctx), teamID, stateAction) }()
defer func() { go o.analyticsRemove(context.WithoutCancel(ctx), sbx, stateAction) }()
defer func() { go o.countersRemove(context.WithoutCancel(ctx), teamID, opts.Action) }()
defer func() { go o.analyticsRemove(context.WithoutCancel(ctx), sbx, opts.Action) }()
defer o.sandboxStore.Remove(ctx, teamID, sandboxID)
err = o.removeSandboxFromNode(ctx, sbx, stateAction)
err = o.removeSandboxFromNode(ctx, sbx, opts.Action)
if err != nil {
logger.L().Error(ctx, "Error pausing sandbox", zap.Error(err), logger.WithSandboxID(sbx.SandboxID))

Expand Down
20 changes: 13 additions & 7 deletions packages/api/internal/orchestrator/evictor/evict.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package evictor

import (
"context"
"errors"
"time"

"github.com/google/uuid"
Expand All @@ -18,12 +19,12 @@ const (

type Evictor struct {
store *sandbox.Store
removeSandbox func(ctx context.Context, teamID uuid.UUID, sandboxID string, stateAction sandbox.StateAction) error
removeSandbox func(ctx context.Context, teamID uuid.UUID, sandboxID string, opts sandbox.RemoveOpts) error
}

func New(
store *sandbox.Store,
removeSandbox func(ctx context.Context, teamID uuid.UUID, sandboxID string, stateAction sandbox.StateAction) error,
removeSandbox func(ctx context.Context, teamID uuid.UUID, sandboxID string, opts sandbox.RemoveOpts) error,
) *Evictor {
return &Evictor{
store: store,
Expand Down Expand Up @@ -53,16 +54,21 @@ func (e *Evictor) Start(ctx context.Context) {

for _, item := range sbxs {
g.Go(func() error {
stateAction := sandbox.StateActionKill
action := sandbox.StateActionKill
if item.AutoPause {
stateAction = sandbox.StateActionPause
action = sandbox.StateActionPause
}

logger.L().Debug(ctx, "Evicting sandbox", logger.WithSandboxID(item.SandboxID), zap.String("state_action", stateAction.Name))
if err := e.removeSandbox(context.WithoutCancel(ctx), item.TeamID, item.SandboxID, stateAction); err != nil {
logger.L().Debug(ctx, "Evicting sandbox failed", zap.Error(err), logger.WithSandboxID(item.SandboxID))
if err := e.removeSandbox(context.WithoutCancel(ctx), item.TeamID, item.SandboxID, sandbox.RemoveOpts{Action: action, Eviction: true}); err != nil {
if !errors.Is(err, sandbox.ErrNotEvictable) && !errors.Is(err, sandbox.ErrNotFound) {
logger.L().Debug(ctx, "Evicting sandbox failed", zap.Error(err), logger.WithSandboxID(item.SandboxID))
}

return nil
}

logger.L().Debug(ctx, "Sandbox evicted", logger.WithSandboxID(item.SandboxID))

return nil
})
}
Expand Down
3 changes: 1 addition & 2 deletions packages/api/internal/orchestrator/keep_alive.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,13 @@ func (o *Orchestrator) KeepAliveFor(ctx context.Context, teamID uuid.UUID, sandb
return sbx, nil
}

var sbxNotFoundErr *sandbox.NotFoundError
var sbxNotRunningErr *sandbox.NotRunningError
sbx, err := o.sandboxStore.Update(ctx, teamID, sandboxID, updateFunc)
if err != nil {
switch {
case errors.As(err, &sbxNotRunningErr):
return nil, &api.APIError{Code: http.StatusConflict, ClientMsg: utils.SandboxChangingStateMsg(sandboxID, sbxNotRunningErr.State), Err: err}
case errors.As(err, &sbxNotFoundErr):
case errors.Is(err, sandbox.ErrNotFound):
return nil, &api.APIError{Code: http.StatusNotFound, ClientMsg: utils.SandboxNotFoundMsg(sandboxID), Err: err}
case errors.Is(err, errMaxInstanceLengthExceeded):
return nil, &api.APIError{Code: http.StatusBadRequest, ClientMsg: "Max instance length exceeded", Err: err}
Expand Down
4 changes: 2 additions & 2 deletions packages/api/internal/orchestrator/snapshot_template.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func (o *Orchestrator) CreateSnapshotTemplate(ctx context.Context, teamID uuid.U
ctx, span := tracer.Start(ctx, "create-snapshot-template")
defer span.End()

sbx, alreadyDone, finishSnapshotting, err := o.sandboxStore.StartRemoving(ctx, teamID, sandboxID, sandbox.StateActionSnapshot)
sbx, alreadyDone, finishSnapshotting, err := o.sandboxStore.StartRemoving(ctx, teamID, sandboxID, sandbox.RemoveOpts{Action: sandbox.StateActionSnapshot})
if err != nil {
return SnapshotTemplateResult{}, fmt.Errorf("failed to start snapshotting: %w", err)
}
Expand Down Expand Up @@ -97,7 +97,7 @@ func (o *Orchestrator) CreateSnapshotTemplate(ctx context.Context, teamID uuid.U
// so RemoveSandbox can proceed without deadlock.
finish(err)

if killErr := o.RemoveSandbox(ctx, teamID, sandboxID, sandbox.StateActionKill); killErr != nil {
if killErr := o.RemoveSandbox(ctx, teamID, sandboxID, sandbox.RemoveOpts{Action: sandbox.StateActionKill}); killErr != nil {
telemetry.ReportError(ctx, "error killing sandbox after failed checkpoint", killErr)
}

Expand Down
10 changes: 3 additions & 7 deletions packages/api/internal/sandbox/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,7 @@ func (e *LimitExceededError) Error() string {
return fmt.Sprintf("team %s has exceeded the limit", e.TeamID.String())
}

type NotFoundError struct {
SandboxID string
}

func (e *NotFoundError) Error() string {
return fmt.Sprintf("sandbox %s not found", e.SandboxID)
}
var ErrNotFound = errors.New("sandbox not found")

type InvalidStateTransitionError struct {
CurrentState State
Expand All @@ -42,3 +36,5 @@ func (e *NotRunningError) Error() string {
}

var ErrAlreadyExists = errors.New("sandbox already exists")

var ErrNotEvictable = errors.New("sandbox is not expirable")
6 changes: 6 additions & 0 deletions packages/api/internal/sandbox/states.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ var (
}
)

// RemoveOpts bundles the parameters that control sandbox removal.
type RemoveOpts struct {
Action StateAction
Eviction bool
}

var AllowedTransitions = map[State]map[State]bool{
StateRunning: {StatePausing: true, StateKilling: true, StateSnapshotting: true},
StatePausing: {StateKilling: true},
Expand Down
46 changes: 32 additions & 14 deletions packages/api/internal/sandbox/storage/memory/operations.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ func (s *Storage) get(sandboxID string) (*memorySandbox, error) {
func (s *Storage) Get(_ context.Context, teamID uuid.UUID, sandboxID string) (sandbox.Sandbox, error) {
item, ok := s.items.Get(sandboxID)
if !ok {
return sandbox.Sandbox{}, &sandbox.NotFoundError{SandboxID: sandboxID}
return sandbox.Sandbox{}, fmt.Errorf("sandbox %q: %w", sandboxID, sandbox.ErrNotFound)
}

data := item.Data()
if data.TeamID != teamID {
return sandbox.Sandbox{}, &sandbox.NotFoundError{SandboxID: sandboxID}
return sandbox.Sandbox{}, fmt.Errorf("sandbox %q: %w", sandboxID, sandbox.ErrNotFound)
}

return data, nil
Expand Down Expand Up @@ -115,14 +115,14 @@ func (s *Storage) ExpiredItems(_ context.Context) ([]sandbox.Sandbox, error) {
func (s *Storage) Update(_ context.Context, teamID uuid.UUID, sandboxID string, updateFunc func(sandbox.Sandbox) (sandbox.Sandbox, error)) (sandbox.Sandbox, error) {
item, ok := s.items.Get(sandboxID)
if !ok {
return sandbox.Sandbox{}, &sandbox.NotFoundError{SandboxID: sandboxID}
return sandbox.Sandbox{}, fmt.Errorf("sandbox %q: %w", sandboxID, sandbox.ErrNotFound)
}

item.mu.Lock()
defer item.mu.Unlock()

if item._data.TeamID != teamID {
return sandbox.Sandbox{}, &sandbox.NotFoundError{SandboxID: sandboxID}
return sandbox.Sandbox{}, fmt.Errorf("sandbox %q: %w", sandboxID, sandbox.ErrNotFound)
}

sbx, err := updateFunc(item._data)
Expand All @@ -135,27 +135,45 @@ func (s *Storage) Update(_ context.Context, teamID uuid.UUID, sandboxID string,
return sbx, nil
}

func (s *Storage) StartRemoving(ctx context.Context, teamID uuid.UUID, sandboxID string, stateAction sandbox.StateAction) (sandbox.Sandbox, bool, func(context.Context, error), error) {
func (s *Storage) StartRemoving(ctx context.Context, teamID uuid.UUID, sandboxID string, opts sandbox.RemoveOpts) (sandbox.Sandbox, bool, func(context.Context, error), error) {
sbx, err := s.get(sandboxID)
if err != nil {
return sandbox.Sandbox{}, false, nil, &sandbox.NotFoundError{SandboxID: sandboxID}
return sandbox.Sandbox{}, false, nil, fmt.Errorf("sandbox %q: %w", sandboxID, sandbox.ErrNotFound)
}

data := sbx.Data()
if data.TeamID != teamID {
return sandbox.Sandbox{}, false, nil, &sandbox.NotFoundError{SandboxID: sandboxID}
return sandbox.Sandbox{}, false, nil, fmt.Errorf("sandbox %q: %w", sandboxID, sandbox.ErrNotFound)
}

alreadyDone, callback, err := startRemoving(ctx, sbx, stateAction)
alreadyDone, callback, err := startRemoving(ctx, sbx, opts)

return sbx.Data(), alreadyDone, callback, err
}

func startRemoving(ctx context.Context, sbx *memorySandbox, stateAction sandbox.StateAction) (alreadyDone bool, callback func(ctx context.Context, err error), err error) {
newState := stateAction.TargetState

func startRemoving(ctx context.Context, sbx *memorySandbox, opts sandbox.RemoveOpts) (alreadyDone bool, callback func(ctx context.Context, err error), err error) {
sbx.mu.Lock()
transition := sbx.transition

// Resolve eviction under the lock + re-check expiry
if opts.Eviction {
// If there's a transition already in place, don't evict.
if transition != nil {
sbx.mu.Unlock()

return false, nil, sandbox.ErrNotEvictable
}

// If sandbox isn't expired (e.g. race condition with KeepAliveFor), skip.
if !sbx._data.IsExpired(time.Now()) {
sbx.mu.Unlock()

return false, nil, sandbox.ErrNotEvictable
}
}

newState := opts.Action.TargetState

if transition != nil {
currentState := sbx._data.State
sbx.mu.Unlock()
Expand All @@ -175,7 +193,7 @@ func startRemoving(ctx context.Context, sbx *memorySandbox, stateAction sandbox.
case currentState == newState:
return true, func(context.Context, error) {}, nil
case sandbox.AllowedTransitions[currentState][newState]:
return startRemoving(ctx, sbx, stateAction)
return startRemoving(ctx, sbx, sandbox.RemoveOpts{Action: opts.Action})
default:
return false, nil, fmt.Errorf("unexpected state transition")
}
Expand All @@ -192,7 +210,7 @@ func startRemoving(ctx context.Context, sbx *memorySandbox, stateAction sandbox.
return false, nil, &sandbox.InvalidStateTransitionError{CurrentState: sbx._data.State, TargetState: newState}
}

if stateAction.Effect == sandbox.TransitionExpires {
if opts.Action.Effect == sandbox.TransitionExpires {
sbx.setExpired()
}

Expand All @@ -204,7 +222,7 @@ func startRemoving(ctx context.Context, sbx *memorySandbox, stateAction sandbox.
sbx.mu.Lock()
defer sbx.mu.Unlock()

if stateAction.Effect == sandbox.TransitionTransient {
if opts.Action.Effect == sandbox.TransitionTransient {
if err == nil && sbx._data.State == newState {
sbx._data.State = sandbox.StateRunning
}
Expand Down
Loading
Loading