Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions packages/api/internal/orchestrator/delete_instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ func (o *Orchestrator) RemoveSandbox(ctx context.Context, teamID uuid.UUID, sand

sbx, alreadyDone, finish, err := o.sandboxStore.StartRemoving(ctx, teamID, sandboxID, stateAction)
if err != nil {
if errors.Is(err, sandbox.ErrNotExpirable) {
// Propagate to evictor
return err
}

switch stateAction {
case sandbox.StateActionKill:
var notFoundErr *sandbox.NotFoundError
Expand Down Expand Up @@ -65,6 +70,14 @@ func (o *Orchestrator) RemoveSandbox(ctx context.Context, teamID uuid.UUID, sand

return ErrSandboxOperationFailed
default:
// StateActionEvict errors (other than ErrNotExpirable handled above)
var notFoundErr *sandbox.NotFoundError
if errors.As(err, &notFoundErr) {
logger.L().Debug(ctx, "Eviction skipped: sandbox already removed", logger.WithSandboxID(sandboxID))

return nil
}

logger.L().Error(ctx, "Invalid state action", logger.WithSandboxID(sandboxID), zap.String("state_action", stateAction.Name))

return ErrSandboxOperationFailed
Expand All @@ -74,6 +87,18 @@ func (o *Orchestrator) RemoveSandbox(ctx context.Context, teamID uuid.UUID, sand
finish(ctx, err)
}()

// Resolve the actual action from the sandbox state when evicting.
// StartRemoving already resolved Evict to Kill or Pause internally,
// so we can determine which one from the resulting sandbox state.
if stateAction == sandbox.StateActionEvict {
switch sbx.State {
case sandbox.StatePausing:
stateAction = sandbox.StateActionPause
default:
stateAction = sandbox.StateActionKill
}
}

if alreadyDone {
logger.L().Info(ctx, "Sandbox was already in the process of being removed", logger.WithSandboxID(sandboxID), zap.String("state", string(sbx.State)))

Expand Down
15 changes: 8 additions & 7 deletions packages/api/internal/orchestrator/evictor/evict.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package evictor

import (
"context"
"errors"
"time"

"github.com/google/uuid"
Expand Down Expand Up @@ -53,16 +54,16 @@ func (e *Evictor) Start(ctx context.Context) {

for _, item := range sbxs {
g.Go(func() error {
stateAction := sandbox.StateActionKill
if item.AutoPause {
stateAction = sandbox.StateActionPause
}
if err := e.removeSandbox(context.WithoutCancel(ctx), item.TeamID, item.SandboxID, sandbox.StateActionEvict); err != nil {
if !errors.Is(err, sandbox.ErrNotExpirable) {
logger.L().Debug(ctx, "Evicting sandbox failed", zap.Error(err), logger.WithSandboxID(item.SandboxID))
}

logger.L().Debug(ctx, "Evicting sandbox", logger.WithSandboxID(item.SandboxID), zap.String("state_action", stateAction.Name))
if err := e.removeSandbox(context.WithoutCancel(ctx), item.TeamID, item.SandboxID, stateAction); err != nil {
logger.L().Debug(ctx, "Evicting sandbox failed", zap.Error(err), logger.WithSandboxID(item.SandboxID))
return nil
}

logger.L().Debug(ctx, "Sandbox evicted", logger.WithSandboxID(item.SandboxID))

return nil
})
}
Expand Down
2 changes: 2 additions & 0 deletions packages/api/internal/sandbox/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,5 @@ func (e *NotRunningError) Error() string {
}

var ErrAlreadyExists = errors.New("sandbox already exists")

var ErrNotExpirable = errors.New("sandbox is not expirable")
5 changes: 5 additions & 0 deletions packages/api/internal/sandbox/states.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ var (
TargetState: StateSnapshotting,
Effect: TransitionTransient,
}
// StateActionEvict is a marker action used by the evictor.
// It is resolved to Kill or Pause inside StartRemoving
StateActionEvict = StateAction{
Name: "evict",
}
)

var AllowedTransitions = map[State]map[State]bool{
Expand Down
28 changes: 26 additions & 2 deletions packages/api/internal/sandbox/storage/memory/operations.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,34 @@ func (s *Storage) StartRemoving(ctx context.Context, teamID uuid.UUID, sandboxID
}

func startRemoving(ctx context.Context, sbx *memorySandbox, stateAction sandbox.StateAction) (alreadyDone bool, callback func(ctx context.Context, err error), err error) {
newState := stateAction.TargetState

sbx.mu.Lock()
transition := sbx.transition

// Resolve StateActionEvict under the lock: re-check expiry and pick Kill or Pause.
if stateAction == sandbox.StateActionEvict {
// If there's a transition already in place, don't evict.
if transition != nil {
sbx.mu.Unlock()

return false, nil, sandbox.ErrNotExpirable
}

// If sandbox isn't expired (e.g. race condition with KeepAliveFor), skip.
if !sbx._data.IsExpired(time.Now()) {
sbx.mu.Unlock()

return false, nil, sandbox.ErrNotExpirable
}

if sbx._data.AutoPause {
stateAction = sandbox.StateActionPause
} else {
stateAction = sandbox.StateActionKill
}
}

newState := stateAction.TargetState

if transition != nil {
currentState := sbx._data.State
sbx.mu.Unlock()
Expand Down
23 changes: 21 additions & 2 deletions packages/api/internal/sandbox/storage/redis/state_change.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ import (
// The callback is critical: it deletes the transition key
// and sets the result value with short TTL to notify waiters of the outcome.
func (s *Storage) StartRemoving(ctx context.Context, teamID uuid.UUID, sandboxID string, stateAction sandbox.StateAction) (sandbox.Sandbox, bool, func(context.Context, error), error) {
newState := stateAction.TargetState

key := getSandboxKey(teamID.String(), sandboxID)
transitionKey := getTransitionKey(teamID.String(), sandboxID)

Expand Down Expand Up @@ -75,6 +73,27 @@ func (s *Storage) StartRemoving(ctx context.Context, teamID uuid.UUID, sandboxID
return sbx, false, nil, fmt.Errorf("failed to check transition key: %w", err)
}

// Resolve StateActionEvict under the distributed lock: re-check expiry and pick Kill or Pause.
if stateAction == sandbox.StateActionEvict {
// if there's a transition already in place, don't do anything
if transactionID != "" {
return sbx, false, nil, sandbox.ErrNotExpirable
}

// if sandbox isn't expired (e.g. race condtition with SetTimeout)
if !sbx.IsExpired(time.Now()) {
return sbx, false, nil, sandbox.ErrNotExpirable
}

if sbx.AutoPause {
stateAction = sandbox.StateActionPause
} else {
stateAction = sandbox.StateActionKill
}
}

newState := stateAction.TargetState

if transactionID != "" {
releaseErr := releaseFunc()
if releaseErr != nil {
Expand Down
Loading