datagravity-ai · knechtionscoding · Apr 27, 2026 · Apr 27, 2026 · Apr 28, 2026 · Apr 28, 2026
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -135,30 +135,35 @@ jobs:
               return
             }
 
-            const { data: runs } = await github.rest.actions.listWorkflowRunsForRepo({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              event: process.env.WORKFLOW_EVENT,
-              head_sha: targetSha,
-              per_page: 100,
-            })
-
-            const latestRun = runs.workflow_runs
-              .filter((run) => run.name === process.env.WORKFLOW_NAME)
-              .sort((left, right) => {
-                const createdDiff = Date.parse(right.created_at) - Date.parse(left.created_at)
-                if (createdDiff !== 0) return createdDiff
-                if (left.id !== right.id) return right.id - left.id
-                return (right.run_attempt || 1) - (left.run_attempt || 1)
-              })[0]
-
-            if (!latestRun) {
-              core.info(`No ${process.env.WORKFLOW_NAME} workflow runs found for ${targetSha}`)
-              return
+            // The workflow-runs listing is advisory — used only to detect a
+            // strictly newer run that has superseded us. The listing API is
+            // eventually consistent and may not yet include the in-flight
+            // current run, so an empty or stale listing must not block the
+            // status write or pr-e2e gets stuck on pending (see #1029).
+            let supersededBy = null
+            try {
+              const { data: runs } = await github.rest.actions.listWorkflowRunsForRepo({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                event: process.env.WORKFLOW_EVENT,
+                head_sha: targetSha,
+                per_page: 100,
+              })
+
+              supersededBy = runs.workflow_runs
+                .filter((run) => run.name === process.env.WORKFLOW_NAME)
+                .find((run) => {
+                  if (run.id === currentRunId) {
+                    return (run.run_attempt || 1) > currentRunAttempt
+                  }
+                  return run.id > currentRunId
+                })
+            } catch (err) {
+              core.warning(`Failed to list workflow runs for ${targetSha} (proceeding with status update): ${err.message}`)
             }
 
-            if (latestRun.id !== currentRunId || (latestRun.run_attempt || 1) !== currentRunAttempt) {
-              core.info(`Skipping stale status update from run ${currentRunId} attempt ${currentRunAttempt}`)
+            if (supersededBy) {
+              core.info(`Skipping stale status update from run ${currentRunId} attempt ${currentRunAttempt}; superseded by run ${supersededBy.id} attempt ${supersededBy.run_attempt || 1}`)
               return
             }
 

diff --git a/README.md b/README.md
@@ -13,41 +13,38 @@
 <p align="center">
   <a href="#quick-start">Quick Start</a> &middot;
   <a href="#kelos-skill">Kelos Skill</a> &middot;
-  <a href="#kelos-developing-kelos">Kelos Developing Kelos</a> &middot;
   <a href="#examples">Examples</a> &middot;
-  <a href="docs/integration.md">Integration</a> &middot;
+  <a href="#integration">Integration</a> &middot;
   <a href="docs/reference.md">Reference</a> &middot;
   <a href="examples/">YAML Manifests</a>
 </p>
 
-Kelos lets you **define your development workflow as Kubernetes resources** and run it continuously. Declare what triggers agents, what they do, and how they hand off — Kelos handles the rest.
+Kelos is a Kubernetes-native framework for AI coding agents. It does two things:
 
-Kelos develops Kelos through TaskSpawners running 24/7: triaging issues, planning implementations, fixing bugs, responding to PR feedback, reviewing code, squashing commits, updating agent images, testing DX, brainstorming improvements, and tuning their own prompts and configs. [See the full pipeline below.](#kelos-developing-kelos)
+1. **Defines the agent and the environment it runs in as one unit** — the prompt, model, instructions, plugins, MCP servers, git workspace, credentials, and Pod resources all live together as Kubernetes resources you can version-control.
+2. **Defines how agents integrate with your workflows** — trigger runs from GitHub issues, PRs, webhooks, Linear, Jira, schedules, or any HTTP source, and chain agents into pipelines.
 
 Supports **Claude Code**, **OpenAI Codex**, **Google Gemini**, **OpenCode**, **Cursor**, and [custom agent images](docs/agent-image-interface.md).
 
 ## How It Works
 
-Kelos orchestrates the flow from external events to autonomous execution:
-
 <img width="2310" height="1582" alt="kelos-resources" src="https://github.com/user-attachments/assets/a03c388e-cc28-4a25-972f-e0e506b4d583" />
 
 You define what needs to be done, and Kelos handles the "how" — from cloning the right repo and injecting credentials to running the agent and capturing its outputs (branch names, commit SHAs, PR URLs, and token usage).
 
 ### Core Primitives
 
-Kelos is built on four resources:
+Kelos is built on four resources, grouped by the two concerns above:
 
-1. **Tasks** — Ephemeral units of work that wrap an AI agent run.
-2. **Workspaces** — Persistent or ephemeral environments (git repos) where agents operate.
-3. **AgentConfigs** — Reusable bundles of agent instructions (`AGENTS.md`, `CLAUDE.md`), plugins (skills and agents), and MCP servers.
-4. **TaskSpawners** — Orchestration engines that react to external triggers (GitHub, Cron) to automatically manage agent lifecycles.
+**Defining the agent and its environment**
 
-## Kelos Developing Kelos
+- **Tasks** — A single agent run: prompt, model, credentials, and Pod-level overrides.
+- **Workspaces** — The git repository (URL, ref, auth) the agent operates in.
+- **AgentConfigs** — Reusable bundles of agent instructions (`AGENTS.md`, `CLAUDE.md`), plugins (skills and agents), and MCP servers.
 
-Kelos develops itself. TaskSpawners run 24/7, each handling a different part of the development lifecycle — fully autonomous.
+**Integrating with workflows**
 
-See the [`self-development/` README](self-development/README.md) for the full pipeline: manifests, triggers, models, and setup instructions.
+- **TaskSpawners** — React to external triggers (GitHub Issues/PRs, webhooks, Linear, Jira, Cron, Generic Webhooks) and create Tasks automatically.
 
 ## Why Kelos?
 
@@ -454,7 +451,7 @@ See the [full AgentConfig spec](docs/reference.md#agentconfig) for plugins, skil
 
 Kelos integrates with external systems in two ways:
 
-**TaskSpawner** — Kelos natively watches external sources and automatically creates Tasks. Supports GitHub Issues, GitHub Pull Requests, GitHub Webhooks, Jira, and Cron schedules. No glue code needed.
+**TaskSpawner** — Kelos natively watches external sources and automatically creates Tasks. Supports GitHub Issues, GitHub Pull Requests, GitHub Webhooks, Linear Webhooks, Jira, Cron schedules, and Generic Webhooks (for arbitrary HTTP POST sources like Sentry, Notion, or Slack). No glue code needed.
 
 ```yaml
 spec:
@@ -474,12 +471,18 @@ See the [Integration guide](docs/integration.md) for examples of both approaches
 
 ## Orchestration Patterns
 
-- **Autonomous Self-Development** — Build a feedback loop where agents pick up issues, write code, self-review, and fix CI flakes until the task is complete. See the [self-development pipeline](#kelos-developing-kelos).
+- **Autonomous Self-Development** — Build a feedback loop where agents pick up issues, write code, self-review, and fix CI flakes until the task is complete. Kelos itself is developed this way — see [Case Study: Kelos Developing Kelos](#case-study-kelos-developing-kelos) below.
 - **Event-Driven Bug Fixing** — Automatically spawn agents to investigate and fix bugs as soon as they are labeled in GitHub. See [Auto-fix GitHub issues](#auto-fix-github-issues-with-taskspawner).
 - **Fleet-Wide Refactoring** — Orchestrate a "fan-out" where dozens of agents apply the same refactoring pattern across a fleet of microservices in parallel.
 - **Hands-Free CI/CD** — Embed agents as first-class steps in your deployment pipelines to generate documentation or perform automated migrations.
 - **AI Worker Pools** — Maintain a pool of specialized agents (e.g., "The Security Fixer") that developers can trigger via simple Kubernetes resources.
 
+## Case Study: Kelos Developing Kelos
+
+Kelos develops Kelos. TaskSpawners run 24/7, each handling a different part of the development lifecycle — triaging issues, planning implementations, fixing bugs, responding to PR feedback, reviewing code, squashing commits, updating agent images, testing DX, brainstorming improvements, and tuning their own prompts and configs.
+
+See the [`self-development/` README](self-development/README.md) for the full pipeline: manifests, triggers, models, and setup instructions.
+
 ## Reference
 
 | Resource | Key Fields | Full Spec |

diff --git a/api/v1alpha1/task_types.go b/api/v1alpha1/task_types.go
@@ -86,9 +86,38 @@ type PodOverrides struct {
 	// Workload Identity, or Azure Workload Identity.
 	// +optional
 	ServiceAccountName string `json:"serviceAccountName,omitempty"`
+
+	// Volumes is a list of additional volumes to attach to the agent pod.
+	// User-supplied volume names must not collide with Kelos-reserved
+	// names ("workspace", "kelos-plugin").
+	// +optional
+	Volumes []corev1.Volume `json:"volumes,omitempty"`
+
+	// VolumeMounts is a list of additional volume mounts to add to the
+	// agent container. Names must reference either a user-supplied volume
+	// from Volumes or a Kelos-managed volume ("workspace", "kelos-plugin").
+	// Init containers are not exposed via this field.
+	// +optional
+	VolumeMounts []corev1.VolumeMount `json:"volumeMounts,omitempty"`
+
+	// PodSecurityContext is applied to the agent pod. Fields set here
+	// override Kelos defaults; fields left unset retain Kelos defaults
+	// (in particular, FSGroup is retained when a workspace is mounted so
+	// the agent user keeps read/write access to the workspace volume).
+	// +optional
+	PodSecurityContext *corev1.PodSecurityContext `json:"podSecurityContext,omitempty"`
+
+	// ContainerSecurityContext is applied to the agent container. Use
+	// this to declare allowPrivilegeEscalation=false, capabilities.drop=[ALL],
+	// readOnlyRootFilesystem=true, etc., so the spawned pod can land in a
+	// PSS restricted namespace.
+	// +optional
+	ContainerSecurityContext *corev1.SecurityContext `json:"containerSecurityContext,omitempty"`
 }
 
 // TaskSpec defines the desired state of Task.
+//
+// +kubebuilder:validation:XValidation:rule="!(has(self.agentConfigRef) && has(self.agentConfigRefs))",message="agentConfigRef and agentConfigRefs are mutually exclusive"
 type TaskSpec struct {
 	// Type specifies the agent type (e.g., claude-code).
 	// +kubebuilder:validation:Required
@@ -122,6 +151,14 @@ type TaskSpec struct {
 	// +optional
 	AgentConfigRef *AgentConfigReference `json:"agentConfigRef,omitempty"`
 
+	// AgentConfigRefs references an ordered list of AgentConfig resources.
+	// Configs are merged in order: agentsMD is concatenated, plugins/skills
+	// are appended, mcpServers are appended with later entries winning on
+	// name collision. Mutually exclusive with AgentConfigRef.
+	// +optional
+	// +kubebuilder:validation:MinItems=1
+	AgentConfigRefs []AgentConfigReference `json:"agentConfigRefs,omitempty"`
+
 	// DependsOn lists Task names that must succeed before this Task starts.
 	// +optional
 	DependsOn []string `json:"dependsOn,omitempty"`

diff --git a/api/v1alpha1/taskspawner_types.go b/api/v1alpha1/taskspawner_types.go
@@ -533,6 +533,8 @@ type TaskTemplateMetadata struct {
 }
 
 // TaskTemplate defines the template for spawned Tasks.
+//
+// +kubebuilder:validation:XValidation:rule="!(has(self.agentConfigRef) && has(self.agentConfigRefs))",message="agentConfigRef and agentConfigRefs are mutually exclusive"
 type TaskTemplate struct {
 	// Type specifies the agent type (e.g., claude-code).
 	// +kubebuilder:validation:Required
@@ -566,6 +568,15 @@ type TaskTemplate struct {
 	// +optional
 	AgentConfigRef *AgentConfigReference `json:"agentConfigRef,omitempty"`
 
+	// AgentConfigRefs references an ordered list of AgentConfig resources.
+	// Configs are merged in order: agentsMD is concatenated, plugins/skills
+	// are appended, mcpServers are appended with later entries winning on
+	// name collision. Mutually exclusive with AgentConfigRef.
+	// When set, spawned Tasks inherit this agent config reference list.
+	// +optional
+	// +kubebuilder:validation:MinItems=1
+	AgentConfigRefs []AgentConfigReference `json:"agentConfigRefs,omitempty"`
+
 	// DependsOn lists Task names that spawned Tasks depend on.
 	// +optional
 	DependsOn []string `json:"dependsOn,omitempty"`

diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
diff --git a/claude-code/Dockerfile b/claude-code/Dockerfile
@@ -27,7 +27,7 @@ RUN ARCH=$(dpkg --print-architecture) \
 
 ENV PATH="/usr/local/go/bin:${PATH}"
 
-ARG CLAUDE_CODE_VERSION=2.1.119
+ARG CLAUDE_CODE_VERSION=2.1.121
 RUN npm install -g @anthropic-ai/claude-code@${CLAUDE_CODE_VERSION}
 
 COPY claude-code/kelos_entrypoint.sh /kelos_entrypoint.sh

diff --git a/cmd/kelos-spawner/main_test.go b/cmd/kelos-spawner/main_test.go
@@ -637,6 +637,47 @@ func TestRunCycleWithSource_AgentConfigRefForwarded(t *testing.T) {
 	}
 }
 
+func TestRunCycleWithSource_AgentConfigRefsForwarded(t *testing.T) {
+	ts := newTaskSpawner("spawner", "default", nil)
+	ts.Spec.TaskTemplate.AgentConfigRefs = []kelosv1alpha1.AgentConfigReference{
+		{Name: "base-config"},
+		{Name: "role-config"},
+	}
+	cl, key := setupTest(t, ts)
+
+	src := &fakeSource{
+		items: []source.WorkItem{
+			{ID: "1", Title: "Item 1"},
+		},
+	}
+
+	if err := runCycleWithSource(context.Background(), cl, key, src); err != nil {
+		t.Fatalf("Unexpected error: %v", err)
+	}
+
+	var taskList kelosv1alpha1.TaskList
+	if err := cl.List(context.Background(), &taskList, client.InNamespace("default")); err != nil {
+		t.Fatalf("Listing tasks: %v", err)
+	}
+	if len(taskList.Items) != 1 {
+		t.Fatalf("Expected 1 task, got %d", len(taskList.Items))
+	}
+
+	task := taskList.Items[0]
+	if task.Spec.AgentConfigRef != nil {
+		t.Error("Expected AgentConfigRef to be nil when AgentConfigRefs is used")
+	}
+	if len(task.Spec.AgentConfigRefs) != 2 {
+		t.Fatalf("Expected 2 AgentConfigRefs, got %d", len(task.Spec.AgentConfigRefs))
+	}
+	if task.Spec.AgentConfigRefs[0].Name != "base-config" {
+		t.Errorf("Expected AgentConfigRefs[0].Name %q, got %q", "base-config", task.Spec.AgentConfigRefs[0].Name)
+	}
+	if task.Spec.AgentConfigRefs[1].Name != "role-config" {
+		t.Errorf("Expected AgentConfigRefs[1].Name %q, got %q", "role-config", task.Spec.AgentConfigRefs[1].Name)
+	}
+}
+
 func TestRunCycleWithSource_PodOverridesForwarded(t *testing.T) {
 	ts := newTaskSpawner("spawner", "default", nil)
 	ts.Spec.TaskTemplate.PodOverrides = &kelosv1alpha1.PodOverrides{