Skip to content
Closed

Main #115

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
f980086
Show effective poll interval in kelos get taskspawner
gjkim42 Apr 27, 2026
5e170f1
Expand ~ in @file paths for resolveContent
gjkim42 Apr 27, 2026
fa227bd
Merge pull request #1023 from kelos-dev/kelos-task-986
gjkim42 Apr 28, 2026
9235d0f
Update claude-code image to 2.1.121
gjkim42 Apr 28, 2026
1020243
Update opencode image to 1.14.28
gjkim42 Apr 28, 2026
03a6a87
Add configurable service type for webhook Services
gjkim42 Apr 28, 2026
e4fc46b
Merge pull request #1027 from kelos-dev/kelos-task-1025
gjkim42 Apr 28, 2026
947612e
Merge pull request #1022 from kelos-dev/kelos-task-1018
gjkim42 Apr 28, 2026
d1c0a10
Unset CLAUDE_CODE_EFFORT_LEVEL in self-development workflows
gjkim42 Apr 28, 2026
b7823d2
Merge pull request #1034 from kelos-dev/unset-max-effort
gjkim42 Apr 28, 2026
a5b424e
Align kelos-api-reviewer AgentConfig and prompt with kelos-reviewer
gjkim42 Apr 28, 2026
ac4ee32
Merge pull request #1030 from kelos-dev/update-claude-code-image-2.1.121
gjkim42 Apr 28, 2026
56bf5ab
Merge pull request #1031 from kelos-dev/update-opencode-image-1.14.28
gjkim42 Apr 28, 2026
b00db22
Merge pull request #1036 from kelos-dev/kelos-task-1032
gjkim42 Apr 28, 2026
7a7f619
Always update pr-e2e status when test-e2e completes
gjkim42 Apr 28, 2026
6445dab
Merge pull request #1038 from kelos-dev/kelos-task-1029
gjkim42 Apr 28, 2026
7f5bf8d
Document GenericWebhook TaskSpawner source
gjkim42 Apr 28, 2026
92104a0
docs: Correct GenericWebhook security claims and link follow-up
gjkim42 Apr 28, 2026
1860165
Expand PodOverrides with volumes, volumeMounts, and securityContext
JustinElst Apr 28, 2026
3a4f047
feat(api): add agentConfigRefs for composable multi-layer AgentConfig
knechtionscoding Apr 29, 2026
71d57d9
Merge pull request #1035 from kelos-dev/kelos-task-1033
gjkim42 Apr 29, 2026
f260417
Merge pull request #1014 from datagravity-ai/feat/list-of-agent-configs
gjkim42 Apr 29, 2026
6010dd1
Fix template render errors when optional GitHub keys are absent
gjkim42 Apr 29, 2026
b7954c0
docs: Refocus README on agent+environment definition and workflow int…
gjkim42 Apr 29, 2026
5393ed8
Merge pull request #1053 from kelos-dev/fix-template-missingkey-branch
gjkim42 Apr 29, 2026
b38892a
Merge pull request #1052 from kelos-dev/update-readme
gjkim42 Apr 29, 2026
21b6262
Remove actor/kelos label requirement from kelos-workers
gjkim42 Apr 29, 2026
f8e4ab1
Add PR author to generated release notes
gjkim42 Apr 29, 2026
1425fcf
Merge pull request #1054 from kelos-dev/loosen-worker-filter
gjkim42 Apr 29, 2026
c979962
Merge pull request #1055 from kelos-dev/add-author-to-release-note
gjkim42 Apr 29, 2026
a36f1cd
Merge pull request #1041 from JustinElst/feat/pod-overrides-expansion
gjkim42 Apr 29, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 27 additions & 22 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -135,30 +135,35 @@ jobs:
return
}

const { data: runs } = await github.rest.actions.listWorkflowRunsForRepo({
owner: context.repo.owner,
repo: context.repo.repo,
event: process.env.WORKFLOW_EVENT,
head_sha: targetSha,
per_page: 100,
})

const latestRun = runs.workflow_runs
.filter((run) => run.name === process.env.WORKFLOW_NAME)
.sort((left, right) => {
const createdDiff = Date.parse(right.created_at) - Date.parse(left.created_at)
if (createdDiff !== 0) return createdDiff
if (left.id !== right.id) return right.id - left.id
return (right.run_attempt || 1) - (left.run_attempt || 1)
})[0]

if (!latestRun) {
core.info(`No ${process.env.WORKFLOW_NAME} workflow runs found for ${targetSha}`)
return
// The workflow-runs listing is advisory — used only to detect a
// strictly newer run that has superseded us. The listing API is
// eventually consistent and may not yet include the in-flight
// current run, so an empty or stale listing must not block the
// status write or pr-e2e gets stuck on pending (see #1029).
let supersededBy = null
try {
const { data: runs } = await github.rest.actions.listWorkflowRunsForRepo({
owner: context.repo.owner,
repo: context.repo.repo,
event: process.env.WORKFLOW_EVENT,
head_sha: targetSha,
per_page: 100,
})

supersededBy = runs.workflow_runs
.filter((run) => run.name === process.env.WORKFLOW_NAME)
.find((run) => {
if (run.id === currentRunId) {
return (run.run_attempt || 1) > currentRunAttempt
}
return run.id > currentRunId
})
} catch (err) {
core.warning(`Failed to list workflow runs for ${targetSha} (proceeding with status update): ${err.message}`)
}

if (latestRun.id !== currentRunId || (latestRun.run_attempt || 1) !== currentRunAttempt) {
core.info(`Skipping stale status update from run ${currentRunId} attempt ${currentRunAttempt}`)
if (supersededBy) {
core.info(`Skipping stale status update from run ${currentRunId} attempt ${currentRunAttempt}; superseded by run ${supersededBy.id} attempt ${supersededBy.run_attempt || 1}`)
return
}

Expand Down
35 changes: 19 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,41 +13,38 @@
<p align="center">
<a href="#quick-start">Quick Start</a> &middot;
<a href="#kelos-skill">Kelos Skill</a> &middot;
<a href="#kelos-developing-kelos">Kelos Developing Kelos</a> &middot;
<a href="#examples">Examples</a> &middot;
<a href="docs/integration.md">Integration</a> &middot;
<a href="#integration">Integration</a> &middot;
<a href="docs/reference.md">Reference</a> &middot;
<a href="examples/">YAML Manifests</a>
</p>

Kelos lets you **define your development workflow as Kubernetes resources** and run it continuously. Declare what triggers agents, what they do, and how they hand off — Kelos handles the rest.
Kelos is a Kubernetes-native framework for AI coding agents. It does two things:

Kelos develops Kelos through TaskSpawners running 24/7: triaging issues, planning implementations, fixing bugs, responding to PR feedback, reviewing code, squashing commits, updating agent images, testing DX, brainstorming improvements, and tuning their own prompts and configs. [See the full pipeline below.](#kelos-developing-kelos)
1. **Defines the agent and the environment it runs in as one unit** — the prompt, model, instructions, plugins, MCP servers, git workspace, credentials, and Pod resources all live together as Kubernetes resources you can version-control.
2. **Defines how agents integrate with your workflows** — trigger runs from GitHub issues, PRs, webhooks, Linear, Jira, schedules, or any HTTP source, and chain agents into pipelines.

Supports **Claude Code**, **OpenAI Codex**, **Google Gemini**, **OpenCode**, **Cursor**, and [custom agent images](docs/agent-image-interface.md).

## How It Works

Kelos orchestrates the flow from external events to autonomous execution:

<img width="2310" height="1582" alt="kelos-resources" src="https://github.com/user-attachments/assets/a03c388e-cc28-4a25-972f-e0e506b4d583" />

You define what needs to be done, and Kelos handles the "how" — from cloning the right repo and injecting credentials to running the agent and capturing its outputs (branch names, commit SHAs, PR URLs, and token usage).

### Core Primitives

Kelos is built on four resources:
Kelos is built on four resources, grouped by the two concerns above:

1. **Tasks** — Ephemeral units of work that wrap an AI agent run.
2. **Workspaces** — Persistent or ephemeral environments (git repos) where agents operate.
3. **AgentConfigs** — Reusable bundles of agent instructions (`AGENTS.md`, `CLAUDE.md`), plugins (skills and agents), and MCP servers.
4. **TaskSpawners** — Orchestration engines that react to external triggers (GitHub, Cron) to automatically manage agent lifecycles.
**Defining the agent and its environment**

## Kelos Developing Kelos
- **Tasks** — A single agent run: prompt, model, credentials, and Pod-level overrides.
- **Workspaces** — The git repository (URL, ref, auth) the agent operates in.
- **AgentConfigs** — Reusable bundles of agent instructions (`AGENTS.md`, `CLAUDE.md`), plugins (skills and agents), and MCP servers.

Kelos develops itself. TaskSpawners run 24/7, each handling a different part of the development lifecycle — fully autonomous.
**Integrating with workflows**

See the [`self-development/` README](self-development/README.md) for the full pipeline: manifests, triggers, models, and setup instructions.
- **TaskSpawners** — React to external triggers (GitHub Issues/PRs, webhooks, Linear, Jira, Cron, Generic Webhooks) and create Tasks automatically.

## Why Kelos?

Expand Down Expand Up @@ -454,7 +451,7 @@ See the [full AgentConfig spec](docs/reference.md#agentconfig) for plugins, skil

Kelos integrates with external systems in two ways:

**TaskSpawner** — Kelos natively watches external sources and automatically creates Tasks. Supports GitHub Issues, GitHub Pull Requests, GitHub Webhooks, Jira, and Cron schedules. No glue code needed.
**TaskSpawner** — Kelos natively watches external sources and automatically creates Tasks. Supports GitHub Issues, GitHub Pull Requests, GitHub Webhooks, Linear Webhooks, Jira, Cron schedules, and Generic Webhooks (for arbitrary HTTP POST sources like Sentry, Notion, or Slack). No glue code needed.

```yaml
spec:
Expand All @@ -474,12 +471,18 @@ See the [Integration guide](docs/integration.md) for examples of both approaches

## Orchestration Patterns

- **Autonomous Self-Development** — Build a feedback loop where agents pick up issues, write code, self-review, and fix CI flakes until the task is complete. See the [self-development pipeline](#kelos-developing-kelos).
- **Autonomous Self-Development** — Build a feedback loop where agents pick up issues, write code, self-review, and fix CI flakes until the task is complete. Kelos itself is developed this way — see [Case Study: Kelos Developing Kelos](#case-study-kelos-developing-kelos) below.
- **Event-Driven Bug Fixing** — Automatically spawn agents to investigate and fix bugs as soon as they are labeled in GitHub. See [Auto-fix GitHub issues](#auto-fix-github-issues-with-taskspawner).
- **Fleet-Wide Refactoring** — Orchestrate a "fan-out" where dozens of agents apply the same refactoring pattern across a fleet of microservices in parallel.
- **Hands-Free CI/CD** — Embed agents as first-class steps in your deployment pipelines to generate documentation or perform automated migrations.
- **AI Worker Pools** — Maintain a pool of specialized agents (e.g., "The Security Fixer") that developers can trigger via simple Kubernetes resources.

## Case Study: Kelos Developing Kelos

Kelos develops Kelos. TaskSpawners run 24/7, each handling a different part of the development lifecycle — triaging issues, planning implementations, fixing bugs, responding to PR feedback, reviewing code, squashing commits, updating agent images, testing DX, brainstorming improvements, and tuning their own prompts and configs.

See the [`self-development/` README](self-development/README.md) for the full pipeline: manifests, triggers, models, and setup instructions.

## Reference

| Resource | Key Fields | Full Spec |
Expand Down
37 changes: 37 additions & 0 deletions api/v1alpha1/task_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,38 @@ type PodOverrides struct {
// Workload Identity, or Azure Workload Identity.
// +optional
ServiceAccountName string `json:"serviceAccountName,omitempty"`

// Volumes is a list of additional volumes to attach to the agent pod.
// User-supplied volume names must not collide with Kelos-reserved
// names ("workspace", "kelos-plugin").
// +optional
Volumes []corev1.Volume `json:"volumes,omitempty"`

// VolumeMounts is a list of additional volume mounts to add to the
// agent container. Names must reference either a user-supplied volume
// from Volumes or a Kelos-managed volume ("workspace", "kelos-plugin").
// Init containers are not exposed via this field.
// +optional
VolumeMounts []corev1.VolumeMount `json:"volumeMounts,omitempty"`

// PodSecurityContext is applied to the agent pod. Fields set here
// override Kelos defaults; fields left unset retain Kelos defaults
// (in particular, FSGroup is retained when a workspace is mounted so
// the agent user keeps read/write access to the workspace volume).
// +optional
PodSecurityContext *corev1.PodSecurityContext `json:"podSecurityContext,omitempty"`

// ContainerSecurityContext is applied to the agent container. Use
// this to declare allowPrivilegeEscalation=false, capabilities.drop=[ALL],
// readOnlyRootFilesystem=true, etc., so the spawned pod can land in a
// PSS restricted namespace.
// +optional
ContainerSecurityContext *corev1.SecurityContext `json:"containerSecurityContext,omitempty"`
}

// TaskSpec defines the desired state of Task.
//
// +kubebuilder:validation:XValidation:rule="!(has(self.agentConfigRef) && has(self.agentConfigRefs))",message="agentConfigRef and agentConfigRefs are mutually exclusive"
type TaskSpec struct {
// Type specifies the agent type (e.g., claude-code).
// +kubebuilder:validation:Required
Expand Down Expand Up @@ -122,6 +151,14 @@ type TaskSpec struct {
// +optional
AgentConfigRef *AgentConfigReference `json:"agentConfigRef,omitempty"`

// AgentConfigRefs references an ordered list of AgentConfig resources.
// Configs are merged in order: agentsMD is concatenated, plugins/skills
// are appended, mcpServers are appended with later entries winning on
// name collision. Mutually exclusive with AgentConfigRef.
// +optional
// +kubebuilder:validation:MinItems=1
AgentConfigRefs []AgentConfigReference `json:"agentConfigRefs,omitempty"`

// DependsOn lists Task names that must succeed before this Task starts.
// +optional
DependsOn []string `json:"dependsOn,omitempty"`
Expand Down
11 changes: 11 additions & 0 deletions api/v1alpha1/taskspawner_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,8 @@ type TaskTemplateMetadata struct {
}

// TaskTemplate defines the template for spawned Tasks.
//
// +kubebuilder:validation:XValidation:rule="!(has(self.agentConfigRef) && has(self.agentConfigRefs))",message="agentConfigRef and agentConfigRefs are mutually exclusive"
type TaskTemplate struct {
// Type specifies the agent type (e.g., claude-code).
// +kubebuilder:validation:Required
Expand Down Expand Up @@ -566,6 +568,15 @@ type TaskTemplate struct {
// +optional
AgentConfigRef *AgentConfigReference `json:"agentConfigRef,omitempty"`

// AgentConfigRefs references an ordered list of AgentConfig resources.
// Configs are merged in order: agentsMD is concatenated, plugins/skills
// are appended, mcpServers are appended with later entries winning on
// name collision. Mutually exclusive with AgentConfigRef.
// When set, spawned Tasks inherit this agent config reference list.
// +optional
// +kubebuilder:validation:MinItems=1
AgentConfigRefs []AgentConfigReference `json:"agentConfigRefs,omitempty"`

// DependsOn lists Task names that spawned Tasks depend on.
// +optional
DependsOn []string `json:"dependsOn,omitempty"`
Expand Down
34 changes: 34 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion claude-code/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ RUN ARCH=$(dpkg --print-architecture) \

ENV PATH="/usr/local/go/bin:${PATH}"

ARG CLAUDE_CODE_VERSION=2.1.119
ARG CLAUDE_CODE_VERSION=2.1.121
RUN npm install -g @anthropic-ai/claude-code@${CLAUDE_CODE_VERSION}

COPY claude-code/kelos_entrypoint.sh /kelos_entrypoint.sh
Expand Down
41 changes: 41 additions & 0 deletions cmd/kelos-spawner/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,47 @@ func TestRunCycleWithSource_AgentConfigRefForwarded(t *testing.T) {
}
}

func TestRunCycleWithSource_AgentConfigRefsForwarded(t *testing.T) {
ts := newTaskSpawner("spawner", "default", nil)
ts.Spec.TaskTemplate.AgentConfigRefs = []kelosv1alpha1.AgentConfigReference{
{Name: "base-config"},
{Name: "role-config"},
}
cl, key := setupTest(t, ts)

src := &fakeSource{
items: []source.WorkItem{
{ID: "1", Title: "Item 1"},
},
}

if err := runCycleWithSource(context.Background(), cl, key, src); err != nil {
t.Fatalf("Unexpected error: %v", err)
}

var taskList kelosv1alpha1.TaskList
if err := cl.List(context.Background(), &taskList, client.InNamespace("default")); err != nil {
t.Fatalf("Listing tasks: %v", err)
}
if len(taskList.Items) != 1 {
t.Fatalf("Expected 1 task, got %d", len(taskList.Items))
}

task := taskList.Items[0]
if task.Spec.AgentConfigRef != nil {
t.Error("Expected AgentConfigRef to be nil when AgentConfigRefs is used")
}
if len(task.Spec.AgentConfigRefs) != 2 {
t.Fatalf("Expected 2 AgentConfigRefs, got %d", len(task.Spec.AgentConfigRefs))
}
if task.Spec.AgentConfigRefs[0].Name != "base-config" {
t.Errorf("Expected AgentConfigRefs[0].Name %q, got %q", "base-config", task.Spec.AgentConfigRefs[0].Name)
}
if task.Spec.AgentConfigRefs[1].Name != "role-config" {
t.Errorf("Expected AgentConfigRefs[1].Name %q, got %q", "role-config", task.Spec.AgentConfigRefs[1].Name)
}
}

func TestRunCycleWithSource_PodOverridesForwarded(t *testing.T) {
ts := newTaskSpawner("spawner", "default", nil)
ts.Spec.TaskTemplate.PodOverrides = &kelosv1alpha1.PodOverrides{
Expand Down
Loading
Loading