Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions examples/experimental/swe-agent-v2/prepare_harbor_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,25 @@
logger = logging.getLogger(__name__)


# ── SWE-bench auto-detection ────────────────────────────────────────────
def _is_swebench_instance(metadata: dict) -> bool:
"""Return True if metadata looks like a SWE-bench instance."""
return all(metadata.get(k) for k in ("repo", "version", "base_commit", "test_patch"))


def _swebench_docker_image(instance_id: str) -> str:
"""Derive the pre-built SWE-bench Docker image from instance_id.

Image naming convention (xingyaoww registry):
instance_id: getmoto__moto-7365
image: xingyaoww/sweb.eval.x86_64.getmoto_s_moto-7365:latest

The ``__`` in the instance_id maps to ``_s_`` in the image name.
"""
slug = instance_id.replace("__", "_s_")
return f"xingyaoww/sweb.eval.x86_64.{slug}:latest"
Comment on lines +50 to +60
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For better maintainability, consider extracting the hardcoded parts of the Docker image name (like the registry xingyaoww, prefix sweb.eval.x86_64, and tag latest) into constants at the module level. This makes it easier to update if the naming convention changes in the future. This aligns with the repository rule to avoid hardcoding configuration values.

References
  1. Avoid hardcoding model dimensions or configuration values; derive them from configuration or input tensor shapes instead.



def _get_instruction(metadata: dict) -> str:
for key in ("problem_statement", "instruction", "prompt"):
val = metadata.get(key, "")
Expand Down Expand Up @@ -83,13 +102,21 @@ def _create_task_dir(
env_dir = task_dir / "environment"
env_dir.mkdir(exist_ok=True)

docker_image = metadata.get("docker_image", "ubuntu:24.04")
# Auto-detect SWE-bench instances and derive the correct Docker image
is_swebench = not metadata.get("docker_image") and _is_swebench_instance(metadata)
if is_swebench:
docker_image = _swebench_docker_image(instance_id)
logger.debug(f"SWE-bench auto-detected: {instance_id} -> {docker_image}")
extra_lines = "WORKDIR /testbed\nRUN mkdir -p /logs\n"
else:
docker_image = metadata.get("docker_image", "ubuntu:24.04")
extra_lines = ""
setup_cmds = metadata.get("setup_commands", "")
if isinstance(setup_cmds, list):
setup_cmds = " && ".join(setup_cmds)
setup_block = f"RUN {setup_cmds}\n" if setup_cmds else ""

(env_dir / "Dockerfile").write_text(f"FROM {docker_image}\n{setup_block}")
(env_dir / "Dockerfile").write_text(f"FROM {docker_image}\n{extra_lines}{setup_block}")

if docker_network:
compose_yaml = textwrap.dedent(
Expand Down
Loading