From 69fe10ce71ca5e70f5ecd4ce850934e22c9e0cd4 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Sun, 15 Jun 2025 14:29:02 +0200 Subject: [PATCH 01/12] add milestone definition to task.yml --- examples/matmul_py/task.yml | 7 +++++++ src/libkernelbot/task.py | 20 +++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/examples/matmul_py/task.yml b/examples/matmul_py/task.yml index 6bb74d69..950a8688 100644 --- a/examples/matmul_py/task.yml +++ b/examples/matmul_py/task.yml @@ -7,6 +7,13 @@ files: - {"name": "reference.py", "source": "reference.py"} - {"name": "eval.py", "source": "../eval.py"} +milestones: + - { + "name": "pytorch reference", + "source": "submission.py", + "description": "PyTorch reference implementation as a performance baseline for matmul" + } + lang: "py" description: | diff --git a/src/libkernelbot/task.py b/src/libkernelbot/task.py index 9f3aa43c..2cb38433 100644 --- a/src/libkernelbot/task.py +++ b/src/libkernelbot/task.py @@ -24,6 +24,13 @@ class PythonTaskData: main: str +@dataclasses.dataclass +class MilestoneData: + name: str + code: str + description: str = "" + + TestCaseType = Dict[str, Union[int, str]] @@ -110,6 +117,7 @@ class LeaderboardDefinition: task: LeaderboardTask description: str = "" templates: dict[str, str] = dataclasses.field(default_factory=dict) + milestones: list[MilestoneData] = dataclasses.field(default_factory=list) def make_task_definition(yaml_file: str | Path) -> LeaderboardDefinition: @@ -137,6 +145,12 @@ def make_task_definition(yaml_file: str | Path) -> LeaderboardDefinition: else: file_dict[name] = (root / source).read_text() + milestones = [] + for milestone in raw.get("milestones", []): + milestone["code"] = (root / milestone["source"]).read_text() + del milestone["source"] + milestones.append(MilestoneData(**milestone)) + raw["files"] = file_dict # load template files @@ -146,10 +160,14 @@ def make_task_definition(yaml_file: str | Path) -> LeaderboardDefinition: templates[lang] = (root / source).read_text() del raw["templates"] + del raw["milestones"] description = raw["description"] del raw["description"] task = LeaderboardTask.from_dict(raw) - return LeaderboardDefinition(task=task, templates=templates, description=description) + + return LeaderboardDefinition( + task=task, templates=templates, milestones=milestones, description=description + ) def build_task_config( From 1ab94e5eb5e7cfb52d8ef4fafde33a3fd8ee5f5c Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Sun, 15 Jun 2025 15:11:44 +0200 Subject: [PATCH 02/12] define milestones in db --- src/libkernelbot/db_types.py | 10 ++- src/libkernelbot/leaderboard_db.py | 57 ++++++++++++++++- .../20250725_01_hwite-add-milestone-table.py | 61 +++++++++++++++++++ 3 files changed, 126 insertions(+), 2 deletions(-) create mode 100644 src/migrations/20250725_01_hwite-add-milestone-table.py diff --git a/src/libkernelbot/db_types.py b/src/libkernelbot/db_types.py index e8715fa2..e31d7a84 100644 --- a/src/libkernelbot/db_types.py +++ b/src/libkernelbot/db_types.py @@ -56,4 +56,12 @@ class SubmissionItem(TypedDict): runs: List[RunItem] -__all__ = [LeaderboardItem, LeaderboardRankedEntry, RunItem, SubmissionItem] +class MilestoneItem(TypedDict): + id: int + name: str + code: str + description: str + created_at: datetime.datetime + + +__all__ = [LeaderboardItem, LeaderboardRankedEntry, RunItem, SubmissionItem, MilestoneItem] diff --git a/src/libkernelbot/leaderboard_db.py b/src/libkernelbot/leaderboard_db.py index c322ab68..ba5ef3bb 100644 --- a/src/libkernelbot/leaderboard_db.py +++ b/src/libkernelbot/leaderboard_db.py @@ -5,7 +5,13 @@ import psycopg2 -from libkernelbot.db_types import LeaderboardItem, LeaderboardRankedEntry, RunItem, SubmissionItem +from libkernelbot.db_types import ( + LeaderboardItem, + LeaderboardRankedEntry, + MilestoneItem, + RunItem, + SubmissionItem, +) from libkernelbot.run_eval import CompileResult, RunResult, SystemInfo from libkernelbot.task import LeaderboardDefinition, LeaderboardTask from libkernelbot.utils import ( @@ -235,6 +241,55 @@ def delete_leaderboard(self, leaderboard_name: str, force: bool = False): logger.exception("Could not delete leaderboard %s.", leaderboard_name, exc_info=e) raise KernelBotError(f"Could not delete leaderboard `{leaderboard_name}`.") from e + def create_milestone( + self, + leaderboard_id: int, + name: str, + code: str, + description: str = None, + ) -> int: + """Create a new milestone for a leaderboard""" + try: + self.cursor.execute( + """ + INSERT INTO leaderboard.milestones ( + leaderboard_id, name, code, description + ) + VALUES (%s, %s, %s, %s) + RETURNING id + """, + (leaderboard_id, name, code, description), + ) + milestone_id = self.cursor.fetchone()[0] + self.connection.commit() + return milestone_id + except psycopg2.Error as e: + self.connection.rollback() + logger.exception("Error creating milestone", exc_info=e) + raise KernelBotError("Error creating milestone") from e + + def get_leaderboard_milestones(self, leaderboard_id: int) -> "list[MilestoneItem]": + """Get all milestones for a leaderboard""" + self.cursor.execute( + """ + SELECT id, name, code, description, created_at + FROM leaderboard.milestones + WHERE leaderboard_id = %s + ORDER BY created_at + """, + (leaderboard_id,), + ) + return [ + { + "id": row[0], + "name": row[1], + "code": row[2], + "description": row[3], + "created_at": row[4], + } + for row in self.cursor.fetchall() + ] + def create_submission( self, leaderboard: str, diff --git a/src/migrations/20250725_01_hwite-add-milestone-table.py b/src/migrations/20250725_01_hwite-add-milestone-table.py new file mode 100644 index 00000000..7756f257 --- /dev/null +++ b/src/migrations/20250725_01_hwite-add-milestone-table.py @@ -0,0 +1,61 @@ +""" +Add milestone table for better milestone tracking +""" + +from yoyo import step + +__depends__ = {"20250617_01_c5mrF-task-split"} # Update to latest migration + +steps = [ + step( + """ + CREATE TABLE IF NOT EXISTS leaderboard.milestones ( + id SERIAL PRIMARY KEY, + leaderboard_id INTEGER NOT NULL REFERENCES leaderboard.leaderboard(id) ON DELETE CASCADE, + name TEXT NOT NULL, + code TEXT NOT NULL, + description TEXT, + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + UNIQUE(leaderboard_id, name) + ) + """, + "DROP TABLE leaderboard.milestones", + ), + step("CREATE INDEX ON leaderboard.milestones (leaderboard_id)"), + # add alternative ID column that references milestones; + # we don't really care about being careful preserving milestone + # runs, so we can simply DELETE CASCADE + step( + """ + ALTER TABLE leaderboard.runs + ADD COLUMN milestone_id INTEGER REFERENCES leaderboard.milestones(id) ON DELETE CASCADE; + """, + "ALTER TABLE leaderboard.runs DROP COLUMN milestone_id;", + ), + # as we now have two possible ids, exactly one of them can be NULL + step( + "ALTER TABLE leaderboard.runs ALTER COLUMN submission_id DROP NOT NULL;", + """ + DELETE FROM leaderboard.runs WHERE submission_id IS NULL; + ALTER TABLE leaderboard.runs ALTER COLUMN submission_id SET NOT NULL; + """, + ), + step( + """ + ALTER TABLE leaderboard.runs + ADD CONSTRAINT runs_single_parent CHECK ( + (submission_id IS NOT NULL AND milestone_id IS NULL) OR + (submission_id IS NULL AND milestone_id IS NOT NULL) + ); + """, + "ALTER TABLE leaderboard.runs DROP CONSTRAINT runs_single_parent;", + ), + # ensure we have fast indexing for regular submissions + step( + """ + CREATE INDEX IF NOT EXISTS runs_submission_id_idx ON leaderboard.runs(submission_id) + WHERE submission_id IS NOT NULL; + """, + "DROP INDEX IF EXISTS leaderboard.runs_submission_id_idx", + ), +] From c0fab74511423ebe9ba3222462be3ba9d9f443ff Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Sat, 26 Jul 2025 17:29:01 +0200 Subject: [PATCH 03/12] milestone runs in db --- src/libkernelbot/leaderboard_db.py | 121 ++++++++++++++++++----------- 1 file changed, 75 insertions(+), 46 deletions(-) diff --git a/src/libkernelbot/leaderboard_db.py b/src/libkernelbot/leaderboard_db.py index ba5ef3bb..31dd502c 100644 --- a/src/libkernelbot/leaderboard_db.py +++ b/src/libkernelbot/leaderboard_db.py @@ -290,6 +290,58 @@ def get_leaderboard_milestones(self, leaderboard_id: int) -> "list[MilestoneItem for row in self.cursor.fetchall() ] + def delete_milestone_runs(self, leaderboard_name: str): + self.cursor.execute( + """ + DELETE FROM leaderboard.runs + WHERE milestone_id IN ( + SELECT leaderboard.milestones.id + FROM leaderboard.milestones + WHERE leaderboard_id = %s + ); + """, + (leaderboard_name,), + ) + self.connection.commit() + + def get_runs_generic( + self, *, milestone_id: Optional[int] = None, submission_id: Optional[int] = None + ) -> List["RunItem"]: + if milestone_id is not None: + key = "milestone_id" + value = milestone_id + if submission_id is not None: + logger.error("milestone_id and submission_id specified simultaneously") + raise KernelBotError("`milestone_id` and `submission_id` specified simultaneously") + else: + key = "submission_id" + value = submission_id + query = f""" + SELECT start_time, end_time, mode, secret, runner, score, + passed, compilation, meta, result, system_info + FROM leaderboard.runs + WHERE {key} = %s + """ + self.cursor.execute(query, (value,)) + runs = self.cursor.fetchall() + + return [ + RunItem( + start_time=r[0], + end_time=r[1], + mode=r[2], + secret=r[3], + runner=r[4], + score=r[5], + passed=r[6], + compilation=r[7], + meta=r[8], + result=r[9], + system=r[10], + ) + for r in runs + ] + def create_submission( self, leaderboard: str, @@ -394,7 +446,9 @@ def mark_submission_done( def create_submission_run( self, - submission: int, + *, + submission: Optional[int] = None, + milestone: Optional[int] = None, start: datetime.datetime, end: datetime.datetime, mode: str, @@ -410,22 +464,23 @@ def create_submission_run( compilation = json.dumps(dataclasses.asdict(compilation)) # check validity - self.cursor.execute( - """ - SELECT done FROM leaderboard.submission WHERE id = %s - """, - (submission,), - ) - if self.cursor.fetchone()[0]: - logger.error( - "Submission '%s' is already marked as done when trying to add %s run.", - submission, - mode, - ) - raise KernelBotError( - "Internal error: Attempted to add run, " - "but submission was already marked as done." + if submission is not None: + self.cursor.execute( + """ + SELECT done FROM leaderboard.submission WHERE id = %s + """, + (submission,), ) + if self.cursor.fetchone()[0]: + logger.error( + "Submission '%s' is already marked as done when trying to add %s run.", + submission, + mode, + ) + raise KernelBotError( + "Internal error: Attempted to add run, " + "but submission was already marked as done." + ) meta = { k: result.__dict__[k] @@ -433,13 +488,14 @@ def create_submission_run( } self.cursor.execute( """ - INSERT INTO leaderboard.runs (submission_id, start_time, end_time, mode, + INSERT INTO leaderboard.runs (submission_id, milestone_id, start_time, end_time, mode, secret, runner, score, passed, compilation, meta, result, system_info ) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """, ( submission, + milestone, start, end, mode, @@ -837,33 +893,6 @@ def get_submission_by_id(self, submission_id: int) -> Optional["SubmissionItem"] if submission is None: return None - # OK, now get the runs - query = """ - SELECT start_time, end_time, mode, secret, runner, score, - passed, compilation, meta, result, system_info - FROM leaderboard.runs - WHERE submission_id = %s - """ - self.cursor.execute(query, (submission_id,)) - runs = self.cursor.fetchall() - - runs = [ - RunItem( - start_time=r[0], - end_time=r[1], - mode=r[2], - secret=r[3], - runner=r[4], - score=r[5], - passed=r[6], - compilation=r[7], - meta=r[8], - result=r[9], - system=r[10], - ) - for r in runs - ] - return SubmissionItem( submission_id=submission_id, leaderboard_id=submission[0], @@ -873,7 +902,7 @@ def get_submission_by_id(self, submission_id: int) -> Optional["SubmissionItem"] submission_time=submission[4], done=submission[5], code=bytes(submission[6]).decode("utf-8"), - runs=runs, + runs=self.get_runs_generic(submission_id=submission_id), ) def get_leaderboard_submission_count( From feab9a7e1ca6095df4c6f39ca634f4a85d534181 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Sun, 15 Jun 2025 16:36:16 +0200 Subject: [PATCH 04/12] admin interface for milestones --- src/kernelbot/cogs/admin_cog.py | 154 +++++++++++++++++++++++++++-- src/libkernelbot/leaderboard_db.py | 4 +- 2 files changed, 147 insertions(+), 11 deletions(-) diff --git a/src/kernelbot/cogs/admin_cog.py b/src/kernelbot/cogs/admin_cog.py index c33ad847..a43e5dd1 100644 --- a/src/kernelbot/cogs/admin_cog.py +++ b/src/kernelbot/cogs/admin_cog.py @@ -1,3 +1,4 @@ +import asyncio import json import subprocess import tempfile @@ -19,8 +20,9 @@ ) from kernelbot.env import env from kernelbot.ui.misc import ConfirmationView, DeleteConfirmationModal, GPUSelectionView -from libkernelbot.consts import GitHubGPU, ModalGPU +from libkernelbot.consts import GitHubGPU, ModalGPU, SubmissionMode, get_gpu_by_name from libkernelbot.leaderboard_db import LeaderboardDoesNotExist, LeaderboardItem, SubmissionItem +from libkernelbot.submission import compute_score from libkernelbot.task import LeaderboardDefinition, make_task_definition from libkernelbot.utils import ( KernelBotError, @@ -122,6 +124,10 @@ def __init__(self, bot: "ClusterBot"): name="set-forum-ids", description="Sets forum IDs" )(self.set_forum_ids) + self.trigger_milestones = bot.admin_group.command( + name="trigger-milestones", description="Trigger running of milestones" + )(self.trigger_milestones) + self._scheduled_cleanup_temp_users.start() # -------------------------------------------------------------------------- @@ -162,6 +168,7 @@ async def leaderboard_create_local( interaction: discord.Interaction, directory: str, gpu: Optional[app_commands.Choice[str]], + milestones: Optional[bool] = False, ): is_admin = await self.admin_check(interaction) if not is_admin: @@ -180,20 +187,19 @@ async def leaderboard_create_local( leaderboard_name = directory.name + "-dev" # create-local overwrites existing leaderboard + forum_channel = self.bot.get_channel(self.bot.leaderboard_forum_id) + forum_thread = None + with self.bot.leaderboard_db as db: try: old_lb = db.get_leaderboard(leaderboard_name) + forum_id = old_lb["forum_id"] + forum_thread = await self.bot.fetch_channel(forum_id) except LeaderboardDoesNotExist: pass db.delete_leaderboard(leaderboard_name, force=True) - # get existing forum thread or create new one - forum_channel = self.bot.get_channel(self.bot.leaderboard_forum_id) - forum_thread = None - if old_lb: - forum_id = old_lb["forum_id"] - forum_thread = await self.bot.fetch_channel(forum_id) - + # create new forum thread if none exists if forum_thread is None: forum_thread = await forum_channel.create_thread( name=leaderboard_name, @@ -216,6 +222,11 @@ async def leaderboard_create_local( interaction, f"Leaderboard '{leaderboard_name}' created.", ) + else: + raise KernelBotError(f"Error creating leaderboard '{leaderboard_name}'") + + if milestones: + await self._submit_milestones(interaction, leaderboard_name) def _parse_deadline(self, deadline: str): # Try parsing with time first @@ -354,7 +365,7 @@ async def create_leaderboard_in_db( with self.bot.leaderboard_db as db: try: - db.create_leaderboard( + lb_id = db.create_leaderboard( name=leaderboard_name, deadline=date_value, definition=definition, @@ -362,6 +373,15 @@ async def create_leaderboard_in_db( creator_id=interaction.user.id, forum_id=forum_id, ) + + # create entry in milestones table. + for milestone in definition.milestones: + db.create_milestone( + lb_id, + milestone.name, + milestone.code, + description=milestone.description, + ) except KernelBotError as e: await send_discord_message( interaction, @@ -371,6 +391,122 @@ async def create_leaderboard_in_db( return False return True + async def _submit_milestones( + self, interaction: discord.Interaction, leaderboard_name: str, gpus: Optional[list] = None + ): + backend = self.bot.backend + + with self.bot.leaderboard_db as db: + leaderboard_item = db.get_leaderboard(leaderboard_name) + milestones = db.get_leaderboard_milestones(leaderboard_item["id"]) + + task: "LeaderboardTask" = leaderboard_item["task"] + + # ok, submit all that are missing + submit_tasks = [] + from kernelbot.discord_reporter import MultiProgressReporterDiscord + + reporters = MultiProgressReporterDiscord(interaction) + await reporters.show(f"Milestone runs for {leaderboard_name}") + + async def submit_milestone(milestone, gpu, reporter): + result = await backend.submit_leaderboard( + -1, + milestone["code"], + "milestone.py", + gpu, + reporter, + task, + SubmissionMode.LEADERBOARD, + None, + ) + + # we do not allow milestone runs to fail + if not result.success: + logger.error(f"Milestone run failed: {result}") + raise KernelBotError(f"Milestone run failed: {result.error}") + + for key, value in result.runs.items(): + if not value.run.success or not value.run.passed: + logger.error(f"Milestone run {key} failed: {value}") + raise KernelBotError(f"Milestone run {key} failed.") + + with backend.db as db: + for key, value in result.runs.items(): + db.create_submission_run( + milestone=milestone["id"], + start=value.start, + end=value.end, + mode=key, + runner=gpu.name, + score=compute_score(result, task, -1), + secret=False, + compilation=value.compilation, + result=value.run, + system=result.system, + ) + + if gpus is None: + gpus = leaderboard_item["gpu_types"] + + for milestone in milestones: + with backend.db as db: + existing_runs = db.get_runs_generic(milestone_id=milestone["id"]) + # create tasks + for gpu in gpus: + if gpu in [r["runner"] for r in existing_runs]: + await send_discord_message( + interaction, + f"Skipping {gpu}; milestone run already exists.", + ephemeral=True, + ) + continue + submit_tasks.append( + submit_milestone( + milestone, + get_gpu_by_name(gpu), + reporters.add_run(f"Milestone {milestone['name']} on {gpu}"), + ) + ) + + await send_discord_message( + interaction, + f"Submitted {len(submit_tasks)} milestone runs for {len(milestones)} milestones.", + ephemeral=True, + ) + + # Execute all milestone submissions + await asyncio.gather(*submit_tasks) + + @app_commands.describe( + leaderboard_name="Name of Leaderboard", + gpu="Select GPU. Leave empty to run for all GPUs.", + rerun="Force re-running existing milestones.", + ) + @app_commands.autocomplete(leaderboard_name=leaderboard_name_autocomplete) + @with_error_handling + async def trigger_milestones( + self, + interaction: discord.Interaction, + leaderboard_name: str, + gpu: Optional[str], + rerun: Optional[bool] = False, + ): + if not await self.admin_check(interaction): + await send_discord_message( + interaction, "You do not have permission to trigger milestones.", ephemeral=True + ) + return + + if rerun: + if gpu is not None: + raise KernelBotError("Cannot specify `rerun` and `gpu` at the same time") + with self.bot.backend.db as db: + db.delete_milestone_runs(db.get_leaderboard_id(leaderboard_name)) + + await interaction.response.defer(ephemeral=True) + await self._submit_milestones(interaction, leaderboard_name, gpus=gpu) + @discord.app_commands.describe(leaderboard_name="Name of the leaderboard") @discord.app_commands.autocomplete(leaderboard_name=leaderboard_name_autocomplete) @with_error_handling diff --git a/src/libkernelbot/leaderboard_db.py b/src/libkernelbot/leaderboard_db.py index 31dd502c..f94fc8b7 100644 --- a/src/libkernelbot/leaderboard_db.py +++ b/src/libkernelbot/leaderboard_db.py @@ -290,7 +290,7 @@ def get_leaderboard_milestones(self, leaderboard_id: int) -> "list[MilestoneItem for row in self.cursor.fetchall() ] - def delete_milestone_runs(self, leaderboard_name: str): + def delete_milestone_runs(self, leaderboard_id: int): self.cursor.execute( """ DELETE FROM leaderboard.runs @@ -300,7 +300,7 @@ def delete_milestone_runs(self, leaderboard_name: str): WHERE leaderboard_id = %s ); """, - (leaderboard_name,), + (leaderboard_id,), ) self.connection.commit() From 6d0032e6106282606eee9787043cb7ef8322975c Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Sat, 26 Jul 2025 18:28:20 +0200 Subject: [PATCH 05/12] user-facing milestone commands --- src/kernelbot/cogs/leaderboard_cog.py | 140 +++++++++++++++++++++++++- src/libkernelbot/utils.py | 7 ++ 2 files changed, 146 insertions(+), 1 deletion(-) diff --git a/src/kernelbot/cogs/leaderboard_cog.py b/src/kernelbot/cogs/leaderboard_cog.py index 6cb8bb40..4a909474 100644 --- a/src/kernelbot/cogs/leaderboard_cog.py +++ b/src/kernelbot/cogs/leaderboard_cog.py @@ -1,3 +1,4 @@ +import math from datetime import datetime, timedelta from io import StringIO from typing import TYPE_CHECKING, List, Optional @@ -22,8 +23,9 @@ RunItem, SubmissionItem, ) +from libkernelbot.report import make_benchmark_log from libkernelbot.submission import SubmissionRequest, prepare_submission -from libkernelbot.utils import format_time, setup_logging +from libkernelbot.utils import format_time, run_item_to_run_result, setup_logging if TYPE_CHECKING: from kernelbot.main import ClusterBot @@ -308,6 +310,13 @@ def __init__(self, bot: "ClusterBot"): name="template", description="Get a starter template file for a task" )(self.get_task_template) + self.get_task_milestones = bot.leaderboard_group.command( + name="milestones", description="Show milestone performances" + )(self.get_task_milestones) + self.show_milestone_result = bot.leaderboard_group.command( + name="milestone-result", description="Show detailed results of a milestone run" + )(self.show_milestone_result) + self.get_submission_by_id = bot.leaderboard_group.command( name="get-submission", description="Retrieve one of your past submissions" )(self.get_submission_by_id) @@ -588,6 +597,135 @@ async def get_task_template( ) return + @app_commands.describe( + leaderboard_name="Name of Leaderboard", + gpu="Select GPU. Leave empty for all GPUs.", + ) + @app_commands.autocomplete(leaderboard_name=leaderboard_name_autocomplete) + @with_error_handling + async def get_task_milestones( + self, + interaction: discord.Interaction, + leaderboard_name: str, + gpu: Optional[str] = None, + ): + await interaction.response.defer(ephemeral=True) + + message = f"# Milestones for `{leaderboard_name}`\n" + + try: + with self.bot.leaderboard_db as db: + lb = db.get_leaderboard(leaderboard_name) + milestones = db.get_leaderboard_milestones(leaderboard_id=lb["id"]) + + if len(milestones) == 0: + await send_discord_message( + interaction, + f"Leaderboard `{leaderboard_name}` does not provide any milestones", + ephemeral=True, + ) + return + + for milestone in milestones: + message += f"## {milestone['name']}\n" + message += milestone["description"] + "\n" + with self.bot.leaderboard_db as db: + runs = db.get_runs_generic(milestone_id=milestone["id"]) + + runs = [r for r in runs if r["mode"] == SubmissionMode.LEADERBOARD.value] + + if len(runs) == 0: + message += "⚠️ No runs available. Maybe they haven't been triggered yet?\n" + + if gpu is not None: + runs = [r for r in runs if r["runner"] == gpu] + if len(runs) == 0: + message += f"⚠️ No runs available for GPU {gpu}\n" + + max_len = 0 + min_val = float("inf") + for run in runs: + max_len = max(max_len, len(run["runner"])) + min_val = min(min_val, run["score"]) + + digits = max(0, 1 - math.floor(math.log10(min_val))) + + message += "```\n" + for run in runs: + message += f" {run['runner']:<{max_len}}: {run['score']:.{digits}f}\n" + message += "```\n\n" + + await send_discord_message( + interaction, + message, + ephemeral=True, + ) + + except Exception as E: + logger.exception("Error fetching milestones for %s", leaderboard_name, exc_info=E) + await send_discord_message( + interaction, + f"Could not fetch milestones for leaderboard `{leaderboard_name}`", + ephemeral=True, + ) + return + + @app_commands.describe( + leaderboard_name="Name of Leaderboard", + milestone_name="Name of Milestone", + gpu="Select GPU. Leave empty for all GPUs.", + ) + @app_commands.autocomplete(leaderboard_name=leaderboard_name_autocomplete) + @with_error_handling + async def show_milestone_result( + self, + interaction: discord.Interaction, + leaderboard_name: str, + milestone_name: str, + gpu: Optional[str] = None, + ): + await interaction.response.defer(ephemeral=True) + with self.bot.leaderboard_db as db: + lb = db.get_leaderboard(leaderboard_name) + milestones = db.get_leaderboard_milestones(leaderboard_id=lb["id"]) + + selected = None + for milestone in milestones: + if milestone["name"].lower() == milestone_name.lower(): + selected = milestone + break + + if selected is None: + await send_discord_message( + interaction, + f"Could not find milestone `{milestone_name}` for leaderboard `{leaderboard_name}`", + ephemeral=True, + ) + return + + with self.bot.leaderboard_db as db: + runs = db.get_runs_generic(milestone_id=selected["id"]) + + runs = [r for r in runs if r["mode"] == SubmissionMode.LEADERBOARD.value] + + if len(runs) == 0: + await send_discord_message( + interaction, + f"⚠️ No runs available for milestone `{milestone_name}`." + "Maybe they haven't been triggered yet?", + ephemeral=True, + ) + return + + for run in runs: + log = make_benchmark_log(run_item_to_run_result(run)) + message = f"{milestone_name} on {run['runner']}\n```{log}```\n" + await send_discord_message( + interaction, + message, + ephemeral=True, + ) + @discord.app_commands.describe(leaderboard_name="Name of the leaderboard") @app_commands.autocomplete(leaderboard_name=leaderboard_name_autocomplete) @with_error_handling diff --git a/src/libkernelbot/utils.py b/src/libkernelbot/utils.py index 3702664d..718bcd24 100644 --- a/src/libkernelbot/utils.py +++ b/src/libkernelbot/utils.py @@ -2,6 +2,9 @@ import subprocess from typing import Any, Optional +from libkernelbot.db_types import RunItem +from libkernelbot.run_eval import RunResult + def setup_logging(name: Optional[str] = None): """Configure and setup logging for the application""" @@ -144,3 +147,7 @@ def limit_length(text: str, maxlen: int): return text[: maxlen - 6] + " [...]" else: return text + + +def run_item_to_run_result(item: RunItem) -> RunResult: + return RunResult(**item["meta"], result=item["result"], passed=item["passed"]) From 275b853b553e28846a71f0215b33236dd407b847 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Sat, 26 Jul 2025 19:45:26 +0200 Subject: [PATCH 06/12] add triton-based milestone from @PaliC for some reason, these are not compatible with T4; which gives us a nice test-case/motivation for allowing the exclusion of certain GPUs from milestones --- examples/matmul_py/task.yml | 12 +- examples/matmul_py/triton_ref.py | 127 ++++++++++++++++++ src/kernelbot/cogs/admin_cog.py | 12 +- src/libkernelbot/db_types.py | 1 + src/libkernelbot/leaderboard_db.py | 14 +- src/libkernelbot/task.py | 1 + ...50726_01_j9Q3S-milestone-exclude-column.py | 14 ++ 7 files changed, 173 insertions(+), 8 deletions(-) create mode 100644 examples/matmul_py/triton_ref.py create mode 100644 src/migrations/20250726_01_j9Q3S-milestone-exclude-column.py diff --git a/examples/matmul_py/task.yml b/examples/matmul_py/task.yml index 950a8688..12045e5d 100644 --- a/examples/matmul_py/task.yml +++ b/examples/matmul_py/task.yml @@ -9,9 +9,15 @@ files: milestones: - { - "name": "pytorch reference", - "source": "submission.py", - "description": "PyTorch reference implementation as a performance baseline for matmul" + name: "pytorch", + source: "submission.py", + description: "PyTorch reference implementation as a performance baseline for matmul" + } + - { + name: "triton", + source: "triton_ref.py", + description: "Triton reference implementation as a performance baseline for matmul", + exclude_gpus: ['T4'] } lang: "py" diff --git a/examples/matmul_py/triton_ref.py b/examples/matmul_py/triton_ref.py new file mode 100644 index 00000000..95ce5cba --- /dev/null +++ b/examples/matmul_py/triton_ref.py @@ -0,0 +1,127 @@ +#!POPCORN leaderboard matmul_py +import triton +import triton.language as tl +import torch +from task import input_t, output_t + + +@triton.jit +def matmul_kernel( + # Pointers to matrices + a_ptr, b_ptr, c_ptr, + # Matrix dimensions + M, N, K, + # The stride variables represent how much to increase the ptr by when moving by 1 + # element in a particular dimension. E.g. `stride_am` is how much to increase `a_ptr` + # by to get the element one row down (A has M rows). + stride_am, stride_ak, + stride_bk, stride_bn, + stride_cm, stride_cn, + # Meta-parameters + BLOCK_SIZE_M: tl.constexpr, BLOCK_SIZE_N: tl.constexpr, BLOCK_SIZE_K: tl.constexpr, + GROUP_SIZE_M: tl.constexpr, +): + """Kernel for computing the matmul C = A x B. + A has shape (M, K), B has shape (K, N) and C has shape (M, N) + """ + # ----------------------------------------------------------- + # Map program ids `pid` to the block of C it should compute. + # This is done in a grouped ordering to promote L2 cache hit rates. + # See above `L2 Cache Optimizations` section for details. + pid = tl.program_id(axis=0) + num_pid_m = tl.cdiv(M, BLOCK_SIZE_M) + num_pid_n = tl.cdiv(N, BLOCK_SIZE_N) + num_pid_in_group = GROUP_SIZE_M * num_pid_n + group_id = pid // num_pid_in_group + first_pid_m = group_id * GROUP_SIZE_M + group_size_m = min(num_pid_m - first_pid_m, GROUP_SIZE_M) + pid_m = first_pid_m + (pid % group_size_m) + pid_n = (pid % num_pid_in_group) // group_size_m + + # ---------------------------------------------------------- + # Create pointers for the first blocks of A and B. + # We will advance this pointer as we move in the K direction + # and accumulate + # `a_ptrs` is a block of [BLOCK_SIZE_M, BLOCK_SIZE_K] pointers + # `b_ptrs` is a block of [BLOCK_SIZE_K, BLOCK_SIZE_N] pointers + # See above `Pointer Arithmetic` section for details + offs_am = (pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)) % M + offs_bn = (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)) % N + offs_k = tl.arange(0, BLOCK_SIZE_K) + a_ptrs = a_ptr + (offs_am[:, None] * stride_am + offs_k[None, :] * stride_ak) + b_ptrs = b_ptr + (offs_k[:, None] * stride_bk + offs_bn[None, :] * stride_bn) + + # ----------------------------------------------------------- + # Iterate to compute a block of the C matrix. + # We accumulate into a `[BLOCK_SIZE_M, BLOCK_SIZE_N]` block + # of fp32 values for higher precision. + # `accumulator` will be converted back to fp16 after the loop. + accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32) + for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)): + # Load the next block of A and B, generate a mask by checking the K dimension. + # If it is out of bounds, set it to 0. + a = tl.load(a_ptrs, mask=offs_k[None, :] < K - k * BLOCK_SIZE_K, other=0.0) + b = tl.load(b_ptrs, mask=offs_k[:, None] < K - k * BLOCK_SIZE_K, other=0.0) + # We accumulate along the K dimension. + accumulator += tl.dot(a, b) + # Advance the ptrs to the next K block. + a_ptrs += BLOCK_SIZE_K * stride_ak + b_ptrs += BLOCK_SIZE_K * stride_bk + # You can fuse arbitrary activation functions here + # while the accumulator is still in FP32! + c = accumulator.to(tl.float16) + + # ----------------------------------------------------------- + # Write back the block of the output matrix C with masks. + offs_cm = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) + offs_cn = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) + c_ptrs = c_ptr + stride_cm * offs_cm[:, None] + stride_cn * offs_cn[None, :] + c_mask = (offs_cm[:, None] < M) & (offs_cn[None, :] < N) + tl.store(c_ptrs, c, mask=c_mask) + + +def triton_matmul(a, b): + # Check constraints. + assert a.shape[1] == b.shape[0], "Incompatible dimensions" + assert a.is_contiguous(), "Matrix A must be contiguous" + assert b.is_contiguous(), "Matrix B must be contiguous" + M, K = a.shape + K, N = b.shape + # Allocate output. + c = torch.empty((M, N), device=a.device, dtype=a.dtype) + # 1D launch kernel where each block gets its own program. + grid = lambda META: (triton.cdiv(M, META['BLOCK_SIZE_M']) * triton.cdiv(N, META['BLOCK_SIZE_N']), ) + matmul_kernel[grid]( + a, b, c, + M, N, K, + a.stride(0), a.stride(1), + b.stride(0), b.stride(1), + c.stride(0), c.stride(1), + BLOCK_SIZE_M=128, BLOCK_SIZE_N=128, BLOCK_SIZE_K=32, + GROUP_SIZE_M=8, + ) + return c + + +def custom_kernel(data: input_t) -> output_t: + a, b = data + # Convert to torch tensors if they aren't already + if not isinstance(a, torch.Tensor): + a = torch.tensor(a, dtype=torch.float16).cuda() + if not isinstance(b, torch.Tensor): + b = torch.tensor(b, dtype=torch.float16).cuda() + + # Ensure tensors are on GPU and contiguous + if not a.is_cuda: + a = a.cuda() + if not b.is_cuda: + b = b.cuda() + + a = a.contiguous() + b = b.contiguous() + + # Use our custom Triton matmul + result = triton_matmul(a, b) + + # Convert back to the expected output format + return result diff --git a/src/kernelbot/cogs/admin_cog.py b/src/kernelbot/cogs/admin_cog.py index a43e5dd1..3506b008 100644 --- a/src/kernelbot/cogs/admin_cog.py +++ b/src/kernelbot/cogs/admin_cog.py @@ -381,6 +381,7 @@ async def create_leaderboard_in_db( milestone.name, milestone.code, description=milestone.description, + exclude_gpus=milestone.exclude_gpus, ) except KernelBotError as e: await send_discord_message( @@ -457,10 +458,19 @@ async def submit_milestone(milestone, gpu, reporter): if gpu in [r["runner"] for r in existing_runs]: await send_discord_message( interaction, - f"Skipping {gpu}; milestone run already exists.", + f"Skipping {gpu} for {milestone['name']}; milestone run already exists.", ephemeral=True, ) continue + + if gpu in milestone["exclude_gpus"]: + await send_discord_message( + interaction, + f"Skipping {gpu} for {milestone['name']}; is excluded.", + ephemeral=True, + ) + continue + submit_tasks.append( submit_milestone( milestone, diff --git a/src/libkernelbot/db_types.py b/src/libkernelbot/db_types.py index e31d7a84..32bc699e 100644 --- a/src/libkernelbot/db_types.py +++ b/src/libkernelbot/db_types.py @@ -62,6 +62,7 @@ class MilestoneItem(TypedDict): code: str description: str created_at: datetime.datetime + exclude_gpus: list[str] __all__ = [LeaderboardItem, LeaderboardRankedEntry, RunItem, SubmissionItem, MilestoneItem] diff --git a/src/libkernelbot/leaderboard_db.py b/src/libkernelbot/leaderboard_db.py index f94fc8b7..22acbdab 100644 --- a/src/libkernelbot/leaderboard_db.py +++ b/src/libkernelbot/leaderboard_db.py @@ -247,18 +247,23 @@ def create_milestone( name: str, code: str, description: str = None, + exclude_gpus: list[str] = None, ) -> int: """Create a new milestone for a leaderboard""" + if exclude_gpus is None: + exclude = "" + else: + exclude = str.join(";", exclude_gpus) try: self.cursor.execute( """ INSERT INTO leaderboard.milestones ( - leaderboard_id, name, code, description + leaderboard_id, name, code, description, exclude_gpus ) - VALUES (%s, %s, %s, %s) + VALUES (%s, %s, %s, %s, %s) RETURNING id """, - (leaderboard_id, name, code, description), + (leaderboard_id, name, code, description, exclude), ) milestone_id = self.cursor.fetchone()[0] self.connection.commit() @@ -272,7 +277,7 @@ def get_leaderboard_milestones(self, leaderboard_id: int) -> "list[MilestoneItem """Get all milestones for a leaderboard""" self.cursor.execute( """ - SELECT id, name, code, description, created_at + SELECT id, name, code, description, created_at, exclude_gpus FROM leaderboard.milestones WHERE leaderboard_id = %s ORDER BY created_at @@ -286,6 +291,7 @@ def get_leaderboard_milestones(self, leaderboard_id: int) -> "list[MilestoneItem "code": row[2], "description": row[3], "created_at": row[4], + "exclude_gpus": str.split(row[5], ";"), } for row in self.cursor.fetchall() ] diff --git a/src/libkernelbot/task.py b/src/libkernelbot/task.py index 2cb38433..efd305bd 100644 --- a/src/libkernelbot/task.py +++ b/src/libkernelbot/task.py @@ -29,6 +29,7 @@ class MilestoneData: name: str code: str description: str = "" + exclude_gpus: list[str] = dataclasses.field(default_factory=list) TestCaseType = Dict[str, Union[int, str]] diff --git a/src/migrations/20250726_01_j9Q3S-milestone-exclude-column.py b/src/migrations/20250726_01_j9Q3S-milestone-exclude-column.py new file mode 100644 index 00000000..714ccaa9 --- /dev/null +++ b/src/migrations/20250726_01_j9Q3S-milestone-exclude-column.py @@ -0,0 +1,14 @@ +""" +Adds an exclude column to indicate that a milestone is not compatible with certain GPUs +""" + +from yoyo import step + +__depends__ = {"20250725_01_hwite-add-milestone-table"} + +steps = [ + step( + "ALTER TABLE leaderboard.milestones ADD COLUMN exclude_gpus TEXT NOT NULL DEFAULT '';", + "ALTER TABLE leaderboard.milestones DROP COLUMN exclude_gpus;", + ) +] From 1a12c3a9ce64b635f63451f01a5ec88fce9600f9 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Sat, 26 Jul 2025 20:09:21 +0200 Subject: [PATCH 07/12] move milestone display to backend --- src/kernelbot/cogs/leaderboard_cog.py | 85 +------------------------ src/libkernelbot/backend.py | 91 ++++++++++++++++++++++++++- 2 files changed, 93 insertions(+), 83 deletions(-) diff --git a/src/kernelbot/cogs/leaderboard_cog.py b/src/kernelbot/cogs/leaderboard_cog.py index 4a909474..cba30cb5 100644 --- a/src/kernelbot/cogs/leaderboard_cog.py +++ b/src/kernelbot/cogs/leaderboard_cog.py @@ -1,4 +1,3 @@ -import math from datetime import datetime, timedelta from io import StringIO from typing import TYPE_CHECKING, List, Optional @@ -23,9 +22,8 @@ RunItem, SubmissionItem, ) -from libkernelbot.report import make_benchmark_log from libkernelbot.submission import SubmissionRequest, prepare_submission -from libkernelbot.utils import format_time, run_item_to_run_result, setup_logging +from libkernelbot.utils import format_time, setup_logging if TYPE_CHECKING: from kernelbot.main import ClusterBot @@ -611,53 +609,10 @@ async def get_task_milestones( ): await interaction.response.defer(ephemeral=True) - message = f"# Milestones for `{leaderboard_name}`\n" - try: - with self.bot.leaderboard_db as db: - lb = db.get_leaderboard(leaderboard_name) - milestones = db.get_leaderboard_milestones(leaderboard_id=lb["id"]) - - if len(milestones) == 0: - await send_discord_message( - interaction, - f"Leaderboard `{leaderboard_name}` does not provide any milestones", - ephemeral=True, - ) - return - - for milestone in milestones: - message += f"## {milestone['name']}\n" - message += milestone["description"] + "\n" - with self.bot.leaderboard_db as db: - runs = db.get_runs_generic(milestone_id=milestone["id"]) - - runs = [r for r in runs if r["mode"] == SubmissionMode.LEADERBOARD.value] - - if len(runs) == 0: - message += "⚠️ No runs available. Maybe they haven't been triggered yet?\n" - - if gpu is not None: - runs = [r for r in runs if r["runner"] == gpu] - if len(runs) == 0: - message += f"⚠️ No runs available for GPU {gpu}\n" - - max_len = 0 - min_val = float("inf") - for run in runs: - max_len = max(max_len, len(run["runner"])) - min_val = min(min_val, run["score"]) - - digits = max(0, 1 - math.floor(math.log10(min_val))) - - message += "```\n" - for run in runs: - message += f" {run['runner']:<{max_len}}: {run['score']:.{digits}f}\n" - message += "```\n\n" - await send_discord_message( interaction, - message, + self.bot.backend.get_milestone_overview(leaderboard_name, gpu), ephemeral=True, ) @@ -685,41 +640,7 @@ async def show_milestone_result( gpu: Optional[str] = None, ): await interaction.response.defer(ephemeral=True) - with self.bot.leaderboard_db as db: - lb = db.get_leaderboard(leaderboard_name) - milestones = db.get_leaderboard_milestones(leaderboard_id=lb["id"]) - - selected = None - for milestone in milestones: - if milestone["name"].lower() == milestone_name.lower(): - selected = milestone - break - - if selected is None: - await send_discord_message( - interaction, - f"Could not find milestone `{milestone_name}` for leaderboard `{leaderboard_name}`", - ephemeral=True, - ) - return - - with self.bot.leaderboard_db as db: - runs = db.get_runs_generic(milestone_id=selected["id"]) - - runs = [r for r in runs if r["mode"] == SubmissionMode.LEADERBOARD.value] - - if len(runs) == 0: - await send_discord_message( - interaction, - f"⚠️ No runs available for milestone `{milestone_name}`." - "Maybe they haven't been triggered yet?", - ephemeral=True, - ) - return - - for run in runs: - log = make_benchmark_log(run_item_to_run_result(run)) - message = f"{milestone_name} on {run['runner']}\n```{log}```\n" + for message in self.bot.backend.get_milestone_result(leaderboard_name, milestone_name, gpu): await send_discord_message( interaction, message, diff --git a/src/libkernelbot/backend.py b/src/libkernelbot/backend.py index 3874b142..ee832fc0 100644 --- a/src/libkernelbot/backend.py +++ b/src/libkernelbot/backend.py @@ -1,5 +1,6 @@ import asyncio import copy +import math from datetime import datetime from types import SimpleNamespace from typing import Optional @@ -11,12 +12,13 @@ MultiProgressReporter, RunProgressReporter, generate_report, + make_benchmark_log, make_short_report, ) from libkernelbot.run_eval import FullResult from libkernelbot.submission import ProcessedSubmissionRequest, compute_score from libkernelbot.task import LeaderboardTask, build_task_config -from libkernelbot.utils import setup_logging +from libkernelbot.utils import KernelBotError, run_item_to_run_result, setup_logging logger = setup_logging(__name__) @@ -224,3 +226,90 @@ async def handle_submission( def _get_arch(self, gpu_type: GPU): return GPU_TO_SM[gpu_type.name] + + def get_milestone_overview(self, leaderboard_name: str, gpu: Optional[str] = None) -> str: + """ + Generates a message that gives an overview over milestone performance. + """ + message = f"# Milestones for `{leaderboard_name}`\n" + + with self.bot.leaderboard_db as db: + lb = db.get_leaderboard(leaderboard_name) + milestones = db.get_leaderboard_milestones(leaderboard_id=lb["id"]) + + if len(milestones) == 0: + return f"Leaderboard `{leaderboard_name}` does not provide any milestones" + + for milestone in milestones: + message += f"## {milestone['name']}\n" + message += milestone["description"] + "\n" + with self.bot.leaderboard_db as db: + runs = db.get_runs_generic(milestone_id=milestone["id"]) + + runs = [r for r in runs if r["mode"] == SubmissionMode.LEADERBOARD.value] + + if len(runs) == 0: + message += "⚠️ No runs available. Maybe they haven't been triggered yet?\n" + + if gpu is not None: + runs = [r for r in runs if r["runner"] == gpu] + if len(runs) == 0: + message += f"⚠️ No runs available for GPU {gpu}\n" + + max_len = 0 + min_val = float("inf") + for run in runs: + max_len = max(max_len, len(run["runner"])) + min_val = min(min_val, run["score"]) + + digits = max(0, 1 - math.floor(math.log10(min_val))) + + message += "```\n" + for run in runs: + message += f" {run['runner']:<{max_len}}: {run['score']:.{digits}f}\n" + message += "```\n\n" + + return message + + async def get_milestone_result( + self, + leaderboard_name: str, + milestone_name: str, + gpu: Optional[str] = None, + ) -> list[str]: + with self.db as db: + lb = db.get_leaderboard(leaderboard_name) + milestones = db.get_leaderboard_milestones(leaderboard_id=lb["id"]) + + selected = None + for milestone in milestones: + if milestone["name"].lower() == milestone_name.lower(): + selected = milestone + break + + if selected is None: + raise KernelBotError( + f"Could not find milestone `{milestone_name}` for leaderboard `{leaderboard_name}`" + ) + + with self.db as db: + runs = db.get_runs_generic(milestone_id=selected["id"]) + + runs = [r for r in runs if r["mode"] == SubmissionMode.LEADERBOARD.value] + + if len(runs) == 0: + return [ + f"⚠️ No runs available for milestone `{milestone_name}`. Maybe they haven't been triggered yet?" + ] + if gpu is not None: + runs = [r for r in runs if r["runner"] == gpu] + + if len(runs) == 0: + return [f"⚠️ No runs available for GPU {gpu}"] + + messages = [] + for run in runs: + log = make_benchmark_log(run_item_to_run_result(run)) + messages.append(f"{milestone_name} on {run['runner']}\n```{log}```\n") + + return messages From 49472f2ed0bc251f433acb76d1755f3862c7c308 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Sat, 26 Jul 2025 20:12:23 +0200 Subject: [PATCH 08/12] only store leaderboard runs in DB --- src/kernelbot/cogs/admin_cog.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/kernelbot/cogs/admin_cog.py b/src/kernelbot/cogs/admin_cog.py index 3506b008..adb6b69e 100644 --- a/src/kernelbot/cogs/admin_cog.py +++ b/src/kernelbot/cogs/admin_cog.py @@ -434,6 +434,13 @@ async def submit_milestone(milestone, gpu, reporter): with backend.db as db: for key, value in result.runs.items(): + # Only store LB runs in the database; + # we still want to run test/benchmark to validate + # that the code actually passes, but for all other + # purposes we only need the leaderboard run + if key != SubmissionMode.LEADERBOARD.value: + continue + db.create_submission_run( milestone=milestone["id"], start=value.start, From 603e858ecb26105743b75e01e8eec3a10e130f62 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Sat, 26 Jul 2025 22:57:14 +0200 Subject: [PATCH 09/12] lint/small refactor --- src/kernelbot/cogs/admin_cog.py | 54 ++----------------- src/libkernelbot/backend.py | 47 +++++++++++++++- src/libkernelbot/leaderboard_db.py | 4 +- .../20250725_01_hwite-add-milestone-table.py | 3 +- 4 files changed, 55 insertions(+), 53 deletions(-) diff --git a/src/kernelbot/cogs/admin_cog.py b/src/kernelbot/cogs/admin_cog.py index adb6b69e..66650d07 100644 --- a/src/kernelbot/cogs/admin_cog.py +++ b/src/kernelbot/cogs/admin_cog.py @@ -20,10 +20,9 @@ ) from kernelbot.env import env from kernelbot.ui.misc import ConfirmationView, DeleteConfirmationModal, GPUSelectionView -from libkernelbot.consts import GitHubGPU, ModalGPU, SubmissionMode, get_gpu_by_name +from libkernelbot.consts import GitHubGPU, ModalGPU, get_gpu_by_name from libkernelbot.leaderboard_db import LeaderboardDoesNotExist, LeaderboardItem, SubmissionItem -from libkernelbot.submission import compute_score -from libkernelbot.task import LeaderboardDefinition, make_task_definition +from libkernelbot.task import LeaderboardDefinition, LeaderboardTask, make_task_definition from libkernelbot.utils import ( KernelBotError, setup_logging, @@ -401,7 +400,7 @@ async def _submit_milestones( leaderboard_item = db.get_leaderboard(leaderboard_name) milestones = db.get_leaderboard_milestones(leaderboard_item["id"]) - task: "LeaderboardTask" = leaderboard_item["task"] + task: LeaderboardTask = leaderboard_item["task"] # ok, submit all that are missing submit_tasks = [] @@ -410,50 +409,6 @@ async def _submit_milestones( reporters = MultiProgressReporterDiscord(interaction) await reporters.show(f"Milestone runs for {leaderboard_name}") - async def submit_milestone(milestone, gpu, reporter): - result = await backend.submit_leaderboard( - -1, - milestone["code"], - "milestone.py", - gpu, - reporter, - task, - SubmissionMode.LEADERBOARD, - None, - ) - - # we do not allow milestone runs to fail - if not result.success: - logger.error(f"Milestone run failed: {result}") - raise KernelBotError(f"Milestone run failed: {result.error}") - - for key, value in result.runs.items(): - if not value.run.success or not value.run.passed: - logger.error(f"Milestone run {key} failed: {value}") - raise KernelBotError(f"Milestone run {key} failed.") - - with backend.db as db: - for key, value in result.runs.items(): - # Only store LB runs in the database; - # we still want to run test/benchmark to validate - # that the code actually passes, but for all other - # purposes we only need the leaderboard run - if key != SubmissionMode.LEADERBOARD.value: - continue - - db.create_submission_run( - milestone=milestone["id"], - start=value.start, - end=value.end, - mode=key, - runner=gpu.name, - score=compute_score(result, task, -1), - secret=False, - compilation=value.compilation, - result=value.run, - system=result.system, - ) - if gpus is None: gpus = leaderboard_item["gpu_types"] @@ -479,8 +434,9 @@ async def submit_milestone(milestone, gpu, reporter): continue submit_tasks.append( - submit_milestone( + backend.submit_milestone_run( milestone, + task, get_gpu_by_name(gpu), reporters.add_run(f"Milestone {milestone['name']} on {gpu}"), ) diff --git a/src/libkernelbot/backend.py b/src/libkernelbot/backend.py index ee832fc0..e0b26e86 100644 --- a/src/libkernelbot/backend.py +++ b/src/libkernelbot/backend.py @@ -299,7 +299,8 @@ async def get_milestone_result( if len(runs) == 0: return [ - f"⚠️ No runs available for milestone `{milestone_name}`. Maybe they haven't been triggered yet?" + f"⚠️ No runs available for milestone `{milestone_name}`. " + f"Maybe they haven't been triggered yet?" ] if gpu is not None: runs = [r for r in runs if r["runner"] == gpu] @@ -313,3 +314,47 @@ async def get_milestone_result( messages.append(f"{milestone_name} on {run['runner']}\n```{log}```\n") return messages + + async def submit_milestone_run(self, milestone, task, gpu, reporter): + result = await self.submit_leaderboard( + -1, + milestone["code"], + "milestone.py", + gpu, + reporter, + task, + SubmissionMode.LEADERBOARD, + None, + ) + + # we do not allow milestone runs to fail + if not result.success: + logger.error(f"Milestone run failed: {result}") + raise KernelBotError(f"Milestone run failed: {result.error}") + + for key, value in result.runs.items(): + if not value.run.success or not value.run.passed: + logger.error(f"Milestone run {key} failed: {value}") + raise KernelBotError(f"Milestone run {key} failed.") + + with self.db as db: + for key, value in result.runs.items(): + # Only store LB runs in the database; + # we still want to run test/benchmark to validate + # that the code actually passes, but for all other + # purposes we only need the leaderboard run + if key != SubmissionMode.LEADERBOARD.value: + continue + + db.create_submission_run( + milestone=milestone["id"], + start=value.start, + end=value.end, + mode=key, + runner=gpu.name, + score=compute_score(result, task, -1), + secret=False, + compilation=value.compilation, + result=value.run, + system=result.system, + ) diff --git a/src/libkernelbot/leaderboard_db.py b/src/libkernelbot/leaderboard_db.py index 22acbdab..5be15436 100644 --- a/src/libkernelbot/leaderboard_db.py +++ b/src/libkernelbot/leaderboard_db.py @@ -494,8 +494,8 @@ def create_submission_run( } self.cursor.execute( """ - INSERT INTO leaderboard.runs (submission_id, milestone_id, start_time, end_time, mode, - secret, runner, score, passed, compilation, meta, result, system_info + INSERT INTO leaderboard.runs (submission_id, milestone_id, start_time, end_time, + mode, secret, runner, score, passed, compilation, meta, result, system_info ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """, diff --git a/src/migrations/20250725_01_hwite-add-milestone-table.py b/src/migrations/20250725_01_hwite-add-milestone-table.py index 7756f257..25e0eebc 100644 --- a/src/migrations/20250725_01_hwite-add-milestone-table.py +++ b/src/migrations/20250725_01_hwite-add-milestone-table.py @@ -11,7 +11,8 @@ """ CREATE TABLE IF NOT EXISTS leaderboard.milestones ( id SERIAL PRIMARY KEY, - leaderboard_id INTEGER NOT NULL REFERENCES leaderboard.leaderboard(id) ON DELETE CASCADE, + leaderboard_id INTEGER NOT NULL REFERENCES leaderboard.leaderboard(id) + ON DELETE CASCADE, name TEXT NOT NULL, code TEXT NOT NULL, description TEXT, From a291c36ed22217a53cf5bf8c0604e5485e6e0984 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Sun, 27 Jul 2025 01:43:15 +0200 Subject: [PATCH 10/12] update milestones when leaderboard gets updated --- src/kernelbot/cogs/admin_cog.py | 17 ++++++++++++++++- src/libkernelbot/leaderboard_db.py | 10 ++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/kernelbot/cogs/admin_cog.py b/src/kernelbot/cogs/admin_cog.py index 66650d07..8c2173d6 100644 --- a/src/kernelbot/cogs/admin_cog.py +++ b/src/kernelbot/cogs/admin_cog.py @@ -807,7 +807,7 @@ async def _create_update_plan( # noqa: C901 return update_list, create_list - async def update_competition( + async def update_competition( # noqa: C901 self, interaction: discord.Interaction, spec_file: Path, force: bool = False ): try: @@ -857,6 +857,21 @@ async def update_competition( entry["name"], self._parse_deadline(entry["deadline"]), task ) new_lb: LeaderboardItem = db.get_leaderboard(entry["name"]) + # delete old milestones + db.delete_milestones(new_lb["id"]) + # and (re)-create new ones + for milestone in task.milestones: + db.create_milestone( + new_lb["id"], + milestone.name, + milestone.code, + description=milestone.description, + exclude_gpus=milestone.exclude_gpus, + ) + + # and finally trigger re-run + if task.milestones: + await self._submit_milestones(interaction, new_lb["name"]) forum_id = new_lb["forum_id"] try: diff --git a/src/libkernelbot/leaderboard_db.py b/src/libkernelbot/leaderboard_db.py index 5be15436..6918be1e 100644 --- a/src/libkernelbot/leaderboard_db.py +++ b/src/libkernelbot/leaderboard_db.py @@ -310,6 +310,16 @@ def delete_milestone_runs(self, leaderboard_id: int): ) self.connection.commit() + def delete_milestones(self, leaderboard_id: int): + self.cursor.execute( + """ + DELETE FROM leaderboard.milestones + WHERE leaderboard_id = %s; + """, + (leaderboard_id,), + ) + self.connection.commit() + def get_runs_generic( self, *, milestone_id: Optional[int] = None, submission_id: Optional[int] = None ) -> List["RunItem"]: From b6d9e0f66082929890b6592dd3ebac68a563acb5 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Mon, 28 Jul 2025 01:01:28 +0200 Subject: [PATCH 11/12] update unit tests to cover milestone data --- unit-tests/test_leaderboard_db.py | 132 ++++++++++++++++++++++++++++-- unit-tests/test_task.py | 24 ++++-- 2 files changed, 142 insertions(+), 14 deletions(-) diff --git a/unit-tests/test_leaderboard_db.py b/unit-tests/test_leaderboard_db.py index 939f69b9..700e800d 100644 --- a/unit-tests/test_leaderboard_db.py +++ b/unit-tests/test_leaderboard_db.py @@ -1,8 +1,11 @@ import copy import dataclasses import datetime +import decimal import subprocess import time +from pathlib import Path +from unittest.mock import ANY import pytest from test_report import sample_compile_result, sample_run_result, sample_system_info @@ -16,8 +19,14 @@ @pytest.fixture(scope="module") def docker_compose(): + tgt_path = Path.cwd() + if tgt_path.name == "unit-tests": + tgt_path = tgt_path.parent + """Start a test database and run migrations""" - subprocess.check_call(["docker", "compose", "-f", "docker-compose.test.yml", "up", "-d"]) + subprocess.check_call( + ["docker", "compose", "-f", "docker-compose.test.yml", "up", "-d"], cwd=tgt_path + ) try: # Wait for migrations to finish @@ -26,6 +35,7 @@ def docker_compose(): ["docker", "compose", "-f", "docker-compose.test.yml", "ps", "-q", "migrate-test"], capture_output=True, text=True, + cwd=tgt_path, ) if not result.stdout.strip(): # Container no longer exists @@ -37,6 +47,7 @@ def docker_compose(): ["docker", "compose", "-f", "docker-compose.test.yml", "logs", "migrate-test"], capture_output=True, text=True, + cwd=tgt_path, ) if "error" in logs.stdout.lower(): @@ -52,7 +63,9 @@ def docker_compose(): ssl_mode="disable", ) finally: - subprocess.run(["docker", "compose", "-f", "docker-compose.test.yml", "down", "-v"]) + subprocess.run( + ["docker", "compose", "-f", "docker-compose.test.yml", "down", "-v"], cwd=tgt_path + ) def _nuke_contents(db): @@ -114,7 +127,7 @@ def _create_submission_run( ): """Creates a submission run with suitable default values""" db.create_submission_run( - submission, + submission=submission, start=start or datetime.datetime.now(tz=datetime.timezone.utc), end=end or (datetime.datetime.now(tz=datetime.timezone.utc) + datetime.timedelta(seconds=10)), @@ -268,9 +281,9 @@ def test_leaderboard_submission_basic(database, submit_leaderboard): with database as db: end_time = submit_time + datetime.timedelta(seconds=10) db.create_submission_run( - sub_id, - submit_time, - end_time, + submission=sub_id, + start=submit_time, + end=end_time, mode="test", secret=False, runner="A100", @@ -282,9 +295,9 @@ def test_leaderboard_submission_basic(database, submit_leaderboard): # run ends after the contest deadline; this is valid end_time_2 = submit_time + datetime.timedelta(days=1, hours=1) db.create_submission_run( - sub_id, - submit_time, - end_time_2, + submission=sub_id, + start=submit_time, + end=end_time_2, mode="leaderboard", secret=True, runner="H100", @@ -577,6 +590,107 @@ def test_leaderboard_update(database, task_directory): } +def test_leaderboard_milestones(database, submit_leaderboard): + with database as db: + lb_id = db.get_leaderboard_id("submit-leaderboard") + milestones = db.get_leaderboard_milestones(lb_id) + assert milestones == [] + + # at this point, created_at is filled in at the DB level, + # so we cannot set a fixed value for it in the tests below + db.create_milestone(lb_id, "Milestone", "sample code", "Test milestone") + db.create_milestone( + lb_id, "Milestone2", "other code", "Second milestone", exclude_gpus=["T4"] + ) + milestones = db.get_leaderboard_milestones(lb_id) + assert milestones == [ + { + "code": "sample code", + "created_at": ANY, + "description": "Test milestone", + "exclude_gpus": [""], + "id": 1, + "name": "Milestone", + }, + { + "code": "other code", + "created_at": ANY, + "description": "Second milestone", + "exclude_gpus": ["T4"], + "id": 2, + "name": "Milestone2", + }, + ] + + db.delete_milestones(lb_id) + milestones = db.get_leaderboard_milestones(lb_id) + assert milestones == [] + + +def test_leaderboard_milestone_runs(database, submit_leaderboard): + with database as db: + lb_id = db.get_leaderboard_id("submit-leaderboard") + ms_id = db.create_milestone(lb_id, "Milestone", "sample code", "Test milestone") + + start = datetime.datetime.now(tz=datetime.timezone.utc) + end = start + datetime.timedelta(seconds=10) + db.create_submission_run( + milestone=ms_id, + start=start, + end=end, + mode="leaderboard", + secret=False, + runner="A100", + score=5, + compilation=None, + result=sample_run_result(), + system=sample_system_info(), + ) + + runs = db.get_runs_generic(milestone_id=ms_id) + assert runs == [ + { + "compilation": None, + "start_time": start, + "end_time": end, + "meta": { + "command": "./test", + "duration": 1.5, + "exit_code": 0, + "stderr": "", + "stdout": "All tests passed", + "success": True, + }, + "mode": "leaderboard", + "passed": True, + "result": { + "test-count": "3", + "test.0.message": "Addition works correctly", + "test.0.spec": "Test addition", + "test.0.status": "pass", + "test.1.spec": "Test multiplication", + "test.1.status": "pass", + "test.2.error": "Division by zero", + "test.2.spec": "Test division", + "test.2.status": "fail", + }, + "runner": "A100", + "score": decimal.Decimal("5"), + "secret": False, + "system": { + "cpu": "Intel i9-12900K", + "gpu": "NVIDIA RTX 4090", + "platform": "Linux-5.15.0", + "torch": "2.0.1+cu118", + }, + } + ] + + db.delete_milestone_runs(lb_id) + runs = db.get_runs_generic(milestone_id=ms_id) + assert runs == [] + + def test_generate_stats(database, submit_leaderboard): with database as db: start = datetime.datetime.now(tz=datetime.timezone.utc) diff --git a/unit-tests/test_task.py b/unit-tests/test_task.py index f3abc138..98c1e4f3 100644 --- a/unit-tests/test_task.py +++ b/unit-tests/test_task.py @@ -10,6 +10,7 @@ Language, LeaderboardDefinition, LeaderboardTask, + MilestoneData, PythonTaskData, RankCriterion, build_task_config, @@ -57,7 +58,6 @@ def test_from_dict_python_task(): def test_from_dict_cuda_task(): - """Test creating LeaderboardTask from dict with CUDA config""" """Test creating LeaderboardTask from dict with CUDA config""" data = { "lang": "cu", @@ -93,7 +93,7 @@ def test_type_mismatch(): ) -def test_to_dict(leaderboard_task): +def test_to_dict(leaderboard_task: LeaderboardTask): """Test converting LeaderboardTask to dict""" result = leaderboard_task.to_dict() @@ -114,7 +114,7 @@ def test_to_dict(leaderboard_task): ] -def test_serialization_roundtrip(leaderboard_task): +def test_serialization_roundtrip(leaderboard_task: LeaderboardTask): """Test to_str and from_str work together""" json_str = leaderboard_task.to_str() reconstructed = LeaderboardTask.from_str(json_str) @@ -122,7 +122,7 @@ def test_serialization_roundtrip(leaderboard_task): assert reconstructed == leaderboard_task -def test_build_task_config_python(leaderboard_task): +def test_build_task_config_python(leaderboard_task: LeaderboardTask): """Test build_task_config with Python task and submission content.""" submission_content = "print('Hello World')" arch = "sm_80" @@ -235,6 +235,11 @@ def test_build_task_config_cuda(): templates: Python: "template.py" CUDA: "template.cu" +milestones: + - name: "Milestone" + source: "milestone.py" + description: "This milestone is a test milestone" + exclude_gpus: ["A100"] """ @@ -245,13 +250,14 @@ def task_directory(tmp_path): Path.write_text(tmp_path / "kernel.py", "def kernel(): pass") Path.write_text(tmp_path / "template.py", "# Python template") Path.write_text(tmp_path / "template.cu", "// CUDA template") + Path.write_text(tmp_path / "milestone.py", "def milestone(): pass") # Create task.yml Path.write_text(tmp_path / "task.yml", TASK_YAML) return tmp_path -def test_make_task_definition(task_directory): +def test_make_task_definition(task_directory: Path): """Test make_task_definition with a complete YAML structure""" # Test the function @@ -261,6 +267,14 @@ def test_make_task_definition(task_directory): assert isinstance(result, LeaderboardDefinition) assert result.description == "Test task description" assert result.templates == {"Python": "# Python template", "CUDA": "// CUDA template"} + assert result.milestones == [ + MilestoneData( + name="Milestone", + code="def milestone(): pass", + description="This milestone is a test milestone", + exclude_gpus=["A100"], + ) + ] # Verify the task task = result.task From f339298e299489680f3dc7fe90c6293f86437e78 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Sat, 9 Aug 2025 11:37:17 +0200 Subject: [PATCH 12/12] test --- .github/workflows/{unit_tests.yml => testing.yml} | 3 +++ 1 file changed, 3 insertions(+) rename .github/workflows/{unit_tests.yml => testing.yml} (96%) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/testing.yml similarity index 96% rename from .github/workflows/unit_tests.yml rename to .github/workflows/testing.yml index cf4a7356..94c45083 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/testing.yml @@ -6,6 +6,9 @@ on: - main - dev pull_request: + branches: + - main + - dev jobs: unit-tests: