bstnxbt · 0xClandestine · Apr 24, 2026 · Apr 24, 2026 · Apr 25, 2026 · Apr 25, 2026
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
@@ -112,6 +112,9 @@ def _default_results_path(
     if draft_quant:
         slug = re.sub(r"[^a-z0-9]+", "-", draft_quant.lower()).strip("-")
         name = f"{name}-dq-{slug}"
+    # Timestamp every run so repeated benches never overwrite history.
+    ts = time.strftime("%Y%m%dT%H%M%SZ", time.gmtime())
+    name = f"{name}-{ts}"
     folder = _slugify_chip(chip) if chip else "unknown-chip"
     return Path("benchmark/results") / folder / f"{name}.json"
 

diff --git a/dflash_mlx/archs/__init__.py b/dflash_mlx/archs/__init__.py
@@ -0,0 +1,53 @@
+# Copyright 2026 bstnxbt
+# MIT License — see LICENSE file
+# Based on DFlash (arXiv:2602.06036)
+
+"""
+DFlash architecture modular system.
+
+This module provides a pluggable architecture system supporting multiple
+model architectures (Qwen3, Llama/Gemma, etc.) with custom attention,
+MLP, normalization, and RoPE implementations.
+"""
+
+from dflash_mlx.archs.base import (
+    DFlashAttention,
+    DFlashArgs,
+    DFlashCache,
+    DFlashDecoderLayer,
+    DFlashMLP,
+    DFlashModel,
+    DFlashNorm,
+    DFlashRope,
+    create_dflash_model,
+    extract_context_feature,
+    get_architecture_for_model_type,
+    list_supported_architectures,
+    register_architecture,
+)
+from dflash_mlx.archs.qwen3 import Qwen3DFlashModel, Qwen3DFlashAttention, Qwen3DFlashMLP
+from dflash_mlx.archs.llama import LlamaDFlashModel, LlamaDFlashAttention, LlamaDFlashMLP
+
+__all__ = [
+    # Base classes
+    "DFlashArgs",
+    "DFlashModel",
+    "DFlashAttention",
+    "DFlashMLP",
+    "DFlashNorm",
+    "DFlashRope",
+    "DFlashCache",
+    "DFlashDecoderLayer",
+    # Factory functions
+    "create_dflash_model",
+    "get_architecture_for_model_type",
+    "list_supported_architectures",
+    "register_architecture",
+    # Architecture implementations
+    "Qwen3DFlashModel",
+    "Qwen3DFlashAttention",
+    "Qwen3DFlashMLP",
+    "LlamaDFlashModel",
+    "LlamaDFlashAttention",
+    "LlamaDFlashMLP",
+]