Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ def _default_results_path(
if draft_quant:
slug = re.sub(r"[^a-z0-9]+", "-", draft_quant.lower()).strip("-")
name = f"{name}-dq-{slug}"
# Timestamp every run so repeated benches never overwrite history.
ts = time.strftime("%Y%m%dT%H%M%SZ", time.gmtime())
name = f"{name}-{ts}"
folder = _slugify_chip(chip) if chip else "unknown-chip"
return Path("benchmark/results") / folder / f"{name}.json"

Expand Down
53 changes: 53 additions & 0 deletions dflash_mlx/archs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Copyright 2026 bstnxbt
# MIT License — see LICENSE file
# Based on DFlash (arXiv:2602.06036)

"""
DFlash architecture modular system.

This module provides a pluggable architecture system supporting multiple
model architectures (Qwen3, Llama/Gemma, etc.) with custom attention,
MLP, normalization, and RoPE implementations.
"""

from dflash_mlx.archs.base import (
DFlashAttention,
DFlashArgs,
DFlashCache,
DFlashDecoderLayer,
DFlashMLP,
DFlashModel,
DFlashNorm,
DFlashRope,
create_dflash_model,
extract_context_feature,
get_architecture_for_model_type,
list_supported_architectures,
register_architecture,
)
from dflash_mlx.archs.qwen3 import Qwen3DFlashModel, Qwen3DFlashAttention, Qwen3DFlashMLP
from dflash_mlx.archs.llama import LlamaDFlashModel, LlamaDFlashAttention, LlamaDFlashMLP

__all__ = [
# Base classes
"DFlashArgs",
"DFlashModel",
"DFlashAttention",
"DFlashMLP",
"DFlashNorm",
"DFlashRope",
"DFlashCache",
"DFlashDecoderLayer",
# Factory functions
"create_dflash_model",
"get_architecture_for_model_type",
"list_supported_architectures",
"register_architecture",
# Architecture implementations
"Qwen3DFlashModel",
"Qwen3DFlashAttention",
"Qwen3DFlashMLP",
"LlamaDFlashModel",
"LlamaDFlashAttention",
"LlamaDFlashMLP",
]
Loading