Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .devops/nix/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ let
vulkan-headers
vulkan-loader
shaderc
spirv-headers
];
in

Expand Down Expand Up @@ -146,7 +147,6 @@ effectiveStdenv.mkDerivation (finalAttrs: {
ninja
pkg-config
git
spirv-headers
]
++ optionals useCuda [
cudaPackages.cuda_nvcc
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ uv.lock

# Nix

flake.lock
/result

# Test binaries
Expand Down
31 changes: 31 additions & 0 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1570,6 +1570,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
if chkhsh == "862f827721df956049dff5ca81a57f29e575280bc622e290d3bf4e35eca29015":
# ref: https://huggingface.co/codefuse-ai/F2LLM-v2-4B
res = "f2llmv2"
if chkhsh == "62f6fb0a6fd5098caeabb19b07a5c1099cafc8b9c40eab6ea89ece4ec02fbc57":
# ref: https://huggingface.co/sarvamai/sarvam-30b
res = "sarvam-moe"

if res is None:
logger.warning("\n")
Expand Down Expand Up @@ -11591,6 +11594,34 @@ def prepare_tensors(self):
raise ValueError(f"Unprocessed experts: {experts}")


@ModelBase.register("SarvamMoEForCausalLM", "modeling_sarvam_moe.SarvamMoEForCausalLM")
class SarvamMoEModel(BailingMoeV2Model):
model_arch = gguf.MODEL_ARCH.BAILINGMOE2
# Sarvam-MoE shares the BailingMoeV2 architecture; only differences:
# - full rotary (no partial_rotary_factor)
# - expert bias is zero-mean normalized at load time

def set_gguf_parameters(self):
super().set_gguf_parameters()
hparams = self.hparams
if (rope_dim := hparams.get("head_dim")) is None:
rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"]
# Override the partial-rotary value written by BailingMoeV2 with the full rotary dim
self.gguf_writer.add_rope_dimension_count(rope_dim)

@classmethod
def filter_tensors(cls, item: tuple[str, Callable[[], Tensor]]) -> tuple[str, Callable[[], Tensor]] | None:
name, gen = item
if name.endswith(".expert_bias"):
# Sarvam normalizes expert bias to zero mean
inner = gen

def gen():
t = inner()
return t - t.mean()
return super().filter_tensors((name, gen))


@ModelBase.register("GroveMoeForCausalLM", "modeling_grove_moe.GroveMoeForCausalLM")
class GroveMoeModel(TextModel):
model_arch = gguf.MODEL_ARCH.GROVEMOE
Expand Down
1 change: 1 addition & 0 deletions convert_hf_to_gguf_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ class TOKENIZER_TYPE(IntEnum):
{"name": "joyai-llm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jdopensource/JoyAI-LLM-Flash", },
{"name": "kanana2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/kakaocorp/kanana-2-30b-a3b-instruct-2601", },
{"name": "f2llmv2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/codefuse-ai/F2LLM-v2-4B", },
{"name": "sarvam-moe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sarvamai/sarvam-30b", },
]

# some models are known to be broken upstream, so we will skip them as exceptions
Expand Down
58 changes: 0 additions & 58 deletions flake.lock

This file was deleted.

13 changes: 13 additions & 0 deletions src/llama-vocab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,14 @@ struct llm_tokenizer_bpe : llm_tokenizer {
};
byte_encode = false; // uses raw UTF-8, not GPT-2 byte encoding
break;
case LLAMA_VOCAB_PRE_TYPE_SARVAM_MOE:
// Sarvam uses SPM-style BPE (same shape as Gemma4): spaces replaced with U+2581
// by the normalizer, BPE merges over the whole text on raw UTF-8.
regex_exprs = {
"[^\\n]+|[\\n]+",
};
byte_encode = false;
break;
default:
// default regex for BPE tokenization pre-processing
regex_exprs = {
Expand Down Expand Up @@ -2005,6 +2013,11 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
tokenizer_pre == "gemma4") {
pre_type = LLAMA_VOCAB_PRE_TYPE_GEMMA4;
escape_whitespaces = true;
} else if (
tokenizer_pre == "sarvam-moe") {
pre_type = LLAMA_VOCAB_PRE_TYPE_SARVAM_MOE;
escape_whitespaces = true;
clean_spaces = false;
} else if (
tokenizer_pre == "jina-v1-en" ||
tokenizer_pre == "jina-v2-code" ||
Expand Down
1 change: 1 addition & 0 deletions src/llama-vocab.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ enum llama_vocab_pre_type {
LLAMA_VOCAB_PRE_TYPE_JOYAI_LLM = 48,
LLAMA_VOCAB_PRE_TYPE_JAIS2 = 49,
LLAMA_VOCAB_PRE_TYPE_GEMMA4 = 50,
LLAMA_VOCAB_PRE_TYPE_SARVAM_MOE = 51,
};

struct LLM_KV;
Expand Down
Loading