Skip to content

Commit 16dc0f4

Browse files
committed
llama-model : add dots.llm1 architecture support (#14044)
This commit adds support for "dots.llm1" (I decided to shorten it to dots1 or DOTS1 in the code generally) architecture. The only models that exist as of writing of this commit that follow this architecture are "dots.llm1.inst" and "dots.llm1.base" from here: * https://huggingface.co/rednote-hilab/dots.llm1.inst * https://huggingface.co/rednote-hilab/dots.llm1.base The model architecture is a combination of Qwen and Deepseek parts, as seen here: https://github.com/huggingface/transformers/blob/ffe12627b4e84489d2ab91dd0ec00614855edc79/src/transformers/models/dots1/modular_dots1.py --- Parts in this commit: Adding various "_DOTS1" constants around the codebase where a new architecture is expected. DotsModel in convert_hf_to_gguf.py to be used on Dots1ForCausalLM, to convert the model to .ggufs. It was made by following Qwen and DeepseekV2 converters (mostly Deepseek one was relevant). I added the graph code and architecture code in llama-model.cpp, it too was made by following qwen3 and deepseek codepaths, and doing some trial and error until coherent text came out. I added detection for the dots chat template so that it can pick it up. As of writing of this (10 June 2025) I did not have the opportunity to do more thorough testing than "prompt it and check does it respond with gibberish".
1 parent 1f7d50b commit 16dc0f4

File tree

8 files changed

+402
-1
lines changed

8 files changed

+402
-1
lines changed

convert_hf_to_gguf.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5262,6 +5262,108 @@ def prepare_tensors(self):
52625262
raise ValueError(f"Unprocessed experts: {experts}")
52635263

52645264

5265+
@ModelBase.register("Dots1ForCausalLM")
5266+
class DotsModel(TextModel):
5267+
model_arch = gguf.MODEL_ARCH.DOTS1
5268+
5269+
_experts: list[dict[str, Tensor]] | None = None
5270+
5271+
def set_vocab(self):
5272+
self._set_vocab_gpt2()
5273+
5274+
def set_gguf_parameters(self):
5275+
super().set_gguf_parameters()
5276+
hparams = self.hparams
5277+
5278+
# If 1) another rednote/dots-family model with similar arch is released,
5279+
# and 2) you want to use this DotsModel code to convert it to .gguf,
5280+
# and 3) it complains about "scoring_func" for the gating function,
5281+
#
5282+
# then adjust this code and figure out what the gating should be, if
5283+
# not just SIGMOID.
5284+
#
5285+
# As of writing of this, for dots.llm1.inst/dots.llm1.base, the
5286+
# 'scoring_func' is set to "noaux_tc" (same as Deepseek-V3-0324).
5287+
if hparams["scoring_func"] == "noaux_tc":
5288+
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
5289+
else:
5290+
raise ValueError(f"Unsupported scoring_func value: {hparams['scoring_func']}")
5291+
5292+
self.gguf_writer.add_expert_feed_forward_length(hparams["moe_intermediate_size"])
5293+
self.gguf_writer.add_expert_count(hparams["n_routed_experts"])
5294+
self.gguf_writer.add_expert_shared_count(hparams["n_shared_experts"])
5295+
self.gguf_writer.add_expert_weights_scale(hparams["routed_scaling_factor"])
5296+
self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
5297+
self.gguf_writer.add_leading_dense_block_count(hparams["first_k_dense_replace"])
5298+
5299+
# As of writing this comment (9 June 2025), YaRN context extension is
5300+
# unproven for dots.llm1.inst and dots.llm1.base models to go beyond
5301+
# 32k context length, according to:
5302+
# https://huggingface.co/rednote-hilab/dots.llm1.inst/discussions/3
5303+
#
5304+
# Try it at your own peril ;)
5305+
rope_scaling = self.hparams.get("rope_scaling") or {}
5306+
if rope_scaling.get("rope_type", rope_scaling.get("type")) == "yarn" and "factor" in rope_scaling:
5307+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
5308+
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
5309+
self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"])
5310+
5311+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
5312+
# rename e_score_correction_bias tensors
5313+
if name.endswith("e_score_correction_bias"):
5314+
name = name.replace("e_score_correction_bias", "e_score_correction.bias")
5315+
5316+
# skip Multi-Token Prediction (MTP) layers
5317+
block_count = self.hparams["num_hidden_layers"]
5318+
match = re.match(r"model.layers.(\d+)", name)
5319+
if match and int(match.group(1)) >= block_count:
5320+
return []
5321+
5322+
if name.find("mlp.experts") != -1:
5323+
n_experts = self.hparams["n_routed_experts"]
5324+
assert bid is not None
5325+
5326+
if self._experts is None:
5327+
self._experts = [{} for _ in range(self.block_count)]
5328+
5329+
self._experts[bid][name] = data_torch
5330+
5331+
if len(self._experts[bid]) >= n_experts * 3:
5332+
tensors: list[tuple[str, Tensor]] = []
5333+
5334+
# merge the experts into a single 3d tensor
5335+
for w_name in ["down_proj", "gate_proj", "up_proj"]:
5336+
datas: list[Tensor] = []
5337+
5338+
for xid in range(n_experts):
5339+
ename = f"model.layers.{bid}.mlp.experts.{xid}.{w_name}.weight"
5340+
datas.append(self._experts[bid][ename])
5341+
del self._experts[bid][ename]
5342+
5343+
data_torch = torch.stack(datas, dim=0)
5344+
5345+
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
5346+
5347+
new_name = self.map_tensor_name(merged_name)
5348+
5349+
tensors.append((new_name, data_torch))
5350+
5351+
return tensors
5352+
else:
5353+
return []
5354+
5355+
return [(self.map_tensor_name(name), data_torch)]
5356+
5357+
def prepare_tensors(self):
5358+
super().prepare_tensors()
5359+
5360+
if self._experts is not None:
5361+
# flatten `list[dict[str, Tensor]]` into `list[str]`
5362+
experts = [k for d in self._experts for k in d.keys()]
5363+
if len(experts) > 0:
5364+
raise ValueError(f"Unprocessed experts: {experts}")
5365+
5366+
52655367
@ModelBase.register("PLMForCausalLM")
52665368
class PLMModel(TextModel):
52675369
model_arch = gguf.MODEL_ARCH.PLM

gguf-py/gguf/constants.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,7 @@ class MODEL_ARCH(IntEnum):
343343
WAVTOKENIZER_DEC = auto()
344344
PLM = auto()
345345
BAILINGMOE = auto()
346+
DOTS1 = auto()
346347

347348

348349
class VISION_PROJECTOR_TYPE(IntEnum):
@@ -623,6 +624,7 @@ class MODEL_TENSOR(IntEnum):
623624
MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
624625
MODEL_ARCH.PLM: "plm",
625626
MODEL_ARCH.BAILINGMOE: "bailingmoe",
627+
MODEL_ARCH.DOTS1: "dots1"
626628
}
627629

628630
VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
@@ -2044,6 +2046,31 @@ class MODEL_TENSOR(IntEnum):
20442046
MODEL_TENSOR.FFN_DOWN_SHEXP,
20452047
MODEL_TENSOR.FFN_UP_SHEXP,
20462048
],
2049+
MODEL_ARCH.DOTS1: [
2050+
MODEL_TENSOR.TOKEN_EMBD,
2051+
MODEL_TENSOR.OUTPUT_NORM,
2052+
MODEL_TENSOR.OUTPUT,
2053+
MODEL_TENSOR.ATTN_NORM,
2054+
MODEL_TENSOR.ATTN_Q,
2055+
MODEL_TENSOR.ATTN_Q_NORM,
2056+
MODEL_TENSOR.ATTN_K,
2057+
MODEL_TENSOR.ATTN_K_NORM,
2058+
MODEL_TENSOR.ATTN_V,
2059+
MODEL_TENSOR.ATTN_OUT,
2060+
MODEL_TENSOR.ATTN_ROT_EMBD,
2061+
MODEL_TENSOR.FFN_EXP_PROBS_B,
2062+
MODEL_TENSOR.FFN_NORM,
2063+
MODEL_TENSOR.FFN_GATE,
2064+
MODEL_TENSOR.FFN_GATE_EXP,
2065+
MODEL_TENSOR.FFN_GATE_INP,
2066+
MODEL_TENSOR.FFN_GATE_SHEXP,
2067+
MODEL_TENSOR.FFN_DOWN,
2068+
MODEL_TENSOR.FFN_DOWN_EXP,
2069+
MODEL_TENSOR.FFN_DOWN_SHEXP,
2070+
MODEL_TENSOR.FFN_UP,
2071+
MODEL_TENSOR.FFN_UP_EXP,
2072+
MODEL_TENSOR.FFN_UP_SHEXP,
2073+
],
20472074
# TODO
20482075
}
20492076

@@ -2099,6 +2126,10 @@ class MODEL_TENSOR(IntEnum):
20992126
MODEL_ARCH.BAILINGMOE: [
21002127
MODEL_TENSOR.ROPE_FREQS,
21012128
],
2129+
MODEL_ARCH.DOTS1: [
2130+
MODEL_TENSOR.ROPE_FREQS,
2131+
MODEL_TENSOR.ATTN_ROT_EMBD,
2132+
],
21022133
}
21032134

21042135
#

gguf-py/gguf/tensor_mapping.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ class TensorNameMap:
305305
),
306306

307307
MODEL_TENSOR.FFN_EXP_PROBS_B: (
308-
"model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3
308+
"model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3 dots1
309309
),
310310

311311
# Feed-forward up

src/llama-arch.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
7272
{ LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
7373
{ LLM_ARCH_PLM, "plm" },
7474
{ LLM_ARCH_BAILINGMOE, "bailingmoe" },
75+
{ LLM_ARCH_DOTS1, "dots1" },
7576
{ LLM_ARCH_UNKNOWN, "(unknown)" },
7677
};
7778

@@ -1555,6 +1556,34 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
15551556
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
15561557
},
15571558
},
1559+
{
1560+
LLM_ARCH_DOTS1,
1561+
{
1562+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1563+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1564+
{ LLM_TENSOR_OUTPUT, "output" },
1565+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1566+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1567+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1568+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1569+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1570+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1571+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1572+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1573+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1574+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1575+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1576+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1577+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1578+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1579+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1580+
{ LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
1581+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1582+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1583+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1584+
{ LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
1585+
}
1586+
},
15581587
{
15591588
LLM_ARCH_UNKNOWN,
15601589
{

src/llama-arch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ enum llm_arch {
7676
LLM_ARCH_WAVTOKENIZER_DEC,
7777
LLM_ARCH_PLM,
7878
LLM_ARCH_BAILINGMOE,
79+
LLM_ARCH_DOTS1,
7980
LLM_ARCH_UNKNOWN,
8081
};
8182

src/llama-chat.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,11 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
183183
return LLM_CHAT_TEMPLATE_BAILING;
184184
} else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) {
185185
return LLM_CHAT_TEMPLATE_LLAMA4;
186+
} else if (tmpl_contains("<|userprompt|>") &&
187+
tmpl_contains("<|endofuserprompt|>") &&
188+
tmpl_contains("<|response|>") &&
189+
tmpl_contains("<|endofresponse|>")) {
190+
return LLM_CHAT_TEMPLATE_DOTS1;
186191
}
187192
return LLM_CHAT_TEMPLATE_UNKNOWN;
188193
}
@@ -643,6 +648,21 @@ int32_t llm_chat_apply_template(
643648
if (add_ass) {
644649
ss << "Assistant:";
645650
}
651+
} else if (tmpl == LLM_CHAT_TEMPLATE_DOTS1) {
652+
// dots.llm1.inst (DOTS1)
653+
for (auto message : chat) {
654+
std::string role(message->role);
655+
if (role == "system") {
656+
ss << "<|system|>" << message->content << "<|endofsystem|>";
657+
} else if (role == "user") {
658+
ss << "<|userprompt|>" << message->content << "<|endofuserprompt|>";
659+
} else if (role == "assistant") {
660+
ss << "<|response|>" << message->content << "<|endofresponse|>";
661+
}
662+
}
663+
if (add_ass) {
664+
ss << "<|response|>";
665+
}
646666
} else {
647667
// template not supported
648668
return -1;

src/llama-chat.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ enum llm_chat_template {
4343
LLM_CHAT_TEMPLATE_BAILING,
4444
LLM_CHAT_TEMPLATE_LLAMA4,
4545
LLM_CHAT_TEMPLATE_SMOLVLM,
46+
LLM_CHAT_TEMPLATE_DOTS1,
4647
LLM_CHAT_TEMPLATE_UNKNOWN,
4748
};
4849

0 commit comments

Comments
 (0)