DeepSeek-V3/inference/configs/config_236B.json
stack-heap-overflow 4c2fdb8f55 Release DeepSeek-V3
2024-12-26 19:01:57 +08:00

20 lines
455 B
JSON

{
"vocab_size": 102400,
"dim": 5120,
"inter_dim": 12288,
"moe_inter_dim": 1536,
"n_layers": 60,
"n_dense_layers": 1,
"n_heads": 128,
"n_routed_experts": 160,
"n_shared_experts": 2,
"n_activated_experts": 6,
"n_expert_groups": 8,
"n_limited_groups": 3,
"route_scale": 16.0,
"q_lora_rank": 1536,
"kv_lora_rank": 512,
"qk_nope_head_dim": 128,
"qk_rope_head_dim": 64,
"v_head_dim": 128
}