mirror of
https://github.com/deepseek-ai/DeepSeek-V3.git
synced 2025-06-20 08:33:47 -04:00
22 lines
466 B
JSON
22 lines
466 B
JSON
{
|
|
"vocab_size": 102400,
|
|
"dim": 2048,
|
|
"inter_dim": 5472,
|
|
"moe_inter_dim": 704,
|
|
"n_layers": 14,
|
|
"n_dense_layers": 1,
|
|
"n_heads": 16,
|
|
"n_routed_experts": 32,
|
|
"n_shared_experts": 2,
|
|
"n_activated_experts": 4,
|
|
"route_scale": 1.0,
|
|
"q_lora_rank": 0,
|
|
"kv_lora_rank": 256,
|
|
"qk_nope_head_dim": 128,
|
|
"qk_rope_head_dim": 64,
|
|
"v_head_dim": 128,
|
|
"mscale": 0.707,
|
|
"max_batch_size": 1,
|
|
"max_seq_len": 4096
|
|
}
|