DeepSeek-V3/inference/configs/config_macbook.json

22 lines
466 B
JSON

{
"vocab_size": 102400,
"dim": 2048,
"inter_dim": 5472,
"moe_inter_dim": 704,
"n_layers": 14,
"n_dense_layers": 1,
"n_heads": 16,
"n_routed_experts": 32,
"n_shared_experts": 2,
"n_activated_experts": 4,
"route_scale": 1.0,
"q_lora_rank": 0,
"kv_lora_rank": 256,
"qk_nope_head_dim": 128,
"qk_rope_head_dim": 64,
"v_head_dim": 128,
"mscale": 0.707,
"max_batch_size": 1,
"max_seq_len": 4096
}