mirror of
https://github.com/deepseek-ai/DeepSeek-V3.git
synced 2025-04-20 02:28:57 -04:00
BREAKING CHANGE: Restructured model.py into dedicated modules under inference/models/ Key Changes: - Split monolithic model.py into focused, single-responsibility modules: - config.py: Model configuration and hyperparameters - attention.py: Multi-head Latent Attention (MLA) implementation - moe.py: Mixture of Experts components (Gate, Expert, MoE) - linear.py: Linear layer variants with parallel processing support - __init__.py: Clean public API exports Benefits: - Improved code organization and maintainability - Better separation of concerns - Enhanced testability of individual components - Clearer dependency management - Simplified future modifications and extensions Migration: - Update imports to use new module structure - No functional changes to existing implementations - Backwards compatible with current model weights
36 lines
946 B
Python
36 lines
946 B
Python
from dataclasses import dataclass
|
|
from typing import Literal
|
|
|
|
@dataclass
|
|
class ModelArgs:
|
|
max_batch_size: int = 8
|
|
max_seq_len: int = 4096 * 4
|
|
dtype: Literal["bf16", "fp8"] = "bf16"
|
|
vocab_size: int = 102400
|
|
dim: int = 2048
|
|
inter_dim: int = 10944
|
|
moe_inter_dim: int = 1408
|
|
n_layers: int = 27
|
|
n_dense_layers: int = 1
|
|
n_heads: int = 16
|
|
# moe
|
|
n_routed_experts: int = 64
|
|
n_shared_experts: int = 2
|
|
n_activated_experts: int = 6
|
|
n_expert_groups: int = 1
|
|
n_limited_groups: int = 1
|
|
score_func: Literal["softmax", "sigmoid"] = "softmax"
|
|
route_scale: float = 1.
|
|
# mla
|
|
q_lora_rank: int = 0
|
|
kv_lora_rank: int = 512
|
|
qk_nope_head_dim: int = 128
|
|
qk_rope_head_dim: int = 64
|
|
v_head_dim: int = 128
|
|
# yarn
|
|
original_seq_len: int = 4096
|
|
rope_theta: float = 10000.0
|
|
rope_factor: float = 40
|
|
beta_fast: int = 32
|
|
beta_slow: int = 1
|
|
mscale: float = 1. |