mirror of
https://github.com/deepseek-ai/DeepSeek-VL2.git
synced 2025-02-23 06:09:04 -05:00
Update configuration_deepseek.py
fix Typo change 'gready' to ‘greedy’
This commit is contained in:
parent
a8341f36dd
commit
d975d87778
@ -34,7 +34,7 @@ class DeepseekV2Config(PretrainedConfig):
|
|||||||
Number of routed experts, None means dense model.
|
Number of routed experts, None means dense model.
|
||||||
routed_scaling_factor (`float`, *optional*, defaults to 1.0):
|
routed_scaling_factor (`float`, *optional*, defaults to 1.0):
|
||||||
Scaling factor or routed experts.
|
Scaling factor or routed experts.
|
||||||
topk_method (`str`, *optional*, defaults to `gready`):
|
topk_method (`str`, *optional*, defaults to `greedy`):
|
||||||
Topk method used in routed gate.
|
Topk method used in routed gate.
|
||||||
n_group (`int`, *optional*, defaults to None):
|
n_group (`int`, *optional*, defaults to None):
|
||||||
Number of groups for routed experts.
|
Number of groups for routed experts.
|
||||||
@ -132,7 +132,7 @@ class DeepseekV2Config(PretrainedConfig):
|
|||||||
qk_rope_head_dim = 64,
|
qk_rope_head_dim = 64,
|
||||||
v_head_dim = 128,
|
v_head_dim = 128,
|
||||||
qk_nope_head_dim = 128,
|
qk_nope_head_dim = 128,
|
||||||
topk_method = 'gready',
|
topk_method = 'greedy',
|
||||||
n_group = None,
|
n_group = None,
|
||||||
topk_group = None,
|
topk_group = None,
|
||||||
num_experts_per_tok = None,
|
num_experts_per_tok = None,
|
||||||
|
Loading…
Reference in New Issue
Block a user