From d975d87778d61e63e6a6b49f48c8724bae9899d0 Mon Sep 17 00:00:00 2001 From: digger yu Date: Fri, 27 Dec 2024 16:08:07 +0800 Subject: [PATCH] Update configuration_deepseek.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix Typo change 'gready' to ‘greedy’ --- deepseek_vl2/models/configuration_deepseek.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deepseek_vl2/models/configuration_deepseek.py b/deepseek_vl2/models/configuration_deepseek.py index a8622c2..af82ba1 100644 --- a/deepseek_vl2/models/configuration_deepseek.py +++ b/deepseek_vl2/models/configuration_deepseek.py @@ -34,7 +34,7 @@ class DeepseekV2Config(PretrainedConfig): Number of routed experts, None means dense model. routed_scaling_factor (`float`, *optional*, defaults to 1.0): Scaling factor or routed experts. - topk_method (`str`, *optional*, defaults to `gready`): + topk_method (`str`, *optional*, defaults to `greedy`): Topk method used in routed gate. n_group (`int`, *optional*, defaults to None): Number of groups for routed experts. @@ -132,7 +132,7 @@ class DeepseekV2Config(PretrainedConfig): qk_rope_head_dim = 64, v_head_dim = 128, qk_nope_head_dim = 128, - topk_method = 'gready', + topk_method = 'greedy', n_group = None, topk_group = None, num_experts_per_tok = None,