From e15f67af1ce54a8545b0a98cc43ba8e9faba647f Mon Sep 17 00:00:00 2001 From: Konano Date: Sat, 8 Feb 2025 18:28:40 +0800 Subject: [PATCH 1/4] chore: update README.md to improve layout and image attributes --- README.md | 64 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 318a40c..632b628 100644 --- a/README.md +++ b/README.md @@ -7,36 +7,52 @@
- - Homepage - - - Chat - - - Hugging Face - + Homepage + Chat + Hugging Face
- - Discord - - - Wechat - - - Twitter Follow - + Discord + Wechat + Twitter Follow
- - Code License - - - Model License - + Code License + Model License
From 0866cab5f9b7e26c8ad57077c3fcd16b50d855e1 Mon Sep 17 00:00:00 2001 From: Konano Date: Fri, 14 Feb 2025 12:02:10 +0800 Subject: [PATCH 2/4] chore: update README.md to improve layout and image attributes --- README.md | 73 ++++++++++++++++--------------------------------------- 1 file changed, 21 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 632b628..6746781 100644 --- a/README.md +++ b/README.md @@ -6,59 +6,28 @@ DeepSeek-V3
-
- Homepage - Chat - Hugging Face -
- -
- Discord - Wechat - Twitter Follow -
- -
- Code License - Model License -
- - -

+

+ Homepage + Chat + Hugging Face +
+ Discord + Wechat + Twitter Follow +
+ Code License + Model License +
Paper Link👁️ -

+
## Table of Contents From f07bccc49e02a2ea9b80214a65d181958a8d554e Mon Sep 17 00:00:00 2001 From: Konano Date: Fri, 14 Feb 2025 12:12:16 +0800 Subject: [PATCH 3/4] fix: resolve center alignment issue in preview --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6746781..9ba2346 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ DeepSeek-V3
-
+
Homepage Chat Date: Fri, 14 Feb 2025 20:26:45 +0800 Subject: [PATCH 4/4] fix scores mask --- inference/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inference/model.py b/inference/model.py index 40bbf4d..8f1ab81 100644 --- a/inference/model.py +++ b/inference/model.py @@ -585,8 +585,8 @@ class Gate(nn.Module): else: group_scores = scores.topk(2, dim=-1)[0].sum(dim=-1) indices = group_scores.topk(self.topk_groups, dim=-1)[1] - mask = torch.zeros_like(scores[..., 0]).scatter_(1, indices, True) - scores = (scores * mask.unsqueeze(-1)).flatten(1) + mask = scores.new_ones(x.size(0), self.n_groups, dtype=bool).scatter_(1, indices, False) + scores = scores.masked_fill_(mask.unsqueeze(-1), float("-inf")).flatten(1) indices = torch.topk(scores, self.topk, dim=-1)[1] weights = original_scores.gather(1, indices) if self.score_func == "sigmoid":