Add commented options for output attentions and hidden states in DeepSeekMathConfig

2025-06-21 00:43:45 -04:00 · 2025-06-19 17:46:50 +05:30 · 2025-06-19 17:46:50 +05:30 · 2b3c49c9cb
commit 2b3c49c9cb
parent 735546a4f9
1 changed files with 2 additions and 0 deletions
--- a/train/model.py
+++ b/train/model.py
@ -21,6 +21,8 @@ class DeepSeekMathConfig:
    use_cache: bool = True
    rope_scaling: Optional[dict] = None
    tie_word_embeddings: bool = False
    # output_attentions:bool=True
    # output_hidden_states:int=12
 class RMSNorm(nn.Module):
    def __init__(self, hidden_size, eps=1e-6):