Add commented options for output attentions and hidden states in DeepSeekMathConfig

This commit is contained in:
jayeshthk 2025-06-19 17:46:50 +05:30
parent 735546a4f9
commit 2b3c49c9cb

View File

@ -21,6 +21,8 @@ class DeepSeekMathConfig:
use_cache: bool = True use_cache: bool = True
rope_scaling: Optional[dict] = None rope_scaling: Optional[dict] = None
tie_word_embeddings: bool = False tie_word_embeddings: bool = False
# output_attentions:bool=True
# output_hidden_states:int=12
class RMSNorm(nn.Module): class RMSNorm(nn.Module):
def __init__(self, hidden_size, eps=1e-6): def __init__(self, hidden_size, eps=1e-6):