diff --git a/inference/model.py b/inference/model.py index 2ec1b20..3b3c3bb 100644 --- a/inference/model.py +++ b/inference/model.py @@ -50,6 +50,7 @@ class ModelArgs: beta_fast (int): Fast beta correction factor. beta_slow (int): Slow beta correction factor. mscale (float): Scaling factor for extended attention. + gate_bias_dim: Optional[int] = 7168 # Dimension threshold that determines if gate bias is used """ max_batch_size: int = 8 max_seq_len: int = 4096 * 4