mirror of
https://github.com/deepseek-ai/DeepSeek-V3.git
synced 2025-07-05 07:51:38 -04:00
48 lines
2.0 KiB
Python
48 lines
2.0 KiB
Python
# Adapted from AutoFP8 (deprecated project)
|
|
import re
|
|
from typing import List, Optional, Tuple
|
|
|
|
|
|
class BaseQuantizeConfig:
|
|
"""Configuration for model quantization.
|
|
|
|
Args:
|
|
quant_method: Type/precision of quantization method to use.
|
|
At the moment, this is just "fp8" which specifically means
|
|
the fp8_e4m3 format in pytorch.
|
|
activation_scheme: Choice of either "dynamic" or "static" quantization
|
|
of activations. If "static", then calibration samples are required
|
|
during quantization to produce accurate per-tensor scales for
|
|
activations of Linear modules.
|
|
ignore_patterns: List of patterns used to ignore layers. If a string
|
|
starts with "re:", then everything afterwards is used as python
|
|
regex style matching i.e. re.search(), for each Linear layer.
|
|
By default, "re:.*lm_head" is included to ignore the embedding
|
|
Linear layer usually at the end of decoder LLMs
|
|
kv_cache_quant_targets: Tuple of Linear module names to target for
|
|
calibration of the output scales for KV cache quantization.
|
|
Usually, these should be `("k_proj", "v_proj")`.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
quant_method: str = "fp8",
|
|
activation_scheme: str = "static",
|
|
ignore_patterns: List[str] = ["re:.*lm_head"],
|
|
kv_cache_quant_targets: Optional[Tuple[str]] = None,
|
|
):
|
|
if quant_method != "fp8":
|
|
raise ValueError("Only FP8 quantization is supported.")
|
|
if activation_scheme not in ["static", "dynamic"]:
|
|
raise ValueError(
|
|
"Invalid activation_scheme. Choose either 'static' or 'dynamic'."
|
|
)
|
|
self.quant_method = quant_method
|
|
self.activation_scheme = activation_scheme
|
|
self.re_ignore_patterns = ignore_patterns
|
|
self.ignore_patterns = [
|
|
re.compile(regex_pat, re.VERBOSE) for regex_pat in ignore_patterns
|
|
]
|
|
self.kv_cache_quant_targets = kv_cache_quant_targets
|
|
self.ignored_layers = []
|