fix(fp8_cast): Add robust memory management and error handling

This commit is contained in:
ajwise9 2025-02-04 16:36:07 +00:00
parent b5d872ead0
commit dca08f2cfd

View File

@ -88,10 +88,14 @@ def main(fp8_path, bf16_path):
save_file(new_state_dict, new_safetensor_file)
# Memory management: keep only the 2 most recently used files
if len(loaded_files) > 2:
oldest_file = next(iter(loaded_files))
del loaded_files[oldest_file]
torch.cuda.empty_cache()
try:
if len(loaded_files) > 2:
oldest_file = next(iter(loaded_files))
del loaded_files[oldest_file]
torch.cuda.empty_cache()
except RuntimeError as e:
print(f"Memory error: {e}")
# Implement fallback strategy or graceful exit
# Update model index
new_model_index_file = os.path.join(bf16_path, "model.safetensors.index.json")