fix(fp8_cast): Add robust memory management and error handling

2025-07-14 13:19:07 -04:00 · 2025-02-04 16:36:07 +00:00 · 2025-02-04 16:36:07 +00:00 · dca08f2cfd
commit dca08f2cfd
parent b5d872ead0
1 changed files with 8 additions and 4 deletions
--- a/inference/fp8_cast_bf16.py
+++ b/inference/fp8_cast_bf16.py
@ -88,10 +88,14 @@ def main(fp8_path, bf16_path):
        save_file(new_state_dict, new_safetensor_file)
        # Memory management: keep only the 2 most recently used files
-        if len(loaded_files) > 2:
+        try:
-            oldest_file = next(iter(loaded_files))
+            if len(loaded_files) > 2:
-            del loaded_files[oldest_file]
+                oldest_file = next(iter(loaded_files))
-            torch.cuda.empty_cache()
+                del loaded_files[oldest_file]
                torch.cuda.empty_cache()
        except RuntimeError as e:
            print(f"Memory error: {e}")
            # Implement fallback strategy or graceful exit
    # Update model index
    new_model_index_file = os.path.join(bf16_path, "model.safetensors.index.json")