From 65d8f5f1e99cba73e0f6a72bf0c871ac7873a023 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 26 Dec 2024 23:18:39 +0800 Subject: [PATCH] Add CUDA cache clearing in memory management Added torch.cuda.empty_cache() to free up unused memory on the GPU, --- inference/fp8_cast_bf16.py | 1 + 1 file changed, 1 insertion(+) diff --git a/inference/fp8_cast_bf16.py b/inference/fp8_cast_bf16.py index d6130ac..1b9735a 100644 --- a/inference/fp8_cast_bf16.py +++ b/inference/fp8_cast_bf16.py @@ -60,6 +60,7 @@ def main(fp8_path, bf16_path): if len(loaded_files) > 2: oldest_file = next(iter(loaded_files)) del loaded_files[oldest_file] + torch.cuda.empty_cache() # Update model index new_model_index_file = os.path.join(bf16_path, "model.safetensors.index.json")