Add CUDA cache clearing in memory management

Added torch.cuda.empty_cache() to free up unused memory on the GPU,
This commit is contained in:
Yang Wang 2024-12-26 23:18:39 +08:00 committed by GitHub
parent e6e66fd23f
commit 65d8f5f1e9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -60,6 +60,7 @@ def main(fp8_path, bf16_path):
if len(loaded_files) > 2: if len(loaded_files) > 2:
oldest_file = next(iter(loaded_files)) oldest_file = next(iter(loaded_files))
del loaded_files[oldest_file] del loaded_files[oldest_file]
torch.cuda.empty_cache()
# Update model index # Update model index
new_model_index_file = os.path.join(bf16_path, "model.safetensors.index.json") new_model_index_file = os.path.join(bf16_path, "model.safetensors.index.json")