diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py index 8f953d33..34484b28 100644 --- a/comfy/ldm/modules/attention.py +++ b/comfy/ldm/modules/attention.py @@ -323,7 +323,7 @@ class CrossAttentionDoggettx(nn.Module): break except model_management.OOM_EXCEPTION as e: if first_op_done == False: - model_management.soft_empty_cache() + model_management.soft_empty_cache(True) if cleared_cache == False: cleared_cache = True print("out of memory error, emptying cache and trying again") diff --git a/comfy/ldm/modules/diffusionmodules/model.py b/comfy/ldm/modules/diffusionmodules/model.py index 43154848..5f38640c 100644 --- a/comfy/ldm/modules/diffusionmodules/model.py +++ b/comfy/ldm/modules/diffusionmodules/model.py @@ -186,6 +186,7 @@ def slice_attention(q, k, v): del s2 break except model_management.OOM_EXCEPTION as e: + model_management.soft_empty_cache(True) steps *= 2 if steps > 128: raise e diff --git a/comfy/model_management.py b/comfy/model_management.py index bdbbbd84..b663e8f5 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -639,14 +639,14 @@ def should_use_fp16(device=None, model_params=0, prioritize_performance=True): return True -def soft_empty_cache(): +def soft_empty_cache(force=False): global cpu_state if cpu_state == CPUState.MPS: torch.mps.empty_cache() elif is_intel_xpu(): torch.xpu.empty_cache() elif torch.cuda.is_available(): - if is_nvidia(): #This seems to make things worse on ROCm so I only do it for cuda + if force or is_nvidia(): #This seems to make things worse on ROCm so I only do it for cuda torch.cuda.empty_cache() torch.cuda.ipc_collect()