diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py
index 8f953d33..34484b28 100644
--- a/comfy/ldm/modules/attention.py
+++ b/comfy/ldm/modules/attention.py
@@ -323,7 +323,7 @@ class CrossAttentionDoggettx(nn.Module):
                 break
             except model_management.OOM_EXCEPTION as e:
                 if first_op_done == False:
-                    model_management.soft_empty_cache()
+                    model_management.soft_empty_cache(True)
                     if cleared_cache == False:
                         cleared_cache = True
                         print("out of memory error, emptying cache and trying again")
diff --git a/comfy/ldm/modules/diffusionmodules/model.py b/comfy/ldm/modules/diffusionmodules/model.py
index 43154848..5f38640c 100644
--- a/comfy/ldm/modules/diffusionmodules/model.py
+++ b/comfy/ldm/modules/diffusionmodules/model.py
@@ -186,6 +186,7 @@ def slice_attention(q, k, v):
                 del s2
             break
         except model_management.OOM_EXCEPTION as e:
+            model_management.soft_empty_cache(True)
             steps *= 2
             if steps > 128:
                 raise e
diff --git a/comfy/model_management.py b/comfy/model_management.py
index bdbbbd84..b663e8f5 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -639,14 +639,14 @@ def should_use_fp16(device=None, model_params=0, prioritize_performance=True):
 
     return True
 
-def soft_empty_cache():
+def soft_empty_cache(force=False):
     global cpu_state
     if cpu_state == CPUState.MPS:
         torch.mps.empty_cache()
     elif is_intel_xpu():
         torch.xpu.empty_cache()
     elif torch.cuda.is_available():
-        if is_nvidia(): #This seems to make things worse on ROCm so I only do it for cuda
+        if force or is_nvidia(): #This seems to make things worse on ROCm so I only do it for cuda
             torch.cuda.empty_cache()
             torch.cuda.ipc_collect()