diff --git a/ldm_patched/contrib/external_sag.py b/ldm_patched/contrib/external_sag.py
index 59d1890..3505b44 100644
--- a/ldm_patched/contrib/external_sag.py
+++ b/ldm_patched/contrib/external_sag.py
@@ -29,9 +29,7 @@ def attention_basic_with_sim(q, k, v, heads, mask=None):
 
     # force cast to fp32 to avoid overflowing
     if _ATTN_PRECISION =="fp32":
-        with torch.autocast(enabled=False, device_type = 'cuda'):
-            q, k = q.float(), k.float()
-            sim = einsum('b i d, b j d -> b i j', q, k) * scale
+        sim = einsum('b i d, b j d -> b i j', q.float(), k.float()) * scale
     else:
         sim = einsum('b i d, b j d -> b i j', q, k) * scale
 
@@ -113,7 +111,6 @@ class SelfAttentionGuidance:
         m = model.clone()
 
         attn_scores = None
-        mid_block_shape = None
 
         # TODO: make this work properly with chunked batches
         #       currently, we can only save the attn from one UNet call
@@ -136,7 +133,6 @@ class SelfAttentionGuidance:
 
         def post_cfg_function(args):
             nonlocal attn_scores
-            nonlocal mid_block_shape
             uncond_attn = attn_scores
 
             sag_scale = scale
diff --git a/ldm_patched/ldm/modules/attention.py b/ldm_patched/ldm/modules/attention.py
index f4579ba..49e502e 100644
--- a/ldm_patched/ldm/modules/attention.py
+++ b/ldm_patched/ldm/modules/attention.py
@@ -104,9 +104,7 @@ def attention_basic(q, k, v, heads, mask=None):
 
     # force cast to fp32 to avoid overflowing
     if _ATTN_PRECISION =="fp32":
-        with torch.autocast(enabled=False, device_type = 'cuda'):
-            q, k = q.float(), k.float()
-            sim = einsum('b i d, b j d -> b i j', q, k) * scale
+        sim = einsum('b i d, b j d -> b i j', q.float(), k.float()) * scale
     else:
         sim = einsum('b i d, b j d -> b i j', q, k) * scale