From 179bcb2c4e6e6b9574c5a38e28e3c9813ed95bd7 Mon Sep 17 00:00:00 2001
From: lllyasviel <lyuminzhang@outlook.com>
Date: Thu, 14 Dec 2023 19:15:16 -0800
Subject: [PATCH] Fix inpaint device problem in `--always-gpu` mode. (#1420)

2.1.841
---
 fooocus_version.py              |  2 +-
 ldm_patched/modules/samplers.py | 46 ++++++++++++++++-----------------
 modules/core.py                 |  2 +-
 modules/patch.py                |  7 ++---
 update_log.md                   |  8 ++++++
 5 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/fooocus_version.py b/fooocus_version.py
index 1118b48..5ab4910 100644
--- a/fooocus_version.py
+++ b/fooocus_version.py
@@ -1 +1 @@
-version = '2.1.840'
+version = '2.1.841'
diff --git a/ldm_patched/modules/samplers.py b/ldm_patched/modules/samplers.py
index 9996e74..4e13d72 100644
--- a/ldm_patched/modules/samplers.py
+++ b/ldm_patched/modules/samplers.py
@@ -2,6 +2,7 @@ from ldm_patched.k_diffusion import sampling as k_diffusion_sampling
 from ldm_patched.unipc import uni_pc
 import torch
 import enum
+import collections
 from ldm_patched.modules import model_management
 import math
 from ldm_patched.modules import model_base
@@ -61,9 +62,7 @@ def get_area_and_mult(conds, x_in, timestep_in):
     for c in model_conds:
         conditioning[c] = model_conds[c].process_cond(batch_size=x_in.shape[0], device=x_in.device, area=area)
 
-    control = None
-    if 'control' in conds:
-        control = conds['control']
+    control = conds.get('control', None)
 
     patches = None
     if 'gligen' in conds:
@@ -78,7 +77,8 @@ def get_area_and_mult(conds, x_in, timestep_in):
 
         patches['middle_patch'] = [gligen_patch]
 
-    return (input_x, mult, conditioning, area, control, patches)
+    cond_obj = collections.namedtuple('cond_obj', ['input_x', 'mult', 'conditioning', 'area', 'control', 'patches'])
+    return cond_obj(input_x, mult, conditioning, area, control, patches)
 
 def cond_equal_size(c1, c2):
     if c1 is c2:
@@ -91,24 +91,24 @@ def cond_equal_size(c1, c2):
     return True
 
 def can_concat_cond(c1, c2):
-    if c1[0].shape != c2[0].shape:
+    if c1.input_x.shape != c2.input_x.shape:
         return False
 
-    #control
-    if (c1[4] is None) != (c2[4] is None):
-        return False
-    if c1[4] is not None:
-        if c1[4] is not c2[4]:
+    def objects_concatable(obj1, obj2):
+        if (obj1 is None) != (obj2 is None):
             return False
+        if obj1 is not None:
+            if obj1 is not obj2:
+                return False
+        return True
 
-    #patches
-    if (c1[5] is None) != (c2[5] is None):
+    if not objects_concatable(c1.control, c2.control):
         return False
-    if (c1[5] is not None):
-        if c1[5] is not c2[5]:
-            return False
 
-    return cond_equal_size(c1[2], c2[2])
+    if not objects_concatable(c1.patches, c2.patches):
+        return False
+
+    return cond_equal_size(c1.conditioning, c2.conditioning)
 
 def cond_cat(c_list):
     c_crossattn = []
@@ -184,13 +184,13 @@ def calc_cond_uncond_batch(model, cond, uncond, x_in, timestep, model_options):
         for x in to_batch:
             o = to_run.pop(x)
             p = o[0]
-            input_x += [p[0]]
-            mult += [p[1]]
-            c += [p[2]]
-            area += [p[3]]
-            cond_or_uncond += [o[1]]
-            control = p[4]
-            patches = p[5]
+            input_x.append(p.input_x)
+            mult.append(p.mult)
+            c.append(p.conditioning)
+            area.append(p.area)
+            cond_or_uncond.append(o[1])
+            control = p.control
+            patches = p.patches
 
         batch_chunks = len(cond_or_uncond)
         input_x = torch.cat(input_x)
diff --git a/modules/core.py b/modules/core.py
index 86c56b5..989b8e3 100644
--- a/modules/core.py
+++ b/modules/core.py
@@ -191,7 +191,7 @@ def encode_vae_inpaint(vae, pixels, mask):
 
     latent_mask = mask[:, None, :, :]
     latent_mask = torch.nn.functional.interpolate(latent_mask, size=(H * 8, W * 8), mode="bilinear").round()
-    latent_mask = torch.nn.functional.max_pool2d(latent_mask, (8, 8)).round()
+    latent_mask = torch.nn.functional.max_pool2d(latent_mask, (8, 8)).round().to(latent)
 
     return latent, latent_mask
 
diff --git a/modules/patch.py b/modules/patch.py
index da678d0..0a04baf 100644
--- a/modules/patch.py
+++ b/modules/patch.py
@@ -269,9 +269,10 @@ def sdxl_encode_adm_patched(self, **kwargs):
         height = float(height) * positive_adm_scale
 
     def embedder(number_list):
-        h = [self.embedder(torch.Tensor([number])) for number in number_list]
-        y = torch.flatten(torch.cat(h)).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1)
-        return y
+        h = torch.tensor(number_list, dtype=torch.float32)
+        h = self.embedder(h)
+        h = torch.flatten(h).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1)
+        return h
 
     width, height = round_to_64(width), round_to_64(height)
     target_width, target_height = round_to_64(target_width), round_to_64(target_height)
diff --git a/update_log.md b/update_log.md
index f889070..8269855 100644
--- a/update_log.md
+++ b/update_log.md
@@ -1,3 +1,11 @@
+# 2.1.841
+
+* Backend maintain.
+* Fix some potential frozen after model mismatch.
+* Fix crash when cfg=1 when using anime preset.
+* Added some guidelines for troubleshoot the "CUDA kernel errors asynchronously" problem.
+* Fix inpaint device problem in `--always-gpu` mode.
+
 # 2.1.839
 
 * Maintained some computation codes in backend for efficiency.