2.1.844

* maintain clip vision device * update links in troubleshoot
2023-12-16 10:34:47 -08:00 · 2023-12-16 10:34:47 -08:00 · 3a727fd240
commit 3a727fd240
parent 776e080b3f
10 changed files with 113 additions and 77 deletions
--- a/extras/ip_adapter.py
+++ b/extras/ip_adapter.py
@ -167,13 +167,6 @@ def preprocess(img, ip_adapter_path):

    ldm_patched.modules.model_management.load_model_gpu(clip_vision.patcher)
    pixel_values = clip_preprocess(numpy_to_pytorch(img).to(clip_vision.load_device))
-
-    if clip_vision.dtype != torch.float32:
-        precision_scope = torch.autocast
-    else:
-        precision_scope = lambda a, b: contextlib.nullcontext(a)
-
-    with precision_scope(ldm_patched.modules.model_management.get_autocast_device(clip_vision.load_device), torch.float32):
    outputs = clip_vision.model(pixel_values=pixel_values, intermediate_output=-2)

    ip_adapter = entry['ip_adapter']
--- a/fooocus_version.py
+++ b/fooocus_version.py
@ -1 +1 @@
-version = '2.1.843'
+version = '2.1.844'
--- a/ldm_patched/contrib/external.py
+++ b/ldm_patched/contrib/external.py
@ -1870,6 +1870,7 @@ def init_custom_nodes():
        "nodes_images.py",
        "nodes_video_model.py",
        "nodes_sag.py",
+        "nodes_perpneg.py",
    ]

    for node_file in extras_files:
--- a/ldm_patched/contrib/external_latent.py
+++ b/ldm_patched/contrib/external_latent.py
@ -5,9 +5,7 @@ import torch

 def reshape_latent_to(target_shape, latent):
    if latent.shape[1:] != target_shape[1:]:
-        latent.movedim(1, -1)
        latent = ldm_patched.modules.utils.common_upscale(latent, target_shape[3], target_shape[2], "bilinear", "center")
-        latent.movedim(-1, 1)
    return ldm_patched.modules.utils.repeat_to_batch_size(latent, target_shape[0])


@ -104,9 +102,32 @@ class LatentInterpolate:
        samples_out["samples"] = st * (m1 * ratio + m2 * (1.0 - ratio))
        return (samples_out,)

+class LatentBatch:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}}
+
+    RETURN_TYPES = ("LATENT",)
+    FUNCTION = "batch"
+
+    CATEGORY = "latent/batch"
+
+    def batch(self, samples1, samples2):
+        samples_out = samples1.copy()
+        s1 = samples1["samples"]
+        s2 = samples2["samples"]
+
+        if s1.shape[1:] != s2.shape[1:]:
+            s2 = ldm_patched.modules.utils.common_upscale(s2, s1.shape[3], s1.shape[2], "bilinear", "center")
+        s = torch.cat((s1, s2), dim=0)
+        samples_out["samples"] = s
+        samples_out["batch_index"] = samples1.get("batch_index", [x for x in range(0, s1.shape[0])]) + samples2.get("batch_index", [x for x in range(0, s2.shape[0])])
+        return (samples_out,)
+
 NODE_CLASS_MAPPINGS = {
    "LatentAdd": LatentAdd,
    "LatentSubtract": LatentSubtract,
    "LatentMultiply": LatentMultiply,
    "LatentInterpolate": LatentInterpolate,
+    "LatentBatch": LatentBatch,
 }
--- a/ldm_patched/contrib/external_model_advanced.py
+++ b/ldm_patched/contrib/external_model_advanced.py
@ -19,41 +19,19 @@ class LCM(ldm_patched.modules.model_sampling.EPS):

        return c_out * x0 + c_skip * model_input

-class ModelSamplingDiscreteDistilled(torch.nn.Module):
+class ModelSamplingDiscreteDistilled(ldm_patched.modules.model_sampling.ModelSamplingDiscrete):
    original_timesteps = 50

-    def __init__(self):
-        super().__init__()
-        self.sigma_data = 1.0
-        timesteps = 1000
-        beta_start = 0.00085
-        beta_end = 0.012
+    def __init__(self, model_config=None):
+        super().__init__(model_config)

-        betas = torch.linspace(beta_start**0.5, beta_end**0.5, timesteps, dtype=torch.float32) ** 2
-        alphas = 1.0 - betas
-        alphas_cumprod = torch.cumprod(alphas, dim=0)
+        self.skip_steps = self.num_timesteps // self.original_timesteps

-        self.skip_steps = timesteps // self.original_timesteps
-
-
-        alphas_cumprod_valid = torch.zeros((self.original_timesteps), dtype=torch.float32)
+        sigmas_valid = torch.zeros((self.original_timesteps), dtype=torch.float32)
        for x in range(self.original_timesteps):
-            alphas_cumprod_valid[self.original_timesteps - 1 - x] = alphas_cumprod[timesteps - 1 - x * self.skip_steps]
+            sigmas_valid[self.original_timesteps - 1 - x] = self.sigmas[self.num_timesteps - 1 - x * self.skip_steps]

-        sigmas = ((1 - alphas_cumprod_valid) / alphas_cumprod_valid) ** 0.5
-        self.set_sigmas(sigmas)
-
-    def set_sigmas(self, sigmas):
-        self.register_buffer('sigmas', sigmas)
-        self.register_buffer('log_sigmas', sigmas.log())
-
-    @property
-    def sigma_min(self):
-        return self.sigmas[0]
-
-    @property
-    def sigma_max(self):
-        return self.sigmas[-1]
+        self.set_sigmas(sigmas_valid)

    def timestep(self, sigma):
        log_sigma = sigma.log()
@ -68,14 +46,6 @@ class ModelSamplingDiscreteDistilled(torch.nn.Module):
        log_sigma = (1 - w) * self.log_sigmas[low_idx] + w * self.log_sigmas[high_idx]
        return log_sigma.exp().to(timestep.device)

-    def percent_to_sigma(self, percent):
-        if percent <= 0.0:
-            return 999999999.9
-        if percent >= 1.0:
-            return 0.0
-        percent = 1.0 - percent
-        return self.sigma(torch.tensor(percent * 999.0)).item()
-

 def rescale_zero_terminal_snr_sigmas(sigmas):
    alphas_cumprod = 1 / ((sigmas * sigmas) + 1)
@ -124,7 +94,7 @@ class ModelSamplingDiscrete:
        class ModelSamplingAdvanced(sampling_base, sampling_type):
            pass

-        model_sampling = ModelSamplingAdvanced()
+        model_sampling = ModelSamplingAdvanced(model.model.model_config)
        if zsnr:
            model_sampling.set_sigmas(rescale_zero_terminal_snr_sigmas(model_sampling.sigmas))

@ -156,7 +126,7 @@ class ModelSamplingContinuousEDM:
        class ModelSamplingAdvanced(ldm_patched.modules.model_sampling.ModelSamplingContinuousEDM, sampling_type):
            pass

-        model_sampling = ModelSamplingAdvanced()
+        model_sampling = ModelSamplingAdvanced(model.model.model_config)
        model_sampling.set_sigma_range(sigma_min, sigma_max)
        m.add_object_patch("model_sampling", model_sampling)
        return (m, )
--- a/ldm_patched/contrib/external_perpneg.py
+++ b/ldm_patched/contrib/external_perpneg.py
@ -0,0 +1,57 @@
+# https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
+
+import torch
+import ldm_patched.modules.model_management
+import ldm_patched.modules.sample
+import ldm_patched.modules.samplers
+import ldm_patched.modules.utils
+
+
+class PerpNeg:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {"model": ("MODEL", ),
+                             "empty_conditioning": ("CONDITIONING", ),
+                             "neg_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0}),
+                            }}
+    RETURN_TYPES = ("MODEL",)
+    FUNCTION = "patch"
+
+    CATEGORY = "_for_testing"
+
+    def patch(self, model, empty_conditioning, neg_scale):
+        m = model.clone()
+        nocond = ldm_patched.modules.sample.convert_cond(empty_conditioning)
+
+        def cfg_function(args):
+            model = args["model"]
+            noise_pred_pos = args["cond_denoised"]
+            noise_pred_neg = args["uncond_denoised"]
+            cond_scale = args["cond_scale"]
+            x = args["input"]
+            sigma = args["sigma"]
+            model_options = args["model_options"]
+            nocond_processed = ldm_patched.modules.samplers.encode_model_conds(model.extra_conds, nocond, x, x.device, "negative")
+
+            (noise_pred_nocond, _) = ldm_patched.modules.samplers.calc_cond_uncond_batch(model, nocond_processed, None, x, sigma, model_options)
+
+            pos = noise_pred_pos - noise_pred_nocond
+            neg = noise_pred_neg - noise_pred_nocond
+            perp = ((torch.mul(pos, neg).sum())/(torch.norm(neg)**2)) * neg
+            perp_neg = perp * neg_scale
+            cfg_result = noise_pred_nocond + cond_scale*(pos - perp_neg)
+            cfg_result = x - cfg_result
+            return cfg_result
+
+        m.set_model_sampler_cfg_function(cfg_function)
+
+        return (m, )
+
+
+NODE_CLASS_MAPPINGS = {
+    "PerpNeg": PerpNeg,
+}
+
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "PerpNeg": "Perp-Neg",
+}
--- a/ldm_patched/contrib/external_sag.py
+++ b/ldm_patched/contrib/external_sag.py
@ -60,7 +60,7 @@ def create_blur_map(x0, attn, sigma=3.0, threshold=1.0):
    attn = attn.reshape(b, -1, hw1, hw2)
    # Global Average Pool
    mask = attn.mean(1, keepdim=False).sum(1, keepdim=False) > threshold
-    ratio = round(math.sqrt(lh * lw / hw1))
+    ratio = math.ceil(math.sqrt(lh * lw / hw1))
    mid_shape = [math.ceil(lh / ratio), math.ceil(lw / ratio)]

    # Reshape
--- a/ldm_patched/modules/clip_vision.py
+++ b/ldm_patched/modules/clip_vision.py
@ -19,8 +19,10 @@ class Output:
 def clip_preprocess(image, size=224):
    mean = torch.tensor([ 0.48145466,0.4578275,0.40821073], device=image.device, dtype=image.dtype)
    std = torch.tensor([0.26862954,0.26130258,0.27577711], device=image.device, dtype=image.dtype)
-    scale = (size / min(image.shape[1], image.shape[2]))
-    image = torch.nn.functional.interpolate(image.movedim(-1, 1), size=(round(scale * image.shape[1]), round(scale * image.shape[2])), mode="bicubic", antialias=True)
+    image = image.movedim(-1, 1)
+    if not (image.shape[2] == size and image.shape[3] == size):
+        scale = (size / min(image.shape[2], image.shape[3]))
+        image = torch.nn.functional.interpolate(image, size=(round(scale * image.shape[2]), round(scale * image.shape[3])), mode="bicubic", antialias=True)
        h = (image.shape[2] - size)//2
        w = (image.shape[3] - size)//2
        image = image[:,:,h:h+size,w:w+size]
@ -34,11 +36,9 @@ class ClipVisionModel():

        self.load_device = ldm_patched.modules.model_management.text_encoder_device()
        offload_device = ldm_patched.modules.model_management.text_encoder_offload_device()
-        self.dtype = torch.float32
-        if ldm_patched.modules.model_management.should_use_fp16(self.load_device, prioritize_performance=False):
-            self.dtype = torch.float16
-
-        self.model = ldm_patched.modules.clip_model.CLIPVisionModelProjection(config, self.dtype, offload_device, ldm_patched.modules.ops.disable_weight_init)
+        self.dtype = ldm_patched.modules.model_management.text_encoder_dtype(self.load_device)
+        self.model = ldm_patched.modules.clip_model.CLIPVisionModelProjection(config, self.dtype, offload_device, ldm_patched.modules.ops.manual_cast)
+        self.model.eval()

        self.patcher = ldm_patched.modules.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
    def load_sd(self, sd):
@ -46,14 +46,7 @@ class ClipVisionModel():

    def encode_image(self, image):
        ldm_patched.modules.model_management.load_model_gpu(self.patcher)
-        pixel_values = clip_preprocess(image.to(self.load_device))
-
-        if self.dtype != torch.float32:
-            precision_scope = torch.autocast
-        else:
-            precision_scope = lambda a, b: contextlib.nullcontext(a)
-
-        with precision_scope(ldm_patched.modules.model_management.get_autocast_device(self.load_device), torch.float32):
+        pixel_values = clip_preprocess(image.to(self.load_device)).float()
        out = self.model(pixel_values=pixel_values, intermediate_output=-2)

        outputs = Output()
--- a/ldm_patched/modules/samplers.py
+++ b/ldm_patched/modules/samplers.py
@ -251,7 +251,8 @@ def sampling_function(model, x, timestep, uncond, cond, cond_scale, model_option

        cond_pred, uncond_pred = calc_cond_uncond_batch(model, cond, uncond_, x, timestep, model_options)
        if "sampler_cfg_function" in model_options:
-            args = {"cond": x - cond_pred, "uncond": x - uncond_pred, "cond_scale": cond_scale, "timestep": timestep, "input": x, "sigma": timestep}
+            args = {"cond": x - cond_pred, "uncond": x - uncond_pred, "cond_scale": cond_scale, "timestep": timestep, "input": x, "sigma": timestep,
+                    "cond_denoised": cond_pred, "uncond_denoised": uncond_pred, "model": model, "model_options": model_options}
            cfg_result = x - model_options["sampler_cfg_function"](args)
        else:
            cfg_result = uncond_pred + (cond_pred - uncond_pred) * cond_scale
--- a/troubleshoot.md
+++ b/troubleshoot.md
@ -143,19 +143,19 @@ Besides, the current support for MAC is very experimental, and we encourage user

 ### I am using Nvidia with 8GB VRAM, I get CUDA Out Of Memory

-It is a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](readme.md#minimal-requirement).
+It is a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement).

 ### I am using Nvidia with 6GB VRAM, I get CUDA Out Of Memory

-It is very likely a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](readme.md#minimal-requirement).
+It is very likely a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement).

 ### I am using Nvidia with 4GB VRAM with Float16 support, like RTX 3050, I get CUDA Out Of Memory

-It is a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](readme.md#minimal-requirement).
+It is a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement).

 ### I am using Nvidia with 4GB VRAM without Float16 support, like GTX 960, I get CUDA Out Of Memory

-Supporting GPU with 4GB VRAM without fp16 is extremely difficult, and you may not be able to use SDXL. However, you may still make an issue and let us know. You may try SD1.5 in Automatic1111 or other software for your device. See also [minimal requirements](readme.md#minimal-requirement).
+Supporting GPU with 4GB VRAM without fp16 is extremely difficult, and you may not be able to use SDXL. However, you may still make an issue and let us know. You may try SD1.5 in Automatic1111 or other software for your device. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement).

 ### I am using AMD GPU on Windows, I get CUDA Out Of Memory

@ -163,11 +163,11 @@ Current AMD support is very experimental for Windows. If you see this, then perh

 However, if you re able to run SDXL on this same device on any other software, please let us know immediately, and we will support it as soon as possible. If no other software can enable your device to run SDXL on Windows, then we also do not have much to help.

-Besides, the AMD support on Linux is slightly better because it will use ROCM. You may also try it if you are willing to change OS to linux. See also [minimal requirements](readme.md#minimal-requirement).
+Besides, the AMD support on Linux is slightly better because it will use ROCM. You may also try it if you are willing to change OS to linux. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement).

 ### I am using AMD GPU on Linux, I get CUDA Out Of Memory

-Current AMD support for Linux is better than that for Windows, but still, very experimental. However, if you re able to run SDXL on this same device on any other software, please let us know immediately, and we will support it as soon as possible. If no other software can enable your device to run SDXL on Windows, then we also do not have much to help. See also [minimal requirements](readme.md#minimal-requirement).
+Current AMD support for Linux is better than that for Windows, but still, very experimental. However, if you re able to run SDXL on this same device on any other software, please let us know immediately, and we will support it as soon as possible. If no other software can enable your device to run SDXL on Windows, then we also do not have much to help. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement).

 ### I tried flags like --lowvram or --gpu-only or --bf16 or so on, and things are not getting any better?