diff --git a/fooocus_version.py b/fooocus_version.py index 867b97d..5243371 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.0.52' +version = '2.0.53' diff --git a/modules/patch.py b/modules/patch.py index 66b0255..63a913a 100644 --- a/modules/patch.py +++ b/modules/patch.py @@ -6,6 +6,7 @@ import comfy.k_diffusion.external import comfy.model_management import modules.anisotropic as anisotropic import comfy.ldm.modules.attention +import comfy.sd1_clip from comfy.k_diffusion import utils @@ -104,6 +105,39 @@ def text_encoder_device_patched(): return comfy.model_management.get_torch_device() +def encode_token_weights_patched_with_a1111_method(self, token_weight_pairs): + to_encode = list(self.empty_tokens) + for x in token_weight_pairs: + tokens = list(map(lambda a: a[0], x)) + to_encode.append(tokens) + + out, pooled = self.encode(to_encode) + + z_empty = out[0:1] + if pooled.shape[0] > 1: + first_pooled = pooled[1:2] + else: + first_pooled = pooled[0:1] + + output = [] + for k in range(1, out.shape[0]): + z = out[k:k + 1] + original_mean = z.mean() + + for i in range(len(z)): + for j in range(len(z[i])): + weight = token_weight_pairs[k - 1][j][1] + z[i][j] = (z[i][j] - z_empty[0][j]) * weight + z_empty[0][j] + + new_mean = z.mean() + z = z * (original_mean / new_mean) + output.append(z) + + if len(output) == 0: + return z_empty.cpu(), first_pooled.cpu() + return torch.cat(output, dim=-2).cpu(), first_pooled.cpu() + + def patch_all(): comfy.ldm.modules.attention.print = lambda x: None @@ -113,3 +147,6 @@ def patch_all(): comfy.k_diffusion.external.DiscreteEpsDDPMDenoiser.forward = patched_discrete_eps_ddpm_denoiser_forward comfy.model_base.SDXL.encode_adm = sdxl_encode_adm_patched # comfy.model_base.SDXLRefiner.encode_adm = sdxl_refiner_encode_adm_patched + + comfy.sd1_clip.ClipTokenWeightEncoder.encode_token_weights = encode_token_weights_patched_with_a1111_method + return diff --git a/readme.md b/readme.md index 729af90..16c7530 100644 --- a/readme.md +++ b/readme.md @@ -150,6 +150,8 @@ Below things are already inside the software, and **users do not need to do anyt 9. Separated prompts for two different text encoders seem unnecessary. Separated prompts for base model and refiner may work but the effects are random, and we refrain from implement this. 10. DPM family seems well-suited for XL, since XL sometimes generates overly smooth texture but DPM family sometimes generate overly dense detail in texture. Their joint effect looks neutral and appealing to human perception. 11. A carefully designed system for balancing multiple styles as well as prompt expansion. +12. Using automatic1111's method to normalize prompt emphasizing. This significantly improve results when users directly copy prompts from civitai. +13. The joint swap system of refiner now also support img2img and upscale in a seamless way. ## Advanced Features