use a1111 emphasizing (#399)

This commit is contained in:
lllyasviel 2023-09-16 15:01:34 -07:00 committed by GitHub
parent d819ffae56
commit 7b73152fe3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 40 additions and 1 deletions

View File

@ -1 +1 @@
version = '2.0.52'
version = '2.0.53'

View File

@ -6,6 +6,7 @@ import comfy.k_diffusion.external
import comfy.model_management
import modules.anisotropic as anisotropic
import comfy.ldm.modules.attention
import comfy.sd1_clip
from comfy.k_diffusion import utils
@ -104,6 +105,39 @@ def text_encoder_device_patched():
return comfy.model_management.get_torch_device()
def encode_token_weights_patched_with_a1111_method(self, token_weight_pairs):
to_encode = list(self.empty_tokens)
for x in token_weight_pairs:
tokens = list(map(lambda a: a[0], x))
to_encode.append(tokens)
out, pooled = self.encode(to_encode)
z_empty = out[0:1]
if pooled.shape[0] > 1:
first_pooled = pooled[1:2]
else:
first_pooled = pooled[0:1]
output = []
for k in range(1, out.shape[0]):
z = out[k:k + 1]
original_mean = z.mean()
for i in range(len(z)):
for j in range(len(z[i])):
weight = token_weight_pairs[k - 1][j][1]
z[i][j] = (z[i][j] - z_empty[0][j]) * weight + z_empty[0][j]
new_mean = z.mean()
z = z * (original_mean / new_mean)
output.append(z)
if len(output) == 0:
return z_empty.cpu(), first_pooled.cpu()
return torch.cat(output, dim=-2).cpu(), first_pooled.cpu()
def patch_all():
comfy.ldm.modules.attention.print = lambda x: None
@ -113,3 +147,6 @@ def patch_all():
comfy.k_diffusion.external.DiscreteEpsDDPMDenoiser.forward = patched_discrete_eps_ddpm_denoiser_forward
comfy.model_base.SDXL.encode_adm = sdxl_encode_adm_patched
# comfy.model_base.SDXLRefiner.encode_adm = sdxl_refiner_encode_adm_patched
comfy.sd1_clip.ClipTokenWeightEncoder.encode_token_weights = encode_token_weights_patched_with_a1111_method
return

View File

@ -150,6 +150,8 @@ Below things are already inside the software, and **users do not need to do anyt
9. Separated prompts for two different text encoders seem unnecessary. Separated prompts for base model and refiner may work but the effects are random, and we refrain from implement this.
10. DPM family seems well-suited for XL, since XL sometimes generates overly smooth texture but DPM family sometimes generate overly dense detail in texture. Their joint effect looks neutral and appealing to human perception.
11. A carefully designed system for balancing multiple styles as well as prompt expansion.
12. Using automatic1111's method to normalize prompt emphasizing. This significantly improve results when users directly copy prompts from civitai.
13. The joint swap system of refiner now also support img2img and upscale in a seamless way.
## Advanced Features