i (#559)
This commit is contained in:
parent
b42e96a52d
commit
6faaac333b
304
fooocus_extras/ip_adapter.py
Normal file
304
fooocus_extras/ip_adapter.py
Normal file
@ -0,0 +1,304 @@
|
||||
import torch
|
||||
import comfy.clip_vision
|
||||
import safetensors.torch as sf
|
||||
import comfy.model_management as model_management
|
||||
import contextlib
|
||||
|
||||
from fooocus_extras.resampler import Resampler
|
||||
from comfy.model_patcher import ModelPatcher
|
||||
|
||||
|
||||
if model_management.xformers_enabled():
|
||||
import xformers
|
||||
import xformers.ops
|
||||
|
||||
|
||||
SD_V12_CHANNELS = [320] * 4 + [640] * 4 + [1280] * 4 + [1280] * 6 + [640] * 6 + [320] * 6 + [1280] * 2
|
||||
SD_XL_CHANNELS = [640] * 8 + [1280] * 40 + [1280] * 60 + [640] * 12 + [1280] * 20
|
||||
|
||||
|
||||
def sdp(q, k, v, extra_options):
|
||||
if model_management.xformers_enabled():
|
||||
b, _, _ = q.shape
|
||||
q, k, v = map(
|
||||
lambda t: t.unsqueeze(3)
|
||||
.reshape(b, t.shape[1], extra_options["n_heads"], extra_options["dim_head"])
|
||||
.permute(0, 2, 1, 3)
|
||||
.reshape(b * extra_options["n_heads"], t.shape[1], extra_options["dim_head"])
|
||||
.contiguous(),
|
||||
(q, k, v),
|
||||
)
|
||||
out = xformers.ops.memory_efficient_attention(q, k, v, attn_bias=None, op=None)
|
||||
out = (
|
||||
out.unsqueeze(0)
|
||||
.reshape(b, extra_options["n_heads"], out.shape[1], extra_options["dim_head"])
|
||||
.permute(0, 2, 1, 3)
|
||||
.reshape(b, out.shape[1], extra_options["n_heads"] * extra_options["dim_head"])
|
||||
)
|
||||
else:
|
||||
b, _, _ = q.shape
|
||||
q, k, v = map(
|
||||
lambda t: t.view(b, -1, extra_options["n_heads"], extra_options["dim_head"]).transpose(1, 2),
|
||||
(q, k, v),
|
||||
)
|
||||
out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=0.0, is_causal=False)
|
||||
out = out.transpose(1, 2).reshape(b, -1, extra_options["n_heads"] * extra_options["dim_head"])
|
||||
return out
|
||||
|
||||
|
||||
class ImageProjModel(torch.nn.Module):
|
||||
def __init__(self, cross_attention_dim=1024, clip_embeddings_dim=1024, clip_extra_context_tokens=4):
|
||||
super().__init__()
|
||||
|
||||
self.cross_attention_dim = cross_attention_dim
|
||||
self.clip_extra_context_tokens = clip_extra_context_tokens
|
||||
self.proj = torch.nn.Linear(clip_embeddings_dim, self.clip_extra_context_tokens * cross_attention_dim)
|
||||
self.norm = torch.nn.LayerNorm(cross_attention_dim)
|
||||
|
||||
def forward(self, image_embeds):
|
||||
embeds = image_embeds
|
||||
clip_extra_context_tokens = self.proj(embeds).reshape(-1, self.clip_extra_context_tokens,
|
||||
self.cross_attention_dim)
|
||||
clip_extra_context_tokens = self.norm(clip_extra_context_tokens)
|
||||
return clip_extra_context_tokens
|
||||
|
||||
|
||||
class To_KV(torch.nn.Module):
|
||||
def __init__(self, cross_attention_dim):
|
||||
super().__init__()
|
||||
|
||||
channels = SD_XL_CHANNELS if cross_attention_dim == 2048 else SD_V12_CHANNELS
|
||||
self.to_kvs = torch.nn.ModuleList(
|
||||
[torch.nn.Linear(cross_attention_dim, channel, bias=False) for channel in channels])
|
||||
|
||||
def load_state_dict_ordered(self, sd):
|
||||
state_dict = []
|
||||
for i in range(4096):
|
||||
for k in ['k', 'v']:
|
||||
key = f'{i}.to_{k}_ip.weight'
|
||||
if key in sd:
|
||||
state_dict.append(sd[key])
|
||||
for i, v in enumerate(state_dict):
|
||||
self.to_kvs[i].weight = torch.nn.Parameter(v, requires_grad=False)
|
||||
|
||||
|
||||
class IPAdapterModel(torch.nn.Module):
|
||||
def __init__(self, state_dict, plus, cross_attention_dim=768, clip_embeddings_dim=1024, clip_extra_context_tokens=4,
|
||||
sdxl_plus=False):
|
||||
super().__init__()
|
||||
self.plus = plus
|
||||
if self.plus:
|
||||
self.image_proj_model = Resampler(
|
||||
dim=1280 if sdxl_plus else cross_attention_dim,
|
||||
depth=4,
|
||||
dim_head=64,
|
||||
heads=20 if sdxl_plus else 12,
|
||||
num_queries=clip_extra_context_tokens,
|
||||
embedding_dim=clip_embeddings_dim,
|
||||
output_dim=cross_attention_dim,
|
||||
ff_mult=4
|
||||
)
|
||||
else:
|
||||
self.image_proj_model = ImageProjModel(
|
||||
cross_attention_dim=cross_attention_dim,
|
||||
clip_embeddings_dim=clip_embeddings_dim,
|
||||
clip_extra_context_tokens=clip_extra_context_tokens
|
||||
)
|
||||
|
||||
self.image_proj_model.load_state_dict(state_dict["image_proj"])
|
||||
self.ip_layers = To_KV(cross_attention_dim)
|
||||
self.ip_layers.load_state_dict_ordered(state_dict["ip_adapter"])
|
||||
|
||||
|
||||
clip_vision: comfy.clip_vision.ClipVisionModel = None
|
||||
ip_negative: torch.Tensor = None
|
||||
image_proj_model: ModelPatcher = None
|
||||
ip_layers: ModelPatcher = None
|
||||
ip_adapter: IPAdapterModel = None
|
||||
|
||||
|
||||
def load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path):
|
||||
global clip_vision, image_proj_model, ip_layers, ip_negative, ip_adapter
|
||||
|
||||
if clip_vision_path is None:
|
||||
return
|
||||
if ip_negative_path is None:
|
||||
return
|
||||
if ip_adapter_path is None:
|
||||
return
|
||||
if clip_vision is not None and image_proj_model is not None and ip_layers is not None and ip_negative is not None:
|
||||
return
|
||||
|
||||
ip_negative = sf.load_file(ip_negative_path)['data']
|
||||
clip_vision = comfy.clip_vision.load(clip_vision_path)
|
||||
|
||||
load_device = model_management.get_torch_device()
|
||||
offload_device = torch.device('cpu')
|
||||
|
||||
use_fp16 = model_management.should_use_fp16(device=load_device)
|
||||
ip_state_dict = torch.load(ip_adapter_path, map_location="cpu")
|
||||
plus = "latents" in ip_state_dict["image_proj"]
|
||||
cross_attention_dim = ip_state_dict["ip_adapter"]["1.to_k_ip.weight"].shape[1]
|
||||
sdxl = cross_attention_dim == 2048
|
||||
sdxl_plus = sdxl and plus
|
||||
|
||||
if plus:
|
||||
clip_extra_context_tokens = ip_state_dict["image_proj"]["latents"].shape[1]
|
||||
clip_embeddings_dim = ip_state_dict["image_proj"]["latents"].shape[2]
|
||||
else:
|
||||
clip_extra_context_tokens = ip_state_dict["image_proj"]["proj.weight"].shape[0] // cross_attention_dim
|
||||
clip_embeddings_dim = None
|
||||
|
||||
ip_adapter = IPAdapterModel(
|
||||
ip_state_dict,
|
||||
plus=plus,
|
||||
cross_attention_dim=cross_attention_dim,
|
||||
clip_embeddings_dim=clip_embeddings_dim,
|
||||
clip_extra_context_tokens=clip_extra_context_tokens,
|
||||
sdxl_plus=sdxl_plus
|
||||
)
|
||||
ip_adapter.sdxl = sdxl
|
||||
ip_adapter.load_device = load_device
|
||||
ip_adapter.offload_device = offload_device
|
||||
ip_adapter.dtype = torch.float16 if use_fp16 else torch.float32
|
||||
ip_adapter.to(offload_device, dtype=ip_adapter.dtype)
|
||||
|
||||
image_proj_model = ModelPatcher(model=ip_adapter.image_proj_model, load_device=load_device,
|
||||
offload_device=offload_device)
|
||||
ip_layers = ModelPatcher(model=ip_adapter.ip_layers, load_device=load_device,
|
||||
offload_device=offload_device)
|
||||
|
||||
return
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
@torch.inference_mode()
|
||||
def preprocess(img):
|
||||
inputs = clip_vision.processor(images=img, return_tensors="pt")
|
||||
comfy.model_management.load_models_gpu([clip_vision.patcher, image_proj_model])
|
||||
pixel_values = inputs['pixel_values'].to(clip_vision.load_device)
|
||||
|
||||
if clip_vision.dtype != torch.float32:
|
||||
precision_scope = torch.autocast
|
||||
else:
|
||||
precision_scope = lambda a, b: contextlib.nullcontext(a)
|
||||
|
||||
with precision_scope(comfy.model_management.get_autocast_device(clip_vision.load_device), torch.float32):
|
||||
outputs = clip_vision.model(pixel_values=pixel_values, output_hidden_states=True)
|
||||
|
||||
if ip_adapter.plus:
|
||||
cond = outputs.hidden_states[-2].to(ip_adapter.dtype)
|
||||
else:
|
||||
cond = outputs.image_embeds.to(ip_adapter.dtype)
|
||||
|
||||
outputs = image_proj_model.model(cond)
|
||||
|
||||
return outputs
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
@torch.inference_mode()
|
||||
def patch_model(model, ip_tasks):
|
||||
new_model = model.clone()
|
||||
|
||||
tasks = []
|
||||
for cn_img, cn_stop, cn_weight in ip_tasks:
|
||||
tasks.append((cn_img, cn_stop, cn_weight, {}))
|
||||
|
||||
def make_attn_patcher(ip_index):
|
||||
ip_model_k = ip_layers.model.to_kvs[ip_index * 2]
|
||||
ip_model_v = ip_layers.model.to_kvs[ip_index * 2 + 1]
|
||||
|
||||
def patcher(n, context_attn2, value_attn2, extra_options):
|
||||
org_dtype = n.dtype
|
||||
current_step = float(model.model.diffusion_model.current_step.detach().cpu().numpy()[0])
|
||||
cond_or_uncond = extra_options['cond_or_uncond']
|
||||
|
||||
with torch.autocast("cuda", dtype=ip_adapter.dtype):
|
||||
q = n
|
||||
k = [context_attn2]
|
||||
v = [value_attn2]
|
||||
b, _, _ = q.shape
|
||||
batch_prompt = b // len(cond_or_uncond)
|
||||
|
||||
for cn_img, cn_stop, cn_weight, cache in tasks:
|
||||
if current_step < cn_stop:
|
||||
if ip_index in cache:
|
||||
ip_k, ip_v = cache[ip_index]
|
||||
else:
|
||||
ip_model_k.to(device=ip_adapter.load_device, dtype=ip_adapter.dtype)
|
||||
ip_model_v.to(device=ip_adapter.load_device, dtype=ip_adapter.dtype)
|
||||
cond = cn_img.to(device=ip_adapter.load_device, dtype=ip_adapter.dtype).repeat(batch_prompt, 1, 1)
|
||||
uncond = ip_negative.to(device=ip_adapter.load_device, dtype=ip_adapter.dtype).repeat(batch_prompt, 1, 1)
|
||||
uncond_cond = torch.cat([(cond, uncond)[i] for i in cond_or_uncond], dim=0)
|
||||
ip_k = ip_model_k(uncond_cond)
|
||||
ip_v = ip_model_v(uncond_cond)
|
||||
|
||||
# Midjourney's attention formulation of image prompt (non-official reimplementation)
|
||||
# Written by Lvmin Zhang at Stanford University, 2023 Dec
|
||||
# For non-commercial use only - if you use this in commercial project then
|
||||
# probably it has some intellectual property issues.
|
||||
# Contact lvminzhang@acm.org if you are not sure.
|
||||
|
||||
# Below is the sensitive part with potential intellectual property issues.
|
||||
|
||||
ip_v_mean = torch.mean(ip_v, dim=1, keepdim=True)
|
||||
ip_v_offset = ip_v - ip_v_mean
|
||||
|
||||
B, F, C = ip_k.shape
|
||||
channel_penalty = float(C) / 1280.0
|
||||
weight = cn_weight * channel_penalty
|
||||
|
||||
ip_k = ip_k * weight
|
||||
ip_v = ip_v_offset + ip_v_mean * weight
|
||||
|
||||
# The sensitive part ends here.
|
||||
|
||||
cache[ip_index] = ip_k, ip_v
|
||||
ip_model_k.to(device=ip_adapter.offload_device, dtype=ip_adapter.dtype)
|
||||
ip_model_v.to(device=ip_adapter.offload_device, dtype=ip_adapter.dtype)
|
||||
|
||||
k.append(ip_k)
|
||||
v.append(ip_v)
|
||||
|
||||
k = torch.cat(k, dim=1)
|
||||
v = torch.cat(v, dim=1)
|
||||
out = sdp(q, k, v, extra_options)
|
||||
|
||||
return out.to(dtype=org_dtype)
|
||||
return patcher
|
||||
|
||||
def set_model_patch_replace(model, number, key):
|
||||
to = model.model_options["transformer_options"]
|
||||
if "patches_replace" not in to:
|
||||
to["patches_replace"] = {}
|
||||
if "attn2" not in to["patches_replace"]:
|
||||
to["patches_replace"]["attn2"] = {}
|
||||
if key not in to["patches_replace"]["attn2"]:
|
||||
to["patches_replace"]["attn2"][key] = make_attn_patcher(number)
|
||||
|
||||
number = 0
|
||||
if not ip_adapter.sdxl:
|
||||
for id in [1, 2, 4, 5, 7, 8]: # id of input_blocks that have cross attention
|
||||
set_model_patch_replace(new_model, number, ("input", id))
|
||||
number += 1
|
||||
for id in [3, 4, 5, 6, 7, 8, 9, 10, 11]: # id of output_blocks that have cross attention
|
||||
set_model_patch_replace(new_model, number, ("output", id))
|
||||
number += 1
|
||||
set_model_patch_replace(new_model, number, ("middle", 0))
|
||||
else:
|
||||
for id in [4, 5, 7, 8]: # id of input_blocks that have cross attention
|
||||
block_indices = range(2) if id in [4, 5] else range(10) # transformer_depth
|
||||
for index in block_indices:
|
||||
set_model_patch_replace(new_model, number, ("input", id, index))
|
||||
number += 1
|
||||
for id in range(6): # id of output_blocks that have cross attention
|
||||
block_indices = range(2) if id in [3, 4, 5] else range(10) # transformer_depth
|
||||
for index in block_indices:
|
||||
set_model_patch_replace(new_model, number, ("output", id, index))
|
||||
number += 1
|
||||
for index in range(10):
|
||||
set_model_patch_replace(new_model, number, ("middle", 0, index))
|
||||
number += 1
|
||||
|
||||
return new_model
|
42
fooocus_extras/preprocessors.py
Normal file
42
fooocus_extras/preprocessors.py
Normal file
@ -0,0 +1,42 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
def canny_k(x, k=0.5):
|
||||
import cv2
|
||||
H, W, C = x.shape
|
||||
Hs, Ws = int(H * k), int(W * k)
|
||||
small = cv2.resize(x, (Ws, Hs), interpolation=cv2.INTER_AREA)
|
||||
return cv2.Canny(small, 100, 200).astype(np.float32) / 255.0
|
||||
|
||||
|
||||
def canny_pyramid(x):
|
||||
# For some reasons, SAI's Control-lora Canny seems to be trained on canny maps with non-standard resolutions.
|
||||
# Then we use pyramid to use all resolutions to avoid missing any structure in specific resolutions.
|
||||
|
||||
ks = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
|
||||
cs = [canny_k(x, k) for k in ks]
|
||||
cur = None
|
||||
|
||||
for c in cs:
|
||||
if cur is None:
|
||||
cur = c
|
||||
else:
|
||||
H, W = c.shape
|
||||
cur = cv2.resize(cur, (W, H), interpolation=cv2.INTER_LINEAR)
|
||||
cur = cur * 0.75 + c * 0.25
|
||||
|
||||
cur *= 400.0
|
||||
|
||||
return cur.clip(0, 255).astype(np.uint8)
|
||||
|
||||
|
||||
def cpds(x):
|
||||
import cv2
|
||||
# cv2.decolor is not "decolor", it is Cewu Lu's method
|
||||
# See http://www.cse.cuhk.edu.hk/leojia/projects/color2gray/index.html
|
||||
# See https://docs.opencv.org/3.0-beta/modules/photo/doc/decolor.html
|
||||
|
||||
y = np.ascontiguousarray(x[:, :, ::-1].copy())
|
||||
y = cv2.decolor(y)[0]
|
||||
return y
|
121
fooocus_extras/resampler.py
Normal file
121
fooocus_extras/resampler.py
Normal file
@ -0,0 +1,121 @@
|
||||
# modified from https://github.com/mlfoundations/open_flamingo/blob/main/open_flamingo/src/helpers.py
|
||||
import math
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
# FFN
|
||||
def FeedForward(dim, mult=4):
|
||||
inner_dim = int(dim * mult)
|
||||
return nn.Sequential(
|
||||
nn.LayerNorm(dim),
|
||||
nn.Linear(dim, inner_dim, bias=False),
|
||||
nn.GELU(),
|
||||
nn.Linear(inner_dim, dim, bias=False),
|
||||
)
|
||||
|
||||
|
||||
def reshape_tensor(x, heads):
|
||||
bs, length, width = x.shape
|
||||
#(bs, length, width) --> (bs, length, n_heads, dim_per_head)
|
||||
x = x.view(bs, length, heads, -1)
|
||||
# (bs, length, n_heads, dim_per_head) --> (bs, n_heads, length, dim_per_head)
|
||||
x = x.transpose(1, 2)
|
||||
# (bs, n_heads, length, dim_per_head) --> (bs*n_heads, length, dim_per_head)
|
||||
x = x.reshape(bs, heads, length, -1)
|
||||
return x
|
||||
|
||||
|
||||
class PerceiverAttention(nn.Module):
|
||||
def __init__(self, *, dim, dim_head=64, heads=8):
|
||||
super().__init__()
|
||||
self.scale = dim_head**-0.5
|
||||
self.dim_head = dim_head
|
||||
self.heads = heads
|
||||
inner_dim = dim_head * heads
|
||||
|
||||
self.norm1 = nn.LayerNorm(dim)
|
||||
self.norm2 = nn.LayerNorm(dim)
|
||||
|
||||
self.to_q = nn.Linear(dim, inner_dim, bias=False)
|
||||
self.to_kv = nn.Linear(dim, inner_dim * 2, bias=False)
|
||||
self.to_out = nn.Linear(inner_dim, dim, bias=False)
|
||||
|
||||
|
||||
def forward(self, x, latents):
|
||||
"""
|
||||
Args:
|
||||
x (torch.Tensor): image features
|
||||
shape (b, n1, D)
|
||||
latent (torch.Tensor): latent features
|
||||
shape (b, n2, D)
|
||||
"""
|
||||
x = self.norm1(x)
|
||||
latents = self.norm2(latents)
|
||||
|
||||
b, l, _ = latents.shape
|
||||
|
||||
q = self.to_q(latents)
|
||||
kv_input = torch.cat((x, latents), dim=-2)
|
||||
k, v = self.to_kv(kv_input).chunk(2, dim=-1)
|
||||
|
||||
q = reshape_tensor(q, self.heads)
|
||||
k = reshape_tensor(k, self.heads)
|
||||
v = reshape_tensor(v, self.heads)
|
||||
|
||||
# attention
|
||||
scale = 1 / math.sqrt(math.sqrt(self.dim_head))
|
||||
weight = (q * scale) @ (k * scale).transpose(-2, -1) # More stable with f16 than dividing afterwards
|
||||
weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype)
|
||||
out = weight @ v
|
||||
|
||||
out = out.permute(0, 2, 1, 3).reshape(b, l, -1)
|
||||
|
||||
return self.to_out(out)
|
||||
|
||||
|
||||
class Resampler(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
dim=1024,
|
||||
depth=8,
|
||||
dim_head=64,
|
||||
heads=16,
|
||||
num_queries=8,
|
||||
embedding_dim=768,
|
||||
output_dim=1024,
|
||||
ff_mult=4,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
self.latents = nn.Parameter(torch.randn(1, num_queries, dim) / dim**0.5)
|
||||
|
||||
self.proj_in = nn.Linear(embedding_dim, dim)
|
||||
|
||||
self.proj_out = nn.Linear(dim, output_dim)
|
||||
self.norm_out = nn.LayerNorm(output_dim)
|
||||
|
||||
self.layers = nn.ModuleList([])
|
||||
for _ in range(depth):
|
||||
self.layers.append(
|
||||
nn.ModuleList(
|
||||
[
|
||||
PerceiverAttention(dim=dim, dim_head=dim_head, heads=heads),
|
||||
FeedForward(dim=dim, mult=ff_mult),
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
latents = self.latents.repeat(x.size(0), 1, 1)
|
||||
|
||||
x = self.proj_in(x)
|
||||
|
||||
for attn, ff in self.layers:
|
||||
latents = attn(x, latents) + latents
|
||||
latents = ff(latents) + latents
|
||||
|
||||
latents = self.proj_out(latents)
|
||||
return self.norm_out(latents)
|
@ -1 +1 @@
|
||||
version = '2.0.93'
|
||||
version = '2.1.0'
|
||||
|
@ -22,7 +22,7 @@ def prepare_environment():
|
||||
xformers_package = os.environ.get('XFORMERS_PACKAGE', 'xformers==0.0.20')
|
||||
|
||||
comfy_repo = os.environ.get('COMFY_REPO', "https://github.com/comfyanonymous/ComfyUI")
|
||||
comfy_commit_hash = os.environ.get('COMFY_COMMIT_HASH', "9bfec2bdbf0b0d778087a9b32f79e57e2d15b913")
|
||||
comfy_commit_hash = os.environ.get('COMFY_COMMIT_HASH', "1c5d6663faf1a33e00ec67240167b174a9cac655")
|
||||
|
||||
print(f"Python {sys.version}")
|
||||
print(f"Fooocus version: {fooocus_version.version}")
|
||||
|
21
modules/advanced_parameters.py
Normal file
21
modules/advanced_parameters.py
Normal file
@ -0,0 +1,21 @@
|
||||
adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg, sampler_name, \
|
||||
scheduler_name, overwrite_step, overwrite_switch, overwrite_width, overwrite_height, \
|
||||
overwrite_vary_strength, overwrite_upscale_strength, \
|
||||
mixing_image_prompt_and_vary_upscale, mixing_image_prompt_and_inpaint, \
|
||||
debugging_cn_preprocessor, disable_soft_cn = [None] * 16
|
||||
|
||||
|
||||
def set_all_advanced_parameters(*args):
|
||||
global adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg, sampler_name, \
|
||||
scheduler_name, overwrite_step, overwrite_switch, overwrite_width, overwrite_height, \
|
||||
overwrite_vary_strength, overwrite_upscale_strength, \
|
||||
mixing_image_prompt_and_vary_upscale, mixing_image_prompt_and_inpaint, \
|
||||
debugging_cn_preprocessor, disable_soft_cn
|
||||
|
||||
adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg, sampler_name, \
|
||||
scheduler_name, overwrite_step, overwrite_switch, overwrite_width, overwrite_height, \
|
||||
overwrite_vary_strength, overwrite_upscale_strength, \
|
||||
mixing_image_prompt_and_vary_upscale, mixing_image_prompt_and_inpaint, \
|
||||
debugging_cn_preprocessor, disable_soft_cn = args
|
||||
|
||||
return
|
@ -21,7 +21,10 @@ def worker():
|
||||
import modules.path
|
||||
import modules.patch
|
||||
import comfy.model_management
|
||||
import fooocus_extras.preprocessors as preprocessors
|
||||
import modules.inpaint_worker as inpaint_worker
|
||||
import modules.advanced_parameters as advanced_parameters
|
||||
import fooocus_extras.ip_adapter as ip_adapter
|
||||
|
||||
from modules.sdxl_styles import apply_style, aspect_ratios, fooocus_expansion
|
||||
from modules.private_logger import log
|
||||
@ -44,24 +47,42 @@ def worker():
|
||||
|
||||
@torch.no_grad()
|
||||
@torch.inference_mode()
|
||||
def handler(task):
|
||||
def handler(args):
|
||||
execution_start_time = time.perf_counter()
|
||||
|
||||
prompt, negative_prompt, style_selections, performance_selection, \
|
||||
aspect_ratios_selection, image_number, image_seed, sharpness, adm_scaler_positive, adm_scaler_negative, adm_scaler_end, guidance_scale, adaptive_cfg, sampler_name, scheduler_name, \
|
||||
overwrite_step, overwrite_switch, overwrite_width, overwrite_height, overwrite_vary_strength, overwrite_upscale_strength, \
|
||||
base_model_name, refiner_model_name, \
|
||||
l1, w1, l2, w2, l3, w3, l4, w4, l5, w5, \
|
||||
input_image_checkbox, current_tab, \
|
||||
uov_method, uov_input_image, outpaint_selections, inpaint_input_image = task
|
||||
args.reverse()
|
||||
|
||||
prompt = args.pop()
|
||||
negative_prompt = args.pop()
|
||||
style_selections = args.pop()
|
||||
performance_selection = args.pop()
|
||||
aspect_ratios_selection = args.pop()
|
||||
image_number = args.pop()
|
||||
image_seed = args.pop()
|
||||
sharpness = args.pop()
|
||||
guidance_scale = args.pop()
|
||||
base_model_name = args.pop()
|
||||
refiner_model_name = args.pop()
|
||||
loras = [(args.pop(), args.pop()) for _ in range(5)]
|
||||
input_image_checkbox = args.pop()
|
||||
current_tab = args.pop()
|
||||
uov_method = args.pop()
|
||||
uov_input_image = args.pop()
|
||||
outpaint_selections = args.pop()
|
||||
inpaint_input_image = args.pop()
|
||||
|
||||
cn_tasks = {flags.cn_ip: [], flags.cn_canny: [], flags.cn_cpds: []}
|
||||
for _ in range(4):
|
||||
cn_img = args.pop()
|
||||
cn_stop = args.pop()
|
||||
cn_weight = args.pop()
|
||||
cn_type = args.pop()
|
||||
if cn_img is not None:
|
||||
cn_tasks[cn_type].append([cn_img, cn_stop, cn_weight])
|
||||
|
||||
outpaint_selections = [o.lower() for o in outpaint_selections]
|
||||
|
||||
loras = [(l1, w1), (l2, w2), (l3, w3), (l4, w4), (l5, w5)]
|
||||
loras_user_raw_input = copy.deepcopy(loras)
|
||||
|
||||
loras_raw = copy.deepcopy(loras)
|
||||
raw_style_selections = copy.deepcopy(style_selections)
|
||||
|
||||
uov_method = uov_method.lower()
|
||||
|
||||
if fooocus_expansion in style_selections:
|
||||
@ -72,15 +93,15 @@ def worker():
|
||||
|
||||
use_style = len(style_selections) > 0
|
||||
|
||||
modules.patch.adaptive_cfg = adaptive_cfg
|
||||
modules.patch.adaptive_cfg = advanced_parameters.adaptive_cfg
|
||||
print(f'[Parameters] Adaptive CFG = {modules.patch.adaptive_cfg}')
|
||||
|
||||
modules.patch.sharpness = sharpness
|
||||
print(f'[Parameters] Sharpness = {modules.patch.sharpness}')
|
||||
|
||||
modules.patch.positive_adm_scale = adm_scaler_positive
|
||||
modules.patch.negative_adm_scale = adm_scaler_negative
|
||||
modules.patch.adm_scaler_end = adm_scaler_end
|
||||
modules.patch.positive_adm_scale = advanced_parameters.adm_scaler_positive
|
||||
modules.patch.negative_adm_scale = advanced_parameters.adm_scaler_negative
|
||||
modules.patch.adm_scaler_end = advanced_parameters.adm_scaler_end
|
||||
print(f'[Parameters] ADM Scale = {modules.patch.positive_adm_scale} : {modules.patch.negative_adm_scale} : {modules.patch.adm_scaler_end}')
|
||||
|
||||
cfg_scale = float(guidance_scale)
|
||||
@ -90,197 +111,18 @@ def worker():
|
||||
denoising_strength = 1.0
|
||||
tiled = False
|
||||
inpaint_worker.current_task = None
|
||||
|
||||
if performance_selection == 'Speed':
|
||||
steps = 30
|
||||
switch = 20
|
||||
else:
|
||||
steps = 60
|
||||
switch = 40
|
||||
|
||||
if overwrite_step > 0:
|
||||
steps = overwrite_step
|
||||
|
||||
if overwrite_switch > 0:
|
||||
switch = overwrite_switch
|
||||
|
||||
pipeline.clear_all_caches() # save memory
|
||||
|
||||
width, height = aspect_ratios[aspect_ratios_selection]
|
||||
|
||||
if overwrite_width > 0:
|
||||
width = overwrite_width
|
||||
|
||||
if overwrite_height > 0:
|
||||
height = overwrite_height
|
||||
|
||||
if input_image_checkbox:
|
||||
progressbar(0, 'Image processing ...')
|
||||
if current_tab == 'uov' and uov_method != flags.disabled and uov_input_image is not None:
|
||||
uov_input_image = HWC3(uov_input_image)
|
||||
if 'vary' in uov_method:
|
||||
if not image_is_generated_in_current_ui(uov_input_image, ui_width=width, ui_height=height):
|
||||
uov_input_image = resize_image(uov_input_image, width=width, height=height)
|
||||
print(f'Resolution corrected - users are uploading their own images.')
|
||||
else:
|
||||
print(f'Processing images generated by Fooocus.')
|
||||
if 'subtle' in uov_method:
|
||||
denoising_strength = 0.5
|
||||
if 'strong' in uov_method:
|
||||
denoising_strength = 0.85
|
||||
if overwrite_vary_strength > 0:
|
||||
denoising_strength = overwrite_vary_strength
|
||||
initial_pixels = core.numpy_to_pytorch(uov_input_image)
|
||||
progressbar(0, 'VAE encoding ...')
|
||||
initial_latent = core.encode_vae(vae=pipeline.xl_base_patched.vae, pixels=initial_pixels)
|
||||
B, C, H, W = initial_latent['samples'].shape
|
||||
width = W * 8
|
||||
height = H * 8
|
||||
print(f'Final resolution is {str((height, width))}.')
|
||||
elif 'upscale' in uov_method:
|
||||
H, W, C = uov_input_image.shape
|
||||
progressbar(0, f'Upscaling image from {str((H, W))} ...')
|
||||
|
||||
uov_input_image = core.numpy_to_pytorch(uov_input_image)
|
||||
uov_input_image = perform_upscale(uov_input_image)
|
||||
uov_input_image = core.pytorch_to_numpy(uov_input_image)[0]
|
||||
print(f'Image upscaled.')
|
||||
|
||||
if '1.5x' in uov_method:
|
||||
f = 1.5
|
||||
elif '2x' in uov_method:
|
||||
f = 2.0
|
||||
else:
|
||||
f = 1.0
|
||||
|
||||
width_f = int(width * f)
|
||||
height_f = int(height * f)
|
||||
|
||||
if image_is_generated_in_current_ui(uov_input_image, ui_width=width_f, ui_height=height_f):
|
||||
uov_input_image = resize_image(uov_input_image, width=int(W * f), height=int(H * f))
|
||||
print(f'Processing images generated by Fooocus.')
|
||||
else:
|
||||
uov_input_image = resize_image(uov_input_image, width=width_f, height=height_f)
|
||||
print(f'Resolution corrected - users are uploading their own images.')
|
||||
|
||||
H, W, C = uov_input_image.shape
|
||||
image_is_super_large = H * W > 2800 * 2800
|
||||
|
||||
if 'fast' in uov_method:
|
||||
direct_return = True
|
||||
elif image_is_super_large:
|
||||
print('Image is too large. Directly returned the SR image. '
|
||||
'Usually directly return SR image at 4K resolution '
|
||||
'yields better results than SDXL diffusion.')
|
||||
direct_return = True
|
||||
else:
|
||||
direct_return = False
|
||||
|
||||
if direct_return:
|
||||
d = [('Upscale (Fast)', '2x')]
|
||||
log(uov_input_image, d, single_line_number=1)
|
||||
outputs.append(['results', [uov_input_image]])
|
||||
return
|
||||
|
||||
tiled = True
|
||||
denoising_strength = 1.0 - 0.618
|
||||
steps = int(steps * 0.618)
|
||||
switch = int(steps * 0.67)
|
||||
|
||||
if overwrite_upscale_strength > 0:
|
||||
denoising_strength = overwrite_upscale_strength
|
||||
if overwrite_step > 0:
|
||||
steps = overwrite_step
|
||||
if overwrite_switch > 0:
|
||||
switch = overwrite_switch
|
||||
|
||||
initial_pixels = core.numpy_to_pytorch(uov_input_image)
|
||||
progressbar(0, 'VAE encoding ...')
|
||||
|
||||
initial_latent = core.encode_vae(vae=pipeline.xl_base_patched.vae, pixels=initial_pixels, tiled=True)
|
||||
B, C, H, W = initial_latent['samples'].shape
|
||||
width = W * 8
|
||||
height = H * 8
|
||||
print(f'Final resolution is {str((height, width))}.')
|
||||
if current_tab == 'inpaint' and isinstance(inpaint_input_image, dict):
|
||||
inpaint_image = inpaint_input_image['image']
|
||||
inpaint_mask = inpaint_input_image['mask'][:, :, 0]
|
||||
if isinstance(inpaint_image, np.ndarray) and isinstance(inpaint_mask, np.ndarray) \
|
||||
and (np.any(inpaint_mask > 127) or len(outpaint_selections) > 0):
|
||||
if len(outpaint_selections) > 0:
|
||||
H, W, C = inpaint_image.shape
|
||||
if 'top' in outpaint_selections:
|
||||
inpaint_image = np.pad(inpaint_image, [[int(H * 0.3), 0], [0, 0], [0, 0]], mode='edge')
|
||||
inpaint_mask = np.pad(inpaint_mask, [[int(H * 0.3), 0], [0, 0]], mode='constant', constant_values=255)
|
||||
if 'bottom' in outpaint_selections:
|
||||
inpaint_image = np.pad(inpaint_image, [[0, int(H * 0.3)], [0, 0], [0, 0]], mode='edge')
|
||||
inpaint_mask = np.pad(inpaint_mask, [[0, int(H * 0.3)], [0, 0]], mode='constant', constant_values=255)
|
||||
|
||||
H, W, C = inpaint_image.shape
|
||||
if 'left' in outpaint_selections:
|
||||
inpaint_image = np.pad(inpaint_image, [[0, 0], [int(H * 0.3), 0], [0, 0]], mode='edge')
|
||||
inpaint_mask = np.pad(inpaint_mask, [[0, 0], [int(H * 0.3), 0]], mode='constant', constant_values=255)
|
||||
if 'right' in outpaint_selections:
|
||||
inpaint_image = np.pad(inpaint_image, [[0, 0], [0, int(H * 0.3)], [0, 0]], mode='edge')
|
||||
inpaint_mask = np.pad(inpaint_mask, [[0, 0], [0, int(H * 0.3)]], mode='constant', constant_values=255)
|
||||
|
||||
inpaint_image = np.ascontiguousarray(inpaint_image.copy())
|
||||
inpaint_mask = np.ascontiguousarray(inpaint_mask.copy())
|
||||
|
||||
inpaint_worker.current_task = inpaint_worker.InpaintWorker(image=inpaint_image, mask=inpaint_mask,
|
||||
is_outpaint=len(outpaint_selections) > 0)
|
||||
|
||||
# print(f'Inpaint task: {str((height, width))}')
|
||||
# outputs.append(['results', inpaint_worker.current_task.visualize_mask_processing()])
|
||||
# return
|
||||
|
||||
progressbar(0, 'Downloading inpainter ...')
|
||||
inpaint_head_model_path, inpaint_patch_model_path = modules.path.downloading_inpaint_models()
|
||||
loras += [(inpaint_patch_model_path, 1.0)]
|
||||
|
||||
inpaint_pixels = core.numpy_to_pytorch(inpaint_worker.current_task.image_ready)
|
||||
progressbar(0, 'VAE encoding ...')
|
||||
initial_latent = core.encode_vae(vae=pipeline.xl_base_patched.vae, pixels=inpaint_pixels)
|
||||
inpaint_latent = initial_latent['samples']
|
||||
B, C, H, W = inpaint_latent.shape
|
||||
inpaint_mask = core.numpy_to_pytorch(inpaint_worker.current_task.mask_ready[None])
|
||||
inpaint_mask = torch.nn.functional.avg_pool2d(inpaint_mask, (8, 8))
|
||||
inpaint_mask = torch.nn.functional.interpolate(inpaint_mask, (H, W), mode='bilinear')
|
||||
inpaint_worker.current_task.load_latent(latent=inpaint_latent, mask=inpaint_mask)
|
||||
|
||||
progressbar(0, 'VAE inpaint encoding ...')
|
||||
|
||||
inpaint_mask = (inpaint_worker.current_task.mask_ready > 0).astype(np.float32)
|
||||
inpaint_mask = torch.tensor(inpaint_mask).float()
|
||||
|
||||
vae_dict = core.encode_vae_inpaint(
|
||||
mask=inpaint_mask, vae=pipeline.xl_base_patched.vae, pixels=inpaint_pixels)
|
||||
|
||||
inpaint_latent = vae_dict['samples']
|
||||
inpaint_mask = vae_dict['noise_mask']
|
||||
inpaint_worker.current_task.load_inpaint_guidance(latent=inpaint_latent, mask=inpaint_mask, model_path=inpaint_head_model_path)
|
||||
|
||||
B, C, H, W = inpaint_latent.shape
|
||||
height, width = inpaint_worker.current_task.image_raw.shape[:2]
|
||||
print(f'Final resolution is {str((height, width))}, latent is {str((H * 8, W * 8))}.')
|
||||
|
||||
sampler_name = 'dpmpp_fooocus_2m_sde_inpaint_seamless'
|
||||
|
||||
print(f'[Parameters] Sampler = {sampler_name} - {scheduler_name}')
|
||||
|
||||
progressbar(1, 'Initializing ...')
|
||||
skip_prompt_processing = False
|
||||
|
||||
raw_prompt = prompt
|
||||
raw_negative_prompt = negative_prompt
|
||||
|
||||
prompts = remove_empty_str([safe_str(p) for p in prompt.split('\n')], default='')
|
||||
negative_prompts = remove_empty_str([safe_str(p) for p in negative_prompt.split('\n')], default='')
|
||||
|
||||
prompt = prompts[0]
|
||||
negative_prompt = negative_prompts[0]
|
||||
|
||||
extra_positive_prompts = prompts[1:] if len(prompts) > 1 else []
|
||||
extra_negative_prompts = negative_prompts[1:] if len(negative_prompts) > 1 else []
|
||||
inpaint_image = None
|
||||
inpaint_mask = None
|
||||
inpaint_head_model_path = None
|
||||
controlnet_canny_path = None
|
||||
controlnet_cpds_path = None
|
||||
clip_vision_path, ip_negative_path, ip_adapter_path = None, None, None
|
||||
|
||||
seed = image_seed
|
||||
max_seed = int(1024 * 1024 * 1024)
|
||||
@ -290,77 +132,329 @@ def worker():
|
||||
seed = - seed
|
||||
seed = seed % max_seed
|
||||
|
||||
progressbar(3, 'Loading models ...')
|
||||
|
||||
pipeline.refresh_everything(
|
||||
refiner_model_name=refiner_model_name,
|
||||
base_model_name=base_model_name,
|
||||
loras=loras)
|
||||
pipeline.prepare_text_encoder(async_call=False)
|
||||
|
||||
progressbar(3, 'Processing prompts ...')
|
||||
|
||||
positive_basic_workloads = []
|
||||
negative_basic_workloads = []
|
||||
|
||||
if use_style:
|
||||
for s in style_selections:
|
||||
p, n = apply_style(s, positive=prompt)
|
||||
positive_basic_workloads.append(p)
|
||||
negative_basic_workloads.append(n)
|
||||
if performance_selection == 'Speed':
|
||||
steps = 30
|
||||
switch = 20
|
||||
else:
|
||||
positive_basic_workloads.append(prompt)
|
||||
steps = 60
|
||||
switch = 40
|
||||
|
||||
negative_basic_workloads.append(negative_prompt) # Always use independent workload for negative.
|
||||
sampler_name = advanced_parameters.sampler_name
|
||||
scheduler_name = advanced_parameters.scheduler_name
|
||||
|
||||
positive_basic_workloads = positive_basic_workloads + extra_positive_prompts
|
||||
negative_basic_workloads = negative_basic_workloads + extra_negative_prompts
|
||||
goals = []
|
||||
tasks = []
|
||||
|
||||
positive_basic_workloads = remove_empty_str(positive_basic_workloads, default=prompt)
|
||||
negative_basic_workloads = remove_empty_str(negative_basic_workloads, default=negative_prompt)
|
||||
if input_image_checkbox:
|
||||
progressbar(13, 'Image processing ...')
|
||||
if (current_tab == 'uov' or (current_tab == 'ip' and advanced_parameters.mixing_image_prompt_and_vary_upscale)) \
|
||||
and uov_method != flags.disabled and uov_input_image is not None:
|
||||
uov_input_image = HWC3(uov_input_image)
|
||||
if 'vary' in uov_method:
|
||||
goals.append('vary')
|
||||
elif 'upscale' in uov_method:
|
||||
goals.append('upscale')
|
||||
if 'fast' in uov_method:
|
||||
skip_prompt_processing = True
|
||||
else:
|
||||
if performance_selection == 'Speed':
|
||||
steps = 18
|
||||
switch = 12
|
||||
else:
|
||||
steps = 36
|
||||
switch = 24
|
||||
if (current_tab == 'inpaint' or (current_tab == 'ip' and advanced_parameters.mixing_image_prompt_and_inpaint))\
|
||||
and isinstance(inpaint_input_image, dict):
|
||||
inpaint_image = inpaint_input_image['image']
|
||||
inpaint_mask = inpaint_input_image['mask'][:, :, 0]
|
||||
inpaint_image = HWC3(inpaint_image)
|
||||
if isinstance(inpaint_image, np.ndarray) and isinstance(inpaint_mask, np.ndarray) \
|
||||
and (np.any(inpaint_mask > 127) or len(outpaint_selections) > 0):
|
||||
progressbar(1, 'Downloading inpainter ...')
|
||||
inpaint_head_model_path, inpaint_patch_model_path = modules.path.downloading_inpaint_models()
|
||||
loras += [(inpaint_patch_model_path, 1.0)]
|
||||
goals.append('inpaint')
|
||||
sampler_name = 'dpmpp_fooocus_2m_sde_inpaint_seamless'
|
||||
if current_tab == 'ip' or \
|
||||
advanced_parameters.mixing_image_prompt_and_inpaint or \
|
||||
advanced_parameters.mixing_image_prompt_and_vary_upscale:
|
||||
goals.append('cn')
|
||||
progressbar(1, 'Downloading control models ...')
|
||||
if len(cn_tasks[flags.cn_canny]) > 0:
|
||||
controlnet_canny_path = modules.path.downloading_controlnet_canny()
|
||||
if len(cn_tasks[flags.cn_cpds]) > 0:
|
||||
controlnet_cpds_path = modules.path.downloading_controlnet_cpds()
|
||||
if len(cn_tasks[flags.cn_ip]) > 0:
|
||||
clip_vision_path, ip_negative_path, ip_adapter_path = modules.path.downloading_ip_adapters()
|
||||
progressbar(1, 'Loading control models ...')
|
||||
|
||||
positive_top_k = len(positive_basic_workloads)
|
||||
negative_top_k = len(negative_basic_workloads)
|
||||
# Load or unload CNs
|
||||
pipeline.refresh_controlnets([controlnet_canny_path, controlnet_cpds_path])
|
||||
ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path)
|
||||
|
||||
tasks = [dict(
|
||||
task_seed=seed + i,
|
||||
positive=positive_basic_workloads,
|
||||
negative=negative_basic_workloads,
|
||||
expansion='',
|
||||
c=[None, None],
|
||||
uc=[None, None],
|
||||
) for i in range(image_number)]
|
||||
if advanced_parameters.overwrite_step > 0:
|
||||
steps = advanced_parameters.overwrite_step
|
||||
|
||||
if use_expansion:
|
||||
for i, t in enumerate(tasks):
|
||||
progressbar(5, f'Preparing Fooocus text #{i + 1} ...')
|
||||
expansion = pipeline.expansion(prompt, t['task_seed'])
|
||||
print(f'[Prompt Expansion] New suffix: {expansion}')
|
||||
t['expansion'] = expansion
|
||||
t['positive'] = copy.deepcopy(t['positive']) + [join_prompts(prompt, expansion)] # Deep copy.
|
||||
if advanced_parameters.overwrite_switch > 0:
|
||||
switch = advanced_parameters.overwrite_switch
|
||||
|
||||
for i, t in enumerate(tasks):
|
||||
progressbar(7, f'Encoding base positive #{i + 1} ...')
|
||||
t['c'][0] = pipeline.clip_encode(sd=pipeline.xl_base_patched, texts=t['positive'],
|
||||
pool_top_k=positive_top_k)
|
||||
if advanced_parameters.overwrite_width > 0:
|
||||
width = advanced_parameters.overwrite_width
|
||||
|
||||
for i, t in enumerate(tasks):
|
||||
progressbar(9, f'Encoding base negative #{i + 1} ...')
|
||||
t['uc'][0] = pipeline.clip_encode(sd=pipeline.xl_base_patched, texts=t['negative'],
|
||||
pool_top_k=negative_top_k)
|
||||
if advanced_parameters.overwrite_height > 0:
|
||||
height = advanced_parameters.overwrite_height
|
||||
|
||||
if pipeline.xl_refiner is not None:
|
||||
for i, t in enumerate(tasks):
|
||||
progressbar(11, f'Encoding refiner positive #{i + 1} ...')
|
||||
t['c'][1] = pipeline.clip_separate(t['c'][0])
|
||||
print(f'[Parameters] Sampler = {sampler_name} - {scheduler_name}')
|
||||
print(f'[Parameters] Steps = {steps} - {switch}')
|
||||
|
||||
progressbar(1, 'Initializing ...')
|
||||
|
||||
if not skip_prompt_processing:
|
||||
|
||||
prompts = remove_empty_str([safe_str(p) for p in prompt.split('\n')], default='')
|
||||
negative_prompts = remove_empty_str([safe_str(p) for p in negative_prompt.split('\n')], default='')
|
||||
|
||||
prompt = prompts[0]
|
||||
negative_prompt = negative_prompts[0]
|
||||
|
||||
extra_positive_prompts = prompts[1:] if len(prompts) > 1 else []
|
||||
extra_negative_prompts = negative_prompts[1:] if len(negative_prompts) > 1 else []
|
||||
|
||||
progressbar(3, 'Loading models ...')
|
||||
pipeline.refresh_everything(refiner_model_name=refiner_model_name, base_model_name=base_model_name, loras=loras)
|
||||
|
||||
progressbar(3, 'Processing prompts ...')
|
||||
positive_basic_workloads = []
|
||||
negative_basic_workloads = []
|
||||
|
||||
if use_style:
|
||||
for s in style_selections:
|
||||
p, n = apply_style(s, positive=prompt)
|
||||
positive_basic_workloads.append(p)
|
||||
negative_basic_workloads.append(n)
|
||||
else:
|
||||
positive_basic_workloads.append(prompt)
|
||||
|
||||
negative_basic_workloads.append(negative_prompt) # Always use independent workload for negative.
|
||||
|
||||
positive_basic_workloads = positive_basic_workloads + extra_positive_prompts
|
||||
negative_basic_workloads = negative_basic_workloads + extra_negative_prompts
|
||||
|
||||
positive_basic_workloads = remove_empty_str(positive_basic_workloads, default=prompt)
|
||||
negative_basic_workloads = remove_empty_str(negative_basic_workloads, default=negative_prompt)
|
||||
|
||||
positive_top_k = len(positive_basic_workloads)
|
||||
negative_top_k = len(negative_basic_workloads)
|
||||
|
||||
tasks = [dict(
|
||||
task_seed=seed + i,
|
||||
positive=positive_basic_workloads,
|
||||
negative=negative_basic_workloads,
|
||||
expansion='',
|
||||
c=None,
|
||||
uc=None,
|
||||
) for i in range(image_number)]
|
||||
|
||||
if use_expansion:
|
||||
for i, t in enumerate(tasks):
|
||||
progressbar(5, f'Preparing Fooocus text #{i + 1} ...')
|
||||
expansion = pipeline.final_expansion(prompt, t['task_seed'])
|
||||
print(f'[Prompt Expansion] New suffix: {expansion}')
|
||||
t['expansion'] = expansion
|
||||
t['positive'] = copy.deepcopy(t['positive']) + [join_prompts(prompt, expansion)] # Deep copy.
|
||||
|
||||
for i, t in enumerate(tasks):
|
||||
progressbar(13, f'Encoding refiner negative #{i + 1} ...')
|
||||
t['uc'][1] = pipeline.clip_separate(t['uc'][0])
|
||||
progressbar(7, f'Encoding positive #{i + 1} ...')
|
||||
t['c'] = pipeline.clip_encode(texts=t['positive'], pool_top_k=positive_top_k)
|
||||
|
||||
for i, t in enumerate(tasks):
|
||||
progressbar(10, f'Encoding negative #{i + 1} ...')
|
||||
t['uc'] = pipeline.clip_encode(texts=t['negative'], pool_top_k=negative_top_k)
|
||||
|
||||
if len(goals) > 0:
|
||||
progressbar(13, 'Image processing ...')
|
||||
|
||||
if 'vary' in goals:
|
||||
if not image_is_generated_in_current_ui(uov_input_image, ui_width=width, ui_height=height):
|
||||
uov_input_image = resize_image(uov_input_image, width=width, height=height)
|
||||
print(f'Resolution corrected - users are uploading their own images.')
|
||||
else:
|
||||
print(f'Processing images generated by Fooocus.')
|
||||
if 'subtle' in uov_method:
|
||||
denoising_strength = 0.5
|
||||
if 'strong' in uov_method:
|
||||
denoising_strength = 0.85
|
||||
if advanced_parameters.overwrite_vary_strength > 0:
|
||||
denoising_strength = advanced_parameters.overwrite_vary_strength
|
||||
initial_pixels = core.numpy_to_pytorch(uov_input_image)
|
||||
progressbar(13, 'VAE encoding ...')
|
||||
initial_latent = core.encode_vae(vae=pipeline.final_vae, pixels=initial_pixels)
|
||||
B, C, H, W = initial_latent['samples'].shape
|
||||
width = W * 8
|
||||
height = H * 8
|
||||
print(f'Final resolution is {str((height, width))}.')
|
||||
|
||||
if 'upscale' in goals:
|
||||
H, W, C = uov_input_image.shape
|
||||
progressbar(13, f'Upscaling image from {str((H, W))} ...')
|
||||
|
||||
uov_input_image = core.numpy_to_pytorch(uov_input_image)
|
||||
uov_input_image = perform_upscale(uov_input_image)
|
||||
uov_input_image = core.pytorch_to_numpy(uov_input_image)[0]
|
||||
print(f'Image upscaled.')
|
||||
|
||||
if '1.5x' in uov_method:
|
||||
f = 1.5
|
||||
elif '2x' in uov_method:
|
||||
f = 2.0
|
||||
else:
|
||||
f = 1.0
|
||||
|
||||
width_f = int(width * f)
|
||||
height_f = int(height * f)
|
||||
|
||||
if image_is_generated_in_current_ui(uov_input_image, ui_width=width_f, ui_height=height_f):
|
||||
uov_input_image = resize_image(uov_input_image, width=int(W * f), height=int(H * f))
|
||||
print(f'Processing images generated by Fooocus.')
|
||||
else:
|
||||
uov_input_image = resize_image(uov_input_image, width=width_f, height=height_f)
|
||||
print(f'Resolution corrected - users are uploading their own images.')
|
||||
|
||||
H, W, C = uov_input_image.shape
|
||||
image_is_super_large = H * W > 2800 * 2800
|
||||
|
||||
if 'fast' in uov_method:
|
||||
direct_return = True
|
||||
elif image_is_super_large:
|
||||
print('Image is too large. Directly returned the SR image. '
|
||||
'Usually directly return SR image at 4K resolution '
|
||||
'yields better results than SDXL diffusion.')
|
||||
direct_return = True
|
||||
else:
|
||||
direct_return = False
|
||||
|
||||
if direct_return:
|
||||
d = [('Upscale (Fast)', '2x')]
|
||||
log(uov_input_image, d, single_line_number=1)
|
||||
outputs.append(['results', [uov_input_image]])
|
||||
return
|
||||
|
||||
tiled = True
|
||||
denoising_strength = 0.382
|
||||
|
||||
if advanced_parameters.overwrite_upscale_strength > 0:
|
||||
denoising_strength = advanced_parameters.overwrite_upscale_strength
|
||||
|
||||
initial_pixels = core.numpy_to_pytorch(uov_input_image)
|
||||
progressbar(13, 'VAE encoding ...')
|
||||
|
||||
initial_latent = core.encode_vae(vae=pipeline.final_vae, pixels=initial_pixels, tiled=True)
|
||||
B, C, H, W = initial_latent['samples'].shape
|
||||
width = W * 8
|
||||
height = H * 8
|
||||
print(f'Final resolution is {str((height, width))}.')
|
||||
|
||||
if 'inpaint' in goals:
|
||||
if len(outpaint_selections) > 0:
|
||||
H, W, C = inpaint_image.shape
|
||||
if 'top' in outpaint_selections:
|
||||
inpaint_image = np.pad(inpaint_image, [[int(H * 0.3), 0], [0, 0], [0, 0]], mode='edge')
|
||||
inpaint_mask = np.pad(inpaint_mask, [[int(H * 0.3), 0], [0, 0]], mode='constant',
|
||||
constant_values=255)
|
||||
if 'bottom' in outpaint_selections:
|
||||
inpaint_image = np.pad(inpaint_image, [[0, int(H * 0.3)], [0, 0], [0, 0]], mode='edge')
|
||||
inpaint_mask = np.pad(inpaint_mask, [[0, int(H * 0.3)], [0, 0]], mode='constant',
|
||||
constant_values=255)
|
||||
|
||||
H, W, C = inpaint_image.shape
|
||||
if 'left' in outpaint_selections:
|
||||
inpaint_image = np.pad(inpaint_image, [[0, 0], [int(H * 0.3), 0], [0, 0]], mode='edge')
|
||||
inpaint_mask = np.pad(inpaint_mask, [[0, 0], [int(H * 0.3), 0]], mode='constant',
|
||||
constant_values=255)
|
||||
if 'right' in outpaint_selections:
|
||||
inpaint_image = np.pad(inpaint_image, [[0, 0], [0, int(H * 0.3)], [0, 0]], mode='edge')
|
||||
inpaint_mask = np.pad(inpaint_mask, [[0, 0], [0, int(H * 0.3)]], mode='constant',
|
||||
constant_values=255)
|
||||
|
||||
inpaint_image = np.ascontiguousarray(inpaint_image.copy())
|
||||
inpaint_mask = np.ascontiguousarray(inpaint_mask.copy())
|
||||
|
||||
inpaint_worker.current_task = inpaint_worker.InpaintWorker(image=inpaint_image, mask=inpaint_mask,
|
||||
is_outpaint=len(outpaint_selections) > 0)
|
||||
|
||||
# print(f'Inpaint task: {str((height, width))}')
|
||||
# outputs.append(['results', inpaint_worker.current_task.visualize_mask_processing()])
|
||||
# return
|
||||
|
||||
progressbar(13, 'VAE encoding ...')
|
||||
inpaint_pixels = core.numpy_to_pytorch(inpaint_worker.current_task.image_ready)
|
||||
initial_latent = core.encode_vae(vae=pipeline.final_vae, pixels=inpaint_pixels)
|
||||
inpaint_latent = initial_latent['samples']
|
||||
B, C, H, W = inpaint_latent.shape
|
||||
inpaint_mask = core.numpy_to_pytorch(inpaint_worker.current_task.mask_ready[None])
|
||||
inpaint_mask = torch.nn.functional.avg_pool2d(inpaint_mask, (8, 8))
|
||||
inpaint_mask = torch.nn.functional.interpolate(inpaint_mask, (H, W), mode='bilinear')
|
||||
inpaint_worker.current_task.load_latent(latent=inpaint_latent, mask=inpaint_mask)
|
||||
|
||||
progressbar(13, 'VAE inpaint encoding ...')
|
||||
|
||||
inpaint_mask = (inpaint_worker.current_task.mask_ready > 0).astype(np.float32)
|
||||
inpaint_mask = torch.tensor(inpaint_mask).float()
|
||||
|
||||
vae_dict = core.encode_vae_inpaint(
|
||||
mask=inpaint_mask, vae=pipeline.final_vae, pixels=inpaint_pixels)
|
||||
|
||||
inpaint_latent = vae_dict['samples']
|
||||
inpaint_mask = vae_dict['noise_mask']
|
||||
inpaint_worker.current_task.load_inpaint_guidance(latent=inpaint_latent, mask=inpaint_mask,
|
||||
model_path=inpaint_head_model_path)
|
||||
|
||||
B, C, H, W = inpaint_latent.shape
|
||||
final_height, final_width = inpaint_worker.current_task.image_raw.shape[:2]
|
||||
height, width = H * 8, W * 8
|
||||
print(f'Final resolution is {str((final_height, final_width))}, latent is {str((height, width))}.')
|
||||
|
||||
if 'cn' in goals:
|
||||
for task in cn_tasks[flags.cn_canny]:
|
||||
cn_img, cn_stop, cn_weight = task
|
||||
cn_img = resize_image(HWC3(cn_img), width=width, height=height)
|
||||
cn_img = preprocessors.canny_pyramid(cn_img)
|
||||
cn_img = HWC3(cn_img)
|
||||
task[0] = core.numpy_to_pytorch(cn_img)
|
||||
if advanced_parameters.debugging_cn_preprocessor:
|
||||
outputs.append(['results', [cn_img]])
|
||||
return
|
||||
for task in cn_tasks[flags.cn_cpds]:
|
||||
cn_img, cn_stop, cn_weight = task
|
||||
cn_img = resize_image(HWC3(cn_img), width=width, height=height)
|
||||
cn_img = preprocessors.cpds(cn_img)
|
||||
cn_img = HWC3(cn_img)
|
||||
task[0] = core.numpy_to_pytorch(cn_img)
|
||||
if advanced_parameters.debugging_cn_preprocessor:
|
||||
outputs.append(['results', [cn_img]])
|
||||
return
|
||||
for task in cn_tasks[flags.cn_ip]:
|
||||
cn_img, cn_stop, cn_weight = task
|
||||
cn_img = HWC3(cn_img)
|
||||
task[0] = ip_adapter.preprocess(cn_img)
|
||||
|
||||
if len(cn_tasks[flags.cn_ip]) > 0:
|
||||
pipeline.final_unet = ip_adapter.patch_model(pipeline.final_unet, cn_tasks[flags.cn_ip])
|
||||
|
||||
results = []
|
||||
all_steps = steps * image_number
|
||||
|
||||
preparation_time = time.perf_counter() - execution_start_time
|
||||
print(f'Preparation time: {preparation_time:.2f} seconds')
|
||||
|
||||
outputs.append(['preview', (13, 'Moving model to GPU ...', None)])
|
||||
execution_start_time = time.perf_counter()
|
||||
comfy.model_management.load_models_gpu([pipeline.final_unet])
|
||||
moving_time = time.perf_counter() - execution_start_time
|
||||
print(f'Moving model to GPU: {moving_time:.2f} seconds')
|
||||
|
||||
outputs.append(['preview', (13, 'Starting tasks ...', None)])
|
||||
|
||||
def callback(step, x0, x, total_steps, y):
|
||||
done_steps = current_task_id * steps + step
|
||||
outputs.append(['preview', (
|
||||
@ -368,17 +462,25 @@ def worker():
|
||||
f'Step {step}/{total_steps} in the {current_task_id + 1}-th Sampling',
|
||||
y)])
|
||||
|
||||
preparation_time = time.perf_counter() - execution_start_time
|
||||
print(f'Preparation time: {preparation_time:.2f} seconds')
|
||||
|
||||
outputs.append(['preview', (13, 'Starting tasks ...', None)])
|
||||
for current_task_id, task in enumerate(tasks):
|
||||
execution_start_time = time.perf_counter()
|
||||
|
||||
try:
|
||||
positive_cond, negative_cond = task['c'], task['uc']
|
||||
|
||||
if 'cn' in goals:
|
||||
for cn_flag, cn_path in [
|
||||
(flags.cn_canny, controlnet_canny_path),
|
||||
(flags.cn_cpds, controlnet_cpds_path)
|
||||
]:
|
||||
for cn_img, cn_stop, cn_weight in cn_tasks[cn_flag]:
|
||||
positive_cond, negative_cond = core.apply_controlnet(
|
||||
positive_cond, negative_cond,
|
||||
pipeline.loaded_ControlNets[cn_path], cn_img, cn_weight, 0, cn_stop)
|
||||
|
||||
imgs = pipeline.process_diffusion(
|
||||
positive_cond=task['c'],
|
||||
negative_cond=task['uc'],
|
||||
positive_cond=positive_cond,
|
||||
negative_cond=negative_cond,
|
||||
steps=steps,
|
||||
switch=switch,
|
||||
width=width,
|
||||
@ -393,6 +495,8 @@ def worker():
|
||||
cfg_scale=cfg_scale
|
||||
)
|
||||
|
||||
del task['c'], task['uc'], positive_cond, negative_cond # Save memory
|
||||
|
||||
if inpaint_worker.current_task is not None:
|
||||
imgs = [inpaint_worker.current_task.post_process(x) for x in imgs]
|
||||
|
||||
@ -406,14 +510,14 @@ def worker():
|
||||
('Resolution', str((width, height))),
|
||||
('Sharpness', sharpness),
|
||||
('Guidance Scale', guidance_scale),
|
||||
('ADM Guidance', str((adm_scaler_positive, adm_scaler_negative))),
|
||||
('ADM Guidance', str((modules.patch.positive_adm_scale, modules.patch.negative_adm_scale))),
|
||||
('Base Model', base_model_name),
|
||||
('Refiner Model', refiner_model_name),
|
||||
('Sampler', sampler_name),
|
||||
('Scheduler', scheduler_name),
|
||||
('Seed', task['task_seed'])
|
||||
]
|
||||
for n, w in loras_user_raw_input:
|
||||
for n, w in loras_raw:
|
||||
if n != 'None':
|
||||
d.append((f'LoRA [{n}] weight', w))
|
||||
log(x, d, single_line_number=3)
|
||||
|
145
modules/core.py
145
modules/core.py
@ -13,13 +13,16 @@ import comfy.model_management
|
||||
import comfy.model_detection
|
||||
import comfy.model_patcher
|
||||
import comfy.utils
|
||||
import comfy.controlnet
|
||||
import modules.sample_hijack
|
||||
import comfy.samplers
|
||||
|
||||
from comfy.sd import load_checkpoint_guess_config
|
||||
from nodes import VAEDecode, EmptyLatentImage, VAEEncode, VAEEncodeTiled, VAEDecodeTiled, VAEEncodeForInpaint
|
||||
from comfy.sample import prepare_mask, broadcast_cond, get_additional_models, cleanup_additional_models
|
||||
from nodes import VAEDecode, EmptyLatentImage, VAEEncode, VAEEncodeTiled, VAEDecodeTiled, VAEEncodeForInpaint, \
|
||||
ControlNetApplyAdvanced
|
||||
from comfy.sample import prepare_mask
|
||||
from modules.patch import patched_sampler_cfg_function, patched_model_function_wrapper
|
||||
from comfy.lora import model_lora_keys_unet, model_lora_keys_clip, load_lora
|
||||
from modules.samplers_advanced import KSamplerBasic, KSamplerWithRefiner
|
||||
|
||||
|
||||
opEmptyLatentImage = EmptyLatentImage()
|
||||
@ -28,6 +31,7 @@ opVAEEncode = VAEEncode()
|
||||
opVAEDecodeTiled = VAEDecodeTiled()
|
||||
opVAEEncodeTiled = VAEEncodeTiled()
|
||||
opVAEEncodeForInpaint = VAEEncodeForInpaint()
|
||||
opControlNetApplyAdvanced = ControlNetApplyAdvanced()
|
||||
|
||||
|
||||
class StableDiffusionModel:
|
||||
@ -38,6 +42,19 @@ class StableDiffusionModel:
|
||||
self.clip_vision = clip_vision
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
@torch.inference_mode()
|
||||
def load_controlnet(ckpt_filename):
|
||||
return comfy.controlnet.load_controlnet(ckpt_filename)
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
@torch.inference_mode()
|
||||
def apply_controlnet(positive, negative, control_net, image, strength, start_percent, end_percent):
|
||||
return opControlNetApplyAdvanced.apply_controlnet(positive=positive, negative=negative, control_net=control_net,
|
||||
image=image, strength=strength, start_percent=start_percent, end_percent=end_percent)
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
@torch.inference_mode()
|
||||
def load_unet_only(unet_path):
|
||||
@ -214,12 +231,8 @@ def get_previewer():
|
||||
@torch.inference_mode()
|
||||
def ksampler(model, positive, negative, latent, seed=None, steps=30, cfg=7.0, sampler_name='dpmpp_fooocus_2m_sde_inpaint_seamless',
|
||||
scheduler='karras', denoise=1.0, disable_noise=False, start_step=None, last_step=None,
|
||||
force_full_denoise=False, callback_function=None):
|
||||
seed = seed if isinstance(seed, int) else random.randint(0, 2**63 - 1)
|
||||
|
||||
device = comfy.model_management.get_torch_device()
|
||||
force_full_denoise=False, callback_function=None, refiner=None, refiner_switch=-1):
|
||||
latent_image = latent["samples"]
|
||||
|
||||
if disable_noise:
|
||||
noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu")
|
||||
else:
|
||||
@ -232,8 +245,6 @@ def ksampler(model, positive, negative, latent, seed=None, steps=30, cfg=7.0, sa
|
||||
|
||||
previewer = get_previewer()
|
||||
|
||||
pbar = comfy.utils.ProgressBar(steps)
|
||||
|
||||
def callback(step, x0, x, total_steps):
|
||||
comfy.model_management.throw_exception_if_processing_interrupted()
|
||||
y = None
|
||||
@ -241,111 +252,23 @@ def ksampler(model, positive, negative, latent, seed=None, steps=30, cfg=7.0, sa
|
||||
y = previewer(x0, step, total_steps)
|
||||
if callback_function is not None:
|
||||
callback_function(step, x0, x, total_steps, y)
|
||||
pbar.update_absolute(step + 1, total_steps, None)
|
||||
|
||||
sigmas = None
|
||||
disable_pbar = False
|
||||
modules.sample_hijack.current_refiner = refiner
|
||||
modules.sample_hijack.refiner_switch_step = refiner_switch
|
||||
comfy.samplers.sample = modules.sample_hijack.sample_hacked
|
||||
|
||||
if noise_mask is not None:
|
||||
noise_mask = prepare_mask(noise_mask, noise.shape, device)
|
||||
try:
|
||||
samples = comfy.sample.sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative, latent_image,
|
||||
denoise=denoise, disable_noise=disable_noise, start_step=start_step,
|
||||
last_step=last_step,
|
||||
force_full_denoise=force_full_denoise, noise_mask=noise_mask, callback=callback,
|
||||
disable_pbar=disable_pbar, seed=seed)
|
||||
|
||||
models, inference_memory = get_additional_models(positive, negative, model.model_dtype())
|
||||
comfy.model_management.load_models_gpu([model] + models, comfy.model_management.batch_area_memory(noise.shape[0] * noise.shape[2] * noise.shape[3]) + inference_memory)
|
||||
real_model = model.model
|
||||
|
||||
noise = noise.to(device)
|
||||
latent_image = latent_image.to(device)
|
||||
|
||||
positive_copy = broadcast_cond(positive, noise.shape[0], device)
|
||||
negative_copy = broadcast_cond(negative, noise.shape[0], device)
|
||||
|
||||
sampler = KSamplerBasic(real_model, steps=steps, device=device, sampler=sampler_name, scheduler=scheduler,
|
||||
denoise=denoise, model_options=model.model_options)
|
||||
|
||||
samples = sampler.sample(noise, positive_copy, negative_copy, cfg=cfg, latent_image=latent_image,
|
||||
start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise,
|
||||
denoise_mask=noise_mask, sigmas=sigmas, callback=callback, disable_pbar=disable_pbar,
|
||||
seed=seed)
|
||||
|
||||
samples = samples.cpu()
|
||||
|
||||
cleanup_additional_models(models)
|
||||
|
||||
out = latent.copy()
|
||||
out["samples"] = samples
|
||||
|
||||
return out
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
@torch.inference_mode()
|
||||
def ksampler_with_refiner(model, positive, negative, refiner, refiner_positive, refiner_negative, latent,
|
||||
seed=None, steps=30, refiner_switch_step=20, cfg=7.0, sampler_name='dpmpp_fooocus_2m_sde_inpaint_seamless',
|
||||
scheduler='karras', denoise=1.0, disable_noise=False, start_step=None, last_step=None,
|
||||
force_full_denoise=False, callback_function=None):
|
||||
seed = seed if isinstance(seed, int) else random.randint(0, 2**63 - 1)
|
||||
|
||||
device = comfy.model_management.get_torch_device()
|
||||
latent_image = latent["samples"]
|
||||
|
||||
if disable_noise:
|
||||
noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu")
|
||||
else:
|
||||
batch_inds = latent["batch_index"] if "batch_index" in latent else None
|
||||
noise = comfy.sample.prepare_noise(latent_image, seed, batch_inds)
|
||||
|
||||
noise_mask = None
|
||||
if "noise_mask" in latent:
|
||||
noise_mask = latent["noise_mask"]
|
||||
|
||||
previewer = get_previewer()
|
||||
|
||||
pbar = comfy.utils.ProgressBar(steps)
|
||||
|
||||
def callback(step, x0, x, total_steps):
|
||||
comfy.model_management.throw_exception_if_processing_interrupted()
|
||||
y = None
|
||||
if previewer is not None:
|
||||
y = previewer(x0, step, total_steps)
|
||||
if callback_function is not None:
|
||||
callback_function(step, x0, x, total_steps, y)
|
||||
pbar.update_absolute(step + 1, total_steps, None)
|
||||
|
||||
sigmas = None
|
||||
disable_pbar = False
|
||||
|
||||
if noise_mask is not None:
|
||||
noise_mask = prepare_mask(noise_mask, noise.shape, device)
|
||||
|
||||
models, inference_memory = get_additional_models(positive, negative, model.model_dtype())
|
||||
comfy.model_management.load_models_gpu([model] + models, comfy.model_management.batch_area_memory(noise.shape[0] * noise.shape[2] * noise.shape[3]) + inference_memory)
|
||||
|
||||
noise = noise.to(device)
|
||||
latent_image = latent_image.to(device)
|
||||
|
||||
positive_copy = broadcast_cond(positive, noise.shape[0], device)
|
||||
negative_copy = broadcast_cond(negative, noise.shape[0], device)
|
||||
|
||||
refiner_positive_copy = broadcast_cond(refiner_positive, noise.shape[0], device)
|
||||
refiner_negative_copy = broadcast_cond(refiner_negative, noise.shape[0], device)
|
||||
|
||||
sampler = KSamplerWithRefiner(model=model, refiner_model=refiner, steps=steps, device=device,
|
||||
sampler=sampler_name, scheduler=scheduler,
|
||||
denoise=denoise, model_options=model.model_options)
|
||||
|
||||
samples = sampler.sample(noise, positive_copy, negative_copy, refiner_positive=refiner_positive_copy,
|
||||
refiner_negative=refiner_negative_copy, refiner_switch_step=refiner_switch_step,
|
||||
cfg=cfg, latent_image=latent_image,
|
||||
start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise,
|
||||
denoise_mask=noise_mask, sigmas=sigmas, callback_function=callback, disable_pbar=disable_pbar,
|
||||
seed=seed)
|
||||
|
||||
samples = samples.cpu()
|
||||
|
||||
cleanup_additional_models(models)
|
||||
|
||||
out = latent.copy()
|
||||
out["samples"] = samples
|
||||
out = latent.copy()
|
||||
out["samples"] = samples
|
||||
finally:
|
||||
modules.sample_hijack.current_refiner = None
|
||||
|
||||
return out
|
||||
|
||||
|
@ -18,6 +18,29 @@ xl_base_patched_hash = ''
|
||||
xl_refiner: ModelPatcher = None
|
||||
xl_refiner_hash = ''
|
||||
|
||||
final_expansion = None
|
||||
final_unet = None
|
||||
final_clip = None
|
||||
final_vae = None
|
||||
final_refiner = None
|
||||
|
||||
loaded_ControlNets = {}
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
@torch.inference_mode()
|
||||
def refresh_controlnets(model_paths):
|
||||
global loaded_ControlNets
|
||||
cache = {}
|
||||
for p in model_paths:
|
||||
if p is not None:
|
||||
if p in loaded_ControlNets:
|
||||
cache[p] = loaded_ControlNets[p]
|
||||
else:
|
||||
cache[p] = core.load_controlnet(p)
|
||||
loaded_ControlNets = cache
|
||||
return
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
@torch.inference_mode()
|
||||
@ -137,31 +160,21 @@ def clip_encode_single(clip, text, verbose=False):
|
||||
|
||||
@torch.no_grad()
|
||||
@torch.inference_mode()
|
||||
def clip_separate(cond):
|
||||
c, p = cond[0]
|
||||
c = c[..., -1280:].clone()
|
||||
p = p["pooled_output"].clone()
|
||||
return [[c, {"pooled_output": p}]]
|
||||
def clip_encode(texts, pool_top_k=1):
|
||||
global final_clip
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
@torch.inference_mode()
|
||||
def clip_encode(sd, texts, pool_top_k=1):
|
||||
if sd is None:
|
||||
return None
|
||||
if sd.clip is None:
|
||||
if final_clip is None:
|
||||
return None
|
||||
if not isinstance(texts, list):
|
||||
return None
|
||||
if len(texts) == 0:
|
||||
return None
|
||||
|
||||
clip = sd.clip
|
||||
cond_list = []
|
||||
pooled_acc = 0
|
||||
|
||||
for i, text in enumerate(texts):
|
||||
cond, pooled = clip_encode_single(clip, text)
|
||||
cond, pooled = clip_encode_single(final_clip, text)
|
||||
cond_list.append(cond)
|
||||
if i < pool_top_k:
|
||||
pooled_acc += pooled
|
||||
@ -176,13 +189,34 @@ def clear_all_caches():
|
||||
xl_base_patched.clip.fcs_cond_cache = {}
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
@torch.inference_mode()
|
||||
def prepare_text_encoder(async_call=True):
|
||||
if async_call:
|
||||
# TODO: make sure that this is always called in an async way so that users cannot feel it.
|
||||
pass
|
||||
assert_model_integrity()
|
||||
comfy.model_management.load_models_gpu([final_clip.patcher, final_expansion.patcher])
|
||||
return
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
@torch.inference_mode()
|
||||
def refresh_everything(refiner_model_name, base_model_name, loras):
|
||||
global final_unet, final_clip, final_vae, final_refiner, final_expansion
|
||||
|
||||
refresh_refiner_model(refiner_model_name)
|
||||
refresh_base_model(base_model_name)
|
||||
refresh_loras(loras)
|
||||
assert_model_integrity()
|
||||
|
||||
final_unet, final_clip, final_vae, final_refiner = \
|
||||
xl_base_patched.unet, xl_base_patched.clip, xl_base_patched.vae, xl_refiner
|
||||
|
||||
if final_expansion is None:
|
||||
final_expansion = FooocusExpansion()
|
||||
|
||||
prepare_text_encoder(async_call=True)
|
||||
clear_all_caches()
|
||||
return
|
||||
|
||||
@ -193,22 +227,6 @@ refresh_everything(
|
||||
loras=[(modules.path.default_lora_name, 0.5), ('None', 0.5), ('None', 0.5), ('None', 0.5), ('None', 0.5)]
|
||||
)
|
||||
|
||||
expansion = FooocusExpansion()
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
@torch.inference_mode()
|
||||
def prepare_text_encoder(async_call=True):
|
||||
if async_call:
|
||||
# TODO: make sure that this is always called in an async way so that users cannot feel it.
|
||||
pass
|
||||
assert_model_integrity()
|
||||
comfy.model_management.load_models_gpu([xl_base_patched.clip.patcher, expansion.patcher])
|
||||
return
|
||||
|
||||
|
||||
prepare_text_encoder(async_call=True)
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
@torch.inference_mode()
|
||||
@ -218,40 +236,23 @@ def process_diffusion(positive_cond, negative_cond, steps, switch, width, height
|
||||
else:
|
||||
empty_latent = latent
|
||||
|
||||
if xl_refiner is not None:
|
||||
sampled_latent = core.ksampler_with_refiner(
|
||||
model=xl_base_patched.unet,
|
||||
positive=positive_cond[0],
|
||||
negative=negative_cond[0],
|
||||
refiner=xl_refiner,
|
||||
refiner_positive=positive_cond[1],
|
||||
refiner_negative=negative_cond[1],
|
||||
refiner_switch_step=switch,
|
||||
latent=empty_latent,
|
||||
steps=steps, start_step=0, last_step=steps, disable_noise=False, force_full_denoise=True,
|
||||
seed=image_seed,
|
||||
denoise=denoise,
|
||||
callback_function=callback,
|
||||
cfg=cfg_scale,
|
||||
sampler_name=sampler_name,
|
||||
scheduler=scheduler_name
|
||||
)
|
||||
else:
|
||||
sampled_latent = core.ksampler(
|
||||
model=xl_base_patched.unet,
|
||||
positive=positive_cond[0],
|
||||
negative=negative_cond[0],
|
||||
latent=empty_latent,
|
||||
steps=steps, start_step=0, last_step=steps, disable_noise=False, force_full_denoise=True,
|
||||
seed=image_seed,
|
||||
denoise=denoise,
|
||||
callback_function=callback,
|
||||
cfg=cfg_scale,
|
||||
sampler_name=sampler_name,
|
||||
scheduler=scheduler_name
|
||||
)
|
||||
sampled_latent = core.ksampler(
|
||||
model=final_unet,
|
||||
refiner=final_refiner,
|
||||
positive=positive_cond,
|
||||
negative=negative_cond,
|
||||
latent=empty_latent,
|
||||
steps=steps, start_step=0, last_step=steps, disable_noise=False, force_full_denoise=True,
|
||||
seed=image_seed,
|
||||
denoise=denoise,
|
||||
callback_function=callback,
|
||||
cfg=cfg_scale,
|
||||
sampler_name=sampler_name,
|
||||
scheduler=scheduler_name,
|
||||
refiner_switch=switch
|
||||
)
|
||||
|
||||
decoded_latent = core.decode_vae(vae=xl_base_patched.vae, latent_image=sampled_latent, tiled=tiled)
|
||||
decoded_latent = core.decode_vae(vae=final_vae, latent_image=sampled_latent, tiled=tiled)
|
||||
images = core.pytorch_to_numpy(decoded_latent)
|
||||
|
||||
comfy.model_management.soft_empty_cache()
|
||||
|
@ -1,3 +1,6 @@
|
||||
import comfy.samplers
|
||||
|
||||
|
||||
disabled = 'Disabled'
|
||||
enabled = 'Enabled'
|
||||
subtle_variation = 'Vary (Subtle)'
|
||||
@ -10,14 +13,19 @@ uov_list = [
|
||||
disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast
|
||||
]
|
||||
|
||||
sampler_list = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral",
|
||||
"lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu",
|
||||
"dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm",
|
||||
# "ddim",
|
||||
"uni_pc", "uni_pc_bh2",
|
||||
# "dpmpp_fooocus_2m_sde_inpaint_seamless"
|
||||
]
|
||||
sampler_list = comfy.samplers.SAMPLER_NAMES
|
||||
default_sampler = 'dpmpp_2m_sde_gpu'
|
||||
|
||||
scheduler_list = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform"]
|
||||
scheduler_list = comfy.samplers.SCHEDULER_NAMES
|
||||
default_scheduler = "karras"
|
||||
|
||||
cn_ip = "Image Prompt"
|
||||
cn_canny = "PyraCanny"
|
||||
cn_cpds = "CPDS"
|
||||
|
||||
ip_list = [cn_ip, cn_canny, cn_cpds]
|
||||
default_ip = cn_ip
|
||||
|
||||
default_parameters = {
|
||||
cn_ip: (0.4, 0.6), cn_canny: (0.4, 1.0), cn_cpds: (0.4, 1.0)
|
||||
} # stop, weight
|
||||
|
@ -91,6 +91,11 @@ progress::after {
|
||||
min-width: min(1px, 100%) !important;
|
||||
}
|
||||
|
||||
.resizable_area {
|
||||
resize: vertical;
|
||||
overflow: auto !important;
|
||||
}
|
||||
|
||||
'''
|
||||
progress_html = '''
|
||||
<div class="loader-container">
|
||||
|
@ -12,7 +12,10 @@ import modules.inpaint_worker as inpaint_worker
|
||||
import comfy.ldm.modules.diffusionmodules.openaimodel
|
||||
import comfy.ldm.modules.diffusionmodules.model
|
||||
import comfy.sd
|
||||
import comfy.cldm.cldm
|
||||
import comfy.model_patcher
|
||||
import comfy.samplers
|
||||
import modules.advanced_parameters as advanced_parameters
|
||||
|
||||
from comfy.k_diffusion import utils
|
||||
from comfy.k_diffusion.sampling import BrownianTreeNoiseSampler, trange
|
||||
@ -191,11 +194,9 @@ def patched_discrete_eps_ddpm_denoiser_forward(self, input, sigma, **kwargs):
|
||||
|
||||
|
||||
def patched_model_function_wrapper(func, args):
|
||||
global cfg_cin
|
||||
x = args['input']
|
||||
t = args['timestep']
|
||||
c = args['c']
|
||||
# is_uncond = torch.tensor(args['cond_or_uncond'])[:, None, None, None].to(x)
|
||||
return func(x, t, **c)
|
||||
|
||||
|
||||
@ -271,6 +272,8 @@ def encode_token_weights_patched_with_a1111_method(self, token_weight_pairs):
|
||||
|
||||
@torch.no_grad()
|
||||
def sample_dpmpp_fooocus_2m_sde_inpaint_seamless(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, **kwargs):
|
||||
print('[Sampler] Inpaint sampler is activated.')
|
||||
|
||||
sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
|
||||
noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
|
||||
|
||||
@ -332,7 +335,54 @@ def sample_dpmpp_fooocus_2m_sde_inpaint_seamless(model, x, sigmas, extra_args=No
|
||||
return x
|
||||
|
||||
|
||||
def timed_adm(y, timesteps):
|
||||
if isinstance(y, torch.Tensor) and int(y.dim()) == 2 and int(y.shape[1]) == 5632:
|
||||
y_mask = (timesteps > 999.0 * (1.0 - float(adm_scaler_end))).to(y)[..., None]
|
||||
y_with_adm = y[..., :2816].clone()
|
||||
y_without_adm = y[..., 2816:].clone()
|
||||
return y_with_adm * y_mask + y_without_adm * (1.0 - y_mask)
|
||||
return y
|
||||
|
||||
|
||||
def patched_cldm_forward(self, x, hint, timesteps, context, y=None, **kwargs):
|
||||
t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False).to(self.dtype)
|
||||
emb = self.time_embed(t_emb)
|
||||
|
||||
guided_hint = self.input_hint_block(hint, emb, context)
|
||||
|
||||
y = timed_adm(y, timesteps)
|
||||
|
||||
outs = []
|
||||
|
||||
hs = []
|
||||
if self.num_classes is not None:
|
||||
assert y.shape[0] == x.shape[0]
|
||||
emb = emb + self.label_emb(y)
|
||||
|
||||
h = x.type(self.dtype)
|
||||
for module, zero_conv in zip(self.input_blocks, self.zero_convs):
|
||||
if guided_hint is not None:
|
||||
h = module(h, emb, context)
|
||||
h += guided_hint
|
||||
guided_hint = None
|
||||
else:
|
||||
h = module(h, emb, context)
|
||||
outs.append(zero_conv(h, emb, context))
|
||||
|
||||
h = self.middle_block(h, emb, context)
|
||||
outs.append(self.middle_block_out(h, emb, context))
|
||||
|
||||
if not advanced_parameters.disable_soft_cn:
|
||||
for i in range(10):
|
||||
k = float(i) / 9.0
|
||||
outs[i] = outs[i] * (0.1 + 0.9 * k)
|
||||
|
||||
return outs
|
||||
|
||||
|
||||
def patched_unet_forward(self, x, timesteps=None, context=None, y=None, control=None, transformer_options={}, **kwargs):
|
||||
self.current_step = 1.0 - timesteps.to(x) / 999.0
|
||||
|
||||
inpaint_fix = None
|
||||
if inpaint_worker.current_task is not None:
|
||||
inpaint_fix = inpaint_worker.current_task.inpaint_head_feature
|
||||
@ -341,11 +391,7 @@ def patched_unet_forward(self, x, timesteps=None, context=None, y=None, control=
|
||||
transformer_options["current_index"] = 0
|
||||
transformer_patches = transformer_options.get("patches", {})
|
||||
|
||||
if isinstance(y, torch.Tensor) and int(y.dim()) == 2 and int(y.shape[1]) == 5632:
|
||||
y_mask = (timesteps > 999.0 * (1.0 - float(adm_scaler_end))).to(y)[..., None]
|
||||
y_with_adm = y[..., :2816].clone()
|
||||
y_without_adm = y[..., 2816:].clone()
|
||||
y = y_with_adm * y_mask + y_without_adm * (1.0 - y_mask)
|
||||
y = timed_adm(y, timesteps)
|
||||
|
||||
hs = []
|
||||
t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False).to(self.dtype)
|
||||
@ -410,8 +456,10 @@ def text_encoder_device_patched():
|
||||
|
||||
|
||||
def patch_all():
|
||||
comfy.samplers.SAMPLER_NAMES += ['dpmpp_fooocus_2m_sde_inpaint_seamless']
|
||||
comfy.model_management.text_encoder_device = text_encoder_device_patched
|
||||
comfy.model_patcher.ModelPatcher.calculate_weight = calculate_weight_patched
|
||||
comfy.cldm.cldm.ControlNet.forward = patched_cldm_forward
|
||||
comfy.ldm.modules.diffusionmodules.openaimodel.UNetModel.forward = patched_unet_forward
|
||||
comfy.k_diffusion.sampling.sample_dpmpp_fooocus_2m_sde_inpaint_seamless = sample_dpmpp_fooocus_2m_sde_inpaint_seamless
|
||||
comfy.k_diffusion.external.DiscreteEpsDDPMDenoiser.forward = patched_discrete_eps_ddpm_denoiser_forward
|
||||
|
@ -33,6 +33,8 @@ lorafile_path = get_config_or_set_default('lorafile_path', '../models/loras/')
|
||||
vae_approx_path = get_config_or_set_default('vae_approx_path', '../models/vae_approx/')
|
||||
upscale_models_path = get_config_or_set_default('upscale_models_path', '../models/upscale_models/')
|
||||
inpaint_models_path = get_config_or_set_default('inpaint_models_path', '../models/inpaint/')
|
||||
controlnet_models_path = get_config_or_set_default('controlnet_models_path', '../models/controlnet/')
|
||||
clip_vision_models_path = get_config_or_set_default('clip_vision_models_path', '../models/clip_vision/')
|
||||
fooocus_expansion_path = get_config_or_set_default('fooocus_expansion_path',
|
||||
'../models/prompt_expansion/fooocus_expansion')
|
||||
|
||||
@ -89,4 +91,49 @@ def downloading_inpaint_models():
|
||||
return os.path.join(inpaint_models_path, 'fooocus_inpaint_head.pth'), os.path.join(inpaint_models_path, 'inpaint.fooocus.patch')
|
||||
|
||||
|
||||
def downloading_controlnet_canny():
|
||||
load_file_from_url(
|
||||
url='https://huggingface.co/lllyasviel/misc/resolve/main/control-lora-canny-rank128.safetensors',
|
||||
model_dir=controlnet_models_path,
|
||||
file_name='control-lora-canny-rank128.safetensors'
|
||||
)
|
||||
return os.path.join(controlnet_models_path, 'control-lora-canny-rank128.safetensors')
|
||||
|
||||
|
||||
def downloading_controlnet_cpds():
|
||||
load_file_from_url(
|
||||
url='https://huggingface.co/lllyasviel/misc/resolve/main/fooocus_xl_cpds_128.safetensors',
|
||||
model_dir=controlnet_models_path,
|
||||
file_name='fooocus_xl_cpds_128.safetensors'
|
||||
)
|
||||
return os.path.join(controlnet_models_path, 'fooocus_xl_cpds_128.safetensors')
|
||||
|
||||
|
||||
def downloading_ip_adapters():
|
||||
results = []
|
||||
|
||||
load_file_from_url(
|
||||
url='https://huggingface.co/lllyasviel/misc/resolve/main/clip_vision_vit_h.safetensors',
|
||||
model_dir=clip_vision_models_path,
|
||||
file_name='clip_vision_vit_h.safetensors'
|
||||
)
|
||||
results += [os.path.join(clip_vision_models_path, 'clip_vision_vit_h.safetensors')]
|
||||
|
||||
load_file_from_url(
|
||||
url='https://huggingface.co/lllyasviel/misc/resolve/main/fooocus_ip_negative.safetensors',
|
||||
model_dir=controlnet_models_path,
|
||||
file_name='fooocus_ip_negative.safetensors'
|
||||
)
|
||||
results += [os.path.join(controlnet_models_path, 'fooocus_ip_negative.safetensors')]
|
||||
|
||||
load_file_from_url(
|
||||
url='https://huggingface.co/lllyasviel/misc/resolve/main/ip-adapter-plus_sdxl_vit-h.bin',
|
||||
model_dir=controlnet_models_path,
|
||||
file_name='ip-adapter-plus_sdxl_vit-h.bin'
|
||||
)
|
||||
results += [os.path.join(controlnet_models_path, 'ip-adapter-plus_sdxl_vit-h.bin')]
|
||||
|
||||
return results
|
||||
|
||||
|
||||
update_all_model_names()
|
||||
|
109
modules/sample_hijack.py
Normal file
109
modules/sample_hijack.py
Normal file
@ -0,0 +1,109 @@
|
||||
import torch
|
||||
import comfy.samplers
|
||||
import comfy.model_management
|
||||
|
||||
from comfy.sample import prepare_sampling, cleanup_additional_models, get_additional_models
|
||||
from comfy.samplers import resolve_areas_and_cond_masks, wrap_model, calculate_start_end_timesteps, \
|
||||
create_cond_with_same_area_if_none, pre_run_control, apply_empty_x_to_equal_area, encode_adm, \
|
||||
blank_inpaint_image_like
|
||||
|
||||
|
||||
current_refiner = None
|
||||
refiner_switch_step = -1
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
@torch.inference_mode()
|
||||
def clip_separate(cond):
|
||||
c, p = cond[0]
|
||||
c = c[..., -1280:].clone()
|
||||
p = p["pooled_output"].clone()
|
||||
return [[c, {"pooled_output": p}]]
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
@torch.inference_mode()
|
||||
def sample_hacked(model, noise, positive, negative, cfg, device, sampler, sigmas, model_options={}, latent_image=None, denoise_mask=None, callback=None, disable_pbar=False, seed=None):
|
||||
global current_refiner
|
||||
|
||||
positive = positive[:]
|
||||
negative = negative[:]
|
||||
|
||||
resolve_areas_and_cond_masks(positive, noise.shape[2], noise.shape[3], device)
|
||||
resolve_areas_and_cond_masks(negative, noise.shape[2], noise.shape[3], device)
|
||||
|
||||
model_wrap = wrap_model(model)
|
||||
|
||||
calculate_start_end_timesteps(model_wrap, negative)
|
||||
calculate_start_end_timesteps(model_wrap, positive)
|
||||
|
||||
#make sure each cond area has an opposite one with the same area
|
||||
for c in positive:
|
||||
create_cond_with_same_area_if_none(negative, c)
|
||||
for c in negative:
|
||||
create_cond_with_same_area_if_none(positive, c)
|
||||
|
||||
# pre_run_control(model_wrap, negative + positive)
|
||||
pre_run_control(model_wrap, positive) # negative is not necessary in Fooocus, 0.5s faster.
|
||||
|
||||
apply_empty_x_to_equal_area(list(filter(lambda c: c[1].get('control_apply_to_uncond', False) == True, positive)), negative, 'control', lambda cond_cnets, x: cond_cnets[x])
|
||||
apply_empty_x_to_equal_area(positive, negative, 'gligen', lambda cond_cnets, x: cond_cnets[x])
|
||||
|
||||
if model.is_adm():
|
||||
positive = encode_adm(model, positive, noise.shape[0], noise.shape[3], noise.shape[2], device, "positive")
|
||||
negative = encode_adm(model, negative, noise.shape[0], noise.shape[3], noise.shape[2], device, "negative")
|
||||
|
||||
if current_refiner is not None and current_refiner.model.is_adm():
|
||||
positive_refiner = encode_adm(current_refiner.model, clip_separate(positive), noise.shape[0], noise.shape[3], noise.shape[2], device, "positive")
|
||||
negative_refiner = encode_adm(current_refiner.model, clip_separate(negative), noise.shape[0], noise.shape[3], noise.shape[2], device, "negative")
|
||||
|
||||
positive_refiner[0][1]['adm_encoded'].to(positive[0][1]['adm_encoded'])
|
||||
negative_refiner[0][1]['adm_encoded'].to(negative[0][1]['adm_encoded'])
|
||||
|
||||
if latent_image is not None:
|
||||
latent_image = model.process_latent_in(latent_image)
|
||||
|
||||
extra_args = {"cond": positive, "uncond": negative, "cond_scale": cfg, "model_options": model_options, "seed": seed}
|
||||
|
||||
cond_concat = None
|
||||
if hasattr(model, 'concat_keys'): # inpaint
|
||||
cond_concat = []
|
||||
for ck in model.concat_keys:
|
||||
if denoise_mask is not None:
|
||||
if ck == "mask":
|
||||
cond_concat.append(denoise_mask[:,:1])
|
||||
elif ck == "masked_image":
|
||||
cond_concat.append(latent_image) #NOTE: the latent_image should be masked by the mask in pixel space
|
||||
else:
|
||||
if ck == "mask":
|
||||
cond_concat.append(torch.ones_like(noise)[:, :1])
|
||||
elif ck == "masked_image":
|
||||
cond_concat.append(blank_inpaint_image_like(noise))
|
||||
extra_args["cond_concat"] = cond_concat
|
||||
|
||||
def refiner_switch():
|
||||
extra_args["cond"] = positive_refiner
|
||||
extra_args["uncond"] = negative_refiner
|
||||
|
||||
# clear ip-adapter for refiner
|
||||
extra_args['model_options'] = {k: {} if k == 'transformer_options' else v for k, v in extra_args['model_options'].items()}
|
||||
|
||||
models, inference_memory = get_additional_models(positive_refiner, negative_refiner, current_refiner.model_dtype())
|
||||
comfy.model_management.load_models_gpu([current_refiner] + models, comfy.model_management.batch_area_memory(
|
||||
noise.shape[0] * noise.shape[2] * noise.shape[3]) + inference_memory)
|
||||
|
||||
model_wrap.inner_model.inner_model = current_refiner.model
|
||||
print('Refiner Swapped')
|
||||
return
|
||||
|
||||
def callback_wrap(step, x0, x, total_steps):
|
||||
if step == refiner_switch_step and current_refiner is not None:
|
||||
refiner_switch()
|
||||
if callback is not None:
|
||||
callback(step, x0, x, total_steps)
|
||||
|
||||
samples = sampler.sample(model_wrap, sigmas, extra_args, callback_wrap, noise, latent_image, denoise_mask, disable_pbar)
|
||||
return model.process_latent_out(samples.to(torch.float32))
|
||||
|
||||
|
||||
comfy.samplers.sample = sample_hacked
|
@ -1,443 +0,0 @@
|
||||
from comfy.samplers import *
|
||||
|
||||
import comfy.model_management
|
||||
|
||||
|
||||
class KSamplerBasic:
|
||||
SCHEDULERS = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform"]
|
||||
SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral",
|
||||
"lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu",
|
||||
"dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "ddim", "uni_pc", "uni_pc_bh2", "dpmpp_fooocus_2m_sde_inpaint_seamless"]
|
||||
|
||||
def __init__(self, model, steps, device, sampler=None, scheduler=None, denoise=None, model_options={}):
|
||||
self.model = model
|
||||
self.model_denoise = CFGNoisePredictor(self.model)
|
||||
if self.model.model_type == model_base.ModelType.V_PREDICTION:
|
||||
self.model_wrap = CompVisVDenoiser(self.model_denoise, quantize=True)
|
||||
else:
|
||||
self.model_wrap = k_diffusion_external.CompVisDenoiser(self.model_denoise, quantize=True)
|
||||
|
||||
self.model_k = KSamplerX0Inpaint(self.model_wrap)
|
||||
self.device = device
|
||||
if scheduler not in self.SCHEDULERS:
|
||||
scheduler = self.SCHEDULERS[0]
|
||||
if sampler not in self.SAMPLERS:
|
||||
sampler = self.SAMPLERS[0]
|
||||
self.scheduler = scheduler
|
||||
self.sampler = sampler
|
||||
self.sigma_min=float(self.model_wrap.sigma_min)
|
||||
self.sigma_max=float(self.model_wrap.sigma_max)
|
||||
self.set_steps(steps, denoise)
|
||||
self.denoise = denoise
|
||||
self.model_options = model_options
|
||||
|
||||
def calculate_sigmas(self, steps):
|
||||
sigmas = None
|
||||
|
||||
discard_penultimate_sigma = False
|
||||
if self.sampler in ['dpm_2', 'dpm_2_ancestral']:
|
||||
steps += 1
|
||||
discard_penultimate_sigma = True
|
||||
|
||||
if self.scheduler == "karras":
|
||||
sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=self.sigma_min, sigma_max=self.sigma_max)
|
||||
elif self.scheduler == "exponential":
|
||||
sigmas = k_diffusion_sampling.get_sigmas_exponential(n=steps, sigma_min=self.sigma_min, sigma_max=self.sigma_max)
|
||||
elif self.scheduler == "normal":
|
||||
sigmas = self.model_wrap.get_sigmas(steps)
|
||||
elif self.scheduler == "simple":
|
||||
sigmas = simple_scheduler(self.model_wrap, steps)
|
||||
elif self.scheduler == "ddim_uniform":
|
||||
sigmas = ddim_scheduler(self.model_wrap, steps)
|
||||
elif self.scheduler == "sgm_uniform":
|
||||
sigmas = sgm_scheduler(self.model_wrap, steps)
|
||||
else:
|
||||
print("error invalid scheduler", self.scheduler)
|
||||
|
||||
if discard_penultimate_sigma:
|
||||
sigmas = torch.cat([sigmas[:-2], sigmas[-1:]])
|
||||
return sigmas
|
||||
|
||||
def set_steps(self, steps, denoise=None):
|
||||
self.steps = steps
|
||||
if denoise is None or denoise > 0.9999:
|
||||
self.sigmas = self.calculate_sigmas(steps).to(self.device)
|
||||
else:
|
||||
new_steps = int(steps/denoise)
|
||||
sigmas = self.calculate_sigmas(new_steps).to(self.device)
|
||||
self.sigmas = sigmas[-(steps + 1):]
|
||||
|
||||
def sample(self, noise, positive, negative, cfg, latent_image=None, start_step=None, last_step=None, force_full_denoise=False, denoise_mask=None, sigmas=None, callback=None, disable_pbar=False, seed=None):
|
||||
if sigmas is None:
|
||||
sigmas = self.sigmas
|
||||
sigma_min = self.sigma_min
|
||||
|
||||
if last_step is not None and last_step < (len(sigmas) - 1):
|
||||
sigma_min = sigmas[last_step]
|
||||
sigmas = sigmas[:last_step + 1]
|
||||
if force_full_denoise:
|
||||
sigmas[-1] = 0
|
||||
|
||||
if start_step is not None:
|
||||
if start_step < (len(sigmas) - 1):
|
||||
sigmas = sigmas[start_step:]
|
||||
else:
|
||||
if latent_image is not None:
|
||||
return latent_image
|
||||
else:
|
||||
return torch.zeros_like(noise)
|
||||
|
||||
positive = positive[:]
|
||||
negative = negative[:]
|
||||
|
||||
resolve_areas_and_cond_masks(positive, noise.shape[2], noise.shape[3], self.device)
|
||||
resolve_areas_and_cond_masks(negative, noise.shape[2], noise.shape[3], self.device)
|
||||
|
||||
calculate_start_end_timesteps(self.model_wrap, negative)
|
||||
calculate_start_end_timesteps(self.model_wrap, positive)
|
||||
|
||||
#make sure each cond area has an opposite one with the same area
|
||||
for c in positive:
|
||||
create_cond_with_same_area_if_none(negative, c)
|
||||
for c in negative:
|
||||
create_cond_with_same_area_if_none(positive, c)
|
||||
|
||||
pre_run_control(self.model_wrap, negative + positive)
|
||||
|
||||
apply_empty_x_to_equal_area(list(filter(lambda c: c[1].get('control_apply_to_uncond', False) == True, positive)), negative, 'control', lambda cond_cnets, x: cond_cnets[x])
|
||||
apply_empty_x_to_equal_area(positive, negative, 'gligen', lambda cond_cnets, x: cond_cnets[x])
|
||||
|
||||
if self.model.is_adm():
|
||||
positive = encode_adm(self.model, positive, noise.shape[0], noise.shape[3], noise.shape[2], self.device, "positive")
|
||||
negative = encode_adm(self.model, negative, noise.shape[0], noise.shape[3], noise.shape[2], self.device, "negative")
|
||||
|
||||
if latent_image is not None:
|
||||
latent_image = self.model.process_latent_in(latent_image)
|
||||
|
||||
extra_args = {"cond":positive, "uncond":negative, "cond_scale": cfg, "model_options": self.model_options, "seed":seed}
|
||||
|
||||
cond_concat = None
|
||||
if hasattr(self.model, 'concat_keys'): #inpaint
|
||||
cond_concat = []
|
||||
for ck in self.model.concat_keys:
|
||||
if denoise_mask is not None:
|
||||
if ck == "mask":
|
||||
cond_concat.append(denoise_mask[:,:1])
|
||||
elif ck == "masked_image":
|
||||
cond_concat.append(latent_image) #NOTE: the latent_image should be masked by the mask in pixel space
|
||||
else:
|
||||
if ck == "mask":
|
||||
cond_concat.append(torch.ones_like(noise)[:,:1])
|
||||
elif ck == "masked_image":
|
||||
cond_concat.append(blank_inpaint_image_like(noise))
|
||||
extra_args["cond_concat"] = cond_concat
|
||||
|
||||
if sigmas[0] != self.sigmas[0] or (self.denoise is not None and self.denoise < 1.0):
|
||||
max_denoise = False
|
||||
else:
|
||||
max_denoise = True
|
||||
|
||||
|
||||
if self.sampler == "uni_pc":
|
||||
samples = uni_pc.sample_unipc(self.model_wrap, noise, latent_image, sigmas, sampling_function=sampling_function, max_denoise=max_denoise, extra_args=extra_args, noise_mask=denoise_mask, callback=callback, disable=disable_pbar)
|
||||
elif self.sampler == "uni_pc_bh2":
|
||||
samples = uni_pc.sample_unipc(self.model_wrap, noise, latent_image, sigmas, sampling_function=sampling_function, max_denoise=max_denoise, extra_args=extra_args, noise_mask=denoise_mask, callback=callback, variant='bh2', disable=disable_pbar)
|
||||
elif self.sampler == "ddim":
|
||||
timesteps = []
|
||||
for s in range(sigmas.shape[0]):
|
||||
timesteps.insert(0, self.model_wrap.sigma_to_discrete_timestep(sigmas[s]))
|
||||
noise_mask = None
|
||||
if denoise_mask is not None:
|
||||
noise_mask = 1.0 - denoise_mask
|
||||
|
||||
ddim_callback = None
|
||||
if callback is not None:
|
||||
total_steps = len(timesteps) - 1
|
||||
ddim_callback = lambda pred_x0, i: callback(i, pred_x0, None, total_steps)
|
||||
|
||||
sampler = DDIMSampler(self.model, device=self.device)
|
||||
sampler.make_schedule_timesteps(ddim_timesteps=timesteps, verbose=False)
|
||||
z_enc = sampler.stochastic_encode(latent_image, torch.tensor([len(timesteps) - 1] * noise.shape[0]).to(self.device), noise=noise, max_denoise=max_denoise)
|
||||
samples, _ = sampler.sample_custom(ddim_timesteps=timesteps,
|
||||
conditioning=positive,
|
||||
batch_size=noise.shape[0],
|
||||
shape=noise.shape[1:],
|
||||
verbose=False,
|
||||
unconditional_guidance_scale=cfg,
|
||||
unconditional_conditioning=negative,
|
||||
eta=0.0,
|
||||
x_T=z_enc,
|
||||
x0=latent_image,
|
||||
img_callback=ddim_callback,
|
||||
denoise_function=self.model_wrap.predict_eps_discrete_timestep,
|
||||
extra_args=extra_args,
|
||||
mask=noise_mask,
|
||||
to_zero=sigmas[-1]==0,
|
||||
end_step=sigmas.shape[0] - 1,
|
||||
disable_pbar=disable_pbar)
|
||||
|
||||
else:
|
||||
extra_args["denoise_mask"] = denoise_mask
|
||||
self.model_k.latent_image = latent_image
|
||||
self.model_k.noise = noise
|
||||
|
||||
if max_denoise:
|
||||
noise = noise * torch.sqrt(1.0 + sigmas[0] ** 2.0)
|
||||
else:
|
||||
noise = noise * sigmas[0]
|
||||
|
||||
k_callback = None
|
||||
total_steps = len(sigmas) - 1
|
||||
if callback is not None:
|
||||
k_callback = lambda x: callback(x["i"], x["denoised"], x["x"], total_steps)
|
||||
|
||||
if latent_image is not None:
|
||||
noise += latent_image
|
||||
if self.sampler == "dpm_fast":
|
||||
samples = k_diffusion_sampling.sample_dpm_fast(self.model_k, noise, sigma_min, sigmas[0], total_steps, extra_args=extra_args, callback=k_callback, disable=disable_pbar)
|
||||
elif self.sampler == "dpm_adaptive":
|
||||
samples = k_diffusion_sampling.sample_dpm_adaptive(self.model_k, noise, sigma_min, sigmas[0], extra_args=extra_args, callback=k_callback, disable=disable_pbar)
|
||||
else:
|
||||
samples = getattr(k_diffusion_sampling, "sample_{}".format(self.sampler))(self.model_k, noise, sigmas, extra_args=extra_args, callback=k_callback, disable=disable_pbar)
|
||||
|
||||
return self.model.process_latent_out(samples.to(torch.float32))
|
||||
|
||||
|
||||
class KSamplerWithRefiner:
|
||||
SCHEDULERS = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform"]
|
||||
SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral",
|
||||
"lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu",
|
||||
"dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "ddim", "uni_pc", "uni_pc_bh2", "dpmpp_fooocus_2m_sde_inpaint_seamless"]
|
||||
|
||||
def __init__(self, model, refiner_model, steps, device, sampler=None, scheduler=None, denoise=None, model_options={}):
|
||||
self.model_patcher = model
|
||||
self.refiner_model_patcher = refiner_model
|
||||
|
||||
self.model = model.model
|
||||
self.refiner_model = refiner_model.model
|
||||
|
||||
self.model_denoise = CFGNoisePredictor(self.model)
|
||||
self.refiner_model_denoise = CFGNoisePredictor(self.refiner_model)
|
||||
|
||||
if self.model.model_type == model_base.ModelType.V_PREDICTION:
|
||||
self.model_wrap = CompVisVDenoiser(self.model_denoise, quantize=True)
|
||||
else:
|
||||
self.model_wrap = k_diffusion_external.CompVisDenoiser(self.model_denoise, quantize=True)
|
||||
|
||||
if self.refiner_model.model_type == model_base.ModelType.V_PREDICTION:
|
||||
self.refiner_model_wrap = CompVisVDenoiser(self.refiner_model_denoise, quantize=True)
|
||||
else:
|
||||
self.refiner_model_wrap = k_diffusion_external.CompVisDenoiser(self.refiner_model_denoise, quantize=True)
|
||||
|
||||
self.model_k = KSamplerX0Inpaint(self.model_wrap)
|
||||
self.refiner_model_k = KSamplerX0Inpaint(self.refiner_model_wrap)
|
||||
|
||||
self.device = device
|
||||
if scheduler not in self.SCHEDULERS:
|
||||
scheduler = self.SCHEDULERS[0]
|
||||
if sampler not in self.SAMPLERS:
|
||||
sampler = self.SAMPLERS[0]
|
||||
self.scheduler = scheduler
|
||||
self.sampler = sampler
|
||||
self.sigma_min = float(self.model_wrap.sigma_min)
|
||||
self.sigma_max = float(self.model_wrap.sigma_max)
|
||||
self.set_steps(steps, denoise)
|
||||
self.denoise = denoise
|
||||
self.model_options = model_options
|
||||
|
||||
def calculate_sigmas(self, steps):
|
||||
sigmas = None
|
||||
|
||||
discard_penultimate_sigma = False
|
||||
if self.sampler in ['dpm_2', 'dpm_2_ancestral']:
|
||||
steps += 1
|
||||
discard_penultimate_sigma = True
|
||||
|
||||
if self.scheduler == "karras":
|
||||
sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=self.sigma_min, sigma_max=self.sigma_max)
|
||||
elif self.scheduler == "exponential":
|
||||
sigmas = k_diffusion_sampling.get_sigmas_exponential(n=steps, sigma_min=self.sigma_min,
|
||||
sigma_max=self.sigma_max)
|
||||
elif self.scheduler == "normal":
|
||||
sigmas = self.model_wrap.get_sigmas(steps)
|
||||
elif self.scheduler == "simple":
|
||||
sigmas = simple_scheduler(self.model_wrap, steps)
|
||||
elif self.scheduler == "ddim_uniform":
|
||||
sigmas = ddim_scheduler(self.model_wrap, steps)
|
||||
elif self.scheduler == "sgm_uniform":
|
||||
sigmas = sgm_scheduler(self.model_wrap, steps)
|
||||
else:
|
||||
print("error invalid scheduler", self.scheduler)
|
||||
|
||||
if discard_penultimate_sigma:
|
||||
sigmas = torch.cat([sigmas[:-2], sigmas[-1:]])
|
||||
return sigmas
|
||||
|
||||
def set_steps(self, steps, denoise=None):
|
||||
self.steps = steps
|
||||
if denoise is None or denoise > 0.9999:
|
||||
self.sigmas = self.calculate_sigmas(steps).to(self.device)
|
||||
else:
|
||||
new_steps = int(steps / denoise)
|
||||
sigmas = self.calculate_sigmas(new_steps).to(self.device)
|
||||
self.sigmas = sigmas[-(steps + 1):]
|
||||
|
||||
def sample(self, noise, positive, negative, refiner_positive, refiner_negative, cfg, latent_image=None,
|
||||
start_step=None, last_step=None, refiner_switch_step=None,
|
||||
force_full_denoise=False, denoise_mask=None, sigmas=None, callback_function=None, disable_pbar=False, seed=None):
|
||||
if sigmas is None:
|
||||
sigmas = self.sigmas
|
||||
sigma_min = self.sigma_min
|
||||
|
||||
if last_step is not None and last_step < (len(sigmas) - 1):
|
||||
sigma_min = sigmas[last_step]
|
||||
sigmas = sigmas[:last_step + 1]
|
||||
if force_full_denoise:
|
||||
sigmas[-1] = 0
|
||||
|
||||
if start_step is not None:
|
||||
if start_step < (len(sigmas) - 1):
|
||||
sigmas = sigmas[start_step:]
|
||||
else:
|
||||
if latent_image is not None:
|
||||
return latent_image
|
||||
else:
|
||||
return torch.zeros_like(noise)
|
||||
|
||||
positive = positive[:]
|
||||
negative = negative[:]
|
||||
|
||||
resolve_areas_and_cond_masks(positive, noise.shape[2], noise.shape[3], self.device)
|
||||
resolve_areas_and_cond_masks(negative, noise.shape[2], noise.shape[3], self.device)
|
||||
|
||||
calculate_start_end_timesteps(self.model_wrap, negative)
|
||||
calculate_start_end_timesteps(self.model_wrap, positive)
|
||||
|
||||
# make sure each cond area has an opposite one with the same area
|
||||
for c in positive:
|
||||
create_cond_with_same_area_if_none(negative, c)
|
||||
for c in negative:
|
||||
create_cond_with_same_area_if_none(positive, c)
|
||||
|
||||
pre_run_control(self.model_wrap, negative + positive)
|
||||
|
||||
apply_empty_x_to_equal_area(
|
||||
list(filter(lambda c: c[1].get('control_apply_to_uncond', False) == True, positive)), negative, 'control',
|
||||
lambda cond_cnets, x: cond_cnets[x])
|
||||
apply_empty_x_to_equal_area(positive, negative, 'gligen', lambda cond_cnets, x: cond_cnets[x])
|
||||
|
||||
if self.model.is_adm():
|
||||
positive = encode_adm(self.model, positive, noise.shape[0], noise.shape[3], noise.shape[2], self.device,
|
||||
"positive")
|
||||
negative = encode_adm(self.model, negative, noise.shape[0], noise.shape[3], noise.shape[2], self.device,
|
||||
"negative")
|
||||
|
||||
refiner_positive = refiner_positive[:]
|
||||
refiner_negative = refiner_negative[:]
|
||||
|
||||
resolve_areas_and_cond_masks(refiner_positive, noise.shape[2], noise.shape[3], self.device)
|
||||
resolve_areas_and_cond_masks(refiner_negative, noise.shape[2], noise.shape[3], self.device)
|
||||
|
||||
calculate_start_end_timesteps(self.refiner_model_wrap, refiner_positive)
|
||||
calculate_start_end_timesteps(self.refiner_model_wrap, refiner_negative)
|
||||
|
||||
# make sure each cond area has an opposite one with the same area
|
||||
for c in refiner_positive:
|
||||
create_cond_with_same_area_if_none(refiner_negative, c)
|
||||
for c in refiner_negative:
|
||||
create_cond_with_same_area_if_none(refiner_positive, c)
|
||||
|
||||
if self.model.is_adm():
|
||||
refiner_positive = encode_adm(self.refiner_model, refiner_positive, noise.shape[0],
|
||||
noise.shape[3], noise.shape[2], self.device, "positive")
|
||||
refiner_negative = encode_adm(self.refiner_model, refiner_negative, noise.shape[0],
|
||||
noise.shape[3], noise.shape[2], self.device, "negative")
|
||||
|
||||
def refiner_switch():
|
||||
comfy.model_management.load_model_gpu(self.refiner_model_patcher)
|
||||
self.model_denoise.inner_model = self.refiner_model_denoise.inner_model
|
||||
for i in range(len(positive)):
|
||||
positive[i] = refiner_positive[i]
|
||||
for i in range(len(negative)):
|
||||
negative[i] = refiner_negative[i]
|
||||
print('Refiner swapped.')
|
||||
return
|
||||
|
||||
def callback(step, x0, x, total_steps):
|
||||
if step == refiner_switch_step:
|
||||
refiner_switch()
|
||||
if callback_function is not None:
|
||||
callback_function(step, x0, x, total_steps)
|
||||
|
||||
if latent_image is not None:
|
||||
latent_image = self.model.process_latent_in(latent_image)
|
||||
|
||||
extra_args = {"cond": positive, "uncond": negative, "cond_scale": cfg, "model_options": self.model_options,
|
||||
"seed": seed}
|
||||
|
||||
cond_concat = None
|
||||
if hasattr(self.model, 'concat_keys'): # inpaint
|
||||
cond_concat = []
|
||||
for ck in self.model.concat_keys:
|
||||
if denoise_mask is not None:
|
||||
if ck == "mask":
|
||||
cond_concat.append(denoise_mask[:, :1])
|
||||
elif ck == "masked_image":
|
||||
cond_concat.append(
|
||||
latent_image) # NOTE: the latent_image should be masked by the mask in pixel space
|
||||
else:
|
||||
if ck == "mask":
|
||||
cond_concat.append(torch.ones_like(noise)[:, :1])
|
||||
elif ck == "masked_image":
|
||||
cond_concat.append(blank_inpaint_image_like(noise))
|
||||
extra_args["cond_concat"] = cond_concat
|
||||
|
||||
if sigmas[0] != self.sigmas[0] or (self.denoise is not None and self.denoise < 1.0):
|
||||
max_denoise = False
|
||||
else:
|
||||
max_denoise = True
|
||||
|
||||
if self.sampler == "uni_pc":
|
||||
samples = uni_pc.sample_unipc(self.model_wrap, noise, latent_image, sigmas,
|
||||
sampling_function=sampling_function, max_denoise=max_denoise,
|
||||
extra_args=extra_args, noise_mask=denoise_mask, callback=callback,
|
||||
disable=disable_pbar)
|
||||
elif self.sampler == "uni_pc_bh2":
|
||||
samples = uni_pc.sample_unipc(self.model_wrap, noise, latent_image, sigmas,
|
||||
sampling_function=sampling_function, max_denoise=max_denoise,
|
||||
extra_args=extra_args, noise_mask=denoise_mask, callback=callback,
|
||||
variant='bh2', disable=disable_pbar)
|
||||
elif self.sampler == "ddim":
|
||||
raise NotImplementedError('Swapped Refiner Does not support DDIM.')
|
||||
else:
|
||||
extra_args["denoise_mask"] = denoise_mask
|
||||
self.model_k.latent_image = latent_image
|
||||
self.model_k.noise = noise
|
||||
|
||||
if max_denoise:
|
||||
noise = noise * torch.sqrt(1.0 + sigmas[0] ** 2.0)
|
||||
else:
|
||||
noise = noise * sigmas[0]
|
||||
|
||||
k_callback = None
|
||||
total_steps = len(sigmas) - 1
|
||||
if callback is not None:
|
||||
k_callback = lambda x: callback(x["i"], x["denoised"], x["x"], total_steps)
|
||||
|
||||
if latent_image is not None:
|
||||
noise += latent_image
|
||||
if self.sampler == "dpm_fast":
|
||||
samples = k_diffusion_sampling.sample_dpm_fast(self.model_k, noise, sigma_min, sigmas[0], total_steps,
|
||||
extra_args=extra_args, callback=k_callback,
|
||||
disable=disable_pbar)
|
||||
elif self.sampler == "dpm_adaptive":
|
||||
samples = k_diffusion_sampling.sample_dpm_adaptive(self.model_k, noise, sigma_min, sigmas[0],
|
||||
extra_args=extra_args, callback=k_callback,
|
||||
disable=disable_pbar)
|
||||
else:
|
||||
samples = getattr(k_diffusion_sampling, "sample_{}".format(self.sampler))(self.model_k, noise, sigmas,
|
||||
extra_args=extra_args,
|
||||
callback=k_callback,
|
||||
disable=disable_pbar)
|
||||
|
||||
return self.model.process_latent_out(samples.to(torch.float32))
|
@ -13,3 +13,4 @@ pytorch_lightning==1.9.4
|
||||
omegaconf==2.2.3
|
||||
gradio==3.39.0
|
||||
pygit2==1.12.2
|
||||
opencv-contrib-python==4.8.0.74
|
||||
|
52
webui.py
52
webui.py
@ -9,6 +9,7 @@ import modules.html
|
||||
import modules.async_worker as worker
|
||||
import modules.flags as flags
|
||||
import modules.gradio_hijack as grh
|
||||
import modules.advanced_parameters as advanced_parameters
|
||||
import comfy.model_management as model_management
|
||||
|
||||
from modules.sdxl_styles import style_keys, aspect_ratios, fooocus_expansion, default_styles
|
||||
@ -50,7 +51,7 @@ with shared.gradio_root:
|
||||
with gr.Column():
|
||||
progress_window = grh.Image(label='Preview', show_label=True, height=640, visible=False)
|
||||
progress_html = gr.HTML(value=modules.html.make_progress_html(32, 'Progress 32%'), visible=False, elem_id='progress-bar', elem_classes='progress-bar')
|
||||
gallery = gr.Gallery(label='Gallery', show_label=False, object_fit='contain', height=720, visible=True)
|
||||
gallery = gr.Gallery(label='Gallery', show_label=False, object_fit='contain', height=720, visible=True, elem_classes='resizable_area')
|
||||
with gr.Row(elem_classes='type_row'):
|
||||
with gr.Column(scale=0.85):
|
||||
prompt = gr.Textbox(show_label=False, placeholder="Type prompt here.", container=False, autofocus=True, elem_classes='type_row', lines=1024)
|
||||
@ -75,16 +76,40 @@ with shared.gradio_root:
|
||||
with gr.Column():
|
||||
uov_method = gr.Radio(label='Upscale or Variation:', choices=flags.uov_list, value=flags.disabled)
|
||||
gr.HTML('<a href="https://github.com/lllyasviel/Fooocus/discussions/390">\U0001F4D4 Document</a>')
|
||||
with gr.TabItem(label='Image Prompt') as ip_tab:
|
||||
with gr.Row():
|
||||
ip_ctrls = []
|
||||
ip_ad_cols = []
|
||||
for _ in range(4):
|
||||
with gr.Column():
|
||||
ip_ctrls.append(grh.Image(label='Image', source='upload', type='numpy', show_label=False, height=300))
|
||||
with gr.Column(visible=False) as ad_col:
|
||||
with gr.Row():
|
||||
default_end, default_weight = flags.default_parameters[flags.default_ip]
|
||||
ip_ctrls.append(gr.Slider(label='Stop At', minimum=0.0, maximum=1.0, step=0.001, value=default_end))
|
||||
ip_ctrls.append(gr.Slider(label='Weight', minimum=0.0, maximum=2.0, step=0.001, value=default_weight))
|
||||
ip_ctrls.append(gr.Radio(label='Type', choices=flags.ip_list, value=flags.default_ip, container=False))
|
||||
ip_ctrls[-1].change(lambda x: flags.default_parameters[x], inputs=ip_ctrls[-1], outputs=ip_ctrls[-3:-1], queue=False, show_progress=False)
|
||||
ip_ad_cols.append(ad_col)
|
||||
ip_advanced = gr.Checkbox(label='Advanced', value=False, container=False)
|
||||
gr.HTML('* \"Image Prompt\" is powered by Fooocus Image Mixture Engine (v1.0.1). <a href="https://github.com/lllyasviel/Fooocus/discussions/557">\U0001F4D4 Document</a>')
|
||||
|
||||
def ip_advance_checked(x):
|
||||
return [gr.update(visible=x)] * len(ip_ad_cols)
|
||||
|
||||
ip_advanced.change(ip_advance_checked, inputs=ip_advanced, outputs=ip_ad_cols, queue=False)
|
||||
|
||||
with gr.TabItem(label='Inpaint or Outpaint (beta)') as inpaint_tab:
|
||||
inpaint_input_image = grh.Image(label='Drag above image to here', source='upload', type='numpy', tool='sketch', height=500, brush_color="#FFFFFF")
|
||||
gr.HTML('Outpaint Expansion (<a href="https://github.com/lllyasviel/Fooocus/discussions/414">\U0001F4D4 Document</a>):')
|
||||
outpaint_selections = gr.CheckboxGroup(choices=['Left', 'Right', 'Top', 'Bottom'], value=[], label='Outpaint', show_label=False, container=False)
|
||||
gr.HTML('* \"Inpaint or Outpaint\" is powered by the sampler \"DPMPP Fooocus Seamless 2M SDE Karras Inpaint Sampler\" (beta)')
|
||||
|
||||
switch_js = "(x) => {if(x){setTimeout(() => window.scrollTo({ top: 700, behavior: 'smooth' }), 50);}else{setTimeout(() => window.scrollTo({ top: 0, behavior: 'smooth' }), 50);} return x}"
|
||||
down_js = "() => {setTimeout(() => window.scrollTo({ top: 700, behavior: 'smooth' }), 50);}"
|
||||
switch_js = "(x) => {if(x){setTimeout(() => window.scrollTo({ top: 850, behavior: 'smooth' }), 50);}else{setTimeout(() => window.scrollTo({ top: 0, behavior: 'smooth' }), 50);} return x}"
|
||||
down_js = "() => {setTimeout(() => window.scrollTo({ top: 850, behavior: 'smooth' }), 50);}"
|
||||
|
||||
input_image_checkbox.change(lambda x: gr.update(visible=x), inputs=input_image_checkbox, outputs=image_input_panel, queue=False, _js=switch_js)
|
||||
ip_advanced.change(lambda: None, queue=False, _js=down_js)
|
||||
|
||||
current_tab = gr.Textbox(value='uov', visible=False)
|
||||
|
||||
@ -111,6 +136,7 @@ with shared.gradio_root:
|
||||
|
||||
uov_tab.select(lambda: ['uov', default_image], outputs=[current_tab, uov_input_image], queue=False, _js=down_js)
|
||||
inpaint_tab.select(lambda: ['inpaint', default_image], outputs=[current_tab, inpaint_input_image], queue=False, _js=down_js)
|
||||
ip_tab.select(lambda: 'ip', outputs=[current_tab], queue=False, _js=down_js)
|
||||
|
||||
with gr.Column(scale=0.5, visible=False) as right_col:
|
||||
with gr.Tab(label='Setting'):
|
||||
@ -195,7 +221,18 @@ with shared.gradio_root:
|
||||
minimum=-1, maximum=1.0, step=0.001, value=-1,
|
||||
info='Set as negative number to disable. For developer debugging.')
|
||||
|
||||
overwrite_ctrls = [overwrite_step, overwrite_switch, overwrite_width, overwrite_height, overwrite_vary_strength, overwrite_upscale_strength]
|
||||
mixing_image_prompt_and_vary_upscale = gr.Checkbox(label='Mixing Image Prompt and Vary/Upscale', value=False)
|
||||
mixing_image_prompt_and_inpaint = gr.Checkbox(label='Mixing Image Prompt and Inpaint', value=False)
|
||||
|
||||
debugging_cn_preprocessor = gr.Checkbox(label='Debug Preprocessor of ControlNets', value=False)
|
||||
|
||||
disable_soft_cn = gr.Checkbox(label='Do not use soft weighting in ControlNets', value=False)
|
||||
|
||||
adps = [adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg, sampler_name,
|
||||
scheduler_name, overwrite_step, overwrite_switch, overwrite_width, overwrite_height,
|
||||
overwrite_vary_strength, overwrite_upscale_strength,
|
||||
mixing_image_prompt_and_vary_upscale, mixing_image_prompt_and_inpaint,
|
||||
debugging_cn_preprocessor, disable_soft_cn]
|
||||
|
||||
def dev_mode_checked(r):
|
||||
return gr.update(visible=r)
|
||||
@ -214,18 +251,21 @@ with shared.gradio_root:
|
||||
model_refresh.click(model_refresh_clicked, [], [base_model, refiner_model] + lora_ctrls, queue=False)
|
||||
|
||||
advanced_checkbox.change(lambda x: gr.update(visible=x), advanced_checkbox, right_col, queue=False)
|
||||
|
||||
ctrls = [
|
||||
prompt, negative_prompt, style_selections,
|
||||
performance_selection, aspect_ratios_selection, image_number, image_seed, sharpness, adm_scaler_positive, adm_scaler_negative, adm_scaler_end, guidance_scale, adaptive_cfg, sampler_name, scheduler_name
|
||||
performance_selection, aspect_ratios_selection, image_number, image_seed, sharpness, guidance_scale
|
||||
]
|
||||
ctrls += overwrite_ctrls
|
||||
|
||||
ctrls += [base_model, refiner_model] + lora_ctrls
|
||||
ctrls += [input_image_checkbox, current_tab]
|
||||
ctrls += [uov_method, uov_input_image]
|
||||
ctrls += [outpaint_selections, inpaint_input_image]
|
||||
ctrls += ip_ctrls
|
||||
|
||||
run_button.click(lambda: (gr.update(visible=True, interactive=True), gr.update(visible=False), []), outputs=[stop_button, run_button, gallery])\
|
||||
.then(fn=refresh_seed, inputs=[seed_random, image_seed], outputs=image_seed)\
|
||||
.then(advanced_parameters.set_all_advanced_parameters, inputs=adps)\
|
||||
.then(fn=generate_clicked, inputs=ctrls, outputs=[progress_html, progress_window, gallery])\
|
||||
.then(lambda: (gr.update(visible=True), gr.update(visible=False)), outputs=[run_button, stop_button])
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user