improve anime
Improve Fooocus Anime a bit by using better SD1.5 refining formulation.
This commit is contained in:
parent
60c05342b2
commit
736a5aa3ac
@ -69,7 +69,7 @@ vae_approx_filename = os.path.join(vae_approx_path, 'xl-to-v1_interposer-v3.1.sa
|
|||||||
def parse(x):
|
def parse(x):
|
||||||
global vae_approx_model
|
global vae_approx_model
|
||||||
|
|
||||||
x_origin = x['samples'].clone()
|
x_origin = x.clone()
|
||||||
|
|
||||||
if vae_approx_model is None:
|
if vae_approx_model is None:
|
||||||
model = Interposer()
|
model = Interposer()
|
||||||
@ -89,6 +89,5 @@ def parse(x):
|
|||||||
fcbh.model_management.load_model_gpu(vae_approx_model)
|
fcbh.model_management.load_model_gpu(vae_approx_model)
|
||||||
|
|
||||||
x = x_origin.to(device=vae_approx_model.load_device, dtype=vae_approx_model.dtype)
|
x = x_origin.to(device=vae_approx_model.load_device, dtype=vae_approx_model.dtype)
|
||||||
x = vae_approx_model.model(x)
|
x = vae_approx_model.model(x).to(x_origin)
|
||||||
|
return x
|
||||||
return {'samples': x.to(x_origin)}
|
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
version = '2.1.722'
|
version = '2.1.723'
|
||||||
|
|||||||
@ -218,19 +218,21 @@ def get_previewer(model):
|
|||||||
def ksampler(model, positive, negative, latent, seed=None, steps=30, cfg=7.0, sampler_name='dpmpp_2m_sde_gpu',
|
def ksampler(model, positive, negative, latent, seed=None, steps=30, cfg=7.0, sampler_name='dpmpp_2m_sde_gpu',
|
||||||
scheduler='karras', denoise=1.0, disable_noise=False, start_step=None, last_step=None,
|
scheduler='karras', denoise=1.0, disable_noise=False, start_step=None, last_step=None,
|
||||||
force_full_denoise=False, callback_function=None, refiner=None, refiner_switch=-1,
|
force_full_denoise=False, callback_function=None, refiner=None, refiner_switch=-1,
|
||||||
previewer_start=None, previewer_end=None, sigmas=None, noise=None):
|
previewer_start=None, previewer_end=None, sigmas=None, extra_noise=0.0):
|
||||||
|
|
||||||
if sigmas is not None:
|
if sigmas is not None:
|
||||||
sigmas = sigmas.clone().to(fcbh.model_management.get_torch_device())
|
sigmas = sigmas.clone().to(fcbh.model_management.get_torch_device())
|
||||||
|
|
||||||
latent_image = latent["samples"]
|
latent_image = latent["samples"]
|
||||||
|
|
||||||
if noise is None:
|
if disable_noise:
|
||||||
if disable_noise:
|
noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu")
|
||||||
noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu")
|
else:
|
||||||
else:
|
batch_inds = latent["batch_index"] if "batch_index" in latent else None
|
||||||
batch_inds = latent["batch_index"] if "batch_index" in latent else None
|
noise = fcbh.sample.prepare_noise(latent_image, seed, batch_inds)
|
||||||
noise = fcbh.sample.prepare_noise(latent_image, seed, batch_inds)
|
|
||||||
|
if extra_noise > 0.0:
|
||||||
|
noise = noise * (1.0 + extra_noise)
|
||||||
|
|
||||||
noise_mask = None
|
noise_mask = None
|
||||||
if "noise_mask" in latent:
|
if "noise_mask" in latent:
|
||||||
|
|||||||
@ -6,7 +6,7 @@ import modules.path
|
|||||||
import fcbh.model_management
|
import fcbh.model_management
|
||||||
import fcbh.latent_formats
|
import fcbh.latent_formats
|
||||||
import modules.inpaint_worker
|
import modules.inpaint_worker
|
||||||
import modules.sample_hijack as sample_hijack
|
import fooocus_extras.vae_interpose as vae_interpose
|
||||||
|
|
||||||
from fcbh.model_base import SDXL, SDXLRefiner
|
from fcbh.model_base import SDXL, SDXLRefiner
|
||||||
from modules.expansion import FooocusExpansion
|
from modules.expansion import FooocusExpansion
|
||||||
@ -270,22 +270,14 @@ refresh_everything(
|
|||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
@torch.inference_mode()
|
@torch.inference_mode()
|
||||||
def vae_parse(x, tiled=False, use_interpose=True):
|
def vae_parse(latent, k=1.0):
|
||||||
if final_vae is None or final_refiner_vae is None:
|
if final_refiner_vae is None:
|
||||||
return x
|
result = latent["samples"]
|
||||||
|
|
||||||
if use_interpose:
|
|
||||||
print('VAE interposing ...')
|
|
||||||
import fooocus_extras.vae_interpose
|
|
||||||
x = fooocus_extras.vae_interpose.parse(x)
|
|
||||||
print('VAE interposed ...')
|
|
||||||
else:
|
else:
|
||||||
print('VAE parsing ...')
|
result = vae_interpose.parse(latent["samples"])
|
||||||
x = core.decode_vae(vae=final_vae, latent_image=x, tiled=tiled)
|
if k != 1.0:
|
||||||
x = core.encode_vae(vae=final_refiner_vae, pixels=x, tiled=tiled)
|
result = result * k
|
||||||
print('VAE parsed ...')
|
return {'samples': result}
|
||||||
|
|
||||||
return x
|
|
||||||
|
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
@ -444,8 +436,7 @@ def process_diffusion(positive_cond, negative_cond, steps, switch, width, height
|
|||||||
if modules.inpaint_worker.current_task is not None:
|
if modules.inpaint_worker.current_task is not None:
|
||||||
modules.inpaint_worker.current_task.unswap()
|
modules.inpaint_worker.current_task.unswap()
|
||||||
|
|
||||||
sample_hijack.history_record = []
|
sampled_latent = core.ksampler(
|
||||||
core.ksampler(
|
|
||||||
model=final_unet,
|
model=final_unet,
|
||||||
positive=positive_cond,
|
positive=positive_cond,
|
||||||
negative=negative_cond,
|
negative=negative_cond,
|
||||||
@ -467,34 +458,20 @@ def process_diffusion(positive_cond, negative_cond, steps, switch, width, height
|
|||||||
target_model = final_unet
|
target_model = final_unet
|
||||||
print('Use base model to refine itself - this may because of developer mode.')
|
print('Use base model to refine itself - this may because of developer mode.')
|
||||||
|
|
||||||
|
# Fooocus' vae parameters
|
||||||
|
k_data = 1.05
|
||||||
|
k_noise = 0.15
|
||||||
|
k_sigmas = 1.4
|
||||||
|
|
||||||
|
sampled_latent = vae_parse(sampled_latent, k=k_data)
|
||||||
|
|
||||||
sigmas = calculate_sigmas(sampler=sampler_name,
|
sigmas = calculate_sigmas(sampler=sampler_name,
|
||||||
scheduler=scheduler_name,
|
scheduler=scheduler_name,
|
||||||
model=target_model.model,
|
model=target_model.model,
|
||||||
steps=steps,
|
steps=steps,
|
||||||
denoise=denoise)[switch:]
|
denoise=denoise)[switch:] * k_sigmas
|
||||||
k1 = target_model.model.latent_format.scale_factor
|
|
||||||
k2 = final_unet.model.latent_format.scale_factor
|
|
||||||
k_sigmas = float(k1) / float(k2)
|
|
||||||
sigmas = sigmas * k_sigmas
|
|
||||||
len_sigmas = len(sigmas) - 1
|
len_sigmas = len(sigmas) - 1
|
||||||
|
|
||||||
last_step, last_clean_latent, last_noisy_latent = sample_hijack.history_record[-1]
|
|
||||||
last_clean_latent = final_unet.model.process_latent_out(last_clean_latent.cpu().to(torch.float32))
|
|
||||||
last_noisy_latent = final_unet.model.process_latent_out(last_noisy_latent.cpu().to(torch.float32))
|
|
||||||
last_noise = last_noisy_latent - last_clean_latent
|
|
||||||
last_noise = last_noise / last_noise.std()
|
|
||||||
|
|
||||||
noise_mean = torch.mean(last_noise, dim=1, keepdim=True).repeat(1, 4, 1, 1) / k_sigmas
|
|
||||||
|
|
||||||
refiner_noise = torch.normal(
|
|
||||||
mean=noise_mean,
|
|
||||||
std=torch.ones_like(noise_mean),
|
|
||||||
generator=torch.manual_seed(image_seed+1) # Avoid artifacts
|
|
||||||
).to(last_noise)
|
|
||||||
|
|
||||||
sampled_latent = {'samples': last_clean_latent}
|
|
||||||
sampled_latent = vae_parse(sampled_latent)
|
|
||||||
|
|
||||||
if modules.inpaint_worker.current_task is not None:
|
if modules.inpaint_worker.current_task is not None:
|
||||||
modules.inpaint_worker.current_task.swap()
|
modules.inpaint_worker.current_task.swap()
|
||||||
|
|
||||||
@ -504,7 +481,7 @@ def process_diffusion(positive_cond, negative_cond, steps, switch, width, height
|
|||||||
negative=clip_separate(negative_cond, target_model=target_model.model, target_clip=final_clip),
|
negative=clip_separate(negative_cond, target_model=target_model.model, target_clip=final_clip),
|
||||||
latent=sampled_latent,
|
latent=sampled_latent,
|
||||||
steps=len_sigmas, start_step=0, last_step=len_sigmas, disable_noise=False, force_full_denoise=True,
|
steps=len_sigmas, start_step=0, last_step=len_sigmas, disable_noise=False, force_full_denoise=True,
|
||||||
seed=image_seed+2, # Avoid artifacts
|
seed=image_seed,
|
||||||
denoise=denoise,
|
denoise=denoise,
|
||||||
callback_function=callback,
|
callback_function=callback,
|
||||||
cfg=cfg_scale,
|
cfg=cfg_scale,
|
||||||
@ -513,7 +490,7 @@ def process_diffusion(positive_cond, negative_cond, steps, switch, width, height
|
|||||||
previewer_start=switch,
|
previewer_start=switch,
|
||||||
previewer_end=steps,
|
previewer_end=steps,
|
||||||
sigmas=sigmas,
|
sigmas=sigmas,
|
||||||
noise=refiner_noise
|
extra_noise=k_noise
|
||||||
)
|
)
|
||||||
|
|
||||||
target_model = final_refiner_vae
|
target_model = final_refiner_vae
|
||||||
@ -522,5 +499,4 @@ def process_diffusion(positive_cond, negative_cond, steps, switch, width, height
|
|||||||
decoded_latent = core.decode_vae(vae=target_model, latent_image=sampled_latent, tiled=tiled)
|
decoded_latent = core.decode_vae(vae=target_model, latent_image=sampled_latent, tiled=tiled)
|
||||||
|
|
||||||
images = core.pytorch_to_numpy(decoded_latent)
|
images = core.pytorch_to_numpy(decoded_latent)
|
||||||
sample_hijack.history_record = None
|
|
||||||
return images
|
return images
|
||||||
|
|||||||
@ -11,7 +11,6 @@ from fcbh.samplers import resolve_areas_and_cond_masks, wrap_model, calculate_st
|
|||||||
|
|
||||||
current_refiner = None
|
current_refiner = None
|
||||||
refiner_switch_step = -1
|
refiner_switch_step = -1
|
||||||
history_record = None
|
|
||||||
|
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
@ -118,9 +117,6 @@ def sample_hacked(model, noise, positive, negative, cfg, device, sampler, sigmas
|
|||||||
return
|
return
|
||||||
|
|
||||||
def callback_wrap(step, x0, x, total_steps):
|
def callback_wrap(step, x0, x, total_steps):
|
||||||
global history_record
|
|
||||||
if isinstance(history_record, list):
|
|
||||||
history_record.append((step, x0, x))
|
|
||||||
if step == refiner_switch_step and current_refiner is not None:
|
if step == refiner_switch_step and current_refiner is not None:
|
||||||
refiner_switch()
|
refiner_switch()
|
||||||
if callback is not None:
|
if callback is not None:
|
||||||
|
|||||||
@ -1,3 +1,7 @@
|
|||||||
|
# 2.1.723
|
||||||
|
|
||||||
|
* Improve Fooocus Anime a bit by using better SD1.5 refining formulation.
|
||||||
|
|
||||||
# 2.1.722
|
# 2.1.722
|
||||||
|
|
||||||
* Now it is possible to translate 100% all texts in the UI.
|
* Now it is possible to translate 100% all texts in the UI.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user