From 736a5aa3ac4be5aeaa35697d6f1d160710c718e9 Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Sat, 21 Oct 2023 13:36:02 -0700 Subject: [PATCH] improve anime Improve Fooocus Anime a bit by using better SD1.5 refining formulation. --- fooocus_extras/vae_interpose.py | 7 ++-- fooocus_version.py | 2 +- modules/core.py | 16 +++++---- modules/default_pipeline.py | 62 ++++++++++----------------------- modules/sample_hijack.py | 4 --- update_log.md | 4 +++ 6 files changed, 36 insertions(+), 59 deletions(-) diff --git a/fooocus_extras/vae_interpose.py b/fooocus_extras/vae_interpose.py index 41f8192..b069b2f 100644 --- a/fooocus_extras/vae_interpose.py +++ b/fooocus_extras/vae_interpose.py @@ -69,7 +69,7 @@ vae_approx_filename = os.path.join(vae_approx_path, 'xl-to-v1_interposer-v3.1.sa def parse(x): global vae_approx_model - x_origin = x['samples'].clone() + x_origin = x.clone() if vae_approx_model is None: model = Interposer() @@ -89,6 +89,5 @@ def parse(x): fcbh.model_management.load_model_gpu(vae_approx_model) x = x_origin.to(device=vae_approx_model.load_device, dtype=vae_approx_model.dtype) - x = vae_approx_model.model(x) - - return {'samples': x.to(x_origin)} + x = vae_approx_model.model(x).to(x_origin) + return x diff --git a/fooocus_version.py b/fooocus_version.py index 8d20af6..c264cd8 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.722' +version = '2.1.723' diff --git a/modules/core.py b/modules/core.py index c58b0fa..8cad32a 100644 --- a/modules/core.py +++ b/modules/core.py @@ -218,19 +218,21 @@ def get_previewer(model): def ksampler(model, positive, negative, latent, seed=None, steps=30, cfg=7.0, sampler_name='dpmpp_2m_sde_gpu', scheduler='karras', denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False, callback_function=None, refiner=None, refiner_switch=-1, - previewer_start=None, previewer_end=None, sigmas=None, noise=None): + previewer_start=None, previewer_end=None, sigmas=None, extra_noise=0.0): if sigmas is not None: sigmas = sigmas.clone().to(fcbh.model_management.get_torch_device()) latent_image = latent["samples"] - if noise is None: - if disable_noise: - noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu") - else: - batch_inds = latent["batch_index"] if "batch_index" in latent else None - noise = fcbh.sample.prepare_noise(latent_image, seed, batch_inds) + if disable_noise: + noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu") + else: + batch_inds = latent["batch_index"] if "batch_index" in latent else None + noise = fcbh.sample.prepare_noise(latent_image, seed, batch_inds) + + if extra_noise > 0.0: + noise = noise * (1.0 + extra_noise) noise_mask = None if "noise_mask" in latent: diff --git a/modules/default_pipeline.py b/modules/default_pipeline.py index 5322e84..d20a1ec 100644 --- a/modules/default_pipeline.py +++ b/modules/default_pipeline.py @@ -6,7 +6,7 @@ import modules.path import fcbh.model_management import fcbh.latent_formats import modules.inpaint_worker -import modules.sample_hijack as sample_hijack +import fooocus_extras.vae_interpose as vae_interpose from fcbh.model_base import SDXL, SDXLRefiner from modules.expansion import FooocusExpansion @@ -270,22 +270,14 @@ refresh_everything( @torch.no_grad() @torch.inference_mode() -def vae_parse(x, tiled=False, use_interpose=True): - if final_vae is None or final_refiner_vae is None: - return x - - if use_interpose: - print('VAE interposing ...') - import fooocus_extras.vae_interpose - x = fooocus_extras.vae_interpose.parse(x) - print('VAE interposed ...') +def vae_parse(latent, k=1.0): + if final_refiner_vae is None: + result = latent["samples"] else: - print('VAE parsing ...') - x = core.decode_vae(vae=final_vae, latent_image=x, tiled=tiled) - x = core.encode_vae(vae=final_refiner_vae, pixels=x, tiled=tiled) - print('VAE parsed ...') - - return x + result = vae_interpose.parse(latent["samples"]) + if k != 1.0: + result = result * k + return {'samples': result} @torch.no_grad() @@ -444,8 +436,7 @@ def process_diffusion(positive_cond, negative_cond, steps, switch, width, height if modules.inpaint_worker.current_task is not None: modules.inpaint_worker.current_task.unswap() - sample_hijack.history_record = [] - core.ksampler( + sampled_latent = core.ksampler( model=final_unet, positive=positive_cond, negative=negative_cond, @@ -467,34 +458,20 @@ def process_diffusion(positive_cond, negative_cond, steps, switch, width, height target_model = final_unet print('Use base model to refine itself - this may because of developer mode.') + # Fooocus' vae parameters + k_data = 1.05 + k_noise = 0.15 + k_sigmas = 1.4 + + sampled_latent = vae_parse(sampled_latent, k=k_data) + sigmas = calculate_sigmas(sampler=sampler_name, scheduler=scheduler_name, model=target_model.model, steps=steps, - denoise=denoise)[switch:] - k1 = target_model.model.latent_format.scale_factor - k2 = final_unet.model.latent_format.scale_factor - k_sigmas = float(k1) / float(k2) - sigmas = sigmas * k_sigmas + denoise=denoise)[switch:] * k_sigmas len_sigmas = len(sigmas) - 1 - last_step, last_clean_latent, last_noisy_latent = sample_hijack.history_record[-1] - last_clean_latent = final_unet.model.process_latent_out(last_clean_latent.cpu().to(torch.float32)) - last_noisy_latent = final_unet.model.process_latent_out(last_noisy_latent.cpu().to(torch.float32)) - last_noise = last_noisy_latent - last_clean_latent - last_noise = last_noise / last_noise.std() - - noise_mean = torch.mean(last_noise, dim=1, keepdim=True).repeat(1, 4, 1, 1) / k_sigmas - - refiner_noise = torch.normal( - mean=noise_mean, - std=torch.ones_like(noise_mean), - generator=torch.manual_seed(image_seed+1) # Avoid artifacts - ).to(last_noise) - - sampled_latent = {'samples': last_clean_latent} - sampled_latent = vae_parse(sampled_latent) - if modules.inpaint_worker.current_task is not None: modules.inpaint_worker.current_task.swap() @@ -504,7 +481,7 @@ def process_diffusion(positive_cond, negative_cond, steps, switch, width, height negative=clip_separate(negative_cond, target_model=target_model.model, target_clip=final_clip), latent=sampled_latent, steps=len_sigmas, start_step=0, last_step=len_sigmas, disable_noise=False, force_full_denoise=True, - seed=image_seed+2, # Avoid artifacts + seed=image_seed, denoise=denoise, callback_function=callback, cfg=cfg_scale, @@ -513,7 +490,7 @@ def process_diffusion(positive_cond, negative_cond, steps, switch, width, height previewer_start=switch, previewer_end=steps, sigmas=sigmas, - noise=refiner_noise + extra_noise=k_noise ) target_model = final_refiner_vae @@ -522,5 +499,4 @@ def process_diffusion(positive_cond, negative_cond, steps, switch, width, height decoded_latent = core.decode_vae(vae=target_model, latent_image=sampled_latent, tiled=tiled) images = core.pytorch_to_numpy(decoded_latent) - sample_hijack.history_record = None return images diff --git a/modules/sample_hijack.py b/modules/sample_hijack.py index ed184dd..bf7ea09 100644 --- a/modules/sample_hijack.py +++ b/modules/sample_hijack.py @@ -11,7 +11,6 @@ from fcbh.samplers import resolve_areas_and_cond_masks, wrap_model, calculate_st current_refiner = None refiner_switch_step = -1 -history_record = None @torch.no_grad() @@ -118,9 +117,6 @@ def sample_hacked(model, noise, positive, negative, cfg, device, sampler, sigmas return def callback_wrap(step, x0, x, total_steps): - global history_record - if isinstance(history_record, list): - history_record.append((step, x0, x)) if step == refiner_switch_step and current_refiner is not None: refiner_switch() if callback is not None: diff --git a/update_log.md b/update_log.md index 1a6bfa3..ff88b3f 100644 --- a/update_log.md +++ b/update_log.md @@ -1,3 +1,7 @@ +# 2.1.723 + +* Improve Fooocus Anime a bit by using better SD1.5 refining formulation. + # 2.1.722 * Now it is possible to translate 100% all texts in the UI.