From 3bc9ac88fd8aa05e157aeff78eaa3631552119e0 Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Thu, 23 Nov 2023 13:46:50 -0800 Subject: [PATCH] maintain --- .../modules/diffusionmodules/openaimodel.py | 31 +++----- backend/headless/fcbh/model_sampling.py | 2 +- backend/headless/fcbh/sd.py | 18 +++-- .../headless/fcbh/supported_models_base.py | 5 +- backend/headless/fcbh/taesd/taesd.py | 19 +++-- backend/headless/fcbh/utils.py | 6 +- backend/headless/fcbh_extras/nodes_images.py | 76 +++++++++++++++++++ .../fcbh_extras/nodes_model_downscale.py | 10 ++- backend/headless/latent_preview.py | 5 +- backend/headless/nodes.py | 55 +++++++++++++- fooocus_version.py | 2 +- modules/inpaint_worker.py | 2 - modules/patch.py | 12 ++- 13 files changed, 189 insertions(+), 54 deletions(-) diff --git a/backend/headless/fcbh/ldm/modules/diffusionmodules/openaimodel.py b/backend/headless/fcbh/ldm/modules/diffusionmodules/openaimodel.py index 69fef0d..1c6ebd3 100644 --- a/backend/headless/fcbh/ldm/modules/diffusionmodules/openaimodel.py +++ b/backend/headless/fcbh/ldm/modules/diffusionmodules/openaimodel.py @@ -28,25 +28,6 @@ class TimestepBlock(nn.Module): Apply the module to `x` given `emb` timestep embeddings. """ - -class TimestepEmbedSequential(nn.Sequential, TimestepBlock): - """ - A sequential module that passes timestep embeddings to the children that - support it as an extra input. - """ - - def forward(self, x, emb, context=None, transformer_options={}, output_shape=None): - for layer in self: - if isinstance(layer, TimestepBlock): - x = layer(x, emb) - elif isinstance(layer, SpatialTransformer): - x = layer(x, context, transformer_options) - elif isinstance(layer, Upsample): - x = layer(x, output_shape=output_shape) - else: - x = layer(x) - return x - #This is needed because accelerate makes a copy of transformer_options which breaks "current_index" def forward_timestep_embed(ts, x, emb, context=None, transformer_options={}, output_shape=None): for layer in ts: @@ -54,13 +35,23 @@ def forward_timestep_embed(ts, x, emb, context=None, transformer_options={}, out x = layer(x, emb) elif isinstance(layer, SpatialTransformer): x = layer(x, context, transformer_options) - transformer_options["current_index"] += 1 + if "current_index" in transformer_options: + transformer_options["current_index"] += 1 elif isinstance(layer, Upsample): x = layer(x, output_shape=output_shape) else: x = layer(x) return x +class TimestepEmbedSequential(nn.Sequential, TimestepBlock): + """ + A sequential module that passes timestep embeddings to the children that + support it as an extra input. + """ + + def forward(self, *args, **kwargs): + return forward_timestep_embed(self, *args, **kwargs) + class Upsample(nn.Module): """ An upsampling layer with an optional convolution. diff --git a/backend/headless/fcbh/model_sampling.py b/backend/headless/fcbh/model_sampling.py index e1ea702..01a54a4 100644 --- a/backend/headless/fcbh/model_sampling.py +++ b/backend/headless/fcbh/model_sampling.py @@ -24,7 +24,7 @@ class ModelSamplingDiscrete(torch.nn.Module): super().__init__() beta_schedule = "linear" if model_config is not None: - beta_schedule = model_config.beta_schedule + beta_schedule = model_config.sampling_settings.get("beta_schedule", beta_schedule) self._register_schedule(given_betas=None, beta_schedule=beta_schedule, timesteps=1000, linear_start=0.00085, linear_end=0.012, cosine_s=8e-3) self.sigma_data = 1.0 diff --git a/backend/headless/fcbh/sd.py b/backend/headless/fcbh/sd.py index a24ce7c..5df89bb 100644 --- a/backend/headless/fcbh/sd.py +++ b/backend/headless/fcbh/sd.py @@ -23,6 +23,7 @@ import fcbh.model_patcher import fcbh.lora import fcbh.t2i_adapter.adapter import fcbh.supported_models_base +import fcbh.taesd.taesd def load_model_weights(model, sd): m, u = model.load_state_dict(sd, strict=False) @@ -154,10 +155,16 @@ class VAE: if 'decoder.up_blocks.0.resnets.0.norm1.weight' in sd.keys(): #diffusers format sd = diffusers_convert.convert_vae_state_dict(sd) + self.memory_used_encode = lambda shape, dtype: (1767 * shape[2] * shape[3]) * model_management.dtype_size(dtype) #These are for AutoencoderKL and need tweaking (should be lower) + self.memory_used_decode = lambda shape, dtype: (2178 * shape[2] * shape[3] * 64) * model_management.dtype_size(dtype) + if config is None: - #default SD1.x/SD2.x VAE parameters - ddconfig = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0} - self.first_stage_model = AutoencoderKL(ddconfig=ddconfig, embed_dim=4) + if "taesd_decoder.1.weight" in sd: + self.first_stage_model = fcbh.taesd.taesd.TAESD() + else: + #default SD1.x/SD2.x VAE parameters + ddconfig = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0} + self.first_stage_model = AutoencoderKL(ddconfig=ddconfig, embed_dim=4) else: self.first_stage_model = AutoencoderKL(**(config['params'])) self.first_stage_model = self.first_stage_model.eval() @@ -206,7 +213,7 @@ class VAE: def decode(self, samples_in): self.first_stage_model = self.first_stage_model.to(self.device) try: - memory_used = (2562 * samples_in.shape[2] * samples_in.shape[3] * 64) * 1.7 + memory_used = self.memory_used_decode(samples_in.shape, self.vae_dtype) model_management.free_memory(memory_used, self.device) free_memory = model_management.get_free_memory(self.device) batch_number = int(free_memory / memory_used) @@ -234,7 +241,7 @@ class VAE: self.first_stage_model = self.first_stage_model.to(self.device) pixel_samples = pixel_samples.movedim(-1,1) try: - memory_used = (2078 * pixel_samples.shape[2] * pixel_samples.shape[3]) * 1.7 #NOTE: this constant along with the one in the decode above are estimated from the mem usage for the VAE and could change. + memory_used = self.memory_used_encode(pixel_samples.shape, self.vae_dtype) model_management.free_memory(memory_used, self.device) free_memory = model_management.get_free_memory(self.device) batch_number = int(free_memory / memory_used) @@ -441,6 +448,7 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o if output_vae: vae_sd = fcbh.utils.state_dict_prefix_replace(sd, {"first_stage_model.": ""}, filter_keys=True) + vae_sd = model_config.process_vae_state_dict(vae_sd) vae = VAE(sd=vae_sd) if output_clip: diff --git a/backend/headless/fcbh/supported_models_base.py b/backend/headless/fcbh/supported_models_base.py index 6dfae03..3412cfe 100644 --- a/backend/headless/fcbh/supported_models_base.py +++ b/backend/headless/fcbh/supported_models_base.py @@ -19,7 +19,7 @@ class BASE: clip_prefix = [] clip_vision_prefix = None noise_aug_config = None - beta_schedule = "linear" + sampling_settings = {} latent_format = latent_formats.LatentFormat @classmethod @@ -56,6 +56,9 @@ class BASE: def process_unet_state_dict(self, state_dict): return state_dict + def process_vae_state_dict(self, state_dict): + return state_dict + def process_clip_state_dict_for_saving(self, state_dict): replace_prefix = {"": "cond_stage_model."} return utils.state_dict_prefix_replace(state_dict, replace_prefix) diff --git a/backend/headless/fcbh/taesd/taesd.py b/backend/headless/fcbh/taesd/taesd.py index 02489cd..cb61fcc 100644 --- a/backend/headless/fcbh/taesd/taesd.py +++ b/backend/headless/fcbh/taesd/taesd.py @@ -46,15 +46,16 @@ class TAESD(nn.Module): latent_magnitude = 3 latent_shift = 0.5 - def __init__(self, encoder_path="taesd_encoder.pth", decoder_path="taesd_decoder.pth"): + def __init__(self, encoder_path=None, decoder_path=None): """Initialize pretrained TAESD on the given device from the given checkpoints.""" super().__init__() - self.encoder = Encoder() - self.decoder = Decoder() + self.taesd_encoder = Encoder() + self.taesd_decoder = Decoder() + self.vae_scale = torch.nn.Parameter(torch.tensor(1.0)) if encoder_path is not None: - self.encoder.load_state_dict(fcbh.utils.load_torch_file(encoder_path, safe_load=True)) + self.taesd_encoder.load_state_dict(fcbh.utils.load_torch_file(encoder_path, safe_load=True)) if decoder_path is not None: - self.decoder.load_state_dict(fcbh.utils.load_torch_file(decoder_path, safe_load=True)) + self.taesd_decoder.load_state_dict(fcbh.utils.load_torch_file(decoder_path, safe_load=True)) @staticmethod def scale_latents(x): @@ -65,3 +66,11 @@ class TAESD(nn.Module): def unscale_latents(x): """[0, 1] -> raw latents""" return x.sub(TAESD.latent_shift).mul(2 * TAESD.latent_magnitude) + + def decode(self, x): + x_sample = self.taesd_decoder(x * self.vae_scale) + x_sample = x_sample.sub(0.5).mul(2) + return x_sample + + def encode(self, x): + return self.taesd_encoder(x * 0.5 + 0.5) / self.vae_scale diff --git a/backend/headless/fcbh/utils.py b/backend/headless/fcbh/utils.py index 194f5eb..87e1267 100644 --- a/backend/headless/fcbh/utils.py +++ b/backend/headless/fcbh/utils.py @@ -318,7 +318,9 @@ def bislerp(samples, width, height): coords_2 = torch.nn.functional.interpolate(coords_2, size=(1, length_new), mode="bilinear") coords_2 = coords_2.to(torch.int64) return ratios, coords_1, coords_2 - + + orig_dtype = samples.dtype + samples = samples.float() n,c,h,w = samples.shape h_new, w_new = (height, width) @@ -347,7 +349,7 @@ def bislerp(samples, width, height): result = slerp(pass_1, pass_2, ratios) result = result.reshape(n, h_new, w_new, c).movedim(-1, 1) - return result + return result.to(orig_dtype) def lanczos(samples, width, height): images = [Image.fromarray(np.clip(255. * image.movedim(0, -1).cpu().numpy(), 0, 255).astype(np.uint8)) for image in samples] diff --git a/backend/headless/fcbh_extras/nodes_images.py b/backend/headless/fcbh_extras/nodes_images.py index 8cb3223..549b28a 100644 --- a/backend/headless/fcbh_extras/nodes_images.py +++ b/backend/headless/fcbh_extras/nodes_images.py @@ -1,4 +1,12 @@ import nodes +import folder_paths +from fcbh.cli_args import args + +from PIL import Image +import numpy as np +import json +import os + MAX_RESOLUTION = nodes.MAX_RESOLUTION class ImageCrop: @@ -38,7 +46,75 @@ class RepeatImageBatch: s = image.repeat((amount, 1,1,1)) return (s,) +class SaveAnimatedWEBP: + def __init__(self): + self.output_dir = folder_paths.get_output_directory() + self.type = "output" + self.prefix_append = "" + + methods = {"default": 4, "fastest": 0, "slowest": 6} + @classmethod + def INPUT_TYPES(s): + return {"required": + {"images": ("IMAGE", ), + "filename_prefix": ("STRING", {"default": "fcbh_backend"}), + "fps": ("FLOAT", {"default": 6.0, "min": 0.01, "max": 1000.0, "step": 0.01}), + "lossless": ("BOOLEAN", {"default": True}), + "quality": ("INT", {"default": 80, "min": 0, "max": 100}), + "method": (list(s.methods.keys()),), + # "num_frames": ("INT", {"default": 0, "min": 0, "max": 8192}), + }, + "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"}, + } + + RETURN_TYPES = () + FUNCTION = "save_images" + + OUTPUT_NODE = True + + CATEGORY = "_for_testing" + + def save_images(self, images, fps, filename_prefix, lossless, quality, method, num_frames=0, prompt=None, extra_pnginfo=None): + method = self.methods.get(method, "aoeu") + filename_prefix += self.prefix_append + full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir, images[0].shape[1], images[0].shape[0]) + results = list() + pil_images = [] + for image in images: + i = 255. * image.cpu().numpy() + img = Image.fromarray(np.clip(i, 0, 255).astype(np.uint8)) + pil_images.append(img) + + metadata = None + if not args.disable_metadata: + metadata = pil_images[0].getexif() + if prompt is not None: + metadata[0x0110] = "prompt:{}".format(json.dumps(prompt)) + if extra_pnginfo is not None: + inital_exif = 0x010f + for x in extra_pnginfo: + metadata[inital_exif] = "{}:{}".format(x, json.dumps(extra_pnginfo[x])) + inital_exif -= 1 + + if num_frames == 0: + num_frames = len(pil_images) + + c = len(pil_images) + for i in range(0, c, num_frames): + file = f"{filename}_{counter:05}_.webp" + pil_images[i].save(os.path.join(full_output_folder, file), save_all=True, duration=int(1000.0/fps), append_images=pil_images[i + 1:i + num_frames], exif=metadata, lossless=lossless, quality=quality, method=method) + results.append({ + "filename": file, + "subfolder": subfolder, + "type": self.type + }) + counter += 1 + + animated = num_frames != 1 + return { "ui": { "images": results, "animated": (animated,) } } + NODE_CLASS_MAPPINGS = { "ImageCrop": ImageCrop, "RepeatImageBatch": RepeatImageBatch, + "SaveAnimatedWEBP": SaveAnimatedWEBP, } diff --git a/backend/headless/fcbh_extras/nodes_model_downscale.py b/backend/headless/fcbh_extras/nodes_model_downscale.py index f65ef05..28f6893 100644 --- a/backend/headless/fcbh_extras/nodes_model_downscale.py +++ b/backend/headless/fcbh_extras/nodes_model_downscale.py @@ -1,6 +1,8 @@ import torch +import fcbh.utils class PatchModelAddDownscale: + upscale_methods = ["bicubic", "nearest-exact", "bilinear", "area", "bislerp"] @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), @@ -9,13 +11,15 @@ class PatchModelAddDownscale: "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}), "end_percent": ("FLOAT", {"default": 0.35, "min": 0.0, "max": 1.0, "step": 0.001}), "downscale_after_skip": ("BOOLEAN", {"default": True}), + "downscale_method": (s.upscale_methods,), + "upscale_method": (s.upscale_methods,), }} RETURN_TYPES = ("MODEL",) FUNCTION = "patch" CATEGORY = "_for_testing" - def patch(self, model, block_number, downscale_factor, start_percent, end_percent, downscale_after_skip): + def patch(self, model, block_number, downscale_factor, start_percent, end_percent, downscale_after_skip, downscale_method, upscale_method): sigma_start = model.model.model_sampling.percent_to_sigma(start_percent) sigma_end = model.model.model_sampling.percent_to_sigma(end_percent) @@ -23,12 +27,12 @@ class PatchModelAddDownscale: if transformer_options["block"][1] == block_number: sigma = transformer_options["sigmas"][0].item() if sigma <= sigma_start and sigma >= sigma_end: - h = torch.nn.functional.interpolate(h, scale_factor=(1.0 / downscale_factor), mode="bicubic", align_corners=False) + h = fcbh.utils.common_upscale(h, round(h.shape[-1] * (1.0 / downscale_factor)), round(h.shape[-2] * (1.0 / downscale_factor)), downscale_method, "disabled") return h def output_block_patch(h, hsp, transformer_options): if h.shape[2] != hsp.shape[2]: - h = torch.nn.functional.interpolate(h, size=(hsp.shape[2], hsp.shape[3]), mode="bicubic", align_corners=False) + h = fcbh.utils.common_upscale(h, hsp.shape[-1], hsp.shape[-2], upscale_method, "disabled") return h, hsp m = model.clone() diff --git a/backend/headless/latent_preview.py b/backend/headless/latent_preview.py index 798c3aa..13f1893 100644 --- a/backend/headless/latent_preview.py +++ b/backend/headless/latent_preview.py @@ -22,10 +22,7 @@ class TAESDPreviewerImpl(LatentPreviewer): self.taesd = taesd def decode_latent_to_preview(self, x0): - x_sample = self.taesd.decoder(x0[:1])[0].detach() - # x_sample = self.taesd.unscale_latents(x_sample).div(4).add(0.5) # returns value in [-2, 2] - x_sample = x_sample.sub(0.5).mul(2) - + x_sample = self.taesd.decode(x0[:1])[0].detach() x_sample = torch.clamp((x_sample + 1.0) / 2.0, min=0.0, max=1.0) x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2) x_sample = x_sample.astype(np.uint8) diff --git a/backend/headless/nodes.py b/backend/headless/nodes.py index 4ab7a1a..e702eae 100644 --- a/backend/headless/nodes.py +++ b/backend/headless/nodes.py @@ -573,9 +573,55 @@ class LoraLoader: return (model_lora, clip_lora) class VAELoader: + @staticmethod + def vae_list(): + vaes = folder_paths.get_filename_list("vae") + approx_vaes = folder_paths.get_filename_list("vae_approx") + sdxl_taesd_enc = False + sdxl_taesd_dec = False + sd1_taesd_enc = False + sd1_taesd_dec = False + + for v in approx_vaes: + if v.startswith("taesd_decoder."): + sd1_taesd_dec = True + elif v.startswith("taesd_encoder."): + sd1_taesd_enc = True + elif v.startswith("taesdxl_decoder."): + sdxl_taesd_dec = True + elif v.startswith("taesdxl_encoder."): + sdxl_taesd_enc = True + if sd1_taesd_dec and sd1_taesd_enc: + vaes.append("taesd") + if sdxl_taesd_dec and sdxl_taesd_enc: + vaes.append("taesdxl") + return vaes + + @staticmethod + def load_taesd(name): + sd = {} + approx_vaes = folder_paths.get_filename_list("vae_approx") + + encoder = next(filter(lambda a: a.startswith("{}_encoder.".format(name)), approx_vaes)) + decoder = next(filter(lambda a: a.startswith("{}_decoder.".format(name)), approx_vaes)) + + enc = fcbh.utils.load_torch_file(folder_paths.get_full_path("vae_approx", encoder)) + for k in enc: + sd["taesd_encoder.{}".format(k)] = enc[k] + + dec = fcbh.utils.load_torch_file(folder_paths.get_full_path("vae_approx", decoder)) + for k in dec: + sd["taesd_decoder.{}".format(k)] = dec[k] + + if name == "taesd": + sd["vae_scale"] = torch.tensor(0.18215) + elif name == "taesdxl": + sd["vae_scale"] = torch.tensor(0.13025) + return sd + @classmethod def INPUT_TYPES(s): - return {"required": { "vae_name": (folder_paths.get_filename_list("vae"), )}} + return {"required": { "vae_name": (s.vae_list(), )}} RETURN_TYPES = ("VAE",) FUNCTION = "load_vae" @@ -583,8 +629,11 @@ class VAELoader: #TODO: scale factor? def load_vae(self, vae_name): - vae_path = folder_paths.get_full_path("vae", vae_name) - sd = fcbh.utils.load_torch_file(vae_path) + if vae_name in ["taesd", "taesdxl"]: + sd = self.load_taesd(vae_name) + else: + vae_path = folder_paths.get_full_path("vae", vae_name) + sd = fcbh.utils.load_torch_file(vae_path) vae = fcbh.sd.VAE(sd=sd) return (vae,) diff --git a/fooocus_version.py b/fooocus_version.py index 7444f16..0a0bc34 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.823' +version = '2.1.824' diff --git a/modules/inpaint_worker.py b/modules/inpaint_worker.py index 9aa9adc..88ec39d 100644 --- a/modules/inpaint_worker.py +++ b/modules/inpaint_worker.py @@ -176,8 +176,6 @@ class InpaintWorker: self.swapped = False self.latent_mask = None self.inpaint_head_feature = None - self.processing_sampler_in = True - self.processing_sampler_out = True return def load_latent(self, latent_fill, latent_mask, latent_swap=None): diff --git a/modules/patch.py b/modules/patch.py index 6ccaf0f..a65e171 100644 --- a/modules/patch.py +++ b/modules/patch.py @@ -312,11 +312,10 @@ def patched_KSamplerX0Inpaint_forward(self, x, sigma, uncond, cond, cond_scale, # avoid bad results by using different seeds. self.energy_generator = torch.Generator(device='cpu').manual_seed((seed + 1) % constants.MAX_SEED) - if inpaint_worker.current_task.processing_sampler_in: - energy_sigma = sigma.reshape([sigma.shape[0]] + [1] * (len(x.shape) - 1)) - current_energy = torch.randn( - x.size(), dtype=x.dtype, generator=self.energy_generator, device="cpu").to(x) * energy_sigma - x = x * inpaint_mask + (inpaint_latent + current_energy) * (1.0 - inpaint_mask) + energy_sigma = sigma.reshape([sigma.shape[0]] + [1] * (len(x.shape) - 1)) + current_energy = torch.randn( + x.size(), dtype=x.dtype, generator=self.energy_generator, device="cpu").to(x) * energy_sigma + x = x * inpaint_mask + (inpaint_latent + current_energy) * (1.0 - inpaint_mask) out = self.inner_model(x, sigma, cond=cond, @@ -325,8 +324,7 @@ def patched_KSamplerX0Inpaint_forward(self, x, sigma, uncond, cond, cond_scale, model_options=model_options, seed=seed) - if inpaint_worker.current_task.processing_sampler_out: - out = out * inpaint_mask + inpaint_latent * (1.0 - inpaint_mask) + out = out * inpaint_mask + inpaint_latent * (1.0 - inpaint_mask) else: out = self.inner_model(x, sigma, cond=cond,