diff --git a/fooocus_version.py b/fooocus_version.py index 67d421f..a6e027d 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.702' +version = '2.1.703' diff --git a/modules/async_worker.py b/modules/async_worker.py index e8b35ab..15b2408 100644 --- a/modules/async_worker.py +++ b/modules/async_worker.py @@ -400,43 +400,42 @@ def worker(): pipeline.final_unet.model.diffusion_model.in_inpaint = True - # print(f'Inpaint task: {str((height, width))}') # outputs.append(['results', inpaint_worker.current_task.visualize_mask_processing()]) # return - progressbar(13, 'VAE encoding ...') - inpaint_pixels = core.numpy_to_pytorch(inpaint_worker.current_task.image_ready) - initial_latent = core.encode_vae(vae=pipeline.final_vae, pixels=inpaint_pixels) - inpaint_latent = initial_latent['samples'] - B, C, H, W = inpaint_latent.shape - inpaint_mask = core.numpy_to_pytorch(inpaint_worker.current_task.mask_ready[None]) - inpaint_mask = torch.nn.functional.avg_pool2d(inpaint_mask, (8, 8)) - inpaint_mask = torch.nn.functional.interpolate(inpaint_mask, (H, W), mode='bilinear') + progressbar(13, 'VAE Inpaint encoding ...') - latent_after_swap = None + inpaint_pixel_fill = core.numpy_to_pytorch(inpaint_worker.current_task.interested_fill) + inpaint_pixel_image = core.numpy_to_pytorch(inpaint_worker.current_task.interested_image) + inpaint_pixel_mask = core.numpy_to_pytorch(inpaint_worker.current_task.interested_mask) + + latent_inpaint, latent_mask = core.encode_vae_inpaint( + mask=inpaint_pixel_mask, + vae=pipeline.final_vae, + pixels=inpaint_pixel_image) + + latent_swap = None if pipeline.final_refiner_vae is not None: - progressbar(13, 'VAE SD15 encoding ...') - latent_after_swap = core.encode_vae(vae=pipeline.final_refiner_vae, pixels=inpaint_pixels)['samples'] + progressbar(13, 'VAE Inpaint SD15 encoding ...') + latent_swap = core.encode_vae( + vae=pipeline.final_refiner_vae, + pixels=inpaint_pixel_fill)['samples'] - inpaint_worker.current_task.load_latent(latent=inpaint_latent, mask=inpaint_mask, - latent_after_swap=latent_after_swap) + progressbar(13, 'VAE encoding ...') + latent_fill = core.encode_vae( + vae=pipeline.final_vae, + pixels=inpaint_pixel_fill)['samples'] - progressbar(13, 'VAE inpaint encoding ...') + inpaint_worker.current_task.load_latent(latent_fill=latent_fill, + latent_inpaint=latent_inpaint, + latent_mask=latent_mask, + latent_swap=latent_swap, + inpaint_head_model_path=inpaint_head_model_path) - inpaint_mask = (inpaint_worker.current_task.mask_ready > 0).astype(np.float32) - inpaint_mask = torch.tensor(inpaint_mask).float() - - vae_dict = core.encode_vae_inpaint( - mask=inpaint_mask, vae=pipeline.final_vae, pixels=inpaint_pixels) - - inpaint_latent = vae_dict['samples'] - inpaint_mask = vae_dict['noise_mask'] - inpaint_worker.current_task.load_inpaint_guidance(latent=inpaint_latent, mask=inpaint_mask, - model_path=inpaint_head_model_path) - - B, C, H, W = inpaint_latent.shape - final_height, final_width = inpaint_worker.current_task.image_raw.shape[:2] + B, C, H, W = latent_fill.shape height, width = H * 8, W * 8 + final_height, final_width = inpaint_worker.current_task.image.shape[:2] + initial_latent = {'samples': latent_fill} print(f'Final resolution is {str((final_height, final_width))}, latent is {str((height, width))}.') if 'cn' in goals: diff --git a/modules/core.py b/modules/core.py index 8907c4b..c58b0fa 100644 --- a/modules/core.py +++ b/modules/core.py @@ -18,7 +18,7 @@ import fcbh.samplers import fcbh.latent_formats from fcbh.sd import load_checkpoint_guess_config -from nodes import VAEDecode, EmptyLatentImage, VAEEncode, VAEEncodeTiled, VAEDecodeTiled, VAEEncodeForInpaint, \ +from nodes import VAEDecode, EmptyLatentImage, VAEEncode, VAEEncodeTiled, VAEDecodeTiled, \ ControlNetApplyAdvanced from fcbh_extras.nodes_freelunch import FreeU_V2 from fcbh.sample import prepare_mask @@ -32,7 +32,6 @@ opVAEDecode = VAEDecode() opVAEEncode = VAEEncode() opVAEDecodeTiled = VAEDecodeTiled() opVAEEncodeTiled = VAEEncodeTiled() -opVAEEncodeForInpaint = VAEEncodeForInpaint() opControlNetApplyAdvanced = ControlNetApplyAdvanced() opFreeU = FreeU_V2() @@ -130,7 +129,21 @@ def encode_vae(vae, pixels, tiled=False): @torch.no_grad() @torch.inference_mode() def encode_vae_inpaint(vae, pixels, mask): - return opVAEEncodeForInpaint.encode(pixels=pixels, vae=vae, mask=mask)[0] + assert mask.ndim == 3 and pixels.ndim == 4 + assert mask.shape[-1] == pixels.shape[-2] + assert mask.shape[-2] == pixels.shape[-3] + + w = mask.round()[..., None] + pixels = pixels * (1 - w) + 0.5 * w + + latent = vae.encode(pixels) + B, C, H, W = latent.shape + + latent_mask = mask[:, None, :, :] + latent_mask = torch.nn.functional.interpolate(latent_mask, size=(H * 8, W * 8), mode="bilinear").round() + latent_mask = torch.nn.functional.max_pool2d(latent_mask, (8, 8)).round() + + return latent, latent_mask class VAEApprox(torch.nn.Module): diff --git a/modules/default_pipeline.py b/modules/default_pipeline.py index 21452cb..4903175 100644 --- a/modules/default_pipeline.py +++ b/modules/default_pipeline.py @@ -445,6 +445,9 @@ def process_diffusion(positive_cond, negative_cond, steps, switch, width, height decoded_latent = core.decode_vae(vae=target_model, latent_image=sampled_latent, tiled=tiled) if refiner_swap_method == 'vae': + if modules.inpaint_worker.current_task is not None: + modules.inpaint_worker.current_task.unswap() + sample_hijack.history_record = [] core.ksampler( model=final_unet, @@ -517,9 +520,6 @@ def process_diffusion(positive_cond, negative_cond, steps, switch, width, height noise=refiner_noise ) - if modules.inpaint_worker.current_task is not None: - modules.inpaint_worker.current_task.swap() - target_model = final_refiner_vae if target_model is None: target_model = final_vae diff --git a/modules/inpaint_worker.py b/modules/inpaint_worker.py index 0568aeb..9cc6ab1 100644 --- a/modules/inpaint_worker.py +++ b/modules/inpaint_worker.py @@ -43,6 +43,12 @@ def morphological_open(x): return x_int32.clip(0, 255).astype(np.uint8) +def up255(x, t=0): + y = np.zeros_like(x).astype(np.uint8) + y[x > t] = 255 + return y + + def imsave(x, path): x = Image.fromarray(x) x.save(path) @@ -75,21 +81,25 @@ def compute_initial_abcd(x): b = np.max(indices[0]) + 65 c = np.min(indices[1]) - 64 d = np.max(indices[1]) + 65 + abp = (b + a) // 2 + abm = (b - a) // 2 + cdp = (d + c) // 2 + cdm = (d - c) // 2 + l = max(abm, cdm) + a = abp - l + b = abp + l + c = cdp - l + d = cdp + l a, b, c, d = regulate_abcd(x, a, b, c, d) return a, b, c, d -def area_abcd(a, b, c, d): - return (b - a) * (d - c) - - def solve_abcd(x, a, b, c, d, outpaint): H, W = x.shape[:2] if outpaint: return 0, H, 0, W - min_area = (min(H, W) ** 2) * 0.5 while True: - if area_abcd(a, b, c, d) >= min_area: + if b - a > H * 0.618 and d - c > W * 0.618: break add_h = (b - a) < (d - c) @@ -119,7 +129,7 @@ def fooocus_fill(image, mask): area = np.where(mask < 127) store = raw_image[area] - for k, repeats in [(64, 4), (32, 4), (16, 4), (4, 4), (2, 4)]: + for k, repeats in [(512, 2), (256, 2), (128, 4), (64, 4), (33, 8), (15, 8), (5, 16), (3, 16)]: for _ in range(repeats): current_image = box_blur(current_image, k) current_image[area] = store @@ -129,98 +139,107 @@ def fooocus_fill(image, mask): class InpaintWorker: def __init__(self, image, mask, is_outpaint): - # mask processing - self.mask_raw_soft = morphological_open(mask) - self.mask_raw_fg = (self.mask_raw_soft == 255).astype(np.uint8) * 255 - self.mask_raw_bg = (self.mask_raw_soft == 0).astype(np.uint8) * 255 - self.mask_raw_trim = 255 - np.maximum(self.mask_raw_fg, self.mask_raw_bg) - - # image processing - self.image_raw = fooocus_fill(image, self.mask_raw_fg) - - # log all images - # imsave(self.image_raw, 'image_raw.png') - # imsave(self.mask_raw_soft, 'mask_raw_soft.png') - # imsave(self.mask_raw_fg, 'mask_raw_fg.png') - # imsave(self.mask_raw_bg, 'mask_raw_bg.png') - # imsave(self.mask_raw_trim, 'mask_raw_trim.png') - - # compute abcd - a, b, c, d = compute_initial_abcd(self.mask_raw_bg < 127) - a, b, c, d = solve_abcd(self.mask_raw_bg, a, b, c, d, outpaint=is_outpaint) + a, b, c, d = compute_initial_abcd(mask > 0) + a, b, c, d = solve_abcd(mask, a, b, c, d, outpaint=is_outpaint) # interested area self.interested_area = (a, b, c, d) - self.mask_interested_soft = self.mask_raw_soft[a:b, c:d] - self.mask_interested_fg = self.mask_raw_fg[a:b, c:d] - self.mask_interested_bg = self.mask_raw_bg[a:b, c:d] - self.mask_interested_trim = self.mask_raw_trim[a:b, c:d] - self.image_interested = self.image_raw[a:b, c:d] + self.interested_mask = mask[a:b, c:d] + self.interested_image = image[a:b, c:d] # resize to make images ready for diffusion - H, W, C = self.image_interested.shape - k = (1024.0 ** 2.0 / float(H * W)) ** 0.5 + H, W, C = self.interested_image.shape + k = ((1024.0 ** 2.0) / float(H * W)) ** 0.5 H = int(np.ceil(float(H) * k / 16.0)) * 16 W = int(np.ceil(float(W) * k / 16.0)) * 16 - self.image_ready = resample_image(self.image_interested, W, H) - self.mask_ready = resample_image(self.mask_interested_soft, W, H) + self.interested_mask = up255(resample_image(self.interested_mask, W, H), t=127) + self.interested_image = resample_image(self.interested_image, W, H) + self.interested_fill = fooocus_fill(self.interested_image, self.interested_mask) + + # soft pixels + self.mask = morphological_open(mask) + self.image = image # ending self.latent = None self.latent_after_swap = None + self.swapped = False self.latent_mask = None self.inpaint_head_feature = None return - def load_inpaint_guidance(self, latent, mask, model_path): + def load_latent(self, + latent_fill, + latent_inpaint, + latent_mask, + latent_swap=None, + inpaint_head_model_path=None): + global inpaint_head + assert inpaint_head_model_path is not None + + self.latent = latent_fill + self.latent_mask = latent_mask + self.latent_after_swap = latent_swap + if inpaint_head is None: inpaint_head = InpaintHead() - sd = torch.load(model_path, map_location='cpu') + sd = torch.load(inpaint_head_model_path, map_location='cpu') inpaint_head.load_state_dict(sd) - process_latent_in = pipeline.xl_base_patched.unet.model.process_latent_in - latent = process_latent_in(latent) - B, C, H, W = latent.shape - - mask = torch.nn.functional.interpolate(mask, size=(H, W), mode="bilinear") - mask = mask.round() - - feed = torch.cat([mask, latent], dim=1) + feed = torch.cat([ + latent_mask, + pipeline.xl_base_patched.unet.model.process_latent_in(latent_inpaint) + ], dim=1) inpaint_head.to(device=feed.device, dtype=feed.dtype) self.inpaint_head_feature = inpaint_head(feed) + return - def load_latent(self, latent, mask, latent_after_swap=None): - self.latent = latent - self.latent_mask = mask - self.latent_after_swap = latent_after_swap - def swap(self): - if self.latent_after_swap is not None: - self.latent, self.latent_after_swap = self.latent_after_swap, self.latent + if self.swapped: + return + + if self.latent is None: + return + + if self.latent_after_swap is None: + return + + self.latent, self.latent_after_swap = self.latent_after_swap, self.latent + self.swapped = True + return + + def unswap(self): + if not self.swapped: + return + + if self.latent is None: + return + + if self.latent_after_swap is None: + return + + self.latent, self.latent_after_swap = self.latent_after_swap, self.latent + self.swapped = False + return def color_correction(self, img): fg = img.astype(np.float32) - bg = self.image_raw.copy().astype(np.float32) - w = self.mask_raw_soft[:, :, None].astype(np.float32) / 255.0 + bg = self.image.copy().astype(np.float32) + w = self.mask[:, :, None].astype(np.float32) / 255.0 y = fg * w + bg * (1 - w) return y.clip(0, 255).astype(np.uint8) def post_process(self, img): a, b, c, d = self.interested_area content = resample_image(img, d - c, b - a) - result = self.image_raw.copy() + result = self.image.copy() result[a:b, c:d] = content result = self.color_correction(result) return result def visualize_mask_processing(self): - result = self.image_raw // 4 - a, b, c, d = self.interested_area - result[a:b, c:d] += 64 - result[self.mask_raw_trim > 127] += 64 - result[self.mask_raw_fg > 127] += 128 - return [result, self.mask_raw_soft, self.image_ready, self.mask_ready] + return [self.interested_fill, self.interested_mask, self.image, self.mask] diff --git a/update_log.md b/update_log.md index 9efd97d..197c0ef 100644 --- a/update_log.md +++ b/update_log.md @@ -1,3 +1,7 @@ +# 2.1.703 + +* Fixed many previous problems related to inpaint. + # 2.1.702 * Corrected reading empty negative prompt from config (it shouldn't turn into None).