From bbae307ef2775bcc1757192f17ef28000bd9849c Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Tue, 3 Oct 2023 10:36:42 -0700 Subject: [PATCH] 2.0.80 (#520) * Rework many patches and some UI details. * Speed up processing. * Move Colab to independent branch. * Implemented CFG Scale and TSNR correction when CFG is bigger than 10. * Implemented Developer Mode with more options to debug. --- colab.ipynb | 35 ------ fooocus_version.py | 2 +- launch.py | 18 +-- modules/async_worker.py | 53 +++++--- modules/core.py | 118 +++++++++-------- modules/default_pipeline.py | 143 +++++++++------------ modules/expansion.py | 2 +- modules/flags.py | 9 ++ modules/inpaint_worker.py | 3 - modules/launch_util.py | 33 +++-- modules/model_loader.py | 1 + modules/patch.py | 237 +++++++++++++++-------------------- modules/samplers_advanced.py | 27 ++-- modules/sdxl_styles.py | 3 - modules/virtual_memory.py | 185 --------------------------- readme.md | 11 +- update_log.md | 10 ++ webui.py | 31 ++++- 18 files changed, 369 insertions(+), 552 deletions(-) delete mode 100644 colab.ipynb delete mode 100644 modules/virtual_memory.py diff --git a/colab.ipynb b/colab.ipynb deleted file mode 100644 index 8e8933a..0000000 --- a/colab.ipynb +++ /dev/null @@ -1,35 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VjYy0F2gZIPR" - }, - "outputs": [], - "source": [ - "%cd /content\n", - "!git clone https://github.com/lllyasviel/Fooocus\n", - "%cd /content/Fooocus\n", - "!pip install pygit2==1.12.2\n", - "!python entry_with_update.py --share\n" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "gpuType": "T4", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/fooocus_version.py b/fooocus_version.py index 88584bd..14318f5 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.0.78' +version = '2.0.80' diff --git a/launch.py b/launch.py index 18a67a7..900efc3 100644 --- a/launch.py +++ b/launch.py @@ -12,6 +12,7 @@ from modules.path import modelfile_path, lorafile_path, vae_approx_path, fooocus REINSTALL_ALL = False + def prepare_environment(): torch_index_url = os.environ.get('TORCH_INDEX_URL', "https://download.pytorch.org/whl/cu118") torch_command = os.environ.get('TORCH_COMMAND', @@ -20,8 +21,8 @@ def prepare_environment(): xformers_package = os.environ.get('XFORMERS_PACKAGE', 'xformers==0.0.20') - comfy_repo = os.environ.get('COMFY_REPO', "https://github.com/lllyasviel/ComfyUI_2bc12d.git") - comfy_commit_hash = os.environ.get('COMFY_COMMIT_HASH', "2bc12d3d22efb5c63ae3a7fc342bb2dd16b31735") + comfy_repo = os.environ.get('COMFY_REPO', "https://github.com/comfyanonymous/ComfyUI") + comfy_commit_hash = os.environ.get('COMFY_COMMIT_HASH', "2ef459b1d4d627929c84d11e5e0cbe3ded9c9f48") print(f"Python {sys.version}") print(f"Fooocus version: {fooocus_version.version}") @@ -95,22 +96,21 @@ def download_models(): return -def clear_comfy_args(): +def ini_comfy_args(): argv = sys.argv sys.argv = [sys.argv[0]] + from comfy.cli_args import args as comfy_args comfy_args.disable_cuda_malloc = True + comfy_args.disable_smart_memory = True + comfy_args.auto_launch = False + sys.argv = argv -def cuda_malloc(): - import cuda_malloc - - prepare_environment() -clear_comfy_args() -# cuda_malloc() +ini_comfy_args() download_models() diff --git a/modules/async_worker.py b/modules/async_worker.py index 5eebd02..bf4ce9b 100644 --- a/modules/async_worker.py +++ b/modules/async_worker.py @@ -19,7 +19,6 @@ def worker(): import modules.flags as flags import modules.path import modules.patch - import modules.virtual_memory as virtual_memory import comfy.model_management import modules.inpaint_worker as inpaint_worker @@ -45,8 +44,10 @@ def worker(): @torch.no_grad() @torch.inference_mode() def handler(task): + execution_start_time = time.perf_counter() + prompt, negative_prompt, style_selections, performance_selction, \ - aspect_ratios_selction, image_number, image_seed, sharpness, \ + aspect_ratios_selction, image_number, image_seed, sharpness, adm_scaler_positive, adm_scaler_negative, guidance_scale, adaptive_cfg, sampler_name, \ base_model_name, refiner_model_name, \ l1, w1, l2, w2, l3, w3, l4, w4, l5, w5, \ input_image_checkbox, current_tab, \ @@ -68,8 +69,20 @@ def worker(): use_expansion = False use_style = len(style_selections) > 0 + + modules.patch.adaptive_cfg = adaptive_cfg + print(f'[Parameters] Adaptive CFG = {modules.patch.adaptive_cfg}') + modules.patch.sharpness = sharpness - modules.patch.negative_adm = True + print(f'[Parameters] Sharpness = {modules.patch.sharpness}') + + modules.patch.positive_adm_scale = adm_scaler_positive + modules.patch.negative_adm_scale = adm_scaler_negative + print(f'[Parameters] ADM Scale = {modules.patch.positive_adm_scale} / {modules.patch.negative_adm_scale}') + + cfg_scale = float(guidance_scale) + print(f'[Parameters] CFG = {cfg_scale}') + initial_latent = None denoising_strength = 1.0 tiled = False @@ -226,6 +239,10 @@ def worker(): height, width = inpaint_worker.current_task.image_raw.shape[:2] print(f'Final resolution is {str((height, width))}, latent is {str((H * 8, W * 8))}.') + sampler_name = 'dpmpp_fooocus_2m_sde_inpaint_seamless' + + print(f'[Parameters] Sampler = {sampler_name}') + progressbar(1, 'Initializing ...') raw_prompt = prompt @@ -307,19 +324,13 @@ def worker(): pool_top_k=negative_top_k) if pipeline.xl_refiner is not None: - virtual_memory.load_from_virtual_memory(pipeline.xl_refiner.clip.cond_stage_model) - for i, t in enumerate(tasks): progressbar(11, f'Encoding refiner positive #{i + 1} ...') - t['c'][1] = pipeline.clip_encode(sd=pipeline.xl_refiner, texts=t['positive'], - pool_top_k=positive_top_k) + t['c'][1] = pipeline.clip_separate(t['c'][0]) for i, t in enumerate(tasks): progressbar(13, f'Encoding refiner negative #{i + 1} ...') - t['uc'][1] = pipeline.clip_encode(sd=pipeline.xl_refiner, texts=t['negative'], - pool_top_k=negative_top_k) - - virtual_memory.try_move_to_virtual_memory(pipeline.xl_refiner.clip.cond_stage_model) + t['uc'][1] = pipeline.clip_separate(t['uc'][0]) results = [] all_steps = steps * image_number @@ -331,13 +342,14 @@ def worker(): f'Step {step}/{total_steps} in the {current_task_id + 1}-th Sampling', y)]) - print(f'[ADM] Negative ADM = {modules.patch.negative_adm}') + preparation_time = time.perf_counter() - execution_start_time + print(f'Preparation time: {preparation_time:.2f} seconds') outputs.append(['preview', (13, 'Starting tasks ...', None)]) for current_task_id, task in enumerate(tasks): - try: - execution_start_time = time.perf_counter() + execution_start_time = time.perf_counter() + try: imgs = pipeline.process_diffusion( positive_cond=task['c'], negative_cond=task['uc'], @@ -347,17 +359,16 @@ def worker(): height=height, image_seed=task['task_seed'], callback=callback, + sampler_name=sampler_name, latent=initial_latent, denoise=denoising_strength, - tiled=tiled + tiled=tiled, + cfg_scale=cfg_scale ) if inpaint_worker.current_task is not None: imgs = [inpaint_worker.current_task.post_process(x) for x in imgs] - execution_time = time.perf_counter() - execution_start_time - print(f'Diffusion time: {execution_time:.2f} seconds') - for x in imgs: d = [ ('Prompt', raw_prompt), @@ -367,8 +378,11 @@ def worker(): ('Performance', performance_selction), ('Resolution', str((width, height))), ('Sharpness', sharpness), + ('Guidance Scale', guidance_scale), + ('ADM Guidance', str((adm_scaler_positive, adm_scaler_negative))), ('Base Model', base_model_name), ('Refiner Model', refiner_model_name), + ('Sampler', sampler_name), ('Seed', task['task_seed']) ] for n, w in loras_user_raw_input: @@ -381,6 +395,9 @@ def worker(): print('User stopped') break + execution_time = time.perf_counter() - execution_start_time + print(f'Generating and saving time: {execution_time:.2f} seconds') + outputs.append(['results', results]) return diff --git a/modules/core.py b/modules/core.py index 82f8fad..cf5128f 100644 --- a/modules/core.py +++ b/modules/core.py @@ -10,13 +10,15 @@ import torch import numpy as np import comfy.model_management +import comfy.model_detection +import comfy.model_patcher import comfy.utils from comfy.sd import load_checkpoint_guess_config from nodes import VAEDecode, EmptyLatentImage, VAEEncode, VAEEncodeTiled, VAEDecodeTiled, VAEEncodeForInpaint -from comfy.sample import prepare_mask, broadcast_cond, load_additional_models, cleanup_additional_models -from comfy.model_base import SDXLRefiner -from comfy.sd import model_lora_keys_unet, model_lora_keys_clip, load_lora +from comfy.sample import prepare_mask, broadcast_cond, get_additional_models, cleanup_additional_models +from modules.patch import patched_sampler_cfg_function, patched_model_function_wrapper +from comfy.lora import model_lora_keys_unet, model_lora_keys_clip, load_lora from modules.samplers_advanced import KSamplerBasic, KSamplerWithRefiner @@ -29,34 +31,61 @@ opVAEEncodeForInpaint = VAEEncodeForInpaint() class StableDiffusionModel: - def __init__(self, unet, vae, clip, clip_vision, model_filename=None): - if isinstance(model_filename, str): - is_refiner = isinstance(unet.model, SDXLRefiner) - if unet is not None: - unet.model.model_file = dict(filename=model_filename, prefix='model') - if clip is not None: - clip.cond_stage_model.model_file = dict(filename=model_filename, prefix='refiner_clip' if is_refiner else 'base_clip') - if vae is not None: - vae.first_stage_model.model_file = dict(filename=model_filename, prefix='first_stage_model') + def __init__(self, unet, vae, clip, clip_vision): self.unet = unet self.vae = vae self.clip = clip self.clip_vision = clip_vision - def to_meta(self): - if self.unet is not None: - self.unet.model.to('meta') - if self.clip is not None: - self.clip.cond_stage_model.to('meta') - if self.vae is not None: - self.vae.first_stage_model.to('meta') + +@torch.no_grad() +@torch.inference_mode() +def load_unet_only(unet_path): + sd_raw = comfy.utils.load_torch_file(unet_path) + sd = {} + flag = 'model.diffusion_model.' + for k in list(sd_raw.keys()): + if k.startswith(flag): + sd[k[len(flag):]] = sd_raw[k] + del sd_raw[k] + + parameters = comfy.utils.calculate_parameters(sd) + fp16 = comfy.model_management.should_use_fp16(model_params=parameters) + if "input_blocks.0.0.weight" in sd: + # ldm + model_config = comfy.model_detection.model_config_from_unet(sd, "", fp16) + if model_config is None: + raise RuntimeError("ERROR: Could not detect model type of: {}".format(unet_path)) + new_sd = sd + else: + # diffusers + model_config = comfy.model_detection.model_config_from_diffusers_unet(sd, fp16) + if model_config is None: + print("ERROR UNSUPPORTED UNET", unet_path) + return None + + diffusers_keys = comfy.utils.unet_to_diffusers(model_config.unet_config) + + new_sd = {} + for k in diffusers_keys: + if k in sd: + new_sd[diffusers_keys[k]] = sd.pop(k) + else: + print(diffusers_keys[k], k) + offload_device = comfy.model_management.unet_offload_device() + model = model_config.get_model(new_sd, "") + model = model.to(offload_device) + model.load_model_weights(new_sd, "") + return comfy.model_patcher.ModelPatcher(model, load_device=comfy.model_management.get_torch_device(), offload_device=offload_device) @torch.no_grad() @torch.inference_mode() def load_model(ckpt_filename): unet, clip, vae, clip_vision = load_checkpoint_guess_config(ckpt_filename) - return StableDiffusionModel(unet=unet, clip=clip, vae=vae, clip_vision=clip_vision, model_filename=ckpt_filename) + unet.model_options['sampler_cfg_function'] = patched_sampler_cfg_function + unet.model_options['model_function_wrapper'] = patched_model_function_wrapper + return StableDiffusionModel(unet=unet, clip=clip, vae=vae, clip_vision=clip_vision) @torch.no_grad() @@ -74,20 +103,19 @@ def load_sd_lora(model, lora_filename, strength_model=1.0, strength_clip=1.0): key_map = model_lora_keys_clip(model.clip.cond_stage_model, key_map) loaded = load_lora(lora, key_map) - new_modelpatcher = model.unet.clone() - k = new_modelpatcher.add_patches(loaded, strength_model) + new_unet = model.unet.clone() + loaded_unet_keys = new_unet.add_patches(loaded, strength_model) new_clip = model.clip.clone() - k1 = new_clip.add_patches(loaded, strength_clip) + loaded_clip_keys = new_clip.add_patches(loaded, strength_clip) + + loaded_keys = set(list(loaded_unet_keys) + list(loaded_clip_keys)) - k = set(k) - k1 = set(k1) for x in loaded: - if (x not in k) and (x not in k1): - print("Lora missed: ", x) + if x not in loaded_keys: + print("Lora key not loaded: ", x) - unet, clip = new_modelpatcher, new_clip - return StableDiffusionModel(unet=unet, clip=clip, vae=model.vae, clip_vision=model.clip_vision) + return StableDiffusionModel(unet=new_unet, clip=new_clip, vae=model.vae, clip_vision=model.clip_vision) @torch.no_grad() @@ -142,7 +170,7 @@ VAE_approx_model = None @torch.no_grad() @torch.inference_mode() -def get_previewer(device, latent_format): +def get_previewer(): global VAE_approx_model if VAE_approx_model is None: @@ -181,12 +209,7 @@ def get_previewer(device, latent_format): def ksampler(model, positive, negative, latent, seed=None, steps=30, cfg=7.0, sampler_name='dpmpp_fooocus_2m_sde_inpaint_seamless', scheduler='karras', denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False, callback_function=None): - # SCHEDULERS = ["normal", "karras", "exponential", "simple", "ddim_uniform"] - # SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral", - # "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu", - # "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "ddim", "uni_pc", "uni_pc_bh2"] - - seed = seed if isinstance(seed, int) else random.randint(1, 2 ** 64) + seed = seed if isinstance(seed, int) else random.randint(0, 2**63 - 1) device = comfy.model_management.get_torch_device() latent_image = latent["samples"] @@ -201,11 +224,12 @@ def ksampler(model, positive, negative, latent, seed=None, steps=30, cfg=7.0, sa if "noise_mask" in latent: noise_mask = latent["noise_mask"] - previewer = get_previewer(device, model.model.latent_format) + previewer = get_previewer() pbar = comfy.utils.ProgressBar(steps) def callback(step, x0, x, total_steps): + comfy.model_management.throw_exception_if_processing_interrupted() y = None if previewer is not None: y = previewer(x0, step, total_steps) @@ -219,7 +243,8 @@ def ksampler(model, positive, negative, latent, seed=None, steps=30, cfg=7.0, sa if noise_mask is not None: noise_mask = prepare_mask(noise_mask, noise.shape, device) - comfy.model_management.load_model_gpu(model) + models, inference_memory = get_additional_models(positive, negative, model.model_dtype()) + comfy.model_management.load_models_gpu([model] + models, comfy.model_management.batch_area_memory(noise.shape[0] * noise.shape[2] * noise.shape[3]) + inference_memory) real_model = model.model noise = noise.to(device) @@ -228,8 +253,6 @@ def ksampler(model, positive, negative, latent, seed=None, steps=30, cfg=7.0, sa positive_copy = broadcast_cond(positive, noise.shape[0], device) negative_copy = broadcast_cond(negative, noise.shape[0], device) - models = load_additional_models(positive, negative, model.model_dtype()) - sampler = KSamplerBasic(real_model, steps=steps, device=device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options) @@ -254,12 +277,7 @@ def ksampler_with_refiner(model, positive, negative, refiner, refiner_positive, seed=None, steps=30, refiner_switch_step=20, cfg=7.0, sampler_name='dpmpp_fooocus_2m_sde_inpaint_seamless', scheduler='karras', denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False, callback_function=None): - # SCHEDULERS = ["normal", "karras", "exponential", "simple", "ddim_uniform"] - # SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral", - # "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu", - # "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "ddim", "uni_pc", "uni_pc_bh2"] - - seed = seed if isinstance(seed, int) else random.randint(1, 2 ** 64) + seed = seed if isinstance(seed, int) else random.randint(0, 2**63 - 1) device = comfy.model_management.get_torch_device() latent_image = latent["samples"] @@ -274,11 +292,12 @@ def ksampler_with_refiner(model, positive, negative, refiner, refiner_positive, if "noise_mask" in latent: noise_mask = latent["noise_mask"] - previewer = get_previewer(device, model.model.latent_format) + previewer = get_previewer() pbar = comfy.utils.ProgressBar(steps) def callback(step, x0, x, total_steps): + comfy.model_management.throw_exception_if_processing_interrupted() y = None if previewer is not None: y = previewer(x0, step, total_steps) @@ -292,7 +311,8 @@ def ksampler_with_refiner(model, positive, negative, refiner, refiner_positive, if noise_mask is not None: noise_mask = prepare_mask(noise_mask, noise.shape, device) - comfy.model_management.load_model_gpu(model) + models, inference_memory = get_additional_models(positive, negative, model.model_dtype()) + comfy.model_management.load_models_gpu([model] + models, comfy.model_management.batch_area_memory(noise.shape[0] * noise.shape[2] * noise.shape[3]) + inference_memory) noise = noise.to(device) latent_image = latent_image.to(device) @@ -303,8 +323,6 @@ def ksampler_with_refiner(model, positive, negative, refiner, refiner_positive, refiner_positive_copy = broadcast_cond(refiner_positive, noise.shape[0], device) refiner_negative_copy = broadcast_cond(refiner_negative, noise.shape[0], device) - models = load_additional_models(positive, negative, model.model_dtype()) - sampler = KSamplerWithRefiner(model=model, refiner_model=refiner, steps=steps, device=device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options) diff --git a/modules/default_pipeline.py b/modules/default_pipeline.py index 70d75e1..c84b7a5 100644 --- a/modules/default_pipeline.py +++ b/modules/default_pipeline.py @@ -2,23 +2,49 @@ import modules.core as core import os import torch import modules.path -import modules.virtual_memory as virtual_memory import comfy.model_management +from comfy.model_patcher import ModelPatcher from comfy.model_base import SDXL, SDXLRefiner -from modules.patch import cfg_patched, patched_model_function from modules.expansion import FooocusExpansion xl_base: core.StableDiffusionModel = None xl_base_hash = '' -xl_refiner: core.StableDiffusionModel = None -xl_refiner_hash = '' - xl_base_patched: core.StableDiffusionModel = None xl_base_patched_hash = '' +xl_refiner: ModelPatcher = None +xl_refiner_hash = '' + + +@torch.no_grad() +@torch.inference_mode() +def assert_model_integrity(): + error_message = None + + if xl_base is None: + error_message = 'You have not selected SDXL base model.' + + if xl_base_patched is None: + error_message = 'You have not selected SDXL base model.' + + if not isinstance(xl_base.unet.model, SDXL): + error_message = 'You have selected base model other than SDXL. This is not supported yet.' + + if not isinstance(xl_base_patched.unet.model, SDXL): + error_message = 'You have selected base model other than SDXL. This is not supported yet.' + + if xl_refiner is not None: + if not isinstance(xl_refiner.model, SDXLRefiner): + error_message = 'You have selected refiner model other than SDXL refiner. This is not supported yet.' + + if error_message is not None: + raise NotImplementedError(error_message) + + return True + @torch.no_grad() @torch.inference_mode() @@ -31,24 +57,13 @@ def refresh_base_model(name): if xl_base_hash == model_hash: return - if xl_base is not None: - xl_base.to_meta() - xl_base = None + xl_base = None + xl_base_hash = '' + xl_base_patched = None + xl_base_patched_hash = '' xl_base = core.load_model(filename) - if not isinstance(xl_base.unet.model, SDXL): - print('Model not supported. Fooocus only support SDXL model as the base model.') - xl_base = None - xl_base_hash = '' - refresh_base_model(modules.path.default_base_model_name) - xl_base_hash = model_hash - xl_base_patched = xl_base - xl_base_patched_hash = '' - return - xl_base_hash = model_hash - xl_base_patched = xl_base - xl_base_patched_hash = '' print(f'Base model loaded: {model_hash}') return @@ -64,28 +79,18 @@ def refresh_refiner_model(name): if xl_refiner_hash == model_hash: return + xl_refiner = None + xl_refiner_hash = '' + if name == 'None': - xl_refiner = None - xl_refiner_hash = '' - print(f'Refiner unloaded.') - return - - if xl_refiner is not None: - xl_refiner.to_meta() - xl_refiner = None - - xl_refiner = core.load_model(filename) - if not isinstance(xl_refiner.unet.model, SDXLRefiner): - print('Model not supported. Fooocus only support SDXL refiner as the refiner.') - xl_refiner = None - xl_refiner_hash = '' print(f'Refiner unloaded.') return + xl_refiner = core.load_unet_only(filename) xl_refiner_hash = model_hash print(f'Refiner model loaded: {model_hash}') - xl_refiner.vae.first_stage_model.to('meta') + # Remove VAE xl_refiner.vae = None return @@ -133,6 +138,15 @@ def clip_encode_single(clip, text, verbose=False): return result +@torch.no_grad() +@torch.inference_mode() +def clip_separate(cond): + c, p = cond[0] + c = c[..., -1280:].clone() + p = p["pooled_output"].clone() + return [[c, {"pooled_output": p}]] + + @torch.no_grad() @torch.inference_mode() def clip_encode(sd, texts, pool_top_k=1): @@ -158,36 +172,20 @@ def clip_encode(sd, texts, pool_top_k=1): return [[torch.cat(cond_list, dim=1), {"pooled_output": pooled_acc}]] -@torch.no_grad() -@torch.inference_mode() -def clear_sd_cond_cache(sd): - if sd is None: - return None - if sd.clip is None: - return None - sd.clip.fcs_cond_cache = {} - return - - @torch.no_grad() @torch.inference_mode() def clear_all_caches(): - clear_sd_cond_cache(xl_base_patched) - clear_sd_cond_cache(xl_refiner) + xl_base.clip.fcs_cond_cache = {} + xl_base_patched.clip.fcs_cond_cache = {} @torch.no_grad() @torch.inference_mode() def refresh_everything(refiner_model_name, base_model_name, loras): refresh_refiner_model(refiner_model_name) - if xl_refiner is not None: - virtual_memory.try_move_to_virtual_memory(xl_refiner.unet.model) - virtual_memory.try_move_to_virtual_memory(xl_refiner.clip.cond_stage_model) - refresh_base_model(base_model_name) - virtual_memory.load_from_virtual_memory(xl_base.unet.model) - refresh_loras(loras) + assert_model_integrity() clear_all_caches() return @@ -203,32 +201,7 @@ expansion = FooocusExpansion() @torch.no_grad() @torch.inference_mode() -def patch_all_models(): - assert xl_base is not None - assert xl_base_patched is not None - - xl_base.unet.model_options['sampler_cfg_function'] = cfg_patched - xl_base.unet.model_options['model_function_wrapper'] = patched_model_function - - xl_base_patched.unet.model_options['sampler_cfg_function'] = cfg_patched - xl_base_patched.unet.model_options['model_function_wrapper'] = patched_model_function - - if xl_refiner is not None: - xl_refiner.unet.model_options['sampler_cfg_function'] = cfg_patched - xl_refiner.unet.model_options['model_function_wrapper'] = patched_model_function - - return - - -@torch.no_grad() -@torch.inference_mode() -def process_diffusion(positive_cond, negative_cond, steps, switch, width, height, image_seed, callback, latent=None, denoise=1.0, tiled=False): - patch_all_models() - - if xl_refiner is not None: - virtual_memory.try_move_to_virtual_memory(xl_refiner.unet.model) - virtual_memory.load_from_virtual_memory(xl_base.unet.model) - +def process_diffusion(positive_cond, negative_cond, steps, switch, width, height, image_seed, callback, sampler_name, latent=None, denoise=1.0, tiled=False, cfg_scale=7.0): if latent is None: empty_latent = core.generate_empty_latent(width=width, height=height, batch_size=1) else: @@ -239,7 +212,7 @@ def process_diffusion(positive_cond, negative_cond, steps, switch, width, height model=xl_base_patched.unet, positive=positive_cond[0], negative=negative_cond[0], - refiner=xl_refiner.unet, + refiner=xl_refiner, refiner_positive=positive_cond[1], refiner_negative=negative_cond[1], refiner_switch_step=switch, @@ -247,7 +220,9 @@ def process_diffusion(positive_cond, negative_cond, steps, switch, width, height steps=steps, start_step=0, last_step=steps, disable_noise=False, force_full_denoise=True, seed=image_seed, denoise=denoise, - callback_function=callback + callback_function=callback, + cfg=cfg_scale, + sampler_name=sampler_name ) else: sampled_latent = core.ksampler( @@ -258,7 +233,9 @@ def process_diffusion(positive_cond, negative_cond, steps, switch, width, height steps=steps, start_step=0, last_step=steps, disable_noise=False, force_full_denoise=True, seed=image_seed, denoise=denoise, - callback_function=callback + callback_function=callback, + cfg=cfg_scale, + sampler_name=sampler_name ) decoded_latent = core.decode_vae(vae=xl_base_patched.vae, latent_image=sampled_latent, tiled=tiled) diff --git a/modules/expansion.py b/modules/expansion.py index 8b339f8..a07f0cf 100644 --- a/modules/expansion.py +++ b/modules/expansion.py @@ -4,7 +4,7 @@ import comfy.model_management as model_management from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed from modules.path import fooocus_expansion_path -from comfy.sd import ModelPatcher +from comfy.model_patcher import ModelPatcher fooocus_magic_split = [ diff --git a/modules/flags.py b/modules/flags.py index cb47e61..ca7af78 100644 --- a/modules/flags.py +++ b/modules/flags.py @@ -9,3 +9,12 @@ upscale_fast = 'Upscale (Fast 2x)' uov_list = [ disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast ] + +sampler_list = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral", + "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu", + "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", + # "ddim", + "uni_pc", "uni_pc_bh2", + # "dpmpp_fooocus_2m_sde_inpaint_seamless" + ] +default_sampler = 'dpmpp_2m_sde_gpu' diff --git a/modules/inpaint_worker.py b/modules/inpaint_worker.py index f83752e..66b642c 100644 --- a/modules/inpaint_worker.py +++ b/modules/inpaint_worker.py @@ -1,12 +1,9 @@ -import os.path - import torch import numpy as np import modules.default_pipeline as pipeline from PIL import Image, ImageFilter from modules.util import resample_image -from modules.path import inpaint_models_path inpaint_head = None diff --git a/modules/launch_util.py b/modules/launch_util.py index 71a64ff..aadafc7 100644 --- a/modules/launch_util.py +++ b/modules/launch_util.py @@ -7,12 +7,15 @@ import sys import re import logging import pygit2 -pygit2.option(pygit2.GIT_OPT_SET_OWNER_VALIDATION, 0) +pygit2.option(pygit2.GIT_OPT_SET_OWNER_VALIDATION, 0) + logging.getLogger("torch.distributed.nn").setLevel(logging.ERROR) # sshh... logging.getLogger("xformers").addFilter(lambda record: 'A matching Triton is not available' not in record.getMessage()) +re_requirement = re.compile(r"\s*([-_a-zA-Z0-9]+)\s*(?:==\s*([-+_.a-zA-Z0-9]+))?\s*") + python = sys.executable default_command_live = (os.environ.get('LAUNCH_LIVE_OUTPUT') == "1") index_url = os.environ.get('INDEX_URL', "") @@ -22,17 +25,32 @@ script_path = os.path.dirname(modules_path) dir_repos = "repositories" +def onerror(func, path, exc_info): + import stat + if not os.access(path, os.W_OK): + os.chmod(path, stat.S_IWUSR) + func(path) + else: + raise 'Failed to invoke "shutil.rmtree", git management failed.' + + def git_clone(url, dir, name, hash=None): try: try: repo = pygit2.Repository(dir) - print(f'{name} exists.') + remote_url = repo.remotes['origin'].url + if remote_url != url: + print(f'{name} exists but remote URL will be updated.') + del repo + raise url + else: + print(f'{name} exists and URL is correct.') except: - if os.path.exists(dir): - shutil.rmtree(dir, ignore_errors=True) + if os.path.isdir(dir) or os.path.exists(dir): + shutil.rmtree(dir, onerror=onerror) os.makedirs(dir, exist_ok=True) repo = pygit2.clone_repository(url, dir) - print(f'{name} cloned.') + print(f'{name} cloned from {url}.') remote = repo.remotes['origin'] remote.fetch() @@ -40,7 +58,7 @@ def git_clone(url, dir, name, hash=None): commit = repo.get(hash) repo.checkout_tree(commit, strategy=pygit2.GIT_CHECKOUT_FORCE) - print(f'{name} checkout finished.') + print(f'{name} checkout finished for {hash}.') except Exception as e: print(f'Git clone failed for {name}: {str(e)}') @@ -101,9 +119,6 @@ def run_pip(command, desc=None, live=default_command_live): return None -re_requirement = re.compile(r"\s*([-_a-zA-Z0-9]+)\s*(?:==\s*([-+_.a-zA-Z0-9]+))?\s*") - - def requirements_met(requirements_file): """ Does a simple parse of a requirements.txt file to determine if all rerqirements in it diff --git a/modules/model_loader.py b/modules/model_loader.py index 3cf0bc9..8ba336a 100644 --- a/modules/model_loader.py +++ b/modules/model_loader.py @@ -2,6 +2,7 @@ import os from urllib.parse import urlparse from typing import Optional + def load_file_from_url( url: str, *, diff --git a/modules/patch.py b/modules/patch.py index 7344b5a..374eb9d 100644 --- a/modules/patch.py +++ b/modules/patch.py @@ -1,5 +1,4 @@ import torch -import contextlib import comfy.model_base import comfy.ldm.modules.diffusionmodules.openaimodel import comfy.samplers @@ -13,6 +12,7 @@ import modules.inpaint_worker as inpaint_worker import comfy.ldm.modules.diffusionmodules.openaimodel import comfy.ldm.modules.diffusionmodules.model import comfy.sd +import comfy.model_patcher from comfy.k_diffusion import utils from comfy.k_diffusion.sampling import BrownianTreeNoiseSampler, trange @@ -20,11 +20,13 @@ from comfy.ldm.modules.diffusionmodules.openaimodel import timestep_embedding, f sharpness = 2.0 -negative_adm = True +positive_adm_scale = 1.5 +negative_adm_scale = 0.8 cfg_x0 = 0.0 cfg_s = 1.0 cfg_cin = 1.0 +adaptive_cfg = 0.7 def calculate_weight_patched(self, patches, weight, key): @@ -45,25 +47,26 @@ def calculate_weight_patched(self, patches, weight, key): if w1.shape != weight.shape: print("WARNING SHAPE MISMATCH {} WEIGHT NOT MERGED {} != {}".format(key, w1.shape, weight.shape)) else: - weight += alpha * w1.type(weight.dtype).to(weight.device) + weight += alpha * comfy.model_management.cast_to_device(w1, weight.device, weight.dtype) elif len(v) == 3: # fooocus - w1 = v[0].float() - w_min = v[1].float() - w_max = v[2].float() + w1 = comfy.model_management.cast_to_device(v[0], weight.device, torch.float32) + w_min = comfy.model_management.cast_to_device(v[1], weight.device, torch.float32) + w_max = comfy.model_management.cast_to_device(v[2], weight.device, torch.float32) w1 = (w1 / 255.0) * (w_max - w_min) + w_min if alpha != 0.0: if w1.shape != weight.shape: print("WARNING SHAPE MISMATCH {} FOOOCUS WEIGHT NOT MERGED {} != {}".format(key, w1.shape, weight.shape)) else: - weight += alpha * w1.type(weight.dtype).to(weight.device) + weight += alpha * comfy.model_management.cast_to_device(w1, weight.device, weight.dtype) elif len(v) == 4: # lora/locon - mat1 = v[0].float().to(weight.device) - mat2 = v[1].float().to(weight.device) + mat1 = comfy.model_management.cast_to_device(v[0], weight.device, torch.float32) + mat2 = comfy.model_management.cast_to_device(v[1], weight.device, torch.float32) if v[2] is not None: alpha *= v[2] / mat2.shape[0] if v[3] is not None: - mat3 = v[3].float().to(weight.device) + # locon mid weights, hopefully the math is fine because I didn't properly test it + mat3 = comfy.model_management.cast_to_device(v[3], weight.device, torch.float32) final_shape = [mat2.shape[1], mat2.shape[0], mat3.shape[2], mat3.shape[3]] mat2 = torch.mm(mat2.transpose(0, 1).flatten(start_dim=1), mat3.transpose(0, 1).flatten(start_dim=1)).reshape(final_shape).transpose(0, 1) @@ -84,19 +87,23 @@ def calculate_weight_patched(self, patches, weight, key): if w1 is None: dim = w1_b.shape[0] - w1 = torch.mm(w1_a.float(), w1_b.float()) + w1 = torch.mm(comfy.model_management.cast_to_device(w1_a, weight.device, torch.float32), + comfy.model_management.cast_to_device(w1_b, weight.device, torch.float32)) else: - w1 = w1.float().to(weight.device) + w1 = comfy.model_management.cast_to_device(w1, weight.device, torch.float32) if w2 is None: dim = w2_b.shape[0] if t2 is None: - w2 = torch.mm(w2_a.float().to(weight.device), w2_b.float().to(weight.device)) + w2 = torch.mm(comfy.model_management.cast_to_device(w2_a, weight.device, torch.float32), + comfy.model_management.cast_to_device(w2_b, weight.device, torch.float32)) else: - w2 = torch.einsum('i j k l, j r, i p -> p r k l', t2.float().to(weight.device), - w2_b.float().to(weight.device), w2_a.float().to(weight.device)) + w2 = torch.einsum('i j k l, j r, i p -> p r k l', + comfy.model_management.cast_to_device(t2, weight.device, torch.float32), + comfy.model_management.cast_to_device(w2_b, weight.device, torch.float32), + comfy.model_management.cast_to_device(w2_a, weight.device, torch.float32)) else: - w2 = w2.float().to(weight.device) + w2 = comfy.model_management.cast_to_device(w2, weight.device, torch.float32) if len(w2.shape) == 4: w1 = w1.unsqueeze(2).unsqueeze(2) @@ -117,13 +124,20 @@ def calculate_weight_patched(self, patches, weight, key): if v[5] is not None: # cp decomposition t1 = v[5] t2 = v[6] - m1 = torch.einsum('i j k l, j r, i p -> p r k l', t1.float().to(weight.device), - w1b.float().to(weight.device), w1a.float().to(weight.device)) - m2 = torch.einsum('i j k l, j r, i p -> p r k l', t2.float().to(weight.device), - w2b.float().to(weight.device), w2a.float().to(weight.device)) + m1 = torch.einsum('i j k l, j r, i p -> p r k l', + comfy.model_management.cast_to_device(t1, weight.device, torch.float32), + comfy.model_management.cast_to_device(w1b, weight.device, torch.float32), + comfy.model_management.cast_to_device(w1a, weight.device, torch.float32)) + + m2 = torch.einsum('i j k l, j r, i p -> p r k l', + comfy.model_management.cast_to_device(t2, weight.device, torch.float32), + comfy.model_management.cast_to_device(w2b, weight.device, torch.float32), + comfy.model_management.cast_to_device(w2a, weight.device, torch.float32)) else: - m1 = torch.mm(w1a.float().to(weight.device), w1b.float().to(weight.device)) - m2 = torch.mm(w2a.float().to(weight.device), w2b.float().to(weight.device)) + m1 = torch.mm(comfy.model_management.cast_to_device(w1a, weight.device, torch.float32), + comfy.model_management.cast_to_device(w1b, weight.device, torch.float32)) + m2 = torch.mm(comfy.model_management.cast_to_device(w2a, weight.device, torch.float32), + comfy.model_management.cast_to_device(w2b, weight.device, torch.float32)) try: weight += (alpha * m1 * m2).reshape(weight.shape).type(weight.dtype) @@ -133,47 +147,67 @@ def calculate_weight_patched(self, patches, weight, key): return weight -def cfg_patched(args): +def get_adaptive_weight_k(cfg_scale): + w = float(cfg_scale) + w -= 7.0 + w /= 3.0 + w = max(w, 0.01) + w = min(w, 0.99) + return w + + +def compute_cfg(uncond, cond, cfg_scale): + global adaptive_cfg + + k = adaptive_cfg * get_adaptive_weight_k(cfg_scale) + x_cfg = uncond + cfg_scale * (cond - uncond) + ro_pos = torch.std(cond, dim=(1, 2, 3), keepdim=True) + ro_cfg = torch.std(x_cfg, dim=(1, 2, 3), keepdim=True) + + x_rescaled = x_cfg * (ro_pos / ro_cfg) + x_final = k * x_rescaled + (1.0 - k) * x_cfg + + return x_final + + +def patched_sampler_cfg_function(args): global cfg_x0, cfg_s - positive_eps = args['cond'].clone() + + positive_eps = args['cond'] + negative_eps = args['uncond'] + cfg_scale = args['cond_scale'] + positive_x0 = args['cond'] * cfg_s + cfg_x0 - uncond = args['uncond'] * cfg_s + cfg_x0 - cond_scale = args['cond_scale'] - t = args['timestep'] + t = 1.0 - (args['timestep'] / 999.0)[:, None, None, None].clone() + alpha = 0.001 * sharpness * t - alpha = 1.0 - (t / 999.0)[:, None, None, None].clone() - alpha *= 0.001 * sharpness + positive_eps_degraded = anisotropic.adaptive_anisotropic_filter(x=positive_eps, g=positive_x0) + positive_eps_degraded_weighted = positive_eps_degraded * alpha + positive_eps * (1.0 - alpha) - eps_degraded = anisotropic.adaptive_anisotropic_filter(x=positive_eps, g=positive_x0) - eps_degraded_weighted = eps_degraded * alpha + positive_eps * (1.0 - alpha) - - cond = eps_degraded_weighted * cfg_s + cfg_x0 - - return uncond + (cond - uncond) * cond_scale + return compute_cfg(uncond=negative_eps, cond=positive_eps_degraded_weighted, cfg_scale=cfg_scale) def patched_discrete_eps_ddpm_denoiser_forward(self, input, sigma, **kwargs): global cfg_x0, cfg_s, cfg_cin c_out, c_in = [utils.append_dims(x, input.ndim) for x in self.get_scalings(sigma)] - cfg_x0 = input - cfg_s = c_out - cfg_cin = c_in - return self.get_eps(input * c_in, self.sigma_to_t(sigma), **kwargs) + cfg_x0, cfg_s, cfg_cin = input, c_out, c_in + eps = self.get_eps(input * c_in, self.sigma_to_t(sigma), **kwargs) + return input + eps * c_out -def patched_model_function(func, args): +def patched_model_function_wrapper(func, args): global cfg_cin x = args['input'] t = args['timestep'] c = args['c'] - # is_uncond = torch.tensor(args['cond_or_uncond'])[:, None, None, None].to(x) * 5e-3 + # is_uncond = torch.tensor(args['cond_or_uncond'])[:, None, None, None].to(x) return func(x, t, **c) def sdxl_encode_adm_patched(self, **kwargs): - global negative_adm + global positive_adm_scale, negative_adm_scale - clip_pooled = kwargs["pooled_output"] + clip_pooled = comfy.model_base.sdxl_pooled(kwargs, self.noise_augmentor) width = kwargs.get("width", 768) height = kwargs.get("height", 768) crop_w = kwargs.get("crop_w", 0) @@ -181,13 +215,20 @@ def sdxl_encode_adm_patched(self, **kwargs): target_width = kwargs.get("target_width", width) target_height = kwargs.get("target_height", height) - if negative_adm: - if kwargs.get("prompt_type", "") == "negative": - width *= 0.8 - height *= 0.8 - elif kwargs.get("prompt_type", "") == "positive": - width *= 1.5 - height *= 1.5 + if kwargs.get("prompt_type", "") == "negative": + width = float(width) * negative_adm_scale + height = float(height) * negative_adm_scale + elif kwargs.get("prompt_type", "") == "positive": + width = float(width) * positive_adm_scale + height = float(height) * positive_adm_scale + + # Avoid artifacts + width = int(width) + height = int(height) + crop_w = int(crop_w) + crop_h = int(crop_h) + target_width = int(target_width) + target_height = int(target_height) out = [] out.append(self.embedder(torch.Tensor([height]))) @@ -196,15 +237,10 @@ def sdxl_encode_adm_patched(self, **kwargs): out.append(self.embedder(torch.Tensor([crop_w]))) out.append(self.embedder(torch.Tensor([target_height]))) out.append(self.embedder(torch.Tensor([target_width]))) - flat = torch.flatten(torch.cat(out))[None, ] + flat = torch.flatten(torch.cat(out)).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1) return torch.cat((clip_pooled.to(flat.device), flat), dim=1) -def text_encoder_device_patched(): - # Fooocus's style system uses text encoder much more times than comfy so this makes things much faster. - return comfy.model_management.get_torch_device() - - def encode_token_weights_patched_with_a1111_method(self, token_weight_pairs): to_encode = list(self.empty_tokens) for x in token_weight_pairs: @@ -308,6 +344,7 @@ def patched_unet_forward(self, x, timesteps=None, context=None, y=None, control= transformer_options["original_shape"] = list(x.shape) transformer_options["current_index"] = 0 + transformer_patches = transformer_options.get("patches", {}) assert (y is not None) == ( self.num_classes is not None @@ -338,7 +375,9 @@ def patched_unet_forward(self, x, timesteps=None, context=None, y=None, control= transformer_options["block"] = ("middle", 0) h = forward_timestep_embed(self.middle_block, h, emb, context, transformer_options) if control is not None and 'middle' in control and len(control['middle']) > 0: - h += control['middle'].pop() + ctrl = control['middle'].pop() + if ctrl is not None: + h += ctrl for id, module in enumerate(self.output_blocks): transformer_options["block"] = ("output", id) @@ -348,6 +387,11 @@ def patched_unet_forward(self, x, timesteps=None, context=None, y=None, control= if ctrl is not None: hsp += ctrl + if "output_block_patch" in transformer_patches: + patch = transformer_patches["output_block_patch"] + for p in patch: + h, hsp = p(h, hsp, transformer_options) + h = torch.cat([h, hsp], dim=1) del hsp if len(hs) > 0: @@ -362,88 +406,11 @@ def patched_unet_forward(self, x, timesteps=None, context=None, y=None, control= return self.out(h) -def patched_SD1ClipModel_forward(self, tokens): - backup_embeds = self.transformer.get_input_embeddings() - device = backup_embeds.weight.device - tokens = self.set_up_textual_embeddings(tokens, backup_embeds) - tokens = torch.LongTensor(tokens).to(device) - - if backup_embeds.weight.dtype != torch.float32: - precision_scope = torch.autocast - else: - precision_scope = contextlib.nullcontext - - with precision_scope(comfy.model_management.get_autocast_device(device)): - outputs = self.transformer(input_ids=tokens, output_hidden_states=self.layer=="hidden") - self.transformer.set_input_embeddings(backup_embeds) - - if self.layer == "last": - z = outputs.last_hidden_state - elif self.layer == "pooled": - z = outputs.pooler_output[:, None, :] - else: - z = outputs.hidden_states[self.layer_idx] - if self.layer_norm_hidden_state: - z = self.transformer.text_model.final_layer_norm(z) - - pooled_output = outputs.pooler_output - if self.text_projection is not None: - pooled_output = pooled_output.float().to(self.text_projection.device) @ self.text_projection.float() - return z.float(), pooled_output.float() - - -VAE_DTYPE = None - - -def vae_dtype_patched(): - global VAE_DTYPE - if VAE_DTYPE is None: - VAE_DTYPE = torch.float32 - if comfy.model_management.is_nvidia(): - torch_version = torch.version.__version__ - if int(torch_version[0]) >= 2: - if torch.cuda.is_bf16_supported(): - VAE_DTYPE = torch.bfloat16 - print('BFloat16 VAE: Enabled') - return VAE_DTYPE - - -def vae_bf16_upsample_forward(self, x): - try: - x = torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest") - except: # operation not implemented for bf16 - b, c, h, w = x.shape - out = torch.empty((b, c, h * 2, w * 2), dtype=x.dtype, layout=x.layout, device=x.device) - split = 8 - l = out.shape[1] // split - for i in range(0, out.shape[1], l): - out[:, i:i + l] = torch.nn.functional.interpolate(x[:, i:i + l].to(torch.float32), scale_factor=2.0, - mode="nearest").to(x.dtype) - del x - x = out - - if self.with_conv: - x = self.conv(x) - return x - - def patch_all(): - comfy.model_management.vae_dtype = vae_dtype_patched - comfy.ldm.modules.diffusionmodules.model.Upsample.forward = vae_bf16_upsample_forward - - comfy.sd1_clip.SD1ClipModel.forward = patched_SD1ClipModel_forward - - comfy.sd.ModelPatcher.calculate_weight = calculate_weight_patched + comfy.model_patcher.ModelPatcher.calculate_weight = calculate_weight_patched comfy.ldm.modules.diffusionmodules.openaimodel.UNetModel.forward = patched_unet_forward - - comfy.ldm.modules.attention.print = lambda x: None comfy.k_diffusion.sampling.sample_dpmpp_fooocus_2m_sde_inpaint_seamless = sample_dpmpp_fooocus_2m_sde_inpaint_seamless - - comfy.model_management.text_encoder_device = text_encoder_device_patched - print(f'Fooocus Text Processing Pipelines are retargeted to {str(comfy.model_management.text_encoder_device())}') - comfy.k_diffusion.external.DiscreteEpsDDPMDenoiser.forward = patched_discrete_eps_ddpm_denoiser_forward comfy.model_base.SDXL.encode_adm = sdxl_encode_adm_patched - comfy.sd1_clip.ClipTokenWeightEncoder.encode_token_weights = encode_token_weights_patched_with_a1111_method return diff --git a/modules/samplers_advanced.py b/modules/samplers_advanced.py index 56850eb..4eec67a 100644 --- a/modules/samplers_advanced.py +++ b/modules/samplers_advanced.py @@ -1,14 +1,13 @@ from comfy.samplers import * import comfy.model_management -import modules.virtual_memory class KSamplerBasic: - SCHEDULERS = ["normal", "karras", "exponential", "simple", "ddim_uniform"] + SCHEDULERS = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform"] SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral", "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu", - "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "ddim", "uni_pc", "uni_pc_bh2", "dpmpp_fooocus_2m_sde_inpaint_seamless"] + "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "ddim", "uni_pc", "uni_pc_bh2", "dpmpp_fooocus_2m_sde_inpaint_seamless"] def __init__(self, model, steps, device, sampler=None, scheduler=None, denoise=None, model_options={}): self.model = model @@ -50,6 +49,8 @@ class KSamplerBasic: sigmas = simple_scheduler(self.model_wrap, steps) elif self.scheduler == "ddim_uniform": sigmas = ddim_scheduler(self.model_wrap, steps) + elif self.scheduler == "sgm_uniform": + sigmas = sgm_scheduler(self.model_wrap, steps) else: print("error invalid scheduler", self.scheduler) @@ -89,8 +90,8 @@ class KSamplerBasic: positive = positive[:] negative = negative[:] - resolve_cond_masks(positive, noise.shape[2], noise.shape[3], self.device) - resolve_cond_masks(negative, noise.shape[2], noise.shape[3], self.device) + resolve_areas_and_cond_masks(positive, noise.shape[2], noise.shape[3], self.device) + resolve_areas_and_cond_masks(negative, noise.shape[2], noise.shape[3], self.device) calculate_start_end_timesteps(self.model_wrap, negative) calculate_start_end_timesteps(self.model_wrap, positive) @@ -203,10 +204,10 @@ class KSamplerBasic: class KSamplerWithRefiner: - SCHEDULERS = ["normal", "karras", "exponential", "simple", "ddim_uniform"] + SCHEDULERS = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform"] SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral", "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu", - "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "ddim", "uni_pc", "uni_pc_bh2", "dpmpp_fooocus_2m_sde_inpaint_seamless"] + "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "ddim", "uni_pc", "uni_pc_bh2", "dpmpp_fooocus_2m_sde_inpaint_seamless"] def __init__(self, model, refiner_model, steps, device, sampler=None, scheduler=None, denoise=None, model_options={}): self.model_patcher = model @@ -263,6 +264,8 @@ class KSamplerWithRefiner: sigmas = simple_scheduler(self.model_wrap, steps) elif self.scheduler == "ddim_uniform": sigmas = ddim_scheduler(self.model_wrap, steps) + elif self.scheduler == "sgm_uniform": + sigmas = sgm_scheduler(self.model_wrap, steps) else: print("error invalid scheduler", self.scheduler) @@ -304,8 +307,8 @@ class KSamplerWithRefiner: positive = positive[:] negative = negative[:] - resolve_cond_masks(positive, noise.shape[2], noise.shape[3], self.device) - resolve_cond_masks(negative, noise.shape[2], noise.shape[3], self.device) + resolve_areas_and_cond_masks(positive, noise.shape[2], noise.shape[3], self.device) + resolve_areas_and_cond_masks(negative, noise.shape[2], noise.shape[3], self.device) calculate_start_end_timesteps(self.model_wrap, negative) calculate_start_end_timesteps(self.model_wrap, positive) @@ -332,8 +335,8 @@ class KSamplerWithRefiner: refiner_positive = refiner_positive[:] refiner_negative = refiner_negative[:] - resolve_cond_masks(refiner_positive, noise.shape[2], noise.shape[3], self.device) - resolve_cond_masks(refiner_negative, noise.shape[2], noise.shape[3], self.device) + resolve_areas_and_cond_masks(refiner_positive, noise.shape[2], noise.shape[3], self.device) + resolve_areas_and_cond_masks(refiner_negative, noise.shape[2], noise.shape[3], self.device) calculate_start_end_timesteps(self.refiner_model_wrap, refiner_positive) calculate_start_end_timesteps(self.refiner_model_wrap, refiner_negative) @@ -351,8 +354,6 @@ class KSamplerWithRefiner: noise.shape[3], noise.shape[2], self.device, "negative") def refiner_switch(): - modules.virtual_memory.try_move_to_virtual_memory(self.model_denoise.inner_model) - modules.virtual_memory.load_from_virtual_memory(self.refiner_model_denoise.inner_model) comfy.model_management.load_model_gpu(self.refiner_model_patcher) self.model_denoise.inner_model = self.refiner_model_denoise.inner_model for i in range(len(positive)): diff --git a/modules/sdxl_styles.py b/modules/sdxl_styles.py index 614f985..f5a75bb 100644 --- a/modules/sdxl_styles.py +++ b/modules/sdxl_styles.py @@ -1,6 +1,3 @@ -from modules.util import join_prompts - - fooocus_expansion = "Fooocus V2" default_styles = ["Default (Slightly Cinematic)"] diff --git a/modules/virtual_memory.py b/modules/virtual_memory.py deleted file mode 100644 index 3c590c0..0000000 --- a/modules/virtual_memory.py +++ /dev/null @@ -1,185 +0,0 @@ -import torch -import time -import gc - -from safetensors import safe_open -from comfy import model_management -from comfy.diffusers_convert import textenc_conversion_lst - - -ALWAYS_USE_VM = None - -if isinstance(ALWAYS_USE_VM, bool): - print(f'[Virtual Memory System] Forced = {ALWAYS_USE_VM}') - -if 'cpu' in model_management.unet_offload_device().type.lower(): - logic_memory = model_management.total_ram - global_virtual_memory_activated = ALWAYS_USE_VM if isinstance(ALWAYS_USE_VM, bool) else logic_memory < 30000 - print(f'[Virtual Memory System] Logic target is CPU, memory = {logic_memory}') -else: - logic_memory = model_management.total_vram - global_virtual_memory_activated = ALWAYS_USE_VM if isinstance(ALWAYS_USE_VM, bool) else logic_memory < 22000 - print(f'[Virtual Memory System] Logic target is GPU, memory = {logic_memory}') - - -print(f'[Virtual Memory System] Activated = {global_virtual_memory_activated}') - - -@torch.no_grad() -def recursive_set(obj, key, value): - if obj is None: - return - if '.' in key: - k1, k2 = key.split('.', 1) - recursive_set(getattr(obj, k1, None), k2, value) - else: - setattr(obj, key, value) - - -@torch.no_grad() -def recursive_del(obj, key): - if obj is None: - return - if '.' in key: - k1, k2 = key.split('.', 1) - recursive_del(getattr(obj, k1, None), k2) - else: - delattr(obj, key) - - -@torch.no_grad() -def force_load_state_dict(model, state_dict): - for k in list(state_dict.keys()): - p = torch.nn.Parameter(state_dict[k], requires_grad=False) - recursive_set(model, k, p) - del state_dict[k] - return - - -@torch.no_grad() -def only_load_safetensors_keys(filename): - try: - with safe_open(filename, framework="pt", device='cpu') as f: - result = list(f.keys()) - assert len(result) > 0 - return result - except: - return None - - -@torch.no_grad() -def move_to_virtual_memory(model, comfy_unload=True): - timer = time.time() - - if comfy_unload: - model_management.unload_model() - - virtual_memory_dict = getattr(model, 'virtual_memory_dict', None) - if isinstance(virtual_memory_dict, dict): - # Already in virtual memory. - return - - model_file = getattr(model, 'model_file', None) - assert isinstance(model_file, dict) - - filename = model_file['filename'] - prefix = model_file['prefix'] - - if hasattr(model, 'virtual_memory_safetensors_keys'): - safetensors_keys = model.virtual_memory_safetensors_keys - else: - safetensors_keys = only_load_safetensors_keys(filename) - model.virtual_memory_safetensors_keys = safetensors_keys - - if safetensors_keys is None: - print(f'[Virtual Memory System] Error: The Virtual Memory System currently only support safetensors models!') - return - - sd = model.state_dict() - original_device = list(sd.values())[0].device.type - model_file['original_device'] = original_device - - virtual_memory_dict = {} - - for k, v in sd.items(): - current_key = k - current_flag = None - if prefix == 'refiner_clip': - current_key_in_safetensors = k - - for a, b in textenc_conversion_lst: - current_key_in_safetensors = current_key_in_safetensors.replace(b, a) - - current_key_in_safetensors = current_key_in_safetensors.replace('clip_g.transformer.text_model.encoder.layers.', 'conditioner.embedders.0.model.transformer.resblocks.') - current_key_in_safetensors = current_key_in_safetensors.replace('clip_g.text_projection', 'conditioner.embedders.0.model.text_projection') - current_key_in_safetensors = current_key_in_safetensors.replace('clip_g.logit_scale', 'conditioner.embedders.0.model.logit_scale') - current_key_in_safetensors = current_key_in_safetensors.replace('clip_g.', 'conditioner.embedders.0.model.') - - for e in ["weight", "bias"]: - for i, k in enumerate(['q', 'k', 'v']): - e_flag = f'.{k}_proj.{e}' - if current_key_in_safetensors.endswith(e_flag): - current_key_in_safetensors = current_key_in_safetensors[:-len(e_flag)] + f'.in_proj_{e}' - current_flag = (1280 * i, 1280 * (i + 1)) - else: - current_key_in_safetensors = prefix + '.' + k - current_device = torch.device(index=v.device.index, type=v.device.type) - if current_key_in_safetensors in safetensors_keys: - virtual_memory_dict[current_key] = (current_key_in_safetensors, current_device, current_flag) - recursive_del(model, current_key) - else: - # print(f'[Virtual Memory System] Missed key: {current_key}') - pass - - del sd - gc.collect() - model_management.soft_empty_cache() - - model.virtual_memory_dict = virtual_memory_dict - - print(f'[Virtual Memory System] time = {str("%.5f" % (time.time() - timer))}s: {prefix} released from {original_device}: {filename}') - return - - -@torch.no_grad() -def load_from_virtual_memory(model): - timer = time.time() - - virtual_memory_dict = getattr(model, 'virtual_memory_dict', None) - if not isinstance(virtual_memory_dict, dict): - # Not in virtual memory. - return - - model_file = getattr(model, 'model_file', None) - assert isinstance(model_file, dict) - - filename = model_file['filename'] - prefix = model_file['prefix'] - original_device = model_file['original_device'] - - with safe_open(filename, framework="pt", device=original_device) as f: - for current_key, (current_key_in_safetensors, current_device, current_flag) in virtual_memory_dict.items(): - tensor = f.get_tensor(current_key_in_safetensors).to(current_device) - if isinstance(current_flag, tuple) and len(current_flag) == 2: - a, b = current_flag - tensor = tensor[a:b] - parameter = torch.nn.Parameter(tensor, requires_grad=False) - recursive_set(model, current_key, parameter) - - print(f'[Virtual Memory System] time = {str("%.5f" % (time.time() - timer))}s: {prefix} loaded to {original_device}: {filename}') - del model.virtual_memory_dict - return - - -@torch.no_grad() -def try_move_to_virtual_memory(model, comfy_unload=True): - if not global_virtual_memory_activated: - return - - import modules.default_pipeline as pipeline - - if pipeline.xl_refiner is None: - # If users do not use refiner, no need to use this. - return - - move_to_virtual_memory(model, comfy_unload) diff --git a/readme.md b/readme.md index 0b9cfdf..a16cfdb 100644 --- a/readme.md +++ b/readme.md @@ -67,7 +67,9 @@ Please open an issue if you use similar devices but still cannot achieve accepta | Colab | Info | --- | --- | -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lllyasviel/Fooocus/blob/main/colab.ipynb) | Fooocus Colab (Official Version) +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lllyasviel/Fooocus/blob/colab/colab.ipynb) | Fooocus Colab Version + +Note that this link is optimized for Google Colab - the codes may be different from offline Fooocus for better Colab experience. Thanks to [camenduru](https://github.com/camenduru)! @@ -129,7 +131,7 @@ Or if you want to open a remote port, use ### Linux (AMD GPUs) -Installation is the same as Linux part. It has been tested for 6700XT. Works for both Pytorch 1.13 and Pytorch 2. +Coming soon ... ### Mac/Windows(AMD GPUs) @@ -153,6 +155,11 @@ Below things are already inside the software, and **users do not need to do anyt 11. A carefully designed system for balancing multiple styles as well as prompt expansion. 12. Using automatic1111's method to normalize prompt emphasizing. This significantly improve results when users directly copy prompts from civitai. 13. The joint swap system of refiner now also support img2img and upscale in a seamless way. +14. CFG Scale and TSNR correction (tuned for SDXL) when CFG is bigger than 10. + +## Changing Model Path + +After the first time you run Fooocus, a config file will be generated at `Fooocus\user_path_config.txt`. This file can be edited for changing the model path. ## Advanced Features diff --git a/update_log.md b/update_log.md index e6d2cf9..6fcb3f8 100644 --- a/update_log.md +++ b/update_log.md @@ -1,3 +1,13 @@ +# 2.0.80 + +* Rework many patches and some UI details. +* Speed up processing. +* Move Colab to independent branch. +* Implemented CFG Scale and TSNR correction when CFG is bigger than 10. +* Implemented Developer Mode with more options to debug. + +### 2.0.72 + (2023 sep 21) The feature updating of Fooocus will be paused for about two or three weeks because we have some events and travelling - we will come back in early or mid October. ### 2.0.72 diff --git a/webui.py b/webui.py index 7519784..2f7f1d5 100644 --- a/webui.py +++ b/webui.py @@ -137,7 +137,7 @@ with shared.gradio_root: choices=[fooocus_expansion] + style_keys, value=[fooocus_expansion] + default_styles, label='Image Style') - with gr.Tab(label='Advanced'): + with gr.Tab(label='Model'): with gr.Row(): base_model = gr.Dropdown(label='SDXL Base Model', choices=modules.path.model_filenames, value=modules.path.default_base_model_name, show_label=True) refiner_model = gr.Dropdown(label='SDXL Refiner', choices=['None'] + modules.path.model_filenames, value=modules.path.default_refiner_model_name, show_label=True) @@ -150,9 +150,30 @@ with shared.gradio_root: lora_ctrls += [lora_model, lora_weight] with gr.Row(): model_refresh = gr.Button(label='Refresh', value='\U0001f504 Refresh All Files', variant='secondary', elem_classes='refresh_button') - with gr.Accordion(label='Advanced', open=False): - sharpness = gr.Slider(label='Sampling Sharpness', minimum=0.0, maximum=30.0, step=0.01, value=2.0) - gr.HTML('\U0001F4D4 Document') + with gr.Tab(label='Advanced'): + sharpness = gr.Slider(label='Sampling Sharpness', minimum=0.0, maximum=30.0, step=0.001, value=2.0, + info='Higher value means image and texture are sharper.') + guidance_scale = gr.Slider(label='Guidance Scale', minimum=1.0, maximum=30.0, step=0.01, value=7.0, + info='Higher value means style is cleaner, vivider, and more artistic.') + + gr.HTML('\U0001F4D4 Document') + dev_mode = gr.Checkbox(label='Developer Debug Mode', value=False, container=False) + + with gr.Column(visible=False) as dev_tools: + with gr.Tab(label='Developer Control and Debug Tools'): + adm_scaler_positive = gr.Slider(label='Positive ADM Guidance Scaler', minimum=0.1, maximum=3.0, + step=0.001, value=1.5, info='The scaler multiplied to positive ADM (use 1.0 to disable). ') + adm_scaler_negative = gr.Slider(label='Negative ADM Guidance Scaler', minimum=0.1, maximum=3.0, + step=0.001, value=0.8, info='The scaler multiplied to negative ADM (use 1.0 to disable). ') + adaptive_cfg = gr.Slider(label='CFG Rescale from TSNR', minimum=0.0, maximum=1.0, + step=0.001, value=0.3, info='Enabling Fooocus\'s implementation of CFG re-weighting for TSNR (use 0 to disable, more effective when CFG > 7).') + sampler_name = gr.Dropdown(label='Sampler', choices=flags.sampler_list, value=flags.default_sampler, info='Only effective in non-inpaint mode.') + + def dev_mode_checked(r): + return gr.update(visible=r) + + + dev_mode.change(dev_mode_checked, inputs=[dev_mode], outputs=[dev_tools], queue=False) def model_refresh_clicked(): modules.path.update_all_model_names() @@ -167,7 +188,7 @@ with shared.gradio_root: advanced_checkbox.change(lambda x: gr.update(visible=x), advanced_checkbox, right_col, queue=False) ctrls = [ prompt, negative_prompt, style_selections, - performance_selction, aspect_ratios_selction, image_number, image_seed, sharpness + performance_selction, aspect_ratios_selction, image_number, image_seed, sharpness, adm_scaler_positive, adm_scaler_negative, guidance_scale, adaptive_cfg, sampler_name ] ctrls += [base_model, refiner_model] + lora_ctrls ctrls += [input_image_checkbox, current_tab]