2.1.790

2023-11-11 22:13:13 -08:00 · 2023-11-11 22:13:13 -08:00 · 2bef62c545
commit 2bef62c545
parent fd4a5b2eaf
10 changed files with 184 additions and 88 deletions
--- a/face_experiments.py
+++ b/face_experiments.py
@ -0,0 +1,7 @@
+import cv2
+import fooocus_extras.face_crop as cropper
+
+
+img = cv2.imread('lena.png')
+result = cropper.crop_image(img)
+cv2.imwrite('lena_result.png', result)
--- a/fooocus_extras/face_crop.py
+++ b/fooocus_extras/face_crop.py
@ -0,0 +1,50 @@
+import cv2
+import numpy as np
+import modules.config
+import facexlib.utils.face_restoration_helper as face_restoration_helper
+
+
+faceRestoreHelper = None
+
+
+def align_warp_face(self, landmark, border_mode='reflect'):
+    affine_matrix = cv2.estimateAffinePartial2D(landmark, self.face_template, method=cv2.LMEDS)[0]
+    self.affine_matrices.append(affine_matrix)
+    if border_mode == 'constant':
+        border_mode = cv2.BORDER_CONSTANT
+    elif border_mode == 'reflect101':
+        border_mode = cv2.BORDER_REFLECT101
+    elif border_mode == 'reflect':
+        border_mode = cv2.BORDER_REFLECT
+    input_img = self.input_img
+    cropped_face = cv2.warpAffine(input_img, affine_matrix, self.face_size,
+                                  borderMode=border_mode, borderValue=(135, 133, 132))
+    return cropped_face
+
+
+def crop_image(img_rgb):
+    global faceRestoreHelper
+    
+    if faceRestoreHelper is None:
+        faceRestoreHelper = face_restoration_helper.FaceRestoreHelper(
+            upscale_factor=1,
+            model_rootpath=modules.config.path_controlnet,
+            device='cpu'  # use cpu is safer since we are out of fcbh management
+        )
+
+    faceRestoreHelper.clean_all()
+    faceRestoreHelper.read_image(np.ascontiguousarray(img_rgb[:, :, ::-1].copy()))
+    faceRestoreHelper.get_face_landmarks_5()
+
+    landmarks = faceRestoreHelper.all_landmarks_5
+    # landmarks are already sorted with confidence.
+
+    if len(landmarks) == 0:
+        print('No face detected')
+        return img_rgb
+    else:
+        print(f'Detected {len(landmarks)} faces')
+
+    result = align_warp_face(faceRestoreHelper, landmarks[0])
+
+    return np.ascontiguousarray(result[:, :, ::-1].copy())
--- a/fooocus_extras/ip_adapter.py
+++ b/fooocus_extras/ip_adapter.py
@ -84,26 +84,20 @@ class IPAdapterModel(torch.nn.Module):

 clip_vision: fcbh.clip_vision.ClipVisionModel = None
 ip_negative: torch.Tensor = None
-image_proj_model: ModelPatcher = None
-ip_layers: ModelPatcher = None
-ip_adapter: IPAdapterModel = None
-ip_unconds = None
+ip_adapters: dict = {}


 def load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path):
-    global clip_vision, image_proj_model, ip_layers, ip_negative, ip_adapter, ip_unconds
+    global clip_vision, ip_negative, ip_adapters

-    if clip_vision_path is None:
-        return
-    if ip_negative_path is None:
-        return
-    if ip_adapter_path is None:
-        return
-    if clip_vision is not None and image_proj_model is not None and ip_layers is not None and ip_negative is not None:
-        return
+    if clip_vision is None and isinstance(clip_vision_path, str):
+        clip_vision = fcbh.clip_vision.load(clip_vision_path)

-    ip_negative = sf.load_file(ip_negative_path)['data']
-    clip_vision = fcbh.clip_vision.load(clip_vision_path)
+    if ip_negative is None and isinstance(ip_negative_path, str):
+        ip_negative = sf.load_file(ip_negative_path)['data']
+
+    if not isinstance(ip_adapter_path, str) or ip_adapter_path in ip_adapters:
+        return

    load_device = model_management.get_torch_device()
    offload_device = torch.device('cpu')
@ -141,7 +135,13 @@ def load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path):
    ip_layers = ModelPatcher(model=ip_adapter.ip_layers, load_device=load_device,
                             offload_device=offload_device)

-    ip_unconds = None
+    ip_adapters[ip_adapter_path] = dict(
+        ip_adapter=ip_adapter,
+        image_proj_model=image_proj_model,
+        ip_layers=ip_layers,
+        ip_unconds=None
+    )
+
    return


@ -161,8 +161,9 @@ def clip_preprocess(image):

@torch.no_grad()
@torch.inference_mode()
-def preprocess(img):
-    global ip_unconds
+def preprocess(img, ip_adapter_path):
+    global ip_adapters
+    entry = ip_adapters[ip_adapter_path]

    fcbh.model_management.load_model_gpu(clip_vision.patcher)
    pixel_values = clip_preprocess(numpy_to_pytorch(img).to(clip_vision.load_device))
@ -175,6 +176,11 @@ def preprocess(img):
    with precision_scope(fcbh.model_management.get_autocast_device(clip_vision.load_device), torch.float32):
        outputs = clip_vision.model(pixel_values=pixel_values, output_hidden_states=True)

+    ip_adapter = entry['ip_adapter']
+    ip_layers = entry['ip_layers']
+    image_proj_model = entry['image_proj_model']
+    ip_unconds = entry['ip_unconds']
+
    if ip_adapter.plus:
        cond = outputs.hidden_states[-2]
    else:
@ -190,9 +196,11 @@ def preprocess(img):
    if ip_unconds is None:
        uncond = ip_negative.to(device=ip_adapter.load_device, dtype=ip_adapter.dtype)
        ip_unconds = [m(uncond).cpu() for m in ip_layers.model.to_kvs]
+        entry['ip_unconds'] = ip_unconds

    ip_conds = [m(cond).cpu() for m in ip_layers.model.to_kvs]
-    return ip_conds
+
+    return ip_conds, ip_unconds


@torch.no_grad()
@ -206,46 +214,46 @@ def patch_model(model, tasks):
            current_step = float(model.model.diffusion_model.current_step.detach().cpu().numpy()[0])
            cond_or_uncond = extra_options['cond_or_uncond']

-            with torch.autocast("cuda", dtype=ip_adapter.dtype):
-                q = n
-                k = [context_attn2]
-                v = [value_attn2]
-                b, _, _ = q.shape
+            q = n
+            k = [context_attn2]
+            v = [value_attn2]
+            b, _, _ = q.shape

-                for ip_conds, cn_stop, cn_weight in tasks:
-                    if current_step < cn_stop:
-                        ip_k_c = ip_conds[ip_index * 2].to(q)
-                        ip_v_c = ip_conds[ip_index * 2 + 1].to(q)
-                        ip_k_uc = ip_unconds[ip_index * 2].to(q)
-                        ip_v_uc = ip_unconds[ip_index * 2 + 1].to(q)
+            for (cs, ucs), cn_stop, cn_weight in tasks:
+                if current_step < cn_stop:
+                    ip_k_c = cs[ip_index * 2].to(q)
+                    ip_v_c = cs[ip_index * 2 + 1].to(q)
+                    ip_k_uc = ucs[ip_index * 2].to(q)
+                    ip_v_uc = ucs[ip_index * 2 + 1].to(q)

-                        ip_k = torch.cat([(ip_k_c, ip_k_uc)[i] for i in cond_or_uncond], dim=0)
-                        ip_v = torch.cat([(ip_v_c, ip_v_uc)[i] for i in cond_or_uncond], dim=0)
+                    ip_k = torch.cat([(ip_k_c, ip_k_uc)[i] for i in cond_or_uncond], dim=0)
+                    ip_v = torch.cat([(ip_v_c, ip_v_uc)[i] for i in cond_or_uncond], dim=0)

-                        # Midjourney's attention formulation of image prompt (non-official reimplementation)
-                        # Written by Lvmin Zhang at Stanford University, 2023 Dec
-                        # For non-commercial use only - if you use this in commercial project then
-                        # probably it has some intellectual property issues.
-                        # Contact lvminzhang@acm.org if you are not sure.
+                    # Midjourney's attention formulation of image prompt (non-official reimplementation)
+                    # Written by Lvmin Zhang at Stanford University, 2023 Dec
+                    # For non-commercial use only - if you use this in commercial project then
+                    # probably it has some intellectual property issues.
+                    # Contact lvminzhang@acm.org if you are not sure.

-                        # Below is the sensitive part with potential intellectual property issues.
+                    # Below is the sensitive part with potential intellectual property issues.

-                        ip_v_mean = torch.mean(ip_v, dim=1, keepdim=True)
-                        ip_v_offset = ip_v - ip_v_mean
+                    ip_v_mean = torch.mean(ip_v, dim=1, keepdim=True)
+                    ip_v_offset = ip_v - ip_v_mean

-                        B, F, C = ip_k.shape
-                        channel_penalty = float(C) / 1280.0
-                        weight = cn_weight * channel_penalty
+                    B, F, C = ip_k.shape
+                    channel_penalty = float(C) / 1280.0
+                    weight = cn_weight * channel_penalty

-                        ip_k = ip_k * weight
-                        ip_v = ip_v_offset + ip_v_mean * weight
+                    ip_k = ip_k * weight
+                    ip_v = ip_v_offset + ip_v_mean * weight

-                        k.append(ip_k)
-                        v.append(ip_v)
+                    k.append(ip_k)
+                    v.append(ip_v)
+
+            k = torch.cat(k, dim=1)
+            v = torch.cat(v, dim=1)
+            out = sdp(q, k, v, extra_options)

-                k = torch.cat(k, dim=1)
-                v = torch.cat(v, dim=1)
-                out = sdp(q, k, v, extra_options)

            return out.to(dtype=org_dtype)
        return patcher
@ -260,27 +268,21 @@ def patch_model(model, tasks):
            to["patches_replace"]["attn2"][key] = make_attn_patcher(number)

    number = 0
-    if not ip_adapter.sdxl:
-        for id in [1, 2, 4, 5, 7, 8]:  # id of input_blocks that have cross attention
-            set_model_patch_replace(new_model, number, ("input", id))
-            number += 1
-        for id in [3, 4, 5, 6, 7, 8, 9, 10, 11]:  # id of output_blocks that have cross attention
-            set_model_patch_replace(new_model, number, ("output", id))
-            number += 1
-        set_model_patch_replace(new_model, number, ("middle", 0))
-    else:
-        for id in [4, 5, 7, 8]:  # id of input_blocks that have cross attention
-            block_indices = range(2) if id in [4, 5] else range(10)  # transformer_depth
-            for index in block_indices:
-                set_model_patch_replace(new_model, number, ("input", id, index))
-                number += 1
-        for id in range(6):  # id of output_blocks that have cross attention
-            block_indices = range(2) if id in [3, 4, 5] else range(10)  # transformer_depth
-            for index in block_indices:
-                set_model_patch_replace(new_model, number, ("output", id, index))
-                number += 1
-        for index in range(10):
-            set_model_patch_replace(new_model, number, ("middle", 0, index))
+
+    for id in [4, 5, 7, 8]:
+        block_indices = range(2) if id in [4, 5] else range(10)
+        for index in block_indices:
+            set_model_patch_replace(new_model, number, ("input", id, index))
            number += 1

+    for id in range(6):
+        block_indices = range(2) if id in [3, 4, 5] else range(10)
+        for index in block_indices:
+            set_model_patch_replace(new_model, number, ("output", id, index))
+            number += 1
+
+    for index in range(10):
+        set_model_patch_replace(new_model, number, ("middle", 0, index))
+        number += 1
+
    return new_model
--- a/fooocus_version.py
+++ b/fooocus_version.py
@ -1 +1 @@
-version = '2.1.789'
+version = '2.1.790'
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@ -28,6 +28,7 @@ def worker():
    import modules.constants as constants
    import modules.advanced_parameters as advanced_parameters
    import fooocus_extras.ip_adapter as ip_adapter
+    import fooocus_extras.face_crop

    from modules.sdxl_styles import apply_style, apply_wildcards, fooocus_expansion
    from modules.private_logger import log
@ -133,7 +134,7 @@ def worker():
        outpaint_selections = args.pop()
        inpaint_input_image = args.pop()

-        cn_tasks = {flags.cn_ip: [], flags.cn_canny: [], flags.cn_cpds: []}
+        cn_tasks = {x: [] for x in flags.ip_list}
        for _ in range(4):
            cn_img = args.pop()
            cn_stop = args.pop()
@ -189,7 +190,7 @@ def worker():
        inpaint_head_model_path = None
        controlnet_canny_path = None
        controlnet_cpds_path = None
-        clip_vision_path, ip_negative_path, ip_adapter_path = None, None, None
+        clip_vision_path, ip_negative_path, ip_adapter_path, ip_adapter_face_path = None, None, None, None

        seed = int(image_seed)
        print(f'[Parameters] Seed = {seed}')
@ -244,12 +245,15 @@ def worker():
                if len(cn_tasks[flags.cn_cpds]) > 0:
                    controlnet_cpds_path = modules.config.downloading_controlnet_cpds()
                if len(cn_tasks[flags.cn_ip]) > 0:
-                    clip_vision_path, ip_negative_path, ip_adapter_path = modules.config.downloading_ip_adapters()
+                    clip_vision_path, ip_negative_path, ip_adapter_path = modules.config.downloading_ip_adapters('ip')
+                if len(cn_tasks[flags.cn_ip_face]) > 0:
+                    clip_vision_path, ip_negative_path, ip_adapter_face_path = modules.config.downloading_ip_adapters('face')
                progressbar(1, 'Loading control models ...')

        # Load or unload CNs
        pipeline.refresh_controlnets([controlnet_canny_path, controlnet_cpds_path])
        ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path)
+        ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_face_path)

        switch = int(round(steps * refiner_switch))

@ -535,13 +539,26 @@ def worker():
                # https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75
                cn_img = resize_image(cn_img, width=224, height=224, resize_mode=0)

-                task[0] = ip_adapter.preprocess(cn_img)
+                task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_path)
+                if advanced_parameters.debugging_cn_preprocessor:
+                    yield_result(cn_img, do_not_show_finished_images=True)
+                    return
+            for task in cn_tasks[flags.cn_ip_face]:
+                cn_img, cn_stop, cn_weight = task
+                cn_img = fooocus_extras.face_crop.crop_image(HWC3(cn_img))
+
+                # https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75
+                cn_img = resize_image(cn_img, width=224, height=224, resize_mode=0)
+
+                task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_face_path)
                if advanced_parameters.debugging_cn_preprocessor:
                    yield_result(cn_img, do_not_show_finished_images=True)
                    return

-            if len(cn_tasks[flags.cn_ip]) > 0:
-                pipeline.final_unet = ip_adapter.patch_model(pipeline.final_unet, cn_tasks[flags.cn_ip])
+            all_ip_tasks = cn_tasks[flags.cn_ip] + cn_tasks[flags.cn_ip_face]
+
+            if len(all_ip_tasks) > 0:
+                pipeline.final_unet = ip_adapter.patch_model(pipeline.final_unet, all_ip_tasks)

        if advanced_parameters.freeu_enabled:
            print(f'FreeU is enabled!')
--- a/modules/config.py
+++ b/modules/config.py
@ -335,7 +335,9 @@ def downloading_controlnet_cpds():
    return os.path.join(path_controlnet, 'fooocus_xl_cpds_128.safetensors')


-def downloading_ip_adapters():
+def downloading_ip_adapters(v):
+    assert v in ['ip', 'face']
+
    results = []

    load_file_from_url(
@ -352,12 +354,21 @@ def downloading_ip_adapters():
    )
    results += [os.path.join(path_controlnet, 'fooocus_ip_negative.safetensors')]

-    load_file_from_url(
-        url='https://huggingface.co/lllyasviel/misc/resolve/main/ip-adapter-plus_sdxl_vit-h.bin',
-        model_dir=path_controlnet,
-        file_name='ip-adapter-plus_sdxl_vit-h.bin'
-    )
-    results += [os.path.join(path_controlnet, 'ip-adapter-plus_sdxl_vit-h.bin')]
+    if v == 'ip':
+        load_file_from_url(
+            url='https://huggingface.co/lllyasviel/misc/resolve/main/ip-adapter-plus_sdxl_vit-h.bin',
+            model_dir=path_controlnet,
+            file_name='ip-adapter-plus_sdxl_vit-h.bin'
+        )
+        results += [os.path.join(path_controlnet, 'ip-adapter-plus_sdxl_vit-h.bin')]
+
+    if v == 'face':
+        load_file_from_url(
+            url='https://huggingface.co/lllyasviel/misc/resolve/main/ip-adapter-plus-face_sdxl_vit-h.bin',
+            model_dir=path_controlnet,
+            file_name='ip-adapter-plus-face_sdxl_vit-h.bin'
+        )
+        results += [os.path.join(path_controlnet, 'ip-adapter-plus-face_sdxl_vit-h.bin')]

    return results

--- a/modules/flags.py
+++ b/modules/flags.py
@ -20,13 +20,14 @@ SAMPLER_NAMES = KSAMPLER_NAMES + ["ddim", "uni_pc", "uni_pc_bh2"]
 sampler_list = SAMPLER_NAMES
 scheduler_list = SCHEDULER_NAMES

-cn_ip = "Image Prompt"
+cn_ip = "ImagePrompt"
+cn_ip_face = "FaceSwap"
 cn_canny = "PyraCanny"
 cn_cpds = "CPDS"

-ip_list = [cn_ip, cn_canny, cn_cpds]
+ip_list = [cn_ip, cn_canny, cn_cpds, cn_ip_face]
 default_ip = cn_ip

 default_parameters = {
-    cn_ip: (0.5, 0.6), cn_canny: (0.5, 1.0), cn_cpds: (0.5, 1.0)
+    cn_ip: (0.5, 0.6), cn_ip_face: (0.9, 0.75), cn_canny: (0.5, 1.0), cn_cpds: (0.5, 1.0)
 }  # stop, weight
--- a/readme.md
+++ b/readme.md
@ -50,6 +50,7 @@ Using Fooocus is as easy as (probably easier than) Midjourney – but this does
 | Prompt Weights | You can use " I am (happy:1.5)". <br> Fooocus uses A1111's reweighting algorithm so that results are better than ComfyUI if users directly copy prompts from Civitai. (Because if prompts are written in ComfyUI's reweighting, users are less likely to copy prompt texts as they prefer dragging files) <br> To use embedding, you can use "(embedding:file_name:1.1)" |
 | --no | Advanced -> Negative Prompt |
 | --ar | Advanced -> Aspect Ratios |
+| InsightFace | Input Image -> Image Prompt -> Advanced -> FaceSwap |

 We also have a few things borrowed from the best parts of LeonardoAI:

--- a/requirements_versions.txt
+++ b/requirements_versions.txt
@ -15,3 +15,4 @@ gradio==3.41.2
 pygit2==1.12.2
 opencv-contrib-python==4.8.0.74
 httpx==0.24.1
+facexlib==0.3.0
--- a/update_log.md
+++ b/update_log.md
@ -1,3 +1,9 @@
+# 2.1.790
+
+* Face swap (in line with Midjourney InsightFace): Input Image -> Image Prompt -> Advanced -> FaceSwap
+* The performance is super high. Use it carefully and never use it in any illegal things!
+* This implementation will crop faces for you and you do NOT need to crop faces before feeding images into Fooocus. (If you previously manually crop faces from images for other software, you do not need to do that now in Fooocus.)
+
 # 2.1.788

 * Fixed some math problems in previous versions.