diff --git a/face_experiments.py b/face_experiments.py
new file mode 100644
index 0000000..5c0a3e5
--- /dev/null
+++ b/face_experiments.py
@@ -0,0 +1,7 @@
+import cv2
+import fooocus_extras.face_crop as cropper
+
+
+img = cv2.imread('lena.png')
+result = cropper.crop_image(img)
+cv2.imwrite('lena_result.png', result)
diff --git a/fooocus_extras/face_crop.py b/fooocus_extras/face_crop.py
new file mode 100644
index 0000000..561b816
--- /dev/null
+++ b/fooocus_extras/face_crop.py
@@ -0,0 +1,50 @@
+import cv2
+import numpy as np
+import modules.config
+import facexlib.utils.face_restoration_helper as face_restoration_helper
+
+
+faceRestoreHelper = None
+
+
+def align_warp_face(self, landmark, border_mode='reflect'):
+ affine_matrix = cv2.estimateAffinePartial2D(landmark, self.face_template, method=cv2.LMEDS)[0]
+ self.affine_matrices.append(affine_matrix)
+ if border_mode == 'constant':
+ border_mode = cv2.BORDER_CONSTANT
+ elif border_mode == 'reflect101':
+ border_mode = cv2.BORDER_REFLECT101
+ elif border_mode == 'reflect':
+ border_mode = cv2.BORDER_REFLECT
+ input_img = self.input_img
+ cropped_face = cv2.warpAffine(input_img, affine_matrix, self.face_size,
+ borderMode=border_mode, borderValue=(135, 133, 132))
+ return cropped_face
+
+
+def crop_image(img_rgb):
+ global faceRestoreHelper
+
+ if faceRestoreHelper is None:
+ faceRestoreHelper = face_restoration_helper.FaceRestoreHelper(
+ upscale_factor=1,
+ model_rootpath=modules.config.path_controlnet,
+ device='cpu' # use cpu is safer since we are out of fcbh management
+ )
+
+ faceRestoreHelper.clean_all()
+ faceRestoreHelper.read_image(np.ascontiguousarray(img_rgb[:, :, ::-1].copy()))
+ faceRestoreHelper.get_face_landmarks_5()
+
+ landmarks = faceRestoreHelper.all_landmarks_5
+ # landmarks are already sorted with confidence.
+
+ if len(landmarks) == 0:
+ print('No face detected')
+ return img_rgb
+ else:
+ print(f'Detected {len(landmarks)} faces')
+
+ result = align_warp_face(faceRestoreHelper, landmarks[0])
+
+ return np.ascontiguousarray(result[:, :, ::-1].copy())
diff --git a/fooocus_extras/ip_adapter.py b/fooocus_extras/ip_adapter.py
index aeb7de2..2961a65 100644
--- a/fooocus_extras/ip_adapter.py
+++ b/fooocus_extras/ip_adapter.py
@@ -84,26 +84,20 @@ class IPAdapterModel(torch.nn.Module):
clip_vision: fcbh.clip_vision.ClipVisionModel = None
ip_negative: torch.Tensor = None
-image_proj_model: ModelPatcher = None
-ip_layers: ModelPatcher = None
-ip_adapter: IPAdapterModel = None
-ip_unconds = None
+ip_adapters: dict = {}
def load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path):
- global clip_vision, image_proj_model, ip_layers, ip_negative, ip_adapter, ip_unconds
+ global clip_vision, ip_negative, ip_adapters
- if clip_vision_path is None:
- return
- if ip_negative_path is None:
- return
- if ip_adapter_path is None:
- return
- if clip_vision is not None and image_proj_model is not None and ip_layers is not None and ip_negative is not None:
- return
+ if clip_vision is None and isinstance(clip_vision_path, str):
+ clip_vision = fcbh.clip_vision.load(clip_vision_path)
- ip_negative = sf.load_file(ip_negative_path)['data']
- clip_vision = fcbh.clip_vision.load(clip_vision_path)
+ if ip_negative is None and isinstance(ip_negative_path, str):
+ ip_negative = sf.load_file(ip_negative_path)['data']
+
+ if not isinstance(ip_adapter_path, str) or ip_adapter_path in ip_adapters:
+ return
load_device = model_management.get_torch_device()
offload_device = torch.device('cpu')
@@ -141,7 +135,13 @@ def load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path):
ip_layers = ModelPatcher(model=ip_adapter.ip_layers, load_device=load_device,
offload_device=offload_device)
- ip_unconds = None
+ ip_adapters[ip_adapter_path] = dict(
+ ip_adapter=ip_adapter,
+ image_proj_model=image_proj_model,
+ ip_layers=ip_layers,
+ ip_unconds=None
+ )
+
return
@@ -161,8 +161,9 @@ def clip_preprocess(image):
@torch.no_grad()
@torch.inference_mode()
-def preprocess(img):
- global ip_unconds
+def preprocess(img, ip_adapter_path):
+ global ip_adapters
+ entry = ip_adapters[ip_adapter_path]
fcbh.model_management.load_model_gpu(clip_vision.patcher)
pixel_values = clip_preprocess(numpy_to_pytorch(img).to(clip_vision.load_device))
@@ -175,6 +176,11 @@ def preprocess(img):
with precision_scope(fcbh.model_management.get_autocast_device(clip_vision.load_device), torch.float32):
outputs = clip_vision.model(pixel_values=pixel_values, output_hidden_states=True)
+ ip_adapter = entry['ip_adapter']
+ ip_layers = entry['ip_layers']
+ image_proj_model = entry['image_proj_model']
+ ip_unconds = entry['ip_unconds']
+
if ip_adapter.plus:
cond = outputs.hidden_states[-2]
else:
@@ -190,9 +196,11 @@ def preprocess(img):
if ip_unconds is None:
uncond = ip_negative.to(device=ip_adapter.load_device, dtype=ip_adapter.dtype)
ip_unconds = [m(uncond).cpu() for m in ip_layers.model.to_kvs]
+ entry['ip_unconds'] = ip_unconds
ip_conds = [m(cond).cpu() for m in ip_layers.model.to_kvs]
- return ip_conds
+
+ return ip_conds, ip_unconds
@torch.no_grad()
@@ -206,46 +214,46 @@ def patch_model(model, tasks):
current_step = float(model.model.diffusion_model.current_step.detach().cpu().numpy()[0])
cond_or_uncond = extra_options['cond_or_uncond']
- with torch.autocast("cuda", dtype=ip_adapter.dtype):
- q = n
- k = [context_attn2]
- v = [value_attn2]
- b, _, _ = q.shape
+ q = n
+ k = [context_attn2]
+ v = [value_attn2]
+ b, _, _ = q.shape
- for ip_conds, cn_stop, cn_weight in tasks:
- if current_step < cn_stop:
- ip_k_c = ip_conds[ip_index * 2].to(q)
- ip_v_c = ip_conds[ip_index * 2 + 1].to(q)
- ip_k_uc = ip_unconds[ip_index * 2].to(q)
- ip_v_uc = ip_unconds[ip_index * 2 + 1].to(q)
+ for (cs, ucs), cn_stop, cn_weight in tasks:
+ if current_step < cn_stop:
+ ip_k_c = cs[ip_index * 2].to(q)
+ ip_v_c = cs[ip_index * 2 + 1].to(q)
+ ip_k_uc = ucs[ip_index * 2].to(q)
+ ip_v_uc = ucs[ip_index * 2 + 1].to(q)
- ip_k = torch.cat([(ip_k_c, ip_k_uc)[i] for i in cond_or_uncond], dim=0)
- ip_v = torch.cat([(ip_v_c, ip_v_uc)[i] for i in cond_or_uncond], dim=0)
+ ip_k = torch.cat([(ip_k_c, ip_k_uc)[i] for i in cond_or_uncond], dim=0)
+ ip_v = torch.cat([(ip_v_c, ip_v_uc)[i] for i in cond_or_uncond], dim=0)
- # Midjourney's attention formulation of image prompt (non-official reimplementation)
- # Written by Lvmin Zhang at Stanford University, 2023 Dec
- # For non-commercial use only - if you use this in commercial project then
- # probably it has some intellectual property issues.
- # Contact lvminzhang@acm.org if you are not sure.
+ # Midjourney's attention formulation of image prompt (non-official reimplementation)
+ # Written by Lvmin Zhang at Stanford University, 2023 Dec
+ # For non-commercial use only - if you use this in commercial project then
+ # probably it has some intellectual property issues.
+ # Contact lvminzhang@acm.org if you are not sure.
- # Below is the sensitive part with potential intellectual property issues.
+ # Below is the sensitive part with potential intellectual property issues.
- ip_v_mean = torch.mean(ip_v, dim=1, keepdim=True)
- ip_v_offset = ip_v - ip_v_mean
+ ip_v_mean = torch.mean(ip_v, dim=1, keepdim=True)
+ ip_v_offset = ip_v - ip_v_mean
- B, F, C = ip_k.shape
- channel_penalty = float(C) / 1280.0
- weight = cn_weight * channel_penalty
+ B, F, C = ip_k.shape
+ channel_penalty = float(C) / 1280.0
+ weight = cn_weight * channel_penalty
- ip_k = ip_k * weight
- ip_v = ip_v_offset + ip_v_mean * weight
+ ip_k = ip_k * weight
+ ip_v = ip_v_offset + ip_v_mean * weight
- k.append(ip_k)
- v.append(ip_v)
+ k.append(ip_k)
+ v.append(ip_v)
+
+ k = torch.cat(k, dim=1)
+ v = torch.cat(v, dim=1)
+ out = sdp(q, k, v, extra_options)
- k = torch.cat(k, dim=1)
- v = torch.cat(v, dim=1)
- out = sdp(q, k, v, extra_options)
return out.to(dtype=org_dtype)
return patcher
@@ -260,27 +268,21 @@ def patch_model(model, tasks):
to["patches_replace"]["attn2"][key] = make_attn_patcher(number)
number = 0
- if not ip_adapter.sdxl:
- for id in [1, 2, 4, 5, 7, 8]: # id of input_blocks that have cross attention
- set_model_patch_replace(new_model, number, ("input", id))
- number += 1
- for id in [3, 4, 5, 6, 7, 8, 9, 10, 11]: # id of output_blocks that have cross attention
- set_model_patch_replace(new_model, number, ("output", id))
- number += 1
- set_model_patch_replace(new_model, number, ("middle", 0))
- else:
- for id in [4, 5, 7, 8]: # id of input_blocks that have cross attention
- block_indices = range(2) if id in [4, 5] else range(10) # transformer_depth
- for index in block_indices:
- set_model_patch_replace(new_model, number, ("input", id, index))
- number += 1
- for id in range(6): # id of output_blocks that have cross attention
- block_indices = range(2) if id in [3, 4, 5] else range(10) # transformer_depth
- for index in block_indices:
- set_model_patch_replace(new_model, number, ("output", id, index))
- number += 1
- for index in range(10):
- set_model_patch_replace(new_model, number, ("middle", 0, index))
+
+ for id in [4, 5, 7, 8]:
+ block_indices = range(2) if id in [4, 5] else range(10)
+ for index in block_indices:
+ set_model_patch_replace(new_model, number, ("input", id, index))
number += 1
+ for id in range(6):
+ block_indices = range(2) if id in [3, 4, 5] else range(10)
+ for index in block_indices:
+ set_model_patch_replace(new_model, number, ("output", id, index))
+ number += 1
+
+ for index in range(10):
+ set_model_patch_replace(new_model, number, ("middle", 0, index))
+ number += 1
+
return new_model
diff --git a/fooocus_version.py b/fooocus_version.py
index 8331d32..bc88969 100644
--- a/fooocus_version.py
+++ b/fooocus_version.py
@@ -1 +1 @@
-version = '2.1.789'
+version = '2.1.790'
diff --git a/modules/async_worker.py b/modules/async_worker.py
index 61f5138..b4793aa 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -28,6 +28,7 @@ def worker():
import modules.constants as constants
import modules.advanced_parameters as advanced_parameters
import fooocus_extras.ip_adapter as ip_adapter
+ import fooocus_extras.face_crop
from modules.sdxl_styles import apply_style, apply_wildcards, fooocus_expansion
from modules.private_logger import log
@@ -133,7 +134,7 @@ def worker():
outpaint_selections = args.pop()
inpaint_input_image = args.pop()
- cn_tasks = {flags.cn_ip: [], flags.cn_canny: [], flags.cn_cpds: []}
+ cn_tasks = {x: [] for x in flags.ip_list}
for _ in range(4):
cn_img = args.pop()
cn_stop = args.pop()
@@ -189,7 +190,7 @@ def worker():
inpaint_head_model_path = None
controlnet_canny_path = None
controlnet_cpds_path = None
- clip_vision_path, ip_negative_path, ip_adapter_path = None, None, None
+ clip_vision_path, ip_negative_path, ip_adapter_path, ip_adapter_face_path = None, None, None, None
seed = int(image_seed)
print(f'[Parameters] Seed = {seed}')
@@ -244,12 +245,15 @@ def worker():
if len(cn_tasks[flags.cn_cpds]) > 0:
controlnet_cpds_path = modules.config.downloading_controlnet_cpds()
if len(cn_tasks[flags.cn_ip]) > 0:
- clip_vision_path, ip_negative_path, ip_adapter_path = modules.config.downloading_ip_adapters()
+ clip_vision_path, ip_negative_path, ip_adapter_path = modules.config.downloading_ip_adapters('ip')
+ if len(cn_tasks[flags.cn_ip_face]) > 0:
+ clip_vision_path, ip_negative_path, ip_adapter_face_path = modules.config.downloading_ip_adapters('face')
progressbar(1, 'Loading control models ...')
# Load or unload CNs
pipeline.refresh_controlnets([controlnet_canny_path, controlnet_cpds_path])
ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path)
+ ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_face_path)
switch = int(round(steps * refiner_switch))
@@ -535,13 +539,26 @@ def worker():
# https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75
cn_img = resize_image(cn_img, width=224, height=224, resize_mode=0)
- task[0] = ip_adapter.preprocess(cn_img)
+ task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_path)
+ if advanced_parameters.debugging_cn_preprocessor:
+ yield_result(cn_img, do_not_show_finished_images=True)
+ return
+ for task in cn_tasks[flags.cn_ip_face]:
+ cn_img, cn_stop, cn_weight = task
+ cn_img = fooocus_extras.face_crop.crop_image(HWC3(cn_img))
+
+ # https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75
+ cn_img = resize_image(cn_img, width=224, height=224, resize_mode=0)
+
+ task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_face_path)
if advanced_parameters.debugging_cn_preprocessor:
yield_result(cn_img, do_not_show_finished_images=True)
return
- if len(cn_tasks[flags.cn_ip]) > 0:
- pipeline.final_unet = ip_adapter.patch_model(pipeline.final_unet, cn_tasks[flags.cn_ip])
+ all_ip_tasks = cn_tasks[flags.cn_ip] + cn_tasks[flags.cn_ip_face]
+
+ if len(all_ip_tasks) > 0:
+ pipeline.final_unet = ip_adapter.patch_model(pipeline.final_unet, all_ip_tasks)
if advanced_parameters.freeu_enabled:
print(f'FreeU is enabled!')
diff --git a/modules/config.py b/modules/config.py
index 1166a2e..8c8557c 100644
--- a/modules/config.py
+++ b/modules/config.py
@@ -335,7 +335,9 @@ def downloading_controlnet_cpds():
return os.path.join(path_controlnet, 'fooocus_xl_cpds_128.safetensors')
-def downloading_ip_adapters():
+def downloading_ip_adapters(v):
+ assert v in ['ip', 'face']
+
results = []
load_file_from_url(
@@ -352,12 +354,21 @@ def downloading_ip_adapters():
)
results += [os.path.join(path_controlnet, 'fooocus_ip_negative.safetensors')]
- load_file_from_url(
- url='https://huggingface.co/lllyasviel/misc/resolve/main/ip-adapter-plus_sdxl_vit-h.bin',
- model_dir=path_controlnet,
- file_name='ip-adapter-plus_sdxl_vit-h.bin'
- )
- results += [os.path.join(path_controlnet, 'ip-adapter-plus_sdxl_vit-h.bin')]
+ if v == 'ip':
+ load_file_from_url(
+ url='https://huggingface.co/lllyasviel/misc/resolve/main/ip-adapter-plus_sdxl_vit-h.bin',
+ model_dir=path_controlnet,
+ file_name='ip-adapter-plus_sdxl_vit-h.bin'
+ )
+ results += [os.path.join(path_controlnet, 'ip-adapter-plus_sdxl_vit-h.bin')]
+
+ if v == 'face':
+ load_file_from_url(
+ url='https://huggingface.co/lllyasviel/misc/resolve/main/ip-adapter-plus-face_sdxl_vit-h.bin',
+ model_dir=path_controlnet,
+ file_name='ip-adapter-plus-face_sdxl_vit-h.bin'
+ )
+ results += [os.path.join(path_controlnet, 'ip-adapter-plus-face_sdxl_vit-h.bin')]
return results
diff --git a/modules/flags.py b/modules/flags.py
index a0e033c..1d46f0a 100644
--- a/modules/flags.py
+++ b/modules/flags.py
@@ -20,13 +20,14 @@ SAMPLER_NAMES = KSAMPLER_NAMES + ["ddim", "uni_pc", "uni_pc_bh2"]
sampler_list = SAMPLER_NAMES
scheduler_list = SCHEDULER_NAMES
-cn_ip = "Image Prompt"
+cn_ip = "ImagePrompt"
+cn_ip_face = "FaceSwap"
cn_canny = "PyraCanny"
cn_cpds = "CPDS"
-ip_list = [cn_ip, cn_canny, cn_cpds]
+ip_list = [cn_ip, cn_canny, cn_cpds, cn_ip_face]
default_ip = cn_ip
default_parameters = {
- cn_ip: (0.5, 0.6), cn_canny: (0.5, 1.0), cn_cpds: (0.5, 1.0)
+ cn_ip: (0.5, 0.6), cn_ip_face: (0.9, 0.75), cn_canny: (0.5, 1.0), cn_cpds: (0.5, 1.0)
} # stop, weight
diff --git a/readme.md b/readme.md
index 49b85e9..dddaebd 100644
--- a/readme.md
+++ b/readme.md
@@ -50,6 +50,7 @@ Using Fooocus is as easy as (probably easier than) Midjourney – but this does
| Prompt Weights | You can use " I am (happy:1.5)".
Fooocus uses A1111's reweighting algorithm so that results are better than ComfyUI if users directly copy prompts from Civitai. (Because if prompts are written in ComfyUI's reweighting, users are less likely to copy prompt texts as they prefer dragging files)
To use embedding, you can use "(embedding:file_name:1.1)" |
| --no | Advanced -> Negative Prompt |
| --ar | Advanced -> Aspect Ratios |
+| InsightFace | Input Image -> Image Prompt -> Advanced -> FaceSwap |
We also have a few things borrowed from the best parts of LeonardoAI:
diff --git a/requirements_versions.txt b/requirements_versions.txt
index 5d5af5d..02f2ff7 100644
--- a/requirements_versions.txt
+++ b/requirements_versions.txt
@@ -15,3 +15,4 @@ gradio==3.41.2
pygit2==1.12.2
opencv-contrib-python==4.8.0.74
httpx==0.24.1
+facexlib==0.3.0
diff --git a/update_log.md b/update_log.md
index 6e9d2be..17424de 100644
--- a/update_log.md
+++ b/update_log.md
@@ -1,3 +1,9 @@
+# 2.1.790
+
+* Face swap (in line with Midjourney InsightFace): Input Image -> Image Prompt -> Advanced -> FaceSwap
+* The performance is super high. Use it carefully and never use it in any illegal things!
+* This implementation will crop faces for you and you do NOT need to crop faces before feeding images into Fooocus. (If you previously manually crop faces from images for other software, you do not need to do that now in Fooocus.)
+
# 2.1.788
* Fixed some math problems in previous versions.