2.1.790
This commit is contained in:
lllyasviel 2023-11-11 22:13:13 -08:00 committed by GitHub
parent fd4a5b2eaf
commit 2bef62c545
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 184 additions and 88 deletions

7
face_experiments.py Normal file
View File

@ -0,0 +1,7 @@
import cv2
import fooocus_extras.face_crop as cropper
img = cv2.imread('lena.png')
result = cropper.crop_image(img)
cv2.imwrite('lena_result.png', result)

View File

@ -0,0 +1,50 @@
import cv2
import numpy as np
import modules.config
import facexlib.utils.face_restoration_helper as face_restoration_helper
faceRestoreHelper = None
def align_warp_face(self, landmark, border_mode='reflect'):
affine_matrix = cv2.estimateAffinePartial2D(landmark, self.face_template, method=cv2.LMEDS)[0]
self.affine_matrices.append(affine_matrix)
if border_mode == 'constant':
border_mode = cv2.BORDER_CONSTANT
elif border_mode == 'reflect101':
border_mode = cv2.BORDER_REFLECT101
elif border_mode == 'reflect':
border_mode = cv2.BORDER_REFLECT
input_img = self.input_img
cropped_face = cv2.warpAffine(input_img, affine_matrix, self.face_size,
borderMode=border_mode, borderValue=(135, 133, 132))
return cropped_face
def crop_image(img_rgb):
global faceRestoreHelper
if faceRestoreHelper is None:
faceRestoreHelper = face_restoration_helper.FaceRestoreHelper(
upscale_factor=1,
model_rootpath=modules.config.path_controlnet,
device='cpu' # use cpu is safer since we are out of fcbh management
)
faceRestoreHelper.clean_all()
faceRestoreHelper.read_image(np.ascontiguousarray(img_rgb[:, :, ::-1].copy()))
faceRestoreHelper.get_face_landmarks_5()
landmarks = faceRestoreHelper.all_landmarks_5
# landmarks are already sorted with confidence.
if len(landmarks) == 0:
print('No face detected')
return img_rgb
else:
print(f'Detected {len(landmarks)} faces')
result = align_warp_face(faceRestoreHelper, landmarks[0])
return np.ascontiguousarray(result[:, :, ::-1].copy())

View File

@ -84,26 +84,20 @@ class IPAdapterModel(torch.nn.Module):
clip_vision: fcbh.clip_vision.ClipVisionModel = None
ip_negative: torch.Tensor = None
image_proj_model: ModelPatcher = None
ip_layers: ModelPatcher = None
ip_adapter: IPAdapterModel = None
ip_unconds = None
ip_adapters: dict = {}
def load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path):
global clip_vision, image_proj_model, ip_layers, ip_negative, ip_adapter, ip_unconds
global clip_vision, ip_negative, ip_adapters
if clip_vision_path is None:
return
if ip_negative_path is None:
return
if ip_adapter_path is None:
return
if clip_vision is not None and image_proj_model is not None and ip_layers is not None and ip_negative is not None:
return
if clip_vision is None and isinstance(clip_vision_path, str):
clip_vision = fcbh.clip_vision.load(clip_vision_path)
ip_negative = sf.load_file(ip_negative_path)['data']
clip_vision = fcbh.clip_vision.load(clip_vision_path)
if ip_negative is None and isinstance(ip_negative_path, str):
ip_negative = sf.load_file(ip_negative_path)['data']
if not isinstance(ip_adapter_path, str) or ip_adapter_path in ip_adapters:
return
load_device = model_management.get_torch_device()
offload_device = torch.device('cpu')
@ -141,7 +135,13 @@ def load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path):
ip_layers = ModelPatcher(model=ip_adapter.ip_layers, load_device=load_device,
offload_device=offload_device)
ip_unconds = None
ip_adapters[ip_adapter_path] = dict(
ip_adapter=ip_adapter,
image_proj_model=image_proj_model,
ip_layers=ip_layers,
ip_unconds=None
)
return
@ -161,8 +161,9 @@ def clip_preprocess(image):
@torch.no_grad()
@torch.inference_mode()
def preprocess(img):
global ip_unconds
def preprocess(img, ip_adapter_path):
global ip_adapters
entry = ip_adapters[ip_adapter_path]
fcbh.model_management.load_model_gpu(clip_vision.patcher)
pixel_values = clip_preprocess(numpy_to_pytorch(img).to(clip_vision.load_device))
@ -175,6 +176,11 @@ def preprocess(img):
with precision_scope(fcbh.model_management.get_autocast_device(clip_vision.load_device), torch.float32):
outputs = clip_vision.model(pixel_values=pixel_values, output_hidden_states=True)
ip_adapter = entry['ip_adapter']
ip_layers = entry['ip_layers']
image_proj_model = entry['image_proj_model']
ip_unconds = entry['ip_unconds']
if ip_adapter.plus:
cond = outputs.hidden_states[-2]
else:
@ -190,9 +196,11 @@ def preprocess(img):
if ip_unconds is None:
uncond = ip_negative.to(device=ip_adapter.load_device, dtype=ip_adapter.dtype)
ip_unconds = [m(uncond).cpu() for m in ip_layers.model.to_kvs]
entry['ip_unconds'] = ip_unconds
ip_conds = [m(cond).cpu() for m in ip_layers.model.to_kvs]
return ip_conds
return ip_conds, ip_unconds
@torch.no_grad()
@ -206,46 +214,46 @@ def patch_model(model, tasks):
current_step = float(model.model.diffusion_model.current_step.detach().cpu().numpy()[0])
cond_or_uncond = extra_options['cond_or_uncond']
with torch.autocast("cuda", dtype=ip_adapter.dtype):
q = n
k = [context_attn2]
v = [value_attn2]
b, _, _ = q.shape
q = n
k = [context_attn2]
v = [value_attn2]
b, _, _ = q.shape
for ip_conds, cn_stop, cn_weight in tasks:
if current_step < cn_stop:
ip_k_c = ip_conds[ip_index * 2].to(q)
ip_v_c = ip_conds[ip_index * 2 + 1].to(q)
ip_k_uc = ip_unconds[ip_index * 2].to(q)
ip_v_uc = ip_unconds[ip_index * 2 + 1].to(q)
for (cs, ucs), cn_stop, cn_weight in tasks:
if current_step < cn_stop:
ip_k_c = cs[ip_index * 2].to(q)
ip_v_c = cs[ip_index * 2 + 1].to(q)
ip_k_uc = ucs[ip_index * 2].to(q)
ip_v_uc = ucs[ip_index * 2 + 1].to(q)
ip_k = torch.cat([(ip_k_c, ip_k_uc)[i] for i in cond_or_uncond], dim=0)
ip_v = torch.cat([(ip_v_c, ip_v_uc)[i] for i in cond_or_uncond], dim=0)
ip_k = torch.cat([(ip_k_c, ip_k_uc)[i] for i in cond_or_uncond], dim=0)
ip_v = torch.cat([(ip_v_c, ip_v_uc)[i] for i in cond_or_uncond], dim=0)
# Midjourney's attention formulation of image prompt (non-official reimplementation)
# Written by Lvmin Zhang at Stanford University, 2023 Dec
# For non-commercial use only - if you use this in commercial project then
# probably it has some intellectual property issues.
# Contact lvminzhang@acm.org if you are not sure.
# Midjourney's attention formulation of image prompt (non-official reimplementation)
# Written by Lvmin Zhang at Stanford University, 2023 Dec
# For non-commercial use only - if you use this in commercial project then
# probably it has some intellectual property issues.
# Contact lvminzhang@acm.org if you are not sure.
# Below is the sensitive part with potential intellectual property issues.
# Below is the sensitive part with potential intellectual property issues.
ip_v_mean = torch.mean(ip_v, dim=1, keepdim=True)
ip_v_offset = ip_v - ip_v_mean
ip_v_mean = torch.mean(ip_v, dim=1, keepdim=True)
ip_v_offset = ip_v - ip_v_mean
B, F, C = ip_k.shape
channel_penalty = float(C) / 1280.0
weight = cn_weight * channel_penalty
B, F, C = ip_k.shape
channel_penalty = float(C) / 1280.0
weight = cn_weight * channel_penalty
ip_k = ip_k * weight
ip_v = ip_v_offset + ip_v_mean * weight
ip_k = ip_k * weight
ip_v = ip_v_offset + ip_v_mean * weight
k.append(ip_k)
v.append(ip_v)
k.append(ip_k)
v.append(ip_v)
k = torch.cat(k, dim=1)
v = torch.cat(v, dim=1)
out = sdp(q, k, v, extra_options)
k = torch.cat(k, dim=1)
v = torch.cat(v, dim=1)
out = sdp(q, k, v, extra_options)
return out.to(dtype=org_dtype)
return patcher
@ -260,27 +268,21 @@ def patch_model(model, tasks):
to["patches_replace"]["attn2"][key] = make_attn_patcher(number)
number = 0
if not ip_adapter.sdxl:
for id in [1, 2, 4, 5, 7, 8]: # id of input_blocks that have cross attention
set_model_patch_replace(new_model, number, ("input", id))
number += 1
for id in [3, 4, 5, 6, 7, 8, 9, 10, 11]: # id of output_blocks that have cross attention
set_model_patch_replace(new_model, number, ("output", id))
number += 1
set_model_patch_replace(new_model, number, ("middle", 0))
else:
for id in [4, 5, 7, 8]: # id of input_blocks that have cross attention
block_indices = range(2) if id in [4, 5] else range(10) # transformer_depth
for index in block_indices:
set_model_patch_replace(new_model, number, ("input", id, index))
number += 1
for id in range(6): # id of output_blocks that have cross attention
block_indices = range(2) if id in [3, 4, 5] else range(10) # transformer_depth
for index in block_indices:
set_model_patch_replace(new_model, number, ("output", id, index))
number += 1
for index in range(10):
set_model_patch_replace(new_model, number, ("middle", 0, index))
for id in [4, 5, 7, 8]:
block_indices = range(2) if id in [4, 5] else range(10)
for index in block_indices:
set_model_patch_replace(new_model, number, ("input", id, index))
number += 1
for id in range(6):
block_indices = range(2) if id in [3, 4, 5] else range(10)
for index in block_indices:
set_model_patch_replace(new_model, number, ("output", id, index))
number += 1
for index in range(10):
set_model_patch_replace(new_model, number, ("middle", 0, index))
number += 1
return new_model

View File

@ -1 +1 @@
version = '2.1.789'
version = '2.1.790'

View File

@ -28,6 +28,7 @@ def worker():
import modules.constants as constants
import modules.advanced_parameters as advanced_parameters
import fooocus_extras.ip_adapter as ip_adapter
import fooocus_extras.face_crop
from modules.sdxl_styles import apply_style, apply_wildcards, fooocus_expansion
from modules.private_logger import log
@ -133,7 +134,7 @@ def worker():
outpaint_selections = args.pop()
inpaint_input_image = args.pop()
cn_tasks = {flags.cn_ip: [], flags.cn_canny: [], flags.cn_cpds: []}
cn_tasks = {x: [] for x in flags.ip_list}
for _ in range(4):
cn_img = args.pop()
cn_stop = args.pop()
@ -189,7 +190,7 @@ def worker():
inpaint_head_model_path = None
controlnet_canny_path = None
controlnet_cpds_path = None
clip_vision_path, ip_negative_path, ip_adapter_path = None, None, None
clip_vision_path, ip_negative_path, ip_adapter_path, ip_adapter_face_path = None, None, None, None
seed = int(image_seed)
print(f'[Parameters] Seed = {seed}')
@ -244,12 +245,15 @@ def worker():
if len(cn_tasks[flags.cn_cpds]) > 0:
controlnet_cpds_path = modules.config.downloading_controlnet_cpds()
if len(cn_tasks[flags.cn_ip]) > 0:
clip_vision_path, ip_negative_path, ip_adapter_path = modules.config.downloading_ip_adapters()
clip_vision_path, ip_negative_path, ip_adapter_path = modules.config.downloading_ip_adapters('ip')
if len(cn_tasks[flags.cn_ip_face]) > 0:
clip_vision_path, ip_negative_path, ip_adapter_face_path = modules.config.downloading_ip_adapters('face')
progressbar(1, 'Loading control models ...')
# Load or unload CNs
pipeline.refresh_controlnets([controlnet_canny_path, controlnet_cpds_path])
ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path)
ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_face_path)
switch = int(round(steps * refiner_switch))
@ -535,13 +539,26 @@ def worker():
# https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75
cn_img = resize_image(cn_img, width=224, height=224, resize_mode=0)
task[0] = ip_adapter.preprocess(cn_img)
task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_path)
if advanced_parameters.debugging_cn_preprocessor:
yield_result(cn_img, do_not_show_finished_images=True)
return
for task in cn_tasks[flags.cn_ip_face]:
cn_img, cn_stop, cn_weight = task
cn_img = fooocus_extras.face_crop.crop_image(HWC3(cn_img))
# https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75
cn_img = resize_image(cn_img, width=224, height=224, resize_mode=0)
task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_face_path)
if advanced_parameters.debugging_cn_preprocessor:
yield_result(cn_img, do_not_show_finished_images=True)
return
if len(cn_tasks[flags.cn_ip]) > 0:
pipeline.final_unet = ip_adapter.patch_model(pipeline.final_unet, cn_tasks[flags.cn_ip])
all_ip_tasks = cn_tasks[flags.cn_ip] + cn_tasks[flags.cn_ip_face]
if len(all_ip_tasks) > 0:
pipeline.final_unet = ip_adapter.patch_model(pipeline.final_unet, all_ip_tasks)
if advanced_parameters.freeu_enabled:
print(f'FreeU is enabled!')

View File

@ -335,7 +335,9 @@ def downloading_controlnet_cpds():
return os.path.join(path_controlnet, 'fooocus_xl_cpds_128.safetensors')
def downloading_ip_adapters():
def downloading_ip_adapters(v):
assert v in ['ip', 'face']
results = []
load_file_from_url(
@ -352,12 +354,21 @@ def downloading_ip_adapters():
)
results += [os.path.join(path_controlnet, 'fooocus_ip_negative.safetensors')]
load_file_from_url(
url='https://huggingface.co/lllyasviel/misc/resolve/main/ip-adapter-plus_sdxl_vit-h.bin',
model_dir=path_controlnet,
file_name='ip-adapter-plus_sdxl_vit-h.bin'
)
results += [os.path.join(path_controlnet, 'ip-adapter-plus_sdxl_vit-h.bin')]
if v == 'ip':
load_file_from_url(
url='https://huggingface.co/lllyasviel/misc/resolve/main/ip-adapter-plus_sdxl_vit-h.bin',
model_dir=path_controlnet,
file_name='ip-adapter-plus_sdxl_vit-h.bin'
)
results += [os.path.join(path_controlnet, 'ip-adapter-plus_sdxl_vit-h.bin')]
if v == 'face':
load_file_from_url(
url='https://huggingface.co/lllyasviel/misc/resolve/main/ip-adapter-plus-face_sdxl_vit-h.bin',
model_dir=path_controlnet,
file_name='ip-adapter-plus-face_sdxl_vit-h.bin'
)
results += [os.path.join(path_controlnet, 'ip-adapter-plus-face_sdxl_vit-h.bin')]
return results

View File

@ -20,13 +20,14 @@ SAMPLER_NAMES = KSAMPLER_NAMES + ["ddim", "uni_pc", "uni_pc_bh2"]
sampler_list = SAMPLER_NAMES
scheduler_list = SCHEDULER_NAMES
cn_ip = "Image Prompt"
cn_ip = "ImagePrompt"
cn_ip_face = "FaceSwap"
cn_canny = "PyraCanny"
cn_cpds = "CPDS"
ip_list = [cn_ip, cn_canny, cn_cpds]
ip_list = [cn_ip, cn_canny, cn_cpds, cn_ip_face]
default_ip = cn_ip
default_parameters = {
cn_ip: (0.5, 0.6), cn_canny: (0.5, 1.0), cn_cpds: (0.5, 1.0)
cn_ip: (0.5, 0.6), cn_ip_face: (0.9, 0.75), cn_canny: (0.5, 1.0), cn_cpds: (0.5, 1.0)
} # stop, weight

View File

@ -50,6 +50,7 @@ Using Fooocus is as easy as (probably easier than) Midjourney but this does
| Prompt Weights | You can use " I am (happy:1.5)". <br> Fooocus uses A1111's reweighting algorithm so that results are better than ComfyUI if users directly copy prompts from Civitai. (Because if prompts are written in ComfyUI's reweighting, users are less likely to copy prompt texts as they prefer dragging files) <br> To use embedding, you can use "(embedding:file_name:1.1)" |
| --no | Advanced -> Negative Prompt |
| --ar | Advanced -> Aspect Ratios |
| InsightFace | Input Image -> Image Prompt -> Advanced -> FaceSwap |
We also have a few things borrowed from the best parts of LeonardoAI:

View File

@ -15,3 +15,4 @@ gradio==3.41.2
pygit2==1.12.2
opencv-contrib-python==4.8.0.74
httpx==0.24.1
facexlib==0.3.0

View File

@ -1,3 +1,9 @@
# 2.1.790
* Face swap (in line with Midjourney InsightFace): Input Image -> Image Prompt -> Advanced -> FaceSwap
* The performance is super high. Use it carefully and never use it in any illegal things!
* This implementation will crop faces for you and you do NOT need to crop faces before feeding images into Fooocus. (If you previously manually crop faces from images for other software, you do not need to do that now in Fooocus.)
# 2.1.788
* Fixed some math problems in previous versions.