Merge bf63506e89
into d057f2fae9
This commit is contained in:
commit
e0543b3c38
43
extras/GroundingDINO/config/GroundingDINO_SwinT_OGC.py
Normal file
43
extras/GroundingDINO/config/GroundingDINO_SwinT_OGC.py
Normal file
@ -0,0 +1,43 @@
|
||||
batch_size = 1
|
||||
modelname = "groundingdino"
|
||||
backbone = "swin_T_224_1k"
|
||||
position_embedding = "sine"
|
||||
pe_temperatureH = 20
|
||||
pe_temperatureW = 20
|
||||
return_interm_indices = [1, 2, 3]
|
||||
backbone_freeze_keywords = None
|
||||
enc_layers = 6
|
||||
dec_layers = 6
|
||||
pre_norm = False
|
||||
dim_feedforward = 2048
|
||||
hidden_dim = 256
|
||||
dropout = 0.0
|
||||
nheads = 8
|
||||
num_queries = 900
|
||||
query_dim = 4
|
||||
num_patterns = 0
|
||||
num_feature_levels = 4
|
||||
enc_n_points = 4
|
||||
dec_n_points = 4
|
||||
two_stage_type = "standard"
|
||||
two_stage_bbox_embed_share = False
|
||||
two_stage_class_embed_share = False
|
||||
transformer_activation = "relu"
|
||||
dec_pred_bbox_embed_share = True
|
||||
dn_box_noise_scale = 1.0
|
||||
dn_label_noise_ratio = 0.5
|
||||
dn_label_coef = 1.0
|
||||
dn_bbox_coef = 1.0
|
||||
embed_init_tgt = True
|
||||
dn_labelbook_size = 2000
|
||||
max_text_len = 256
|
||||
text_encoder_type = "bert-base-uncased"
|
||||
use_text_enhancer = True
|
||||
use_fusion_layer = True
|
||||
use_checkpoint = True
|
||||
use_transformer_ckpt = True
|
||||
use_text_cross_attention = True
|
||||
text_dropout = 0.0
|
||||
fusion_dropout = 0.0
|
||||
fusion_droppath = 0.1
|
||||
sub_sentence_present = True
|
98
extras/GroundingDINO/util/inference.py
Normal file
98
extras/GroundingDINO/util/inference.py
Normal file
@ -0,0 +1,98 @@
|
||||
from typing import Tuple, List
|
||||
|
||||
import ldm_patched.modules.model_management as model_management
|
||||
from ldm_patched.modules.model_patcher import ModelPatcher
|
||||
from modules.config import path_inpaint
|
||||
from modules.model_loader import load_file_from_url
|
||||
|
||||
import numpy as np
|
||||
import supervision as sv
|
||||
import torch
|
||||
from groundingdino.util.inference import Model
|
||||
from groundingdino.util.inference import load_model, preprocess_caption, get_phrases_from_posmap
|
||||
|
||||
|
||||
class GroundingDinoModel(Model):
|
||||
def __init__(self):
|
||||
self.config_file = 'extras/GroundingDINO/config/GroundingDINO_SwinT_OGC.py'
|
||||
self.model = None
|
||||
self.load_device = torch.device('cpu')
|
||||
self.offload_device = torch.device('cpu')
|
||||
|
||||
def predict_with_caption(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
caption: str,
|
||||
box_threshold: float = 0.35,
|
||||
text_threshold: float = 0.25
|
||||
) -> Tuple[sv.Detections, List[str]]:
|
||||
if self.model is None:
|
||||
filename = load_file_from_url(
|
||||
url="https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth",
|
||||
file_name='groundingdino_swint_ogc.pth',
|
||||
model_dir=path_inpaint)
|
||||
model = load_model(model_config_path=self.config_file, model_checkpoint_path=filename)
|
||||
|
||||
self.load_device = model_management.text_encoder_device()
|
||||
self.offload_device = model_management.text_encoder_offload_device()
|
||||
|
||||
model.to(self.offload_device)
|
||||
|
||||
self.model = ModelPatcher(model, load_device=self.load_device, offload_device=self.offload_device)
|
||||
|
||||
model_management.load_model_gpu(self.model)
|
||||
|
||||
processed_image = GroundingDinoModel.preprocess_image(image_bgr=image).to(self.load_device)
|
||||
boxes, logits, phrases = predict(
|
||||
model=self.model,
|
||||
image=processed_image,
|
||||
caption=caption,
|
||||
box_threshold=box_threshold,
|
||||
text_threshold=text_threshold,
|
||||
device=self.load_device)
|
||||
source_h, source_w, _ = image.shape
|
||||
detections = GroundingDinoModel.post_process_result(
|
||||
source_h=source_h,
|
||||
source_w=source_w,
|
||||
boxes=boxes,
|
||||
logits=logits)
|
||||
return detections, phrases
|
||||
|
||||
|
||||
def predict(
|
||||
model,
|
||||
image: torch.Tensor,
|
||||
caption: str,
|
||||
box_threshold: float,
|
||||
text_threshold: float,
|
||||
device: str = "cuda"
|
||||
) -> Tuple[torch.Tensor, torch.Tensor, List[str]]:
|
||||
caption = preprocess_caption(caption=caption)
|
||||
|
||||
# override to use model wrapped by patcher
|
||||
model = model.model.to(device)
|
||||
image = image.to(device)
|
||||
|
||||
with torch.no_grad():
|
||||
outputs = model(image[None], captions=[caption])
|
||||
|
||||
prediction_logits = outputs["pred_logits"].cpu().sigmoid()[0] # prediction_logits.shape = (nq, 256)
|
||||
prediction_boxes = outputs["pred_boxes"].cpu()[0] # prediction_boxes.shape = (nq, 4)
|
||||
|
||||
mask = prediction_logits.max(dim=1)[0] > box_threshold
|
||||
logits = prediction_logits[mask] # logits.shape = (n, 256)
|
||||
boxes = prediction_boxes[mask] # boxes.shape = (n, 4)
|
||||
|
||||
tokenizer = model.tokenizer
|
||||
tokenized = tokenizer(caption)
|
||||
|
||||
phrases = [
|
||||
get_phrases_from_posmap(logit > text_threshold, tokenized, tokenizer).replace('.', '')
|
||||
for logit
|
||||
in logits
|
||||
]
|
||||
|
||||
return boxes, logits.max(dim=1)[0], phrases
|
||||
|
||||
|
||||
default_groundingdino = GroundingDinoModel().predict_with_caption
|
42
extras/inpaint_mask.py
Normal file
42
extras/inpaint_mask.py
Normal file
@ -0,0 +1,42 @@
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import torch
|
||||
from rembg import remove, new_session
|
||||
from extras.GroundingDINO.util.inference import default_groundingdino
|
||||
|
||||
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
|
||||
|
||||
def run_grounded_sam(input_image, text_prompt, box_threshold, text_threshold):
|
||||
|
||||
# run grounding dino model
|
||||
boxes, _ = default_groundingdino(
|
||||
image=np.array(input_image),
|
||||
caption=text_prompt,
|
||||
box_threshold=box_threshold,
|
||||
text_threshold=text_threshold
|
||||
)
|
||||
|
||||
return boxes.xyxy
|
||||
|
||||
|
||||
def generate_mask_from_image(image, mask_model, extras):
|
||||
if image is None:
|
||||
return
|
||||
|
||||
if 'image' in image:
|
||||
image = image['image']
|
||||
|
||||
if mask_model == 'sam':
|
||||
boxes = run_grounded_sam(Image.fromarray(image), extras['sam_prompt_text'], box_threshold=extras['box_threshold'], text_threshold=extras['text_threshold'])
|
||||
boxes = np.array([[0, 0, image.shape[1], image.shape[0]]]) if len(boxes) == 0 else boxes
|
||||
extras['sam_prompt'] = []
|
||||
for idx, box in enumerate(boxes):
|
||||
extras['sam_prompt'] += [{"type": "rectangle", "data": box.tolist()}]
|
||||
|
||||
return remove(
|
||||
image,
|
||||
session=new_session(mask_model, **extras),
|
||||
only_mask=True,
|
||||
**extras
|
||||
)
|
@ -36,6 +36,15 @@
|
||||
"Top": "Top",
|
||||
"Bottom": "Bottom",
|
||||
"* \"Inpaint or Outpaint\" is powered by the sampler \"DPMPP Fooocus Seamless 2M SDE Karras Inpaint Sampler\" (beta)": "* \"Inpaint or Outpaint\" is powered by the sampler \"DPMPP Fooocus Seamless 2M SDE Karras Inpaint Sampler\" (beta)",
|
||||
"Mask generation model": "Mask generation model",
|
||||
"Cloth category": "Cloth category",
|
||||
"Segmentation prompt": "Segmentation prompt",
|
||||
"Advanced options": "Advanced options",
|
||||
"SAM model": "SAM model",
|
||||
"Quantization": "Quantization",
|
||||
"Box Threshold": "Box Threshold",
|
||||
"Text Threshold": "Text Threshold",
|
||||
"Generate mask from image": "Generate mask from image",
|
||||
"Setting": "Setting",
|
||||
"Style": "Style",
|
||||
"Performance": "Performance",
|
||||
|
@ -22,7 +22,9 @@ import fooocus_version
|
||||
from build_launcher import build_launcher
|
||||
from modules.launch_util import is_installed, run, python, run_pip, requirements_met
|
||||
from modules.model_loader import load_file_from_url
|
||||
from modules import config
|
||||
|
||||
os.environ["U2NET_HOME"] = config.path_inpaint
|
||||
|
||||
REINSTALL_ALL = False
|
||||
TRY_INSTALL_XFORMERS = False
|
||||
|
@ -320,13 +320,17 @@ def worker():
|
||||
inpaint_mask = inpaint_input_image['mask'][:, :, 0]
|
||||
|
||||
if inpaint_mask_upload_checkbox:
|
||||
if isinstance(inpaint_mask_image_upload, np.ndarray):
|
||||
if inpaint_mask_image_upload.ndim == 3:
|
||||
H, W, C = inpaint_image.shape
|
||||
inpaint_mask_image_upload = resample_image(inpaint_mask_image_upload, width=W, height=H)
|
||||
inpaint_mask_image_upload = np.mean(inpaint_mask_image_upload, axis=2)
|
||||
inpaint_mask_image_upload = (inpaint_mask_image_upload > 127).astype(np.uint8) * 255
|
||||
inpaint_mask = np.maximum(inpaint_mask, inpaint_mask_image_upload)
|
||||
if isinstance(inpaint_mask_image_upload, dict):
|
||||
if (isinstance(inpaint_mask_image_upload['image'], np.ndarray)
|
||||
and isinstance(inpaint_mask_image_upload['mask'], np.ndarray)
|
||||
and inpaint_mask_image_upload['image'].ndim == 3):
|
||||
inpaint_mask_image_upload = np.maximum(inpaint_mask_image_upload['image'], inpaint_mask_image_upload['mask'])
|
||||
if isinstance(inpaint_mask_image_upload, np.ndarray) and inpaint_mask_image_upload.ndim == 3:
|
||||
H, W, C = inpaint_image.shape
|
||||
inpaint_mask_image_upload = resample_image(inpaint_mask_image_upload, width=W, height=H)
|
||||
inpaint_mask_image_upload = np.mean(inpaint_mask_image_upload, axis=2)
|
||||
inpaint_mask_image_upload = (inpaint_mask_image_upload > 127).astype(np.uint8) * 255
|
||||
inpaint_mask = np.maximum(inpaint_mask, inpaint_mask_image_upload)
|
||||
|
||||
if int(inpaint_erode_or_dilate) != 0:
|
||||
inpaint_mask = erode_or_dilate(inpaint_mask, inpaint_erode_or_dilate)
|
||||
|
@ -409,6 +409,24 @@ metadata_created_by = get_config_item_or_set_default(
|
||||
|
||||
example_inpaint_prompts = [[x] for x in example_inpaint_prompts]
|
||||
|
||||
default_inpaint_mask_model = get_config_item_or_set_default(
|
||||
key='default_inpaint_mask_model',
|
||||
default_value='isnet-general-use',
|
||||
validator=lambda x: x in modules.flags.inpaint_mask_models
|
||||
)
|
||||
|
||||
default_inpaint_mask_cloth_category = get_config_item_or_set_default(
|
||||
key='default_inpaint_mask_cloth_category',
|
||||
default_value='full',
|
||||
validator=lambda x: x in modules.flags.inpaint_mask_cloth_category
|
||||
)
|
||||
|
||||
default_inpaint_mask_sam_model = get_config_item_or_set_default(
|
||||
key='default_inpaint_mask_sam_model',
|
||||
default_value='sam_vit_b_01ec64',
|
||||
validator=lambda x: x in modules.flags.inpaint_mask_sam_model
|
||||
)
|
||||
|
||||
config_dict["default_loras"] = default_loras = default_loras[:default_max_lora_number] + [['None', 1.0] for _ in range(default_max_lora_number - len(default_loras))]
|
||||
|
||||
possible_preset_keys = [
|
||||
|
@ -69,6 +69,10 @@ default_parameters = {
|
||||
|
||||
output_formats = ['png', 'jpg', 'webp']
|
||||
|
||||
inpaint_mask_models = ['u2net', 'u2netp', 'u2net_human_seg', 'u2net_cloth_seg', 'silueta', 'isnet-general-use', 'isnet-anime', 'sam']
|
||||
inpaint_mask_cloth_category = ['full', 'upper', 'lower']
|
||||
inpaint_mask_sam_model = ['sam_vit_b_01ec64', 'sam_vit_h_4b8939', 'sam_vit_l_0b3195']
|
||||
|
||||
inpaint_engine_versions = ['None', 'v1', 'v2.5', 'v2.6']
|
||||
inpaint_option_default = 'Inpaint or Outpaint (default)'
|
||||
inpaint_option_detail = 'Improve Detail (face, hand, eyes, etc.)'
|
||||
|
@ -16,3 +16,5 @@ opencv-contrib-python==4.8.0.74
|
||||
httpx==0.24.1
|
||||
onnxruntime==1.16.3
|
||||
timm==0.9.2
|
||||
rembg==2.0.53
|
||||
groundingdino-py==0.4.0
|
76
webui.py
76
webui.py
@ -197,16 +197,67 @@ with shared.gradio_root:
|
||||
queue=False, show_progress=False)
|
||||
with gr.TabItem(label='Inpaint or Outpaint') as inpaint_tab:
|
||||
with gr.Row():
|
||||
inpaint_input_image = grh.Image(label='Drag inpaint or outpaint image to here', source='upload', type='numpy', tool='sketch', height=500, brush_color="#FFFFFF", elem_id='inpaint_canvas')
|
||||
inpaint_mask_image = grh.Image(label='Mask Upload', source='upload', type='numpy', height=500, visible=False)
|
||||
with gr.Column():
|
||||
inpaint_input_image = grh.Image(label='Drag inpaint or outpaint image to here', source='upload', type='numpy', tool='sketch', height=500, brush_color="#FFFFFF", elem_id='inpaint_canvas')
|
||||
inpaint_mode = gr.Dropdown(choices=modules.flags.inpaint_options, value=modules.flags.inpaint_option_default, label='Method')
|
||||
inpaint_additional_prompt = gr.Textbox(placeholder="Describe what you want to inpaint.", elem_id='inpaint_additional_prompt', label='Inpaint Additional Prompt', visible=False)
|
||||
outpaint_selections = gr.CheckboxGroup(choices=['Left', 'Right', 'Top', 'Bottom'], value=[], label='Outpaint Direction')
|
||||
example_inpaint_prompts = gr.Dataset(samples=modules.config.example_inpaint_prompts,
|
||||
label='Additional Prompt Quick List',
|
||||
components=[inpaint_additional_prompt],
|
||||
visible=False)
|
||||
gr.HTML('* Powered by Fooocus Inpaint Engine <a href="https://github.com/lllyasviel/Fooocus/discussions/414" target="_blank">\U0001F4D4 Document</a>')
|
||||
example_inpaint_prompts.click(lambda x: x[0], inputs=example_inpaint_prompts, outputs=inpaint_additional_prompt, show_progress=False, queue=False)
|
||||
|
||||
with gr.Row():
|
||||
inpaint_additional_prompt = gr.Textbox(placeholder="Describe what you want to inpaint.", elem_id='inpaint_additional_prompt', label='Inpaint Additional Prompt', visible=False)
|
||||
outpaint_selections = gr.CheckboxGroup(choices=['Left', 'Right', 'Top', 'Bottom'], value=[], label='Outpaint Direction')
|
||||
inpaint_mode = gr.Dropdown(choices=modules.flags.inpaint_options, value=modules.flags.inpaint_option_default, label='Method')
|
||||
example_inpaint_prompts = gr.Dataset(samples=modules.config.example_inpaint_prompts, label='Additional Prompt Quick List', components=[inpaint_additional_prompt], visible=False)
|
||||
gr.HTML('* Powered by Fooocus Inpaint Engine <a href="https://github.com/lllyasviel/Fooocus/discussions/414" target="_blank">\U0001F4D4 Document</a>')
|
||||
example_inpaint_prompts.click(lambda x: x[0], inputs=example_inpaint_prompts, outputs=inpaint_additional_prompt, show_progress=False, queue=False)
|
||||
with gr.Column(visible=False) as inpaint_mask_generation_col:
|
||||
inpaint_mask_image = grh.Image(label='Mask Upload', source='upload', type='numpy', tool='sketch', height=500, brush_color="#FFFFFF", mask_opacity=1)
|
||||
inpaint_mask_model = gr.Dropdown(label='Mask generation model',
|
||||
choices=flags.inpaint_mask_models,
|
||||
value=modules.config.default_inpaint_mask_model)
|
||||
inpaint_mask_cloth_category = gr.Dropdown(label='Cloth category',
|
||||
choices=flags.inpaint_mask_cloth_category,
|
||||
value=modules.config.default_inpaint_mask_cloth_category,
|
||||
visible=False)
|
||||
inpaint_mask_sam_prompt_text = gr.Textbox(label='Segmentation prompt', value='', visible=False)
|
||||
with gr.Accordion("Advanced options", visible=False, open=False) as inpaint_mask_advanced_options:
|
||||
inpaint_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model)
|
||||
inpaint_mask_sam_quant = gr.Checkbox(label='Quantization', value=False)
|
||||
inpaint_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05)
|
||||
inpaint_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05)
|
||||
generate_mask_button = gr.Button(value='Generate mask from image')
|
||||
|
||||
|
||||
def generate_mask(image, mask_model, cloth_category, sam_prompt_text, sam_model, sam_quant, box_threshold, text_threshold):
|
||||
from extras.inpaint_mask import generate_mask_from_image
|
||||
|
||||
extras = {}
|
||||
if mask_model == 'u2net_cloth_seg':
|
||||
extras['cloth_category'] = cloth_category
|
||||
elif mask_model == 'sam':
|
||||
extras['sam_prompt_text'] = sam_prompt_text
|
||||
extras['sam_model'] = sam_model
|
||||
extras['sam_quant'] = sam_quant
|
||||
extras['box_threshold'] = box_threshold
|
||||
extras['text_threshold'] = text_threshold
|
||||
|
||||
return generate_mask_from_image(image, mask_model, extras)
|
||||
|
||||
generate_mask_button.click(fn=generate_mask,
|
||||
inputs=[
|
||||
inpaint_input_image, inpaint_mask_model,
|
||||
inpaint_mask_cloth_category,
|
||||
inpaint_mask_sam_prompt_text,
|
||||
inpaint_mask_sam_model,
|
||||
inpaint_mask_sam_quant,
|
||||
inpaint_mask_box_threshold,
|
||||
inpaint_mask_text_threshold
|
||||
],
|
||||
outputs=inpaint_mask_image, show_progress=True, queue=True)
|
||||
|
||||
inpaint_mask_model.change(lambda x: [gr.update(visible=x == 'u2net_cloth_seg'), gr.update(visible=x == 'sam'), gr.update(visible=x == 'sam')],
|
||||
inputs=inpaint_mask_model,
|
||||
outputs=[inpaint_mask_cloth_category, inpaint_mask_sam_prompt_text, inpaint_mask_advanced_options],
|
||||
queue=False, show_progress=False)
|
||||
with gr.TabItem(label='Describe') as desc_tab:
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
@ -497,9 +548,10 @@ with shared.gradio_root:
|
||||
inpaint_strength, inpaint_respective_field,
|
||||
inpaint_mask_upload_checkbox, invert_mask_checkbox, inpaint_erode_or_dilate]
|
||||
|
||||
inpaint_mask_upload_checkbox.change(lambda x: gr.update(visible=x),
|
||||
inputs=inpaint_mask_upload_checkbox,
|
||||
outputs=inpaint_mask_image, queue=False, show_progress=False)
|
||||
inpaint_mask_upload_checkbox.change(lambda x: [gr.update(visible=x)] * 2,
|
||||
inputs=inpaint_mask_upload_checkbox,
|
||||
outputs=[inpaint_mask_image, inpaint_mask_generation_col],
|
||||
queue=False, show_progress=False)
|
||||
|
||||
with gr.Tab(label='FreeU'):
|
||||
freeu_enabled = gr.Checkbox(label='Enabled', value=False)
|
||||
|
Loading…
Reference in New Issue
Block a user