update backend + revise styles

2023-10-26 08:24:45 -07:00 · 2023-10-26 08:24:45 -07:00 · 175f3e3040
commit 175f3e3040
parent 214c2ba04c
8 changed files with 51 additions and 32 deletions
--- a/backend/headless/fcbh/clip_vision.py
+++ b/backend/headless/fcbh/clip_vision.py
@ -1,5 +1,5 @@
-from transformers import CLIPVisionModelWithProjection, CLIPVisionConfig, CLIPImageProcessor, modeling_utils
+from transformers import CLIPVisionModelWithProjection, CLIPVisionConfig, modeling_utils
-from .utils import load_torch_file, transformers_convert
+from .utils import load_torch_file, transformers_convert, common_upscale
 import os
 import torch
 import contextlib
@ -7,6 +7,18 @@ import contextlib
 import fcbh.ops
 import fcbh.model_patcher
 import fcbh.model_management
 import fcbh.utils
 def clip_preprocess(image, size=224):
    mean = torch.tensor([ 0.48145466,0.4578275,0.40821073], device=image.device, dtype=image.dtype)
    std = torch.tensor([0.26862954,0.26130258,0.27577711], device=image.device, dtype=image.dtype)
    scale = (size / min(image.shape[1], image.shape[2]))
    image = torch.nn.functional.interpolate(image.movedim(-1, 1), size=(round(scale * image.shape[1]), round(scale * image.shape[2])), mode="bicubic", antialias=True)
    h = (image.shape[2] - size)//2
    w = (image.shape[3] - size)//2
    image = image[:,:,h:h+size,w:w+size]
    image = torch.clip((255. * image), 0, 255).round() / 255.0
    return (image - mean.view([3,1,1])) / std.view([3,1,1])
 class ClipVisionModel():
    def __init__(self, json_config):
@ -23,25 +35,12 @@ class ClipVisionModel():
        self.model.to(self.dtype)
        self.patcher = fcbh.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
        self.processor = CLIPImageProcessor(crop_size=224,
                                            do_center_crop=True,
                                            do_convert_rgb=True,
                                            do_normalize=True,
                                            do_resize=True,
                                            image_mean=[ 0.48145466,0.4578275,0.40821073],
                                            image_std=[0.26862954,0.26130258,0.27577711],
                                            resample=3, #bicubic
                                            size=224)
    def load_sd(self, sd):
        return self.model.load_state_dict(sd, strict=False)
    def encode_image(self, image):
        img = torch.clip((255. * image), 0, 255).round().int()
        img = list(map(lambda a: a, img))
        inputs = self.processor(images=img, return_tensors="pt")
        fcbh.model_management.load_model_gpu(self.patcher)
-        pixel_values = inputs['pixel_values'].to(self.load_device)
+        pixel_values = clip_preprocess(image.to(self.load_device))
        if self.dtype != torch.float32:
            precision_scope = torch.autocast
--- a/backend/headless/fcbh/ldm/modules/attention.py
+++ b/backend/headless/fcbh/ldm/modules/attention.py
@ -222,9 +222,14 @@ def attention_split(q, k, v, heads, mask=None):
    mem_free_total = model_management.get_free_memory(q.device)
    if _ATTN_PRECISION =="fp32":
        element_size = 4
    else:
        element_size = q.element_size()
    gb = 1024 ** 3
-    tensor_size = q.shape[0] * q.shape[1] * k.shape[1] * q.element_size()
+    tensor_size = q.shape[0] * q.shape[1] * k.shape[1] * element_size
-    modifier = 3 if q.element_size() == 2 else 2.5
+    modifier = 3 if element_size == 2 else 2.5
    mem_required = tensor_size * modifier
    steps = 1
--- a/backend/headless/fcbh/ldm/modules/sub_quadratic_attention.py
+++ b/backend/headless/fcbh/ldm/modules/sub_quadratic_attention.py
@ -83,7 +83,8 @@ def _summarize_chunk(
        )
    max_score, _ = torch.max(attn_weights, -1, keepdim=True)
    max_score = max_score.detach()
-    torch.exp(attn_weights - max_score, out=attn_weights)
+    attn_weights -= max_score
    torch.exp(attn_weights, out=attn_weights)
    exp_weights = attn_weights.to(value.dtype)
    exp_values = torch.bmm(exp_weights, value)
    max_score = max_score.squeeze(-1)
--- a/fooocus_extras/ip_adapter.py
+++ b/fooocus_extras/ip_adapter.py
@ -7,6 +7,7 @@ import fcbh.ldm.modules.attention as attention
 from fooocus_extras.resampler import Resampler
 from fcbh.model_patcher import ModelPatcher
 from modules.core import numpy_to_pytorch
 SD_V12_CHANNELS = [320] * 4 + [640] * 4 + [1280] * 4 + [1280] * 6 + [640] * 6 + [320] * 6 + [1280] * 2
@ -144,14 +145,27 @@ def load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path):
    return
@torch.no_grad()
@torch.inference_mode()
 def clip_preprocess(image):
    mean = torch.tensor([0.48145466, 0.4578275, 0.40821073], device=image.device, dtype=image.dtype).view([1, 3, 1, 1])
    std = torch.tensor([0.26862954, 0.26130258, 0.27577711], device=image.device, dtype=image.dtype).view([1, 3, 1, 1])
    image = image.movedim(-1, 1)
    # https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75
    B, C, H, W = image.shape
    assert H == 224 and W == 224
    return (image - mean) / std
@torch.no_grad()
@torch.inference_mode()
 def preprocess(img):
    global ip_unconds
    inputs = clip_vision.processor(images=img, return_tensors="pt")
    fcbh.model_management.load_model_gpu(clip_vision.patcher)
-    pixel_values = inputs['pixel_values'].to(clip_vision.load_device)
+    pixel_values = clip_preprocess(numpy_to_pytorch(img).to(clip_vision.load_device))
    if clip_vision.dtype != torch.float32:
        precision_scope = torch.autocast
--- a/fooocus_version.py
+++ b/fooocus_version.py
@ -1 +1 @@
-version = '2.1.744'
+version = '2.1.745'
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@ -210,8 +210,8 @@ def worker():
        if not skip_prompt_processing:
-            prompts = remove_empty_str([safe_str(p) for p in prompt.split('\n')], default='')
+            prompts = remove_empty_str([safe_str(p) for p in prompt.splitlines()], default='')
-            negative_prompts = remove_empty_str([safe_str(p) for p in negative_prompt.split('\n')], default='')
+            negative_prompts = remove_empty_str([safe_str(p) for p in negative_prompt.splitlines()], default='')
            prompt = prompts[0]
            negative_prompt = negative_prompts[0]
@ -239,8 +239,8 @@ def worker():
                if use_style:
                    for s in style_selections:
                        p, n = apply_style(s, positive=task_prompt)
-                        positive_basic_workloads.append(p)
+                        positive_basic_workloads += p
-                        negative_basic_workloads.append(n)
+                        negative_basic_workloads += n
                else:
                    positive_basic_workloads.append(task_prompt)
--- a/modules/sdxl_styles.py
+++ b/modules/sdxl_styles.py
@ -40,7 +40,9 @@ for styles_file in styles_files:
    try:
        with open(os.path.join(styles_path, styles_file), encoding='utf-8') as f:
            for entry in json.load(f):
-                name, prompt, negative_prompt = normalize_key(entry['name']), entry['prompt'], entry['negative_prompt']
+                name = normalize_key(entry['name'])
                prompt = entry['prompt'] if 'prompt' in entry else ''
                negative_prompt = entry['negative_prompt'] if 'negative_prompt' in entry else ''
                styles[name] = (prompt, negative_prompt)
    except Exception as e:
        print(str(e))
@ -53,7 +55,7 @@ legal_style_names = [fooocus_expansion] + style_keys
 def apply_style(style, positive):
    p, n = styles[style]
-    return p.replace('{prompt}', positive), n
+    return p.replace('{prompt}', positive).splitlines(), n.splitlines()
 def apply_wildcards(wildcard_text, rng, directory=wildcards_path):
--- a/sdxl_styles/sdxl_styles_fooocus.json
+++ b/sdxl_styles/sdxl_styles_fooocus.json
@ -1,13 +1,12 @@
 [
    {
        "name": "Fooocus Enhance",
        "prompt": "{prompt} . (perfect real extremely details), award-winning, breathtaking, amazing fine detail, dramatic lighting, best quality",
        "negative_prompt": "(worst quality, low quality, normal quality, lowres, low details, oversaturated, undersaturated, overexposed, underexposed, grayscale, bw, bad photo, bad photography, bad art:1.4), (watermark, signature, text font, username, error, logo, words, letters, digits, autograph, trademark, name:1.2), (blur, blurry, grainy), morbid, ugly, asymmetrical, mutated malformed, mutilated, poorly lit, bad shadow, draft, cropped, out of frame, cut off, censored, jpeg artifacts, out of focus, glitch, duplicate, (airbrushed, cartoon, anime, semi-realistic, cgi, render, blender, digital art, manga, amateur:1.3), (3D ,3D Game, 3D Game Scene, 3D Character:1.1), (bad hands, bad anatomy, bad body, bad face, bad teeth, bad arms, bad legs, deformities:1.3)"
    },
    {
        "name": "Fooocus Sharp",
-        "prompt": "cinematic still {prompt} . sharp focus, emotional, harmonious, vignette, 4k epic detailed photograph shot on kodak detailed cinematic hbo dark moody, 35mm photo, high budget, cinemascope, moody, epic, gorgeous, film grain, grainy",
+        "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, 4k epic detailed, shot on kodak, 35mm photo, sharp focus, cinemascope, moody, epic, gorgeous, film grain, grainy",
-        "negative_prompt": "blurry, anime, cartoon, blured background, graphic, bokeh, background blur, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"
+        "negative_prompt": "anime, cartoon, graphic, (blur, blurry, bokeh), text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"
    },
    {
        "name": "Fooocus Masterpiece",
@ -21,7 +20,6 @@
    },
    {
        "name": "Fooocus Negative",
        "prompt": "",
        "negative_prompt": "deformed, bad anatomy, disfigured, poorly drawn face, mutated, extra limb, ugly, poorly drawn hands, missing limb, floating limbs, disconnected limbs, disconnected head, malformed hands, long neck, mutated hands and fingers, bad hands, missing fingers, cropped, worst quality, low quality, mutation, poorly drawn, huge calf, bad hands, fused hand, missing hand, disappearing arms, disappearing thigh, disappearing calf, disappearing legs, missing fingers, fused fingers, abnormal eye proportion, Abnormal hands, abnormal legs, abnormal feet, abnormal fingers, drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly, anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch"
    },
    {