Fooocus Prompt Expansion (#329)

* add vae approx download

* files

* files

* files

* i

* i

* i

* i

* i

* i

* i

* i

* i

* i
This commit is contained in:
lllyasviel 2023-09-09 17:22:32 -07:00 committed by GitHub
parent 09e0d1cb3a
commit 496766edd7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 150532 additions and 81 deletions

1
.gitignore vendored
View File

@ -2,6 +2,7 @@ __pycache__
*.ckpt *.ckpt
*.safetensors *.safetensors
*.pth *.pth
*.bin
lena.png lena.png
lena_result.png lena_result.png
lena_test.py lena_test.py

View File

@ -1 +1 @@
version = '1.0.45' version = '1.0.50'

View File

@ -6,7 +6,7 @@ import fooocus_version
from modules.launch_util import is_installed, run, python, \ from modules.launch_util import is_installed, run, python, \
run_pip, repo_dir, git_clone, requirements_met, script_path, dir_repos run_pip, repo_dir, git_clone, requirements_met, script_path, dir_repos
from modules.model_loader import load_file_from_url from modules.model_loader import load_file_from_url
from modules.path import modelfile_path, lorafile_path from modules.path import modelfile_path, lorafile_path, vae_approx_path, fooocus_expansion_path
REINSTALL_ALL = False REINSTALL_ALL = False
@ -63,12 +63,26 @@ lora_filenames = [
'https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_offset_example-lora_1.0.safetensors') 'https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_offset_example-lora_1.0.safetensors')
] ]
vae_approx_filenames = [
('taesdxl_decoder.pth',
'https://huggingface.co/lllyasviel/misc/resolve/main/taesdxl_decoder.pth')
]
def download_models(): def download_models():
for file_name, url in model_filenames: for file_name, url in model_filenames:
load_file_from_url(url=url, model_dir=modelfile_path, file_name=file_name) load_file_from_url(url=url, model_dir=modelfile_path, file_name=file_name)
for file_name, url in lora_filenames: for file_name, url in lora_filenames:
load_file_from_url(url=url, model_dir=lorafile_path, file_name=file_name) load_file_from_url(url=url, model_dir=lorafile_path, file_name=file_name)
for file_name, url in vae_approx_filenames:
load_file_from_url(url=url, model_dir=vae_approx_path, file_name=file_name)
load_file_from_url(
url='https://huggingface.co/lllyasviel/misc/resolve/main/fooocus_expansion.bin',
model_dir=fooocus_expansion_path,
file_name='pytorch_model.bin'
)
return return

View File

@ -0,0 +1,39 @@
{
"_name_or_path": "gpt2",
"activation_function": "gelu_new",
"architectures": [
"GPT2LMHeadModel"
],
"attn_pdrop": 0.1,
"bos_token_id": 50256,
"embd_pdrop": 0.1,
"eos_token_id": 50256,
"initializer_range": 0.02,
"layer_norm_epsilon": 1e-05,
"model_type": "gpt2",
"n_ctx": 1024,
"n_embd": 768,
"n_head": 12,
"n_inner": null,
"n_layer": 12,
"n_positions": 1024,
"reorder_and_upcast_attn": false,
"resid_pdrop": 0.1,
"scale_attn_by_inverse_layer_idx": false,
"scale_attn_weights": true,
"summary_activation": null,
"summary_first_dropout": 0.1,
"summary_proj_to_labels": true,
"summary_type": "cls_index",
"summary_use_proj": true,
"task_specific_params": {
"text-generation": {
"do_sample": true,
"max_length": 50
}
},
"torch_dtype": "float32",
"transformers_version": "4.23.0.dev0",
"use_cache": true,
"vocab_size": 50257
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,5 @@
{
"bos_token": "<|endoftext|>",
"eos_token": "<|endoftext|>",
"unk_token": "<|endoftext|>"
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,10 @@
{
"add_prefix_space": false,
"bos_token": "<|endoftext|>",
"eos_token": "<|endoftext|>",
"model_max_length": 1024,
"name_or_path": "gpt2",
"special_tokens_map_file": null,
"tokenizer_class": "GPT2Tokenizer",
"unk_token": "<|endoftext|>"
}

File diff suppressed because one or more lines are too long

View File

@ -15,7 +15,7 @@ def worker():
import modules.path import modules.path
import modules.patch import modules.patch
from modules.sdxl_styles import apply_style, aspect_ratios from modules.sdxl_styles import apply_style_negative, apply_style_positive, aspect_ratios
from modules.private_logger import log from modules.private_logger import log
try: try:
@ -29,19 +29,69 @@ def worker():
def handler(task): def handler(task):
prompt, negative_prompt, style_selction, performance_selction, \ prompt, negative_prompt, style_selction, performance_selction, \
aspect_ratios_selction, image_number, image_seed, sharpness, base_model_name, refiner_model_name, \ aspect_ratios_selction, image_number, image_seed, sharpness, raw_mode, \
base_model_name, refiner_model_name, \
l1, w1, l2, w2, l3, w3, l4, w4, l5, w5 = task l1, w1, l2, w2, l3, w3, l4, w4, l5, w5 = task
loras = [(l1, w1), (l2, w2), (l3, w3), (l4, w4), (l5, w5)] loras = [(l1, w1), (l2, w2), (l3, w3), (l4, w4), (l5, w5)]
modules.patch.sharpness = sharpness modules.patch.sharpness = sharpness
outputs.append(['preview', (1, 'Initializing ...', None)])
seed = image_seed
max_seed = int(1024 * 1024 * 1024)
if not isinstance(seed, int):
seed = random.randint(1, max_seed)
if seed < 0:
seed = - seed
seed = seed % max_seed
outputs.append(['preview', (3, 'Load models ...', None)])
pipeline.refresh_base_model(base_model_name) pipeline.refresh_base_model(base_model_name)
pipeline.refresh_refiner_model(refiner_model_name) pipeline.refresh_refiner_model(refiner_model_name)
pipeline.refresh_loras(loras) pipeline.refresh_loras(loras)
pipeline.clean_prompt_cond_caches()
p_txt, n_txt = apply_style(style_selction, prompt, negative_prompt) outputs.append(['preview', (5, 'Encoding negative text ...', None)])
n_txt = apply_style_negative(style_selction, negative_prompt)
n_cond = pipeline.process_prompt(n_txt)
tasks = []
if raw_mode:
outputs.append(['preview', (9, 'Encoding positive text ...', None)])
p_txt = apply_style_positive(style_selction, prompt)
p_cond = pipeline.process_prompt(p_txt)
for i in range(image_number):
tasks.append(dict(
prompt=prompt,
negative_prompt=negative_prompt,
seed=seed + i,
n_cond=n_cond,
p_cond=p_cond,
real_positive_prompt=p_txt,
real_negative_prompt=n_txt
))
else:
for i in range(image_number):
outputs.append(['preview', (9, f'Preparing positive text #{i + 1} ...', None)])
current_seed = seed + i
p_txt = pipeline.expand_txt(prompt, current_seed)
print(f'Expanded positive prompt: {p_txt}')
p_txt = apply_style_positive(style_selction, p_txt)
tasks.append(dict(
prompt=prompt,
negative_prompt=negative_prompt,
seed=current_seed,
n_cond=n_cond,
real_positive_prompt=p_txt,
real_negative_prompt=n_txt
))
for i, t in enumerate(tasks):
outputs.append(['preview', (12, f'Encoding positive text #{i + 1} ...', None)])
t['p_cond'] = pipeline.process_prompt(t['real_positive_prompt'])
if performance_selction == 'Speed': if performance_selction == 'Speed':
steps = 30 steps = 30
@ -53,45 +103,47 @@ def worker():
width, height = aspect_ratios[aspect_ratios_selction] width, height = aspect_ratios[aspect_ratios_selction]
results = [] results = []
seed = image_seed
max_seed = int(1024*1024*1024)
if not isinstance(seed, int):
seed = random.randint(1, max_seed)
if seed < 0:
seed = - seed
seed = seed % max_seed
all_steps = steps * image_number all_steps = steps * image_number
def callback(step, x0, x, total_steps, y): def callback(step, x0, x, total_steps, y):
done_steps = i * steps + step done_steps = current_task_id * steps + step
outputs.append(['preview', ( outputs.append(['preview', (
int(100.0 * float(done_steps) / float(all_steps)), int(15.0 + 85.0 * float(done_steps) / float(all_steps)),
f'Step {step}/{total_steps} in the {i}-th Sampling', f'Step {step}/{total_steps} in the {i}-th Sampling',
y)]) y)])
for i in range(image_number): outputs.append(['preview', (13, 'Starting tasks ...', None)])
imgs = pipeline.process(p_txt, n_txt, steps, switch, width, height, seed, callback=callback) for current_task_id, task in enumerate(tasks):
imgs = pipeline.process_diffusion(
positive_cond=task['p_cond'],
negative_cond=task['n_cond'],
steps=steps,
switch=switch,
width=width,
height=height,
image_seed=task['seed'],
callback=callback)
for x in imgs: for x in imgs:
d = [ d = [
('Prompt', prompt), ('Prompt', task['prompt']),
('Negative Prompt', negative_prompt), ('Negative Prompt', task['negative_prompt']),
('Real Positive Prompt', task['real_positive_prompt']),
('Real Negative Prompt', task['real_negative_prompt']),
('Raw Mode', str(raw_mode)),
('Style', style_selction), ('Style', style_selction),
('Performance', performance_selction), ('Performance', performance_selction),
('Resolution', str((width, height))), ('Resolution', str((width, height))),
('Sharpness', sharpness), ('Sharpness', sharpness),
('Base Model', base_model_name), ('Base Model', base_model_name),
('Refiner Model', refiner_model_name), ('Refiner Model', refiner_model_name),
('Seed', seed) ('Seed', task['seed'])
] ]
for n, w in loras: for n, w in loras:
if n != 'None': if n != 'None':
d.append((f'LoRA [{n}] weight', w)) d.append((f'LoRA [{n}] weight', w))
log(x, d) log(x, d)
seed += 1
results += imgs results += imgs
outputs.append(['results', results]) outputs.append(['results', results])

View File

@ -5,6 +5,7 @@ import modules.path
from comfy.model_base import SDXL, SDXLRefiner from comfy.model_base import SDXL, SDXLRefiner
from modules.patch import cfg_patched from modules.patch import cfg_patched
from modules.expansion import FooocusExpansion
xl_base: core.StableDiffusionModel = None xl_base: core.StableDiffusionModel = None
@ -43,7 +44,6 @@ def refresh_base_model(name):
xl_base_patched = xl_base xl_base_patched = xl_base
xl_base_patched_hash = '' xl_base_patched_hash = ''
print(f'Base model loaded: {xl_base_hash}') print(f'Base model loaded: {xl_base_hash}')
return return
@ -103,27 +103,24 @@ refresh_base_model(modules.path.default_base_model_name)
refresh_refiner_model(modules.path.default_refiner_model_name) refresh_refiner_model(modules.path.default_refiner_model_name)
refresh_loras([(modules.path.default_lora_name, 0.5), ('None', 0.5), ('None', 0.5), ('None', 0.5), ('None', 0.5)]) refresh_loras([(modules.path.default_lora_name, 0.5), ('None', 0.5), ('None', 0.5), ('None', 0.5), ('None', 0.5)])
positive_conditions_cache = None expansion_model = FooocusExpansion()
negative_conditions_cache = None
positive_conditions_refiner_cache = None
negative_conditions_refiner_cache = None
def clean_prompt_cond_caches(): def expand_txt(*args, **kwargs):
global positive_conditions_cache, negative_conditions_cache, \ return expansion_model(*args, **kwargs)
positive_conditions_refiner_cache, negative_conditions_refiner_cache
positive_conditions_cache = None
negative_conditions_cache = None def process_prompt(text):
positive_conditions_refiner_cache = None base_cond = core.encode_prompt_condition(clip=xl_base_patched.clip, prompt=text)
negative_conditions_refiner_cache = None if xl_refiner is not None:
return refiner_cond = core.encode_prompt_condition(clip=xl_refiner.clip, prompt=text)
else:
refiner_cond = None
return base_cond, refiner_cond
@torch.no_grad() @torch.no_grad()
def process(positive_prompt, negative_prompt, steps, switch, width, height, image_seed, callback): def process_diffusion(positive_cond, negative_cond, steps, switch, width, height, image_seed, callback):
global positive_conditions_cache, negative_conditions_cache, \
positive_conditions_refiner_cache, negative_conditions_refiner_cache
if xl_base is not None: if xl_base is not None:
xl_base.unet.model_options['sampler_cfg_function'] = cfg_patched xl_base.unet.model_options['sampler_cfg_function'] = cfg_patched
@ -133,40 +130,27 @@ def process(positive_prompt, negative_prompt, steps, switch, width, height, imag
if xl_refiner is not None: if xl_refiner is not None:
xl_refiner.unet.model_options['sampler_cfg_function'] = cfg_patched xl_refiner.unet.model_options['sampler_cfg_function'] = cfg_patched
positive_conditions = core.encode_prompt_condition(clip=xl_base_patched.clip, prompt=positive_prompt) if positive_conditions_cache is None else positive_conditions_cache
negative_conditions = core.encode_prompt_condition(clip=xl_base_patched.clip, prompt=negative_prompt) if negative_conditions_cache is None else negative_conditions_cache
positive_conditions_cache = positive_conditions
negative_conditions_cache = negative_conditions
empty_latent = core.generate_empty_latent(width=width, height=height, batch_size=1) empty_latent = core.generate_empty_latent(width=width, height=height, batch_size=1)
if xl_refiner is not None: if xl_refiner is not None:
positive_conditions_refiner = core.encode_prompt_condition(clip=xl_refiner.clip, prompt=positive_prompt) if positive_conditions_refiner_cache is None else positive_conditions_refiner_cache
negative_conditions_refiner = core.encode_prompt_condition(clip=xl_refiner.clip, prompt=negative_prompt) if negative_conditions_refiner_cache is None else negative_conditions_refiner_cache
positive_conditions_refiner_cache = positive_conditions_refiner
negative_conditions_refiner_cache = negative_conditions_refiner
sampled_latent = core.ksampler_with_refiner( sampled_latent = core.ksampler_with_refiner(
model=xl_base_patched.unet, model=xl_base_patched.unet,
positive=positive_conditions, positive=positive_cond[0],
negative=negative_conditions, negative=negative_cond[0],
refiner=xl_refiner.unet, refiner=xl_refiner.unet,
refiner_positive=positive_conditions_refiner, refiner_positive=positive_cond[1],
refiner_negative=negative_conditions_refiner, refiner_negative=negative_cond[1],
refiner_switch_step=switch, refiner_switch_step=switch,
latent=empty_latent, latent=empty_latent,
steps=steps, start_step=0, last_step=steps, disable_noise=False, force_full_denoise=True, steps=steps, start_step=0, last_step=steps, disable_noise=False, force_full_denoise=True,
seed=image_seed, seed=image_seed,
callback_function=callback callback_function=callback
) )
else: else:
sampled_latent = core.ksampler( sampled_latent = core.ksampler(
model=xl_base_patched.unet, model=xl_base_patched.unet,
positive=positive_conditions, positive=positive_cond[0],
negative=negative_conditions, negative=negative_cond[0],
latent=empty_latent, latent=empty_latent,
steps=steps, start_step=0, last_step=steps, disable_noise=False, force_full_denoise=True, steps=steps, start_step=0, last_step=steps, disable_noise=False, force_full_denoise=True,
seed=image_seed, seed=image_seed,
@ -174,7 +158,5 @@ def process(positive_prompt, negative_prompt, steps, switch, width, height, imag
) )
decoded_latent = core.decode_vae(vae=xl_base_patched.vae, latent_image=sampled_latent) decoded_latent = core.decode_vae(vae=xl_base_patched.vae, latent_image=sampled_latent)
images = core.image_to_numpy(decoded_latent) images = core.image_to_numpy(decoded_latent)
return images return images

23
modules/expansion.py Normal file
View File

@ -0,0 +1,23 @@
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, set_seed
from modules.path import fooocus_expansion_path
class FooocusExpansion:
def __init__(self):
self.tokenizer = AutoTokenizer.from_pretrained(fooocus_expansion_path)
self.model = AutoModelForCausalLM.from_pretrained(fooocus_expansion_path)
self.pipe = pipeline('text-generation',
model=self.model,
tokenizer=self.tokenizer,
device='cpu',
torch_dtype=torch.float32)
print('Fooocus Expansion engine loaded.')
def __call__(self, prompt, seed):
prompt = str(prompt).rstrip('\n')
seed = int(seed)
set_seed(seed)
response = self.pipe(prompt, max_length=len(prompt) + 256)
result = response[0]['generated_text'].rstrip('\n')
return result

View File

@ -2,8 +2,12 @@ import os
modelfile_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../models/checkpoints/')) modelfile_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../models/checkpoints/'))
lorafile_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../models/loras/')) lorafile_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../models/loras/'))
vae_approx_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../models/vae_approx/'))
temp_outputs_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../outputs/')) temp_outputs_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../outputs/'))
fooocus_expansion_path = os.path.abspath(os.path.join(os.path.dirname(__file__),
'../models/prompt_expansion/fooocus_expansion'))
os.makedirs(temp_outputs_path, exist_ok=True) os.makedirs(temp_outputs_path, exist_ok=True)
default_base_model_name = 'sd_xl_base_1.0_0.9vae.safetensors' default_base_model_name = 'sd_xl_base_1.0_0.9vae.safetensors'

View File

@ -22,7 +22,7 @@ def log(img, dic):
f.write(f"<p>{only_name}</p>\n") f.write(f"<p>{only_name}</p>\n")
i = 0 i = 0
for k, v in dic: for k, v in dic:
if i < 2: if i < 4:
f.write(f"<p>{k}: <b>{v}</b> </p>\n") f.write(f"<p>{k}: <b>{v}</b> </p>\n")
else: else:
if i % 2 == 0: if i % 2 == 0:

View File

@ -959,6 +959,14 @@ SD_XL_BASE_RATIOS = {
aspect_ratios = {str(v[0]) + '×' + str(v[1]): v for k, v in SD_XL_BASE_RATIOS.items()} aspect_ratios = {str(v[0]) + '×' + str(v[1]): v for k, v in SD_XL_BASE_RATIOS.items()}
def apply_style(style, positive, negative): def apply_style_positive(style, txt):
p, n = styles.get(style, default_style) p, n = styles.get(style, default_style)
return p.replace('{prompt}', positive), n + ', ' + negative return p.replace('{prompt}', txt)
def apply_style_negative(style, txt):
p, n = styles.get(style, default_style)
if n == '':
return txt
else:
return n + ', ' + txt

View File

@ -97,17 +97,16 @@ Coming soon ...
Below things are already inside the software, and **users do not need to do anything about these**. Below things are already inside the software, and **users do not need to do anything about these**.
~Note that some of these tricks are currently (2023 Aug 11) impossible to reproduce in Automatic1111's interface or ComfyUI's node system.~ (Update Aug 21: We are working on implementing some of these as webui extensions/features.) 1. Prompt expansion and the "Raw mode" to turn it off (similar to Midjourney's hidden pre-processsing and "raw" mode, or the LeonardoAI's Prompt Magic).
2. Native refiner swap inside one single k-sampler. The advantage is that now the refiner model can reuse the base model's momentum (or ODE's history parameters) collected from k-sampling to achieve more coherent sampling. In Automatic1111's high-res fix and ComfyUI's node system, the base model and refiner use two independent k-samplers, which means the momentum is largely wasted, and the sampling continuity is broken. Fooocus uses its own advanced k-diffusion sampling that ensures seamless, native, and continuous swap in a refiner setup. (Update Aug 13: Actually I discussed this with Automatic1111 several days ago and it seems that the “native refiner swap inside one single k-sampler” is [merged]( https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12371) into the dev branch of webui. Great!)
1. Native refiner swap inside one single k-sampler. The advantage is that now the refiner model can reuse the base model's momentum (or ODE's history parameters) collected from k-sampling to achieve more coherent sampling. In Automatic1111's high-res fix and ComfyUI's node system, the base model and refiner use two independent k-samplers, which means the momentum is largely wasted, and the sampling continuity is broken. Fooocus uses its own advanced k-diffusion sampling that ensures seamless, native, and continuous swap in a refiner setup. (Update Aug 13: Actually I discussed this with Automatic1111 several days ago and it seems that the “native refiner swap inside one single k-sampler” is [merged]( https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12371) into the dev branch of webui. Great!) 3. Negative ADM guidance. Because the highest resolution level of XL Base does not have cross attentions, the positive and negative signals for XL's highest resolution level cannot receive enough contrasts during the CFG sampling, causing the results look a bit plastic or overly smooth in certain cases. Fortunately, since the XL's highest resolution level is still conditioned on image aspect ratios (ADM), we can modify the adm on the positive/negative side to compensate for the lack of CFG contrast in the highest resolution level. (Update Aug 16, the IOS App [Drawing Things](https://apps.apple.com/us/app/draw-things-ai-generation/id6444050820) will support Negative ADM Guidance. Great!)
2. Negative ADM guidance. Because the highest resolution level of XL Base does not have cross attentions, the positive and negative signals for XL's highest resolution level cannot receive enough contrasts during the CFG sampling, causing the results look a bit plastic or overly smooth in certain cases. Fortunately, since the XL's highest resolution level is still conditioned on image aspect ratios (ADM), we can modify the adm on the positive/negative side to compensate for the lack of CFG contrast in the highest resolution level. (Update Aug 16, the IOS App [Drawing Things](https://apps.apple.com/us/app/draw-things-ai-generation/id6444050820) will support Negative ADM Guidance. Great!) 4. We implemented a carefully tuned variation of the Section 5.1 of ["Improving Sample Quality of Diffusion Models Using Self-Attention Guidance"](https://arxiv.org/pdf/2210.00939.pdf). The weight is set to very low, but this is Fooocus's final guarantee to make sure that the XL will never yield overly smooth or plastic appearance (examples [here](https://github.com/lllyasviel/Fooocus/discussions/117)). This can almostly eliminate all cases that XL still occasionally produce overly smooth results even with negative ADM guidance. (Update 2023 Aug 18, the Gaussian kernel of SAG is changed to an anisotropic kernel for better structure preservation and fewer artifacts.)
3. We implemented a carefully tuned variation of the Section 5.1 of ["Improving Sample Quality of Diffusion Models Using Self-Attention Guidance"](https://arxiv.org/pdf/2210.00939.pdf). The weight is set to very low, but this is Fooocus's final guarantee to make sure that the XL will never yield overly smooth or plastic appearance (examples [here](https://github.com/lllyasviel/Fooocus/discussions/117)). This can almostly eliminate all cases that XL still occasionally produce overly smooth results even with negative ADM guidance. (Update 2023 Aug 18, the Gaussian kernel of SAG is changed to an anisotropic kernel for better structure preservation and fewer artifacts.) 5. We modified the style templates a bit and added the "cinematic-default".
4. We modified the style templates a bit and added the "cinematic-default". 6. We tested the "sd_xl_offset_example-lora_1.0.safetensors" and it seems that when the lora weight is below 0.5, the results are always better than XL without lora.
5. We tested the "sd_xl_offset_example-lora_1.0.safetensors" and it seems that when the lora weight is below 0.5, the results are always better than XL without lora. 7. The parameters of samplers are carefully tuned.
6. The parameters of samplers are carefully tuned. 8. Because XL uses positional encoding for generation resolution, images generated by several fixed resolutions look a bit better than that from arbitrary resolutions (because the positional encoding is not very good at handling int numbers that are unseen during training). This suggests that the resolutions in UI may be hard coded for best results.
7. Because XL uses positional encoding for generation resolution, images generated by several fixed resolutions look a bit better than that from arbitrary resolutions (because the positional encoding is not very good at handling int numbers that are unseen during training). This suggests that the resolutions in UI may be hard coded for best results. 9. Separated prompts for two different text encoders seem unnecessary. Separated prompts for base model and refiner may work but the effects are random, and we refrain from implement this.
8. Separated prompts for two different text encoders seem unnecessary. Separated prompts for base model and refiner may work but the effects are random, and we refrain from implement this. 10. DPM family seems well-suited for XL, since XL sometimes generates overly smooth texture but DPM family sometimes generate overly dense detail in texture. Their joint effect looks neutral and appealing to human perception.
9. DPM family seems well-suited for XL, since XL sometimes generates overly smooth texture but DPM family sometimes generate overly dense detail in texture. Their joint effect looks neutral and appealing to human perception.
## Advanced Features ## Advanced Features

View File

@ -1,3 +1,7 @@
### 1.0.50
* Prompt expansion and a "Raw mode" to turn it off (similar to Midjourney's "raw").
### 1.0.45 ### 1.0.45
* Reworked SAG, removed unnecessary patch * Reworked SAG, removed unnecessary patch

View File

@ -56,10 +56,11 @@ with shared.gradio_root:
with gr.Column(scale=0.5, visible=False) as right_col: with gr.Column(scale=0.5, visible=False) as right_col:
with gr.Tab(label='Setting'): with gr.Tab(label='Setting'):
performance_selction = gr.Radio(label='Performance', choices=['Speed', 'Quality'], value='Speed') performance_selction = gr.Radio(label='Performance', choices=['Speed', 'Quality'], value='Speed')
aspect_ratios_selction = gr.Radio(label='Aspect Ratios (width × height)', choices=list(aspect_ratios.keys()), aspect_ratios_selction = gr.Radio(label='Aspect Ratios', choices=list(aspect_ratios.keys()),
value='1152×896') value='1152×896', info='width × height')
image_number = gr.Slider(label='Image Number', minimum=1, maximum=32, step=1, value=2) image_number = gr.Slider(label='Image Number', minimum=1, maximum=32, step=1, value=2)
negative_prompt = gr.Textbox(label='Negative Prompt', show_label=True, placeholder="Type prompt here.") negative_prompt = gr.Textbox(label='Negative Prompt', show_label=True, placeholder="Type prompt here.",
info='Describing objects that you do not want to see.')
seed_random = gr.Checkbox(label='Random', value=True) seed_random = gr.Checkbox(label='Random', value=True)
image_seed = gr.Number(label='Seed', value=0, precision=0, visible=False) image_seed = gr.Number(label='Seed', value=0, precision=0, visible=False)
@ -75,8 +76,11 @@ with shared.gradio_root:
seed_random.change(random_checked, inputs=[seed_random], outputs=[image_seed]) seed_random.change(random_checked, inputs=[seed_random], outputs=[image_seed])
with gr.Tab(label='Style'): with gr.Tab(label='Style'):
style_selction = gr.Radio(show_label=False, container=True, raw_mode_check = gr.Checkbox(label='Raw Mode', value=False,
choices=style_keys, value='cinematic-default') info='Similar to Midjourney\'s \"raw\" mode.')
style_selction = gr.Radio(show_label=True, container=True,
choices=style_keys, value='cinematic-default', label='Image Style',
info='Similar to Midjourney\'s \"--style\".')
with gr.Tab(label='Advanced'): with gr.Tab(label='Advanced'):
with gr.Row(): with gr.Row():
base_model = gr.Dropdown(label='SDXL Base Model', choices=modules.path.model_filenames, value=modules.path.default_base_model_name, show_label=True) base_model = gr.Dropdown(label='SDXL Base Model', choices=modules.path.model_filenames, value=modules.path.default_base_model_name, show_label=True)
@ -107,7 +111,7 @@ with shared.gradio_root:
advanced_checkbox.change(lambda x: gr.update(visible=x), advanced_checkbox, right_col) advanced_checkbox.change(lambda x: gr.update(visible=x), advanced_checkbox, right_col)
ctrls = [ ctrls = [
prompt, negative_prompt, style_selction, prompt, negative_prompt, style_selction,
performance_selction, aspect_ratios_selction, image_number, image_seed, sharpness performance_selction, aspect_ratios_selction, image_number, image_seed, sharpness, raw_mode_check
] ]
ctrls += [base_model, refiner_model] + lora_ctrls ctrls += [base_model, refiner_model] + lora_ctrls
run_button.click(fn=refresh_seed, inputs=[seed_random, image_seed], outputs=image_seed)\ run_button.click(fn=refresh_seed, inputs=[seed_random, image_seed], outputs=image_seed)\