(requested) support AMD 8GB GPUs via Windows DirectML

this update is requested by users
This commit is contained in:
lllyasviel 2023-12-30 06:30:59 -08:00 committed by GitHub
parent c0e11c3451
commit 8e62a72a63
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
31 changed files with 402 additions and 148 deletions

View File

@ -1 +1 @@
version = '2.1.856' version = '2.1.857'

View File

@ -11,7 +11,7 @@ import math
import time import time
import random import random
from PIL import Image, ImageOps from PIL import Image, ImageOps, ImageSequence
from PIL.PngImagePlugin import PngInfo from PIL.PngImagePlugin import PngInfo
import numpy as np import numpy as np
import safetensors.torch import safetensors.torch
@ -1412,17 +1412,30 @@ class LoadImage:
FUNCTION = "load_image" FUNCTION = "load_image"
def load_image(self, image): def load_image(self, image):
image_path = ldm_patched.utils.path_utils.get_annotated_filepath(image) image_path = ldm_patched.utils.path_utils.get_annotated_filepath(image)
i = Image.open(image_path) img = Image.open(image_path)
i = ImageOps.exif_transpose(i) output_images = []
image = i.convert("RGB") output_masks = []
image = np.array(image).astype(np.float32) / 255.0 for i in ImageSequence.Iterator(img):
image = torch.from_numpy(image)[None,] i = ImageOps.exif_transpose(i)
if 'A' in i.getbands(): image = i.convert("RGB")
mask = np.array(i.getchannel('A')).astype(np.float32) / 255.0 image = np.array(image).astype(np.float32) / 255.0
mask = 1. - torch.from_numpy(mask) image = torch.from_numpy(image)[None,]
if 'A' in i.getbands():
mask = np.array(i.getchannel('A')).astype(np.float32) / 255.0
mask = 1. - torch.from_numpy(mask)
else:
mask = torch.zeros((64,64), dtype=torch.float32, device="cpu")
output_images.append(image)
output_masks.append(mask.unsqueeze(0))
if len(output_images) > 1:
output_image = torch.cat(output_images, dim=0)
output_mask = torch.cat(output_masks, dim=0)
else: else:
mask = torch.zeros((64,64), dtype=torch.float32, device="cpu") output_image = output_images[0]
return (image, mask.unsqueeze(0)) output_mask = output_masks[0]
return (output_image, output_mask)
@classmethod @classmethod
def IS_CHANGED(s, image): def IS_CHANGED(s, image):
@ -1480,13 +1493,10 @@ class LoadImageMask:
return m.digest().hex() return m.digest().hex()
@classmethod @classmethod
def VALIDATE_INPUTS(s, image, channel): def VALIDATE_INPUTS(s, image):
if not ldm_patched.utils.path_utils.exists_annotated_filepath(image): if not ldm_patched.utils.path_utils.exists_annotated_filepath(image):
return "Invalid image file: {}".format(image) return "Invalid image file: {}".format(image)
if channel not in s._color_channels:
return "Invalid color channel: {}".format(channel)
return True return True
class ImageScale: class ImageScale:
@ -1871,6 +1881,7 @@ def init_custom_nodes():
"nodes_video_model.py", "nodes_video_model.py",
"nodes_sag.py", "nodes_sag.py",
"nodes_perpneg.py", "nodes_perpneg.py",
"nodes_stable3d.py",
] ]
for node_file in extras_files: for node_file in extras_files:

View File

@ -89,6 +89,7 @@ class SDTurboScheduler:
return {"required": return {"required":
{"model": ("MODEL",), {"model": ("MODEL",),
"steps": ("INT", {"default": 1, "min": 1, "max": 10}), "steps": ("INT", {"default": 1, "min": 1, "max": 10}),
"denoise": ("FLOAT", {"default": 1.0, "min": 0, "max": 1.0, "step": 0.01}),
} }
} }
RETURN_TYPES = ("SIGMAS",) RETURN_TYPES = ("SIGMAS",)
@ -96,8 +97,9 @@ class SDTurboScheduler:
FUNCTION = "get_sigmas" FUNCTION = "get_sigmas"
def get_sigmas(self, model, steps): def get_sigmas(self, model, steps, denoise):
timesteps = torch.flip(torch.arange(1, 11) * 100 - 1, (0,))[:steps] start_step = 10 - int(10 * denoise)
timesteps = torch.flip(torch.arange(1, 11) * 100 - 1, (0,))[start_step:start_step + steps]
sigmas = model.model.model_sampling.sigma(timesteps) sigmas = model.model.model_sampling.sigma(timesteps)
sigmas = torch.cat([sigmas, sigmas.new_zeros([1])]) sigmas = torch.cat([sigmas, sigmas.new_zeros([1])])
return (sigmas, ) return (sigmas, )

View File

@ -8,6 +8,7 @@ import ldm_patched.modules.utils
from ldm_patched.contrib.external import MAX_RESOLUTION from ldm_patched.contrib.external import MAX_RESOLUTION
def composite(destination, source, x, y, mask = None, multiplier = 8, resize_source = False): def composite(destination, source, x, y, mask = None, multiplier = 8, resize_source = False):
source = source.to(destination.device)
if resize_source: if resize_source:
source = torch.nn.functional.interpolate(source, size=(destination.shape[2], destination.shape[3]), mode="bilinear") source = torch.nn.functional.interpolate(source, size=(destination.shape[2], destination.shape[3]), mode="bilinear")
@ -22,7 +23,7 @@ def composite(destination, source, x, y, mask = None, multiplier = 8, resize_sou
if mask is None: if mask is None:
mask = torch.ones_like(source) mask = torch.ones_like(source)
else: else:
mask = mask.clone() mask = mask.to(destination.device, copy=True)
mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(source.shape[2], source.shape[3]), mode="bilinear") mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(source.shape[2], source.shape[3]), mode="bilinear")
mask = ldm_patched.modules.utils.repeat_to_batch_size(mask, source.shape[0]) mask = ldm_patched.modules.utils.repeat_to_batch_size(mask, source.shape[0])

View File

@ -101,10 +101,40 @@ class LatentRebatch:
return (output_list,) return (output_list,)
class ImageRebatch:
@classmethod
def INPUT_TYPES(s):
return {"required": { "images": ("IMAGE",),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
}}
RETURN_TYPES = ("IMAGE",)
INPUT_IS_LIST = True
OUTPUT_IS_LIST = (True, )
FUNCTION = "rebatch"
CATEGORY = "image/batch"
def rebatch(self, images, batch_size):
batch_size = batch_size[0]
output_list = []
all_images = []
for img in images:
for i in range(img.shape[0]):
all_images.append(img[i:i+1])
for i in range(0, len(all_images), batch_size):
output_list.append(torch.cat(all_images[i:i+batch_size], dim=0))
return (output_list,)
NODE_CLASS_MAPPINGS = { NODE_CLASS_MAPPINGS = {
"RebatchLatents": LatentRebatch, "RebatchLatents": LatentRebatch,
"RebatchImages": ImageRebatch,
} }
NODE_DISPLAY_NAME_MAPPINGS = { NODE_DISPLAY_NAME_MAPPINGS = {
"RebatchLatents": "Rebatch Latents", "RebatchLatents": "Rebatch Latents",
} "RebatchImages": "Rebatch Images",
}

View File

@ -153,7 +153,7 @@ class SelfAttentionGuidance:
(sag, _) = ldm_patched.modules.samplers.calc_cond_uncond_batch(model, uncond, None, degraded_noised, sigma, model_options) (sag, _) = ldm_patched.modules.samplers.calc_cond_uncond_batch(model, uncond, None, degraded_noised, sigma, model_options)
return cfg_result + (degraded - sag) * sag_scale return cfg_result + (degraded - sag) * sag_scale
m.set_model_sampler_post_cfg_function(post_cfg_function) m.set_model_sampler_post_cfg_function(post_cfg_function, disable_cfg1_optimization=True)
# from diffusers: # from diffusers:
# unet.mid_block.attentions[0].transformer_blocks[0].attn1.patch # unet.mid_block.attentions[0].transformer_blocks[0].attn1.patch

View File

@ -0,0 +1,60 @@
# https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py
import torch
import ldm_patched.contrib.external
import ldm_patched.modules.utils
def camera_embeddings(elevation, azimuth):
elevation = torch.as_tensor([elevation])
azimuth = torch.as_tensor([azimuth])
embeddings = torch.stack(
[
torch.deg2rad(
(90 - elevation) - (90)
), # Zero123 polar is 90-elevation
torch.sin(torch.deg2rad(azimuth)),
torch.cos(torch.deg2rad(azimuth)),
torch.deg2rad(
90 - torch.full_like(elevation, 0)
),
], dim=-1).unsqueeze(1)
return embeddings
class StableZero123_Conditioning:
@classmethod
def INPUT_TYPES(s):
return {"required": { "clip_vision": ("CLIP_VISION",),
"init_image": ("IMAGE",),
"vae": ("VAE",),
"width": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}),
"height": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
"elevation": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}),
"azimuth": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}),
}}
RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
RETURN_NAMES = ("positive", "negative", "latent")
FUNCTION = "encode"
CATEGORY = "conditioning/3d_models"
def encode(self, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth):
output = clip_vision.encode_image(init_image)
pooled = output.image_embeds.unsqueeze(0)
pixels = ldm_patched.modules.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
encode_pixels = pixels[:,:,:,:3]
t = vae.encode(encode_pixels)
cam_embeds = camera_embeddings(elevation, azimuth)
cond = torch.cat([pooled, cam_embeds.repeat((pooled.shape[0], 1, 1))], dim=-1)
positive = [[cond, {"concat_latent_image": t}]]
negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]]
latent = torch.zeros([batch_size, 4, height // 8, width // 8])
return (positive, negative, {"samples":latent})
NODE_CLASS_MAPPINGS = {
"StableZero123_Conditioning": StableZero123_Conditioning,
}

View File

@ -8,6 +8,7 @@ from ldm_patched.ldm.modules.distributions.distributions import DiagonalGaussian
from ldm_patched.ldm.util import instantiate_from_config from ldm_patched.ldm.util import instantiate_from_config
from ldm_patched.ldm.modules.ema import LitEma from ldm_patched.ldm.modules.ema import LitEma
import ldm_patched.modules.ops
class DiagonalGaussianRegularizer(torch.nn.Module): class DiagonalGaussianRegularizer(torch.nn.Module):
def __init__(self, sample: bool = True): def __init__(self, sample: bool = True):
@ -161,12 +162,12 @@ class AutoencodingEngineLegacy(AutoencodingEngine):
}, },
**kwargs, **kwargs,
) )
self.quant_conv = torch.nn.Conv2d( self.quant_conv = ldm_patched.modules.ops.disable_weight_init.Conv2d(
(1 + ddconfig["double_z"]) * ddconfig["z_channels"], (1 + ddconfig["double_z"]) * ddconfig["z_channels"],
(1 + ddconfig["double_z"]) * embed_dim, (1 + ddconfig["double_z"]) * embed_dim,
1, 1,
) )
self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1) self.post_quant_conv = ldm_patched.modules.ops.disable_weight_init.Conv2d(embed_dim, ddconfig["z_channels"], 1)
self.embed_dim = embed_dim self.embed_dim = embed_dim
def get_autoencoder_params(self) -> list: def get_autoencoder_params(self) -> list:

View File

@ -41,7 +41,7 @@ def nonlinearity(x):
def Normalize(in_channels, num_groups=32): def Normalize(in_channels, num_groups=32):
return torch.nn.GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True) return ops.GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True)
class Upsample(nn.Module): class Upsample(nn.Module):

View File

@ -43,8 +43,8 @@ class AbstractLowScaleModel(nn.Module):
def q_sample(self, x_start, t, noise=None): def q_sample(self, x_start, t, noise=None):
noise = default(noise, lambda: torch.randn_like(x_start)) noise = default(noise, lambda: torch.randn_like(x_start))
return (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + return (extract_into_tensor(self.sqrt_alphas_cumprod.to(x_start.device), t, x_start.shape) * x_start +
extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise) extract_into_tensor(self.sqrt_one_minus_alphas_cumprod.to(x_start.device), t, x_start.shape) * noise)
def forward(self, x): def forward(self, x):
return x, None return x, None

View File

@ -51,9 +51,9 @@ class AlphaBlender(nn.Module):
if self.merge_strategy == "fixed": if self.merge_strategy == "fixed":
# make shape compatible # make shape compatible
# alpha = repeat(self.mix_factor, '1 -> b () t () ()', t=t, b=bs) # alpha = repeat(self.mix_factor, '1 -> b () t () ()', t=t, b=bs)
alpha = self.mix_factor alpha = self.mix_factor.to(image_only_indicator.device)
elif self.merge_strategy == "learned": elif self.merge_strategy == "learned":
alpha = torch.sigmoid(self.mix_factor) alpha = torch.sigmoid(self.mix_factor.to(image_only_indicator.device))
# make shape compatible # make shape compatible
# alpha = repeat(alpha, '1 -> s () ()', s = t * bs) # alpha = repeat(alpha, '1 -> s () ()', s = t * bs)
elif self.merge_strategy == "learned_with_images": elif self.merge_strategy == "learned_with_images":
@ -61,7 +61,7 @@ class AlphaBlender(nn.Module):
alpha = torch.where( alpha = torch.where(
image_only_indicator.bool(), image_only_indicator.bool(),
torch.ones(1, 1, device=image_only_indicator.device), torch.ones(1, 1, device=image_only_indicator.device),
rearrange(torch.sigmoid(self.mix_factor), "... -> ... 1"), rearrange(torch.sigmoid(self.mix_factor.to(image_only_indicator.device)), "... -> ... 1"),
) )
alpha = rearrange(alpha, self.rearrange_pattern) alpha = rearrange(alpha, self.rearrange_pattern)
# make shape compatible # make shape compatible

View File

@ -15,12 +15,12 @@ class CLIPEmbeddingNoiseAugmentation(ImageConcatWithNoiseAugmentation):
def scale(self, x): def scale(self, x):
# re-normalize to centered mean and unit variance # re-normalize to centered mean and unit variance
x = (x - self.data_mean) * 1. / self.data_std x = (x - self.data_mean.to(x.device)) * 1. / self.data_std.to(x.device)
return x return x
def unscale(self, x): def unscale(self, x):
# back to original data stats # back to original data stats
x = (x * self.data_std) + self.data_mean x = (x * self.data_std.to(x.device)) + self.data_mean.to(x.device)
return x return x
def forward(self, x, noise_level=None): def forward(self, x, noise_level=None):

View File

@ -82,14 +82,14 @@ class VideoResBlock(ResnetBlock):
x = self.time_stack(x, temb) x = self.time_stack(x, temb)
alpha = self.get_alpha(bs=b // timesteps) alpha = self.get_alpha(bs=b // timesteps).to(x.device)
x = alpha * x + (1.0 - alpha) * x_mix x = alpha * x + (1.0 - alpha) * x_mix
x = rearrange(x, "b c t h w -> (b t) c h w") x = rearrange(x, "b c t h w -> (b t) c h w")
return x return x
class AE3DConv(torch.nn.Conv2d): class AE3DConv(ops.Conv2d):
def __init__(self, in_channels, out_channels, video_kernel_size=3, *args, **kwargs): def __init__(self, in_channels, out_channels, video_kernel_size=3, *args, **kwargs):
super().__init__(in_channels, out_channels, *args, **kwargs) super().__init__(in_channels, out_channels, *args, **kwargs)
if isinstance(video_kernel_size, Iterable): if isinstance(video_kernel_size, Iterable):
@ -97,7 +97,7 @@ class AE3DConv(torch.nn.Conv2d):
else: else:
padding = int(video_kernel_size // 2) padding = int(video_kernel_size // 2)
self.time_mix_conv = torch.nn.Conv3d( self.time_mix_conv = ops.Conv3d(
in_channels=out_channels, in_channels=out_channels,
out_channels=out_channels, out_channels=out_channels,
kernel_size=video_kernel_size, kernel_size=video_kernel_size,
@ -167,7 +167,7 @@ class AttnVideoBlock(AttnBlock):
emb = emb[:, None, :] emb = emb[:, None, :]
x_mix = x_mix + emb x_mix = x_mix + emb
alpha = self.get_alpha() alpha = self.get_alpha().to(x.device)
x_mix = self.time_mix_block(x_mix, timesteps=timesteps) x_mix = self.time_mix_block(x_mix, timesteps=timesteps)
x = alpha * x + (1.0 - alpha) * x_mix # alpha merge x = alpha * x + (1.0 - alpha) * x_mix # alpha merge

View File

@ -66,6 +66,8 @@ fpvae_group.add_argument("--vae-in-fp16", action="store_true")
fpvae_group.add_argument("--vae-in-fp32", action="store_true") fpvae_group.add_argument("--vae-in-fp32", action="store_true")
fpvae_group.add_argument("--vae-in-bf16", action="store_true") fpvae_group.add_argument("--vae-in-bf16", action="store_true")
parser.add_argument("--vae-in-cpu", action="store_true")
fpte_group = parser.add_mutually_exclusive_group() fpte_group = parser.add_mutually_exclusive_group()
fpte_group.add_argument("--clip-in-fp8-e4m3fn", action="store_true") fpte_group.add_argument("--clip-in-fp8-e4m3fn", action="store_true")
fpte_group.add_argument("--clip-in-fp8-e5m2", action="store_true") fpte_group.add_argument("--clip-in-fp8-e5m2", action="store_true")

View File

@ -151,7 +151,7 @@ class CLIPVisionEmbeddings(torch.nn.Module):
def forward(self, pixel_values): def forward(self, pixel_values):
embeds = self.patch_embedding(pixel_values).flatten(2).transpose(1, 2) embeds = self.patch_embedding(pixel_values).flatten(2).transpose(1, 2)
return torch.cat([self.class_embedding.expand(pixel_values.shape[0], 1, -1), embeds], dim=1) + self.position_embedding.weight return torch.cat([self.class_embedding.to(embeds.device).expand(pixel_values.shape[0], 1, -1), embeds], dim=1) + self.position_embedding.weight.to(embeds.device)
class CLIPVision(torch.nn.Module): class CLIPVision(torch.nn.Module):

View File

@ -283,7 +283,7 @@ class ControlLora(ControlNet):
cm = self.control_model.state_dict() cm = self.control_model.state_dict()
for k in sd: for k in sd:
weight = ldm_patched.modules.model_management.resolve_lowvram_weight(sd[k], diffusion_model, k) weight = sd[k]
try: try:
ldm_patched.modules.utils.set_attr(self.control_model, k, weight) ldm_patched.modules.utils.set_attr(self.control_model, k, weight)
except: except:

View File

@ -126,9 +126,15 @@ class BaseModel(torch.nn.Module):
cond_concat.append(blank_inpaint_image_like(noise)) cond_concat.append(blank_inpaint_image_like(noise))
data = torch.cat(cond_concat, dim=1) data = torch.cat(cond_concat, dim=1)
out['c_concat'] = ldm_patched.modules.conds.CONDNoiseShape(data) out['c_concat'] = ldm_patched.modules.conds.CONDNoiseShape(data)
adm = self.encode_adm(**kwargs) adm = self.encode_adm(**kwargs)
if adm is not None: if adm is not None:
out['y'] = ldm_patched.modules.conds.CONDRegular(adm) out['y'] = ldm_patched.modules.conds.CONDRegular(adm)
cross_attn = kwargs.get("cross_attn", None)
if cross_attn is not None:
out['c_crossattn'] = ldm_patched.modules.conds.CONDCrossAttn(cross_attn)
return out return out
def load_model_weights(self, sd, unet_prefix=""): def load_model_weights(self, sd, unet_prefix=""):
@ -156,11 +162,7 @@ class BaseModel(torch.nn.Module):
def state_dict_for_saving(self, clip_state_dict, vae_state_dict): def state_dict_for_saving(self, clip_state_dict, vae_state_dict):
clip_state_dict = self.model_config.process_clip_state_dict_for_saving(clip_state_dict) clip_state_dict = self.model_config.process_clip_state_dict_for_saving(clip_state_dict)
unet_sd = self.diffusion_model.state_dict() unet_state_dict = self.diffusion_model.state_dict()
unet_state_dict = {}
for k in unet_sd:
unet_state_dict[k] = ldm_patched.modules.model_management.resolve_lowvram_weight(unet_sd[k], self.diffusion_model, k)
unet_state_dict = self.model_config.process_unet_state_dict_for_saving(unet_state_dict) unet_state_dict = self.model_config.process_unet_state_dict_for_saving(unet_state_dict)
vae_state_dict = self.model_config.process_vae_state_dict_for_saving(vae_state_dict) vae_state_dict = self.model_config.process_vae_state_dict_for_saving(vae_state_dict)
if self.get_dtype() == torch.float16: if self.get_dtype() == torch.float16:
@ -322,9 +324,43 @@ class SVD_img2vid(BaseModel):
out['c_concat'] = ldm_patched.modules.conds.CONDNoiseShape(latent_image) out['c_concat'] = ldm_patched.modules.conds.CONDNoiseShape(latent_image)
cross_attn = kwargs.get("cross_attn", None)
if cross_attn is not None:
out['c_crossattn'] = ldm_patched.modules.conds.CONDCrossAttn(cross_attn)
if "time_conditioning" in kwargs: if "time_conditioning" in kwargs:
out["time_context"] = ldm_patched.modules.conds.CONDCrossAttn(kwargs["time_conditioning"]) out["time_context"] = ldm_patched.modules.conds.CONDCrossAttn(kwargs["time_conditioning"])
out['image_only_indicator'] = ldm_patched.modules.conds.CONDConstant(torch.zeros((1,), device=device)) out['image_only_indicator'] = ldm_patched.modules.conds.CONDConstant(torch.zeros((1,), device=device))
out['num_video_frames'] = ldm_patched.modules.conds.CONDConstant(noise.shape[0]) out['num_video_frames'] = ldm_patched.modules.conds.CONDConstant(noise.shape[0])
return out return out
class Stable_Zero123(BaseModel):
def __init__(self, model_config, model_type=ModelType.EPS, device=None, cc_projection_weight=None, cc_projection_bias=None):
super().__init__(model_config, model_type, device=device)
self.cc_projection = ldm_patched.modules.ops.manual_cast.Linear(cc_projection_weight.shape[1], cc_projection_weight.shape[0], dtype=self.get_dtype(), device=device)
self.cc_projection.weight.copy_(cc_projection_weight)
self.cc_projection.bias.copy_(cc_projection_bias)
def extra_conds(self, **kwargs):
out = {}
latent_image = kwargs.get("concat_latent_image", None)
noise = kwargs.get("noise", None)
if latent_image is None:
latent_image = torch.zeros_like(noise)
if latent_image.shape[1:] != noise.shape[1:]:
latent_image = utils.common_upscale(latent_image, noise.shape[-1], noise.shape[-2], "bilinear", "center")
latent_image = utils.resize_to_batch_size(latent_image, noise.shape[0])
out['c_concat'] = ldm_patched.modules.conds.CONDNoiseShape(latent_image)
cross_attn = kwargs.get("cross_attn", None)
if cross_attn is not None:
if cross_attn.shape[-1] != 768:
cross_attn = self.cc_projection(cross_attn)
out['c_crossattn'] = ldm_patched.modules.conds.CONDCrossAttn(cross_attn)
return out

View File

@ -186,6 +186,9 @@ except:
if is_intel_xpu(): if is_intel_xpu():
VAE_DTYPE = torch.bfloat16 VAE_DTYPE = torch.bfloat16
if args.vae_in_cpu:
VAE_DTYPE = torch.float32
if args.vae_in_fp16: if args.vae_in_fp16:
VAE_DTYPE = torch.float16 VAE_DTYPE = torch.float16
elif args.vae_in_bf16: elif args.vae_in_bf16:
@ -218,15 +221,8 @@ if args.all_in_fp16:
FORCE_FP16 = True FORCE_FP16 = True
if lowvram_available: if lowvram_available:
try: if set_vram_to in (VRAMState.LOW_VRAM, VRAMState.NO_VRAM):
import accelerate vram_state = set_vram_to
if set_vram_to in (VRAMState.LOW_VRAM, VRAMState.NO_VRAM):
vram_state = set_vram_to
except Exception as e:
import traceback
print(traceback.format_exc())
print("ERROR: LOW VRAM MODE NEEDS accelerate.")
lowvram_available = False
if cpu_state != CPUState.GPU: if cpu_state != CPUState.GPU:
@ -266,6 +262,14 @@ print("VAE dtype:", VAE_DTYPE)
current_loaded_models = [] current_loaded_models = []
def module_size(module):
module_mem = 0
sd = module.state_dict()
for k in sd:
t = sd[k]
module_mem += t.nelement() * t.element_size()
return module_mem
class LoadedModel: class LoadedModel:
def __init__(self, model): def __init__(self, model):
self.model = model self.model = model
@ -298,8 +302,20 @@ class LoadedModel:
if lowvram_model_memory > 0: if lowvram_model_memory > 0:
print("loading in lowvram mode", lowvram_model_memory/(1024 * 1024)) print("loading in lowvram mode", lowvram_model_memory/(1024 * 1024))
device_map = accelerate.infer_auto_device_map(self.real_model, max_memory={0: "{}MiB".format(lowvram_model_memory // (1024 * 1024)), "cpu": "16GiB"}) mem_counter = 0
accelerate.dispatch_model(self.real_model, device_map=device_map, main_device=self.device) for m in self.real_model.modules():
if hasattr(m, "ldm_patched_cast_weights"):
m.prev_ldm_patched_cast_weights = m.ldm_patched_cast_weights
m.ldm_patched_cast_weights = True
module_mem = module_size(m)
if mem_counter + module_mem < lowvram_model_memory:
m.to(self.device)
mem_counter += module_mem
elif hasattr(m, "weight"): #only modules with ldm_patched_cast_weights can be set to lowvram mode
m.to(self.device)
mem_counter += module_size(m)
print("lowvram: loaded module regularly", m)
self.model_accelerated = True self.model_accelerated = True
if is_intel_xpu() and not args.disable_ipex_hijack: if is_intel_xpu() and not args.disable_ipex_hijack:
@ -309,7 +325,11 @@ class LoadedModel:
def model_unload(self): def model_unload(self):
if self.model_accelerated: if self.model_accelerated:
accelerate.hooks.remove_hook_from_submodules(self.real_model) for m in self.real_model.modules():
if hasattr(m, "prev_ldm_patched_cast_weights"):
m.ldm_patched_cast_weights = m.prev_ldm_patched_cast_weights
del m.prev_ldm_patched_cast_weights
self.model_accelerated = False self.model_accelerated = False
self.model.unpatch_model(self.model.offload_device) self.model.unpatch_model(self.model.offload_device)
@ -402,14 +422,14 @@ def load_models_gpu(models, memory_required=0):
if lowvram_available and (vram_set_state == VRAMState.LOW_VRAM or vram_set_state == VRAMState.NORMAL_VRAM): if lowvram_available and (vram_set_state == VRAMState.LOW_VRAM or vram_set_state == VRAMState.NORMAL_VRAM):
model_size = loaded_model.model_memory_required(torch_dev) model_size = loaded_model.model_memory_required(torch_dev)
current_free_mem = get_free_memory(torch_dev) current_free_mem = get_free_memory(torch_dev)
lowvram_model_memory = int(max(256 * (1024 * 1024), (current_free_mem - 1024 * (1024 * 1024)) / 1.3 )) lowvram_model_memory = int(max(64 * (1024 * 1024), (current_free_mem - 1024 * (1024 * 1024)) / 1.3 ))
if model_size > (current_free_mem - inference_memory): #only switch to lowvram if really necessary if model_size > (current_free_mem - inference_memory): #only switch to lowvram if really necessary
vram_set_state = VRAMState.LOW_VRAM vram_set_state = VRAMState.LOW_VRAM
else: else:
lowvram_model_memory = 0 lowvram_model_memory = 0
if vram_set_state == VRAMState.NO_VRAM: if vram_set_state == VRAMState.NO_VRAM:
lowvram_model_memory = 256 * 1024 * 1024 lowvram_model_memory = 64 * 1024 * 1024
cur_loaded_model = loaded_model.model_load(lowvram_model_memory) cur_loaded_model = loaded_model.model_load(lowvram_model_memory)
current_loaded_models.insert(0, loaded_model) current_loaded_models.insert(0, loaded_model)
@ -538,6 +558,8 @@ def intermediate_device():
return torch.device("cpu") return torch.device("cpu")
def vae_device(): def vae_device():
if args.vae_in_cpu:
return torch.device("cpu")
return get_torch_device() return get_torch_device()
def vae_offload_device(): def vae_offload_device():
@ -566,6 +588,11 @@ def supports_dtype(device, dtype): #TODO
return True return True
return False return False
def device_supports_non_blocking(device):
if is_device_mps(device):
return False #pytorch bug? mps doesn't support non blocking
return True
def cast_to_device(tensor, device, dtype, copy=False): def cast_to_device(tensor, device, dtype, copy=False):
device_supports_cast = False device_supports_cast = False
if tensor.dtype == torch.float32 or tensor.dtype == torch.float16: if tensor.dtype == torch.float32 or tensor.dtype == torch.float16:
@ -576,9 +603,7 @@ def cast_to_device(tensor, device, dtype, copy=False):
elif is_intel_xpu(): elif is_intel_xpu():
device_supports_cast = True device_supports_cast = True
non_blocking = True non_blocking = device_supports_non_blocking(device)
if is_device_mps(device):
non_blocking = False #pytorch bug? mps doesn't support non blocking
if device_supports_cast: if device_supports_cast:
if copy: if copy:
@ -742,11 +767,11 @@ def soft_empty_cache(force=False):
torch.cuda.empty_cache() torch.cuda.empty_cache()
torch.cuda.ipc_collect() torch.cuda.ipc_collect()
def resolve_lowvram_weight(weight, model, key): def unload_all_models():
if weight.device == torch.device("meta"): #lowvram NOTE: this depends on the inner working of the accelerate library so it might break. free_memory(1e30, get_torch_device())
key_split = key.split('.') # I have no idea why they don't just leave the weight there instead of using the meta device.
op = ldm_patched.modules.utils.get_attr(model, '.'.join(key_split[:-1]))
weight = op._hf_hook.weights_map[key_split[-1]] def resolve_lowvram_weight(weight, model, key): #TODO: remove
return weight return weight
#TODO: might be cleaner to put this somewhere else #TODO: might be cleaner to put this somewhere else

View File

@ -28,13 +28,9 @@ class ModelPatcher:
if self.size > 0: if self.size > 0:
return self.size return self.size
model_sd = self.model.state_dict() model_sd = self.model.state_dict()
size = 0 self.size = ldm_patched.modules.model_management.module_size(self.model)
for k in model_sd:
t = model_sd[k]
size += t.nelement() * t.element_size()
self.size = size
self.model_keys = set(model_sd.keys()) self.model_keys = set(model_sd.keys())
return size return self.size
def clone(self): def clone(self):
n = ModelPatcher(self.model, self.load_device, self.offload_device, self.size, self.current_device, weight_inplace_update=self.weight_inplace_update) n = ModelPatcher(self.model, self.load_device, self.offload_device, self.size, self.current_device, weight_inplace_update=self.weight_inplace_update)
@ -55,14 +51,18 @@ class ModelPatcher:
def memory_required(self, input_shape): def memory_required(self, input_shape):
return self.model.memory_required(input_shape=input_shape) return self.model.memory_required(input_shape=input_shape)
def set_model_sampler_cfg_function(self, sampler_cfg_function): def set_model_sampler_cfg_function(self, sampler_cfg_function, disable_cfg1_optimization=False):
if len(inspect.signature(sampler_cfg_function).parameters) == 3: if len(inspect.signature(sampler_cfg_function).parameters) == 3:
self.model_options["sampler_cfg_function"] = lambda args: sampler_cfg_function(args["cond"], args["uncond"], args["cond_scale"]) #Old way self.model_options["sampler_cfg_function"] = lambda args: sampler_cfg_function(args["cond"], args["uncond"], args["cond_scale"]) #Old way
else: else:
self.model_options["sampler_cfg_function"] = sampler_cfg_function self.model_options["sampler_cfg_function"] = sampler_cfg_function
if disable_cfg1_optimization:
self.model_options["disable_cfg1_optimization"] = True
def set_model_sampler_post_cfg_function(self, post_cfg_function): def set_model_sampler_post_cfg_function(self, post_cfg_function, disable_cfg1_optimization=False):
self.model_options["sampler_post_cfg_function"] = self.model_options.get("sampler_post_cfg_function", []) + [post_cfg_function] self.model_options["sampler_post_cfg_function"] = self.model_options.get("sampler_post_cfg_function", []) + [post_cfg_function]
if disable_cfg1_optimization:
self.model_options["disable_cfg1_optimization"] = True
def set_model_unet_function_wrapper(self, unet_wrapper_function): def set_model_unet_function_wrapper(self, unet_wrapper_function):
self.model_options["model_function_wrapper"] = unet_wrapper_function self.model_options["model_function_wrapper"] = unet_wrapper_function

View File

@ -1,27 +1,93 @@
import torch import torch
from contextlib import contextmanager from contextlib import contextmanager
import ldm_patched.modules.model_management
def cast_bias_weight(s, input):
bias = None
non_blocking = ldm_patched.modules.model_management.device_supports_non_blocking(input.device)
if s.bias is not None:
bias = s.bias.to(device=input.device, dtype=input.dtype, non_blocking=non_blocking)
weight = s.weight.to(device=input.device, dtype=input.dtype, non_blocking=non_blocking)
return weight, bias
class disable_weight_init: class disable_weight_init:
class Linear(torch.nn.Linear): class Linear(torch.nn.Linear):
ldm_patched_cast_weights = False
def reset_parameters(self): def reset_parameters(self):
return None return None
def forward_ldm_patched_cast_weights(self, input):
weight, bias = cast_bias_weight(self, input)
return torch.nn.functional.linear(input, weight, bias)
def forward(self, *args, **kwargs):
if self.ldm_patched_cast_weights:
return self.forward_ldm_patched_cast_weights(*args, **kwargs)
else:
return super().forward(*args, **kwargs)
class Conv2d(torch.nn.Conv2d): class Conv2d(torch.nn.Conv2d):
ldm_patched_cast_weights = False
def reset_parameters(self): def reset_parameters(self):
return None return None
def forward_ldm_patched_cast_weights(self, input):
weight, bias = cast_bias_weight(self, input)
return self._conv_forward(input, weight, bias)
def forward(self, *args, **kwargs):
if self.ldm_patched_cast_weights:
return self.forward_ldm_patched_cast_weights(*args, **kwargs)
else:
return super().forward(*args, **kwargs)
class Conv3d(torch.nn.Conv3d): class Conv3d(torch.nn.Conv3d):
ldm_patched_cast_weights = False
def reset_parameters(self): def reset_parameters(self):
return None return None
def forward_ldm_patched_cast_weights(self, input):
weight, bias = cast_bias_weight(self, input)
return self._conv_forward(input, weight, bias)
def forward(self, *args, **kwargs):
if self.ldm_patched_cast_weights:
return self.forward_ldm_patched_cast_weights(*args, **kwargs)
else:
return super().forward(*args, **kwargs)
class GroupNorm(torch.nn.GroupNorm): class GroupNorm(torch.nn.GroupNorm):
ldm_patched_cast_weights = False
def reset_parameters(self): def reset_parameters(self):
return None return None
def forward_ldm_patched_cast_weights(self, input):
weight, bias = cast_bias_weight(self, input)
return torch.nn.functional.group_norm(input, self.num_groups, weight, bias, self.eps)
def forward(self, *args, **kwargs):
if self.ldm_patched_cast_weights:
return self.forward_ldm_patched_cast_weights(*args, **kwargs)
else:
return super().forward(*args, **kwargs)
class LayerNorm(torch.nn.LayerNorm): class LayerNorm(torch.nn.LayerNorm):
ldm_patched_cast_weights = False
def reset_parameters(self): def reset_parameters(self):
return None return None
def forward_ldm_patched_cast_weights(self, input):
weight, bias = cast_bias_weight(self, input)
return torch.nn.functional.layer_norm(input, self.normalized_shape, weight, bias, self.eps)
def forward(self, *args, **kwargs):
if self.ldm_patched_cast_weights:
return self.forward_ldm_patched_cast_weights(*args, **kwargs)
else:
return super().forward(*args, **kwargs)
@classmethod @classmethod
def conv_nd(s, dims, *args, **kwargs): def conv_nd(s, dims, *args, **kwargs):
if dims == 2: if dims == 2:
@ -31,35 +97,19 @@ class disable_weight_init:
else: else:
raise ValueError(f"unsupported dimensions: {dims}") raise ValueError(f"unsupported dimensions: {dims}")
def cast_bias_weight(s, input):
bias = None
if s.bias is not None:
bias = s.bias.to(device=input.device, dtype=input.dtype)
weight = s.weight.to(device=input.device, dtype=input.dtype)
return weight, bias
class manual_cast(disable_weight_init): class manual_cast(disable_weight_init):
class Linear(disable_weight_init.Linear): class Linear(disable_weight_init.Linear):
def forward(self, input): ldm_patched_cast_weights = True
weight, bias = cast_bias_weight(self, input)
return torch.nn.functional.linear(input, weight, bias)
class Conv2d(disable_weight_init.Conv2d): class Conv2d(disable_weight_init.Conv2d):
def forward(self, input): ldm_patched_cast_weights = True
weight, bias = cast_bias_weight(self, input)
return self._conv_forward(input, weight, bias)
class Conv3d(disable_weight_init.Conv3d): class Conv3d(disable_weight_init.Conv3d):
def forward(self, input): ldm_patched_cast_weights = True
weight, bias = cast_bias_weight(self, input)
return self._conv_forward(input, weight, bias)
class GroupNorm(disable_weight_init.GroupNorm): class GroupNorm(disable_weight_init.GroupNorm):
def forward(self, input): ldm_patched_cast_weights = True
weight, bias = cast_bias_weight(self, input)
return torch.nn.functional.group_norm(input, self.num_groups, weight, bias, self.eps)
class LayerNorm(disable_weight_init.LayerNorm): class LayerNorm(disable_weight_init.LayerNorm):
def forward(self, input): ldm_patched_cast_weights = True
weight, bias = cast_bias_weight(self, input)
return torch.nn.functional.layer_norm(input, self.normalized_shape, weight, bias, self.eps)

View File

@ -47,7 +47,8 @@ def convert_cond(cond):
temp = c[1].copy() temp = c[1].copy()
model_conds = temp.get("model_conds", {}) model_conds = temp.get("model_conds", {})
if c[0] is not None: if c[0] is not None:
model_conds["c_crossattn"] = ldm_patched.modules.conds.CONDCrossAttn(c[0]) model_conds["c_crossattn"] = ldm_patched.modules.conds.CONDCrossAttn(c[0]) #TODO: remove
temp["cross_attn"] = c[0]
temp["model_conds"] = model_conds temp["model_conds"] = model_conds
out.append(temp) out.append(temp)
return out return out

View File

@ -244,7 +244,7 @@ def calc_cond_uncond_batch(model, cond, uncond, x_in, timestep, model_options):
#The main sampling function shared by all the samplers #The main sampling function shared by all the samplers
#Returns denoised #Returns denoised
def sampling_function(model, x, timestep, uncond, cond, cond_scale, model_options={}, seed=None): def sampling_function(model, x, timestep, uncond, cond, cond_scale, model_options={}, seed=None):
if math.isclose(cond_scale, 1.0): if math.isclose(cond_scale, 1.0) and model_options.get("disable_cfg1_optimization", False) == False:
uncond_ = None uncond_ = None
else: else:
uncond_ = uncond uncond_ = uncond
@ -599,6 +599,13 @@ def sample(model, noise, positive, negative, cfg, device, sampler, sigmas, model
calculate_start_end_timesteps(model, negative) calculate_start_end_timesteps(model, negative)
calculate_start_end_timesteps(model, positive) calculate_start_end_timesteps(model, positive)
if latent_image is not None:
latent_image = model.process_latent_in(latent_image)
if hasattr(model, 'extra_conds'):
positive = encode_model_conds(model.extra_conds, positive, noise, device, "positive", latent_image=latent_image, denoise_mask=denoise_mask)
negative = encode_model_conds(model.extra_conds, negative, noise, device, "negative", latent_image=latent_image, denoise_mask=denoise_mask)
#make sure each cond area has an opposite one with the same area #make sure each cond area has an opposite one with the same area
for c in positive: for c in positive:
create_cond_with_same_area_if_none(negative, c) create_cond_with_same_area_if_none(negative, c)
@ -610,13 +617,6 @@ def sample(model, noise, positive, negative, cfg, device, sampler, sigmas, model
apply_empty_x_to_equal_area(list(filter(lambda c: c.get('control_apply_to_uncond', False) == True, positive)), negative, 'control', lambda cond_cnets, x: cond_cnets[x]) apply_empty_x_to_equal_area(list(filter(lambda c: c.get('control_apply_to_uncond', False) == True, positive)), negative, 'control', lambda cond_cnets, x: cond_cnets[x])
apply_empty_x_to_equal_area(positive, negative, 'gligen', lambda cond_cnets, x: cond_cnets[x]) apply_empty_x_to_equal_area(positive, negative, 'gligen', lambda cond_cnets, x: cond_cnets[x])
if latent_image is not None:
latent_image = model.process_latent_in(latent_image)
if hasattr(model, 'extra_conds'):
positive = encode_model_conds(model.extra_conds, positive, noise, device, "positive", latent_image=latent_image, denoise_mask=denoise_mask)
negative = encode_model_conds(model.extra_conds, negative, noise, device, "negative", latent_image=latent_image, denoise_mask=denoise_mask)
extra_args = {"cond":positive, "uncond":negative, "cond_scale": cfg, "model_options": model_options, "seed":seed} extra_args = {"cond":positive, "uncond":negative, "cond_scale": cfg, "model_options": model_options, "seed":seed}
samples = sampler.sample(model_wrap, sigmas, extra_args, callback, noise, latent_image, denoise_mask, disable_pbar) samples = sampler.sample(model_wrap, sigmas, extra_args, callback, noise, latent_image, denoise_mask, disable_pbar)

View File

@ -252,5 +252,32 @@ class SVD_img2vid(supported_models_base.BASE):
def clip_target(self): def clip_target(self):
return None return None
models = [SD15, SD20, SD21UnclipL, SD21UnclipH, SDXLRefiner, SDXL, SSD1B, Segmind_Vega] class Stable_Zero123(supported_models_base.BASE):
unet_config = {
"context_dim": 768,
"model_channels": 320,
"use_linear_in_transformer": False,
"adm_in_channels": None,
"use_temporal_attention": False,
"in_channels": 8,
}
unet_extra_config = {
"num_heads": 8,
"num_head_channels": -1,
}
clip_vision_prefix = "cond_stage_model.model.visual."
latent_format = latent_formats.SD15
def get_model(self, state_dict, prefix="", device=None):
out = model_base.Stable_Zero123(self, device=device, cc_projection_weight=state_dict["cc_projection.weight"], cc_projection_bias=state_dict["cc_projection.bias"])
return out
def clip_target(self):
return None
models = [Stable_Zero123, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXLRefiner, SDXL, SSD1B, Segmind_Vega]
models += [SVD_img2vid] models += [SVD_img2vid]

View File

@ -7,9 +7,10 @@ import torch
import torch.nn as nn import torch.nn as nn
import ldm_patched.modules.utils import ldm_patched.modules.utils
import ldm_patched.modules.ops
def conv(n_in, n_out, **kwargs): def conv(n_in, n_out, **kwargs):
return nn.Conv2d(n_in, n_out, 3, padding=1, **kwargs) return ldm_patched.modules.ops.disable_weight_init.Conv2d(n_in, n_out, 3, padding=1, **kwargs)
class Clamp(nn.Module): class Clamp(nn.Module):
def forward(self, x): def forward(self, x):
@ -19,7 +20,7 @@ class Block(nn.Module):
def __init__(self, n_in, n_out): def __init__(self, n_in, n_out):
super().__init__() super().__init__()
self.conv = nn.Sequential(conv(n_in, n_out), nn.ReLU(), conv(n_out, n_out), nn.ReLU(), conv(n_out, n_out)) self.conv = nn.Sequential(conv(n_in, n_out), nn.ReLU(), conv(n_out, n_out), nn.ReLU(), conv(n_out, n_out))
self.skip = nn.Conv2d(n_in, n_out, 1, bias=False) if n_in != n_out else nn.Identity() self.skip = ldm_patched.modules.ops.disable_weight_init.Conv2d(n_in, n_out, 1, bias=False) if n_in != n_out else nn.Identity()
self.fuse = nn.ReLU() self.fuse = nn.ReLU()
def forward(self, x): def forward(self, x):
return self.fuse(self.conv(x) + self.skip(x)) return self.fuse(self.conv(x) + self.skip(x))

View File

@ -184,8 +184,7 @@ def cached_filename_list_(folder_name):
if folder_name not in filename_list_cache: if folder_name not in filename_list_cache:
return None return None
out = filename_list_cache[folder_name] out = filename_list_cache[folder_name]
if time.perf_counter() < (out[2] + 0.5):
return out
for x in out[1]: for x in out[1]:
time_modified = out[1][x] time_modified = out[1][x]
folder = x folder = x

19
modules/ops.py Normal file
View File

@ -0,0 +1,19 @@
import torch
import contextlib
@contextlib.contextmanager
def use_patched_ops(operations):
op_names = ['Linear', 'Conv2d', 'Conv3d', 'GroupNorm', 'LayerNorm']
backups = {op_name: getattr(torch.nn, op_name) for op_name in op_names}
try:
for op_name in op_names:
setattr(torch.nn, op_name, getattr(operations, op_name))
yield
finally:
for op_name in op_names:
setattr(torch.nn, op_name, backups[op_name])
return

View File

@ -218,7 +218,7 @@ def compute_cfg(uncond, cond, cfg_scale, t):
def patched_sampling_function(model, x, timestep, uncond, cond, cond_scale, model_options=None, seed=None): def patched_sampling_function(model, x, timestep, uncond, cond, cond_scale, model_options=None, seed=None):
global eps_record global eps_record
if math.isclose(cond_scale, 1.0): if math.isclose(cond_scale, 1.0) and not model_options.get("disable_cfg1_optimization", False):
final_x0 = calc_cond_uncond_batch(model, cond, None, x, timestep, model_options)[0] final_x0 = calc_cond_uncond_batch(model, cond, None, x, timestep, model_options)[0]
if eps_record is not None: if eps_record is not None:
@ -480,6 +480,10 @@ def build_loaded(module, loader_name):
def patch_all(): def patch_all():
if ldm_patched.modules.model_management.directml_enabled:
ldm_patched.modules.model_management.lowvram_available = True
ldm_patched.modules.model_management.OOM_EXCEPTION = Exception
patch_all_precision() patch_all_precision()
patch_all_clip() patch_all_clip()

View File

@ -16,30 +16,12 @@ import ldm_patched.modules.samplers
import ldm_patched.modules.sd import ldm_patched.modules.sd
import ldm_patched.modules.sd1_clip import ldm_patched.modules.sd1_clip
import ldm_patched.modules.clip_vision import ldm_patched.modules.clip_vision
import ldm_patched.modules.model_management as model_management
import ldm_patched.modules.ops as ops import ldm_patched.modules.ops as ops
import contextlib
from modules.ops import use_patched_ops
from transformers import CLIPTextModel, CLIPTextConfig, modeling_utils, CLIPVisionConfig, CLIPVisionModelWithProjection from transformers import CLIPTextModel, CLIPTextConfig, modeling_utils, CLIPVisionConfig, CLIPVisionModelWithProjection
@contextlib.contextmanager
def use_patched_ops(operations):
op_names = ['Linear', 'Conv2d', 'Conv3d', 'GroupNorm', 'LayerNorm']
backups = {op_name: getattr(torch.nn, op_name) for op_name in op_names}
try:
for op_name in op_names:
setattr(torch.nn, op_name, getattr(operations, op_name))
yield
finally:
for op_name in op_names:
setattr(torch.nn, op_name, backups[op_name])
return
def patched_encode_token_weights(self, token_weight_pairs): def patched_encode_token_weights(self, token_weight_pairs):
to_encode = list() to_encode = list()
max_token_len = 0 max_token_len = 0

View File

@ -99,6 +99,13 @@ def sample_hacked(model, noise, positive, negative, cfg, device, sampler, sigmas
calculate_start_end_timesteps(model, negative) calculate_start_end_timesteps(model, negative)
calculate_start_end_timesteps(model, positive) calculate_start_end_timesteps(model, positive)
if latent_image is not None:
latent_image = model.process_latent_in(latent_image)
if hasattr(model, 'extra_conds'):
positive = encode_model_conds(model.extra_conds, positive, noise, device, "positive", latent_image=latent_image, denoise_mask=denoise_mask)
negative = encode_model_conds(model.extra_conds, negative, noise, device, "negative", latent_image=latent_image, denoise_mask=denoise_mask)
#make sure each cond area has an opposite one with the same area #make sure each cond area has an opposite one with the same area
for c in positive: for c in positive:
create_cond_with_same_area_if_none(negative, c) create_cond_with_same_area_if_none(negative, c)
@ -111,13 +118,6 @@ def sample_hacked(model, noise, positive, negative, cfg, device, sampler, sigmas
apply_empty_x_to_equal_area(list(filter(lambda c: c.get('control_apply_to_uncond', False) == True, positive)), negative, 'control', lambda cond_cnets, x: cond_cnets[x]) apply_empty_x_to_equal_area(list(filter(lambda c: c.get('control_apply_to_uncond', False) == True, positive)), negative, 'control', lambda cond_cnets, x: cond_cnets[x])
apply_empty_x_to_equal_area(positive, negative, 'gligen', lambda cond_cnets, x: cond_cnets[x]) apply_empty_x_to_equal_area(positive, negative, 'gligen', lambda cond_cnets, x: cond_cnets[x])
if latent_image is not None:
latent_image = model.process_latent_in(latent_image)
if hasattr(model, 'extra_conds'):
positive = encode_model_conds(model.extra_conds, positive, noise, device, "positive", latent_image=latent_image, denoise_mask=denoise_mask)
negative = encode_model_conds(model.extra_conds, negative, noise, device, "negative", latent_image=latent_image, denoise_mask=denoise_mask)
extra_args = {"cond":positive, "uncond":negative, "cond_scale": cfg, "model_options": model_options, "seed":seed} extra_args = {"cond":positive, "uncond":negative, "cond_scale": cfg, "model_options": model_options, "seed":seed}
if current_refiner is not None and hasattr(current_refiner.model, 'extra_conds'): if current_refiner is not None and hasattr(current_refiner.model, 'extra_conds'):
@ -174,7 +174,7 @@ def calculate_sigmas_scheduler_hacked(model, scheduler_name, steps):
elif scheduler_name == "sgm_uniform": elif scheduler_name == "sgm_uniform":
sigmas = normal_scheduler(model, steps, sgm=True) sigmas = normal_scheduler(model, steps, sgm=True)
elif scheduler_name == "turbo": elif scheduler_name == "turbo":
sigmas = SDTurboScheduler().get_sigmas(namedtuple('Patcher', ['model'])(model=model), steps)[0] sigmas = SDTurboScheduler().get_sigmas(namedtuple('Patcher', ['model'])(model=model), steps=steps, denoise=1.0)[0]
else: else:
raise TypeError("error invalid scheduler") raise TypeError("error invalid scheduler")
return sigmas return sigmas

View File

@ -245,19 +245,18 @@ See the guidelines [here](https://github.com/lllyasviel/Fooocus/discussions/1405
Below is the minimal requirement for running Fooocus locally. If your device capability is lower than this spec, you may not be able to use Fooocus locally. (Please let us know, in any case, if your device capability is lower but Fooocus still works.) Below is the minimal requirement for running Fooocus locally. If your device capability is lower than this spec, you may not be able to use Fooocus locally. (Please let us know, in any case, if your device capability is lower but Fooocus still works.)
| Operating System | GPU | Minimal GPU Memory | Minimal System Memory | [System Swap](troubleshoot.md) | Note | | Operating System | GPU | Minimal GPU Memory | Minimal System Memory | [System Swap](troubleshoot.md) | Note |
|-------------------|------------------------------|---------------------------|---------------------------|--------------------------------|---------------------------------------| |-------------------|------------------------------|------------------------------|---------------------------|--------------------------------|----------------------------------------------------------------------------|
| Windows/Linux | Nvidia RTX 4XXX | 4GB | 8GB | Required | fastest | | Windows/Linux | Nvidia RTX 4XXX | 4GB | 8GB | Required | fastest |
| Windows/Linux | Nvidia RTX 3XXX | 4GB | 8GB | Required | usually faster than RTX 2XXX | | Windows/Linux | Nvidia RTX 3XXX | 4GB | 8GB | Required | usually faster than RTX 2XXX |
| Windows/Linux | Nvidia RTX 2XXX | 4GB | 8GB | Required | usually faster than GTX 1XXX | | Windows/Linux | Nvidia RTX 2XXX | 4GB | 8GB | Required | usually faster than GTX 1XXX |
| Windows/Linux | Nvidia GTX 1XXX | 8GB (&ast; 6GB uncertain) | 8GB | Required | only marginally faster than CPU | | Windows/Linux | Nvidia GTX 1XXX | 8GB (&ast; 6GB uncertain) | 8GB | Required | only marginally faster than CPU |
| Windows/Linux | Nvidia GTX 9XX | 8GB | 8GB | Required | faster or slower than CPU | | Windows/Linux | Nvidia GTX 9XX | 8GB | 8GB | Required | faster or slower than CPU |
| Windows/Linux | Nvidia GTX < 9XX | Not supported | / | / | / | | Windows/Linux | Nvidia GTX < 9XX | Not supported | / | / | / |
| Windows | AMD GPU | 16GB | 8GB | Required | via DirectML | | Windows | AMD GPU | 8GB (updated 2023 Dec 30) | 8GB | Required | via DirectML (&ast; ROCm is on hold), about 3x slower than Nvidia RTX 3XXX |
| Linux | AMD GPU | 8GB | 8GB | Required | via ROCm | | Linux | AMD GPU | 8GB | 8GB | Required | via ROCm, about 1.5x slower than Nvidia RTX 3XXX |
| Windows | &ast; AMD GPU ROCm (on hold) | 8GB (on hold) | 8GB (on hold) | Required (on hold) | via ROCm (on hold) | | Mac | M1/M2 MPS | Shared | Shared | Shared | about 9x slower than Nvidia RTX 3XXX |
| Mac | M1/M2 MPS | Shared | Shared | Shared | about 9x slower than Nvidia RTX 3XXX | | Windows/Linux/Mac | only use CPU | 0GB | 32GB | Required | about 17x slower than Nvidia RTX 3XXX |
| Windows/Linux/Mac | only use CPU | 0GB | 32GB | Required | about 17x slower than Nvidia RTX 3XXX |
&ast; AMD GPU ROCm (on hold): The AMD is still working on supporting ROCm on Windows. &ast; AMD GPU ROCm (on hold): The AMD is still working on supporting ROCm on Windows.

View File

@ -1,5 +1,9 @@
**(2023 Dec 21) Hi all, the feature updating of Fooocus will be paused for about two or three weeks because we have some other workloads. See you soon and we will come back in mid or late Jan. However, you may still see updates if other collaborators are fixing bugs or solving problems.** **(2023 Dec 21) Hi all, the feature updating of Fooocus will be paused for about two or three weeks because we have some other workloads. See you soon and we will come back in mid or late Jan. However, you may still see updates if other collaborators are fixing bugs or solving problems.**
# 2.1.857 (requested update)
* Begin to support 8GB AMD GPU on Windows.
# 2.1.854 # 2.1.854
* Add a button to copy parameters to clipboard in log. * Add a button to copy parameters to clipboard in log.