This commit is contained in:
lllyasviel 2023-10-12 04:23:10 -07:00 committed by GitHub
parent 4c867c1b8b
commit e61aac34ca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
147 changed files with 523 additions and 642 deletions

View File

@ -1,15 +1,15 @@
from comfy.options import enable_args_parsing
from fcbh.options import enable_args_parsing
enable_args_parsing(False)
import comfy.cli_args as comfy_cli
import fcbh.cli_args as fcbh_cli
comfy_cli.parser.add_argument("--share", action='store_true', help="Set whether to share on Gradio.")
fcbh_cli.parser.add_argument("--share", action='store_true', help="Set whether to share on Gradio.")
comfy_cli.args = comfy_cli.parser.parse_args()
comfy_cli.args.disable_cuda_malloc = True
comfy_cli.args.auto_launch = True
fcbh_cli.args = fcbh_cli.parser.parse_args()
fcbh_cli.args.disable_cuda_malloc = True
fcbh_cli.args.auto_launch = True
if getattr(comfy_cli.args, 'port', 8188) == 8188:
comfy_cli.args.port = None
if getattr(fcbh_cli.args, 'port', 8188) == 8188:
fcbh_cli.args.port = None
args = comfy_cli.args
args = fcbh_cli.args

View File

@ -1,102 +0,0 @@
class Example:
"""
A example node
Class methods
-------------
INPUT_TYPES (dict):
Tell the main program input parameters of nodes.
Attributes
----------
RETURN_TYPES (`tuple`):
The type of each element in the output tulple.
RETURN_NAMES (`tuple`):
Optional: The name of each output in the output tulple.
FUNCTION (`str`):
The name of the entry-point method. For example, if `FUNCTION = "execute"` then it will run Example().execute()
OUTPUT_NODE ([`bool`]):
If this node is an output node that outputs a result/image from the graph. The SaveImage node is an example.
The backend iterates on these output nodes and tries to execute all their parents if their parent graph is properly connected.
Assumed to be False if not present.
CATEGORY (`str`):
The category the node should appear in the UI.
execute(s) -> tuple || None:
The entry point method. The name of this method must be the same as the value of property `FUNCTION`.
For example, if `FUNCTION = "execute"` then this method's name must be `execute`, if `FUNCTION = "foo"` then it must be `foo`.
"""
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
"""
Return a dictionary which contains config for all input fields.
Some types (string): "MODEL", "VAE", "CLIP", "CONDITIONING", "LATENT", "IMAGE", "INT", "STRING", "FLOAT".
Input types "INT", "STRING" or "FLOAT" are special values for fields on the node.
The type can be a list for selection.
Returns: `dict`:
- Key input_fields_group (`string`): Can be either required, hidden or optional. A node class must have property `required`
- Value input_fields (`dict`): Contains input fields config:
* Key field_name (`string`): Name of a entry-point method's argument
* Value field_config (`tuple`):
+ First value is a string indicate the type of field or a list for selection.
+ Secound value is a config for type "INT", "STRING" or "FLOAT".
"""
return {
"required": {
"image": ("IMAGE",),
"int_field": ("INT", {
"default": 0,
"min": 0, #Minimum value
"max": 4096, #Maximum value
"step": 64, #Slider's step
"display": "number" # Cosmetic only: display as "number" or "slider"
}),
"float_field": ("FLOAT", {
"default": 1.0,
"min": 0.0,
"max": 10.0,
"step": 0.01,
"round": 0.001, #The value represeting the precision to round to, will be set to the step value by default. Can be set to False to disable rounding.
"display": "number"}),
"print_to_screen": (["enable", "disable"],),
"string_field": ("STRING", {
"multiline": False, #True if you want the field to look like the one on the ClipTextEncode node
"default": "Hello World!"
}),
},
}
RETURN_TYPES = ("IMAGE",)
#RETURN_NAMES = ("image_output_name",)
FUNCTION = "test"
#OUTPUT_NODE = False
CATEGORY = "Example"
def test(self, image, string_field, int_field, float_field, print_to_screen):
if print_to_screen == "enable":
print(f"""Your input contains:
string_field aka input text: {string_field}
int_field: {int_field}
float_field: {float_field}
""")
#do some processing on the image, in this example I just invert it
image = 1.0 - image
return (image,)
# A dictionary that contains all nodes you want to export with their names
# NOTE: names should be globally unique
NODE_CLASS_MAPPINGS = {
"Example": Example
}
# A dictionary that contains the friendly/humanly readable titles for the nodes
NODE_DISPLAY_NAME_MAPPINGS = {
"Example": "Example Node"
}

View File

@ -13,7 +13,7 @@ from ..ldm.modules.diffusionmodules.util import (
from ..ldm.modules.attention import SpatialTransformer
from ..ldm.modules.diffusionmodules.openaimodel import UNetModel, TimestepEmbedSequential, ResBlock, Downsample
from ..ldm.util import exists
import comfy.ops
import fcbh.ops
class ControlledUnetModel(UNetModel):
#implemented in the ldm unet
@ -54,7 +54,7 @@ class ControlNet(nn.Module):
adm_in_channels=None,
transformer_depth_middle=None,
device=None,
operations=comfy.ops,
operations=fcbh.ops,
):
super().__init__()
assert use_spatial_transformer == True, "use_spatial_transformer has to be true"

View File

@ -1,6 +1,6 @@
import argparse
import enum
import comfy.options
import fcbh.options
class EnumAction(argparse.Action):
"""
@ -37,10 +37,10 @@ parser.add_argument("--listen", type=str, default="127.0.0.1", metavar="IP", nar
parser.add_argument("--port", type=int, default=8188, help="Set the listen port.")
parser.add_argument("--enable-cors-header", type=str, default=None, metavar="ORIGIN", nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.")
parser.add_argument("--extra-model-paths-config", type=str, default=None, metavar="PATH", nargs='+', action='append', help="Load one or more extra_model_paths.yaml files.")
parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.")
parser.add_argument("--temp-directory", type=str, default=None, help="Set the ComfyUI temp directory (default is in the ComfyUI directory).")
parser.add_argument("--input-directory", type=str, default=None, help="Set the ComfyUI input directory.")
parser.add_argument("--auto-launch", action="store_true", help="Automatically launch ComfyUI in the default browser.")
parser.add_argument("--output-directory", type=str, default=None, help="Set the fcbh_backend output directory.")
parser.add_argument("--temp-directory", type=str, default=None, help="Set the fcbh_backend temp directory (default is in the fcbh_backend directory).")
parser.add_argument("--input-directory", type=str, default=None, help="Set the fcbh_backend input directory.")
parser.add_argument("--auto-launch", action="store_true", help="Automatically launch fcbh_backend in the default browser.")
parser.add_argument("--disable-auto-launch", action="store_true", help="Disable auto launching the browser.")
parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.")
cm_group = parser.add_mutually_exclusive_group()
@ -86,7 +86,7 @@ vram_group.add_argument("--novram", action="store_true", help="When lowvram isn'
vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).")
parser.add_argument("--disable-smart-memory", action="store_true", help="Force ComfyUI to agressively offload to regular ram instead of keeping models in vram when it can.")
parser.add_argument("--disable-smart-memory", action="store_true", help="Force fcbh_backend to agressively offload to regular ram instead of keeping models in vram when it can.")
parser.add_argument("--dont-print-server", action="store_true", help="Don't print server output.")
@ -95,7 +95,7 @@ parser.add_argument("--windows-standalone-build", action="store_true", help="Win
parser.add_argument("--disable-metadata", action="store_true", help="Disable saving prompt metadata in files.")
if comfy.options.args_parsing:
if fcbh.options.args_parsing:
args = parser.parse_args()
else:
args = parser.parse_args([])

View File

@ -4,25 +4,25 @@ import os
import torch
import contextlib
import comfy.ops
import comfy.model_patcher
import comfy.model_management
import fcbh.ops
import fcbh.model_patcher
import fcbh.model_management
class ClipVisionModel():
def __init__(self, json_config):
config = CLIPVisionConfig.from_json_file(json_config)
self.load_device = comfy.model_management.text_encoder_device()
offload_device = comfy.model_management.text_encoder_offload_device()
self.load_device = fcbh.model_management.text_encoder_device()
offload_device = fcbh.model_management.text_encoder_offload_device()
self.dtype = torch.float32
if comfy.model_management.should_use_fp16(self.load_device, prioritize_performance=False):
if fcbh.model_management.should_use_fp16(self.load_device, prioritize_performance=False):
self.dtype = torch.float16
with comfy.ops.use_comfy_ops(offload_device, self.dtype):
with fcbh.ops.use_fcbh_ops(offload_device, self.dtype):
with modeling_utils.no_init_weights():
self.model = CLIPVisionModelWithProjection(config)
self.model.to(self.dtype)
self.patcher = comfy.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
self.patcher = fcbh.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
self.processor = CLIPImageProcessor(crop_size=224,
do_center_crop=True,
do_convert_rgb=True,
@ -40,7 +40,7 @@ class ClipVisionModel():
img = torch.clip((255. * image), 0, 255).round().int()
img = list(map(lambda a: a, img))
inputs = self.processor(images=img, return_tensors="pt")
comfy.model_management.load_model_gpu(self.patcher)
fcbh.model_management.load_model_gpu(self.patcher)
pixel_values = inputs['pixel_values'].to(self.load_device)
if self.dtype != torch.float32:
@ -48,7 +48,7 @@ class ClipVisionModel():
else:
precision_scope = lambda a, b: contextlib.nullcontext(a)
with precision_scope(comfy.model_management.get_autocast_device(self.load_device), torch.float32):
with precision_scope(fcbh.model_management.get_autocast_device(self.load_device), torch.float32):
outputs = self.model(pixel_values=pixel_values, output_hidden_states=True)
for k in outputs:

View File

@ -1,13 +1,13 @@
import torch
import math
import os
import comfy.utils
import comfy.model_management
import comfy.model_detection
import comfy.model_patcher
import fcbh.utils
import fcbh.model_management
import fcbh.model_detection
import fcbh.model_patcher
import comfy.cldm.cldm
import comfy.t2i_adapter.adapter
import fcbh.cldm.cldm
import fcbh.t2i_adapter.adapter
def broadcast_image_to(tensor, target_batch_size, batched_number):
@ -37,7 +37,7 @@ class ControlBase:
self.timestep_range = None
if device is None:
device = comfy.model_management.get_torch_device()
device = fcbh.model_management.get_torch_device()
self.device = device
self.previous_controlnet = None
self.global_average_pooling = False
@ -130,7 +130,7 @@ class ControlNet(ControlBase):
def __init__(self, control_model, global_average_pooling=False, device=None):
super().__init__(device)
self.control_model = control_model
self.control_model_wrapped = comfy.model_patcher.ModelPatcher(self.control_model, load_device=comfy.model_management.get_torch_device(), offload_device=comfy.model_management.unet_offload_device())
self.control_model_wrapped = fcbh.model_patcher.ModelPatcher(self.control_model, load_device=fcbh.model_management.get_torch_device(), offload_device=fcbh.model_management.unet_offload_device())
self.global_average_pooling = global_average_pooling
def get_control(self, x_noisy, t, cond, batched_number):
@ -150,7 +150,7 @@ class ControlNet(ControlBase):
if self.cond_hint is not None:
del self.cond_hint
self.cond_hint = None
self.cond_hint = comfy.utils.common_upscale(self.cond_hint_original, x_noisy.shape[3] * 8, x_noisy.shape[2] * 8, 'nearest-exact', "center").to(self.control_model.dtype).to(self.device)
self.cond_hint = fcbh.utils.common_upscale(self.cond_hint_original, x_noisy.shape[3] * 8, x_noisy.shape[2] * 8, 'nearest-exact', "center").to(self.control_model.dtype).to(self.device)
if x_noisy.shape[0] != self.cond_hint.shape[0]:
self.cond_hint = broadcast_image_to(self.cond_hint, x_noisy.shape[0], batched_number)
@ -249,24 +249,24 @@ class ControlLora(ControlNet):
controlnet_config.pop("out_channels")
controlnet_config["hint_channels"] = self.control_weights["input_hint_block.0.weight"].shape[1]
controlnet_config["operations"] = ControlLoraOps()
self.control_model = comfy.cldm.cldm.ControlNet(**controlnet_config)
self.control_model = fcbh.cldm.cldm.ControlNet(**controlnet_config)
dtype = model.get_dtype()
self.control_model.to(dtype)
self.control_model.to(comfy.model_management.get_torch_device())
self.control_model.to(fcbh.model_management.get_torch_device())
diffusion_model = model.diffusion_model
sd = diffusion_model.state_dict()
cm = self.control_model.state_dict()
for k in sd:
weight = comfy.model_management.resolve_lowvram_weight(sd[k], diffusion_model, k)
weight = fcbh.model_management.resolve_lowvram_weight(sd[k], diffusion_model, k)
try:
comfy.utils.set_attr(self.control_model, k, weight)
fcbh.utils.set_attr(self.control_model, k, weight)
except:
pass
for k in self.control_weights:
if k not in {"lora_controlnet"}:
comfy.utils.set_attr(self.control_model, k, self.control_weights[k].to(dtype).to(comfy.model_management.get_torch_device()))
fcbh.utils.set_attr(self.control_model, k, self.control_weights[k].to(dtype).to(fcbh.model_management.get_torch_device()))
def copy(self):
c = ControlLora(self.control_weights, global_average_pooling=self.global_average_pooling)
@ -283,18 +283,18 @@ class ControlLora(ControlNet):
return out
def inference_memory_requirements(self, dtype):
return comfy.utils.calculate_parameters(self.control_weights) * comfy.model_management.dtype_size(dtype) + ControlBase.inference_memory_requirements(self, dtype)
return fcbh.utils.calculate_parameters(self.control_weights) * fcbh.model_management.dtype_size(dtype) + ControlBase.inference_memory_requirements(self, dtype)
def load_controlnet(ckpt_path, model=None):
controlnet_data = comfy.utils.load_torch_file(ckpt_path, safe_load=True)
controlnet_data = fcbh.utils.load_torch_file(ckpt_path, safe_load=True)
if "lora_controlnet" in controlnet_data:
return ControlLora(controlnet_data)
controlnet_config = None
if "controlnet_cond_embedding.conv_in.weight" in controlnet_data: #diffusers format
use_fp16 = comfy.model_management.should_use_fp16()
controlnet_config = comfy.model_detection.unet_config_from_diffusers_unet(controlnet_data, use_fp16)
diffusers_keys = comfy.utils.unet_to_diffusers(controlnet_config)
use_fp16 = fcbh.model_management.should_use_fp16()
controlnet_config = fcbh.model_detection.unet_config_from_diffusers_unet(controlnet_data, use_fp16)
diffusers_keys = fcbh.utils.unet_to_diffusers(controlnet_config)
diffusers_keys["controlnet_mid_block.weight"] = "middle_block_out.0.weight"
diffusers_keys["controlnet_mid_block.bias"] = "middle_block_out.0.bias"
@ -353,16 +353,16 @@ def load_controlnet(ckpt_path, model=None):
return net
if controlnet_config is None:
use_fp16 = comfy.model_management.should_use_fp16()
controlnet_config = comfy.model_detection.model_config_from_unet(controlnet_data, prefix, use_fp16, True).unet_config
use_fp16 = fcbh.model_management.should_use_fp16()
controlnet_config = fcbh.model_detection.model_config_from_unet(controlnet_data, prefix, use_fp16, True).unet_config
controlnet_config.pop("out_channels")
controlnet_config["hint_channels"] = controlnet_data["{}input_hint_block.0.weight".format(prefix)].shape[1]
control_model = comfy.cldm.cldm.ControlNet(**controlnet_config)
control_model = fcbh.cldm.cldm.ControlNet(**controlnet_config)
if pth:
if 'difference' in controlnet_data:
if model is not None:
comfy.model_management.load_models_gpu([model])
fcbh.model_management.load_models_gpu([model])
model_sd = model.model_state_dict()
for x in controlnet_data:
c_m = "control_model."
@ -425,7 +425,7 @@ class T2IAdapter(ControlBase):
self.control_input = None
self.cond_hint = None
width, height = self.scale_image_to(x_noisy.shape[3] * 8, x_noisy.shape[2] * 8)
self.cond_hint = comfy.utils.common_upscale(self.cond_hint_original, width, height, 'nearest-exact', "center").float().to(self.device)
self.cond_hint = fcbh.utils.common_upscale(self.cond_hint_original, width, height, 'nearest-exact', "center").float().to(self.device)
if self.channels_in == 1 and self.cond_hint.shape[1] > 1:
self.cond_hint = torch.mean(self.cond_hint, 1, keepdim=True)
if x_noisy.shape[0] != self.cond_hint.shape[0]:
@ -458,12 +458,12 @@ def load_t2i_adapter(t2i_data):
prefix_replace["adapter.body.{}.resnets.{}.".format(i, j)] = "body.{}.".format(i * 2 + j)
prefix_replace["adapter.body.{}.".format(i, j)] = "body.{}.".format(i * 2)
prefix_replace["adapter."] = ""
t2i_data = comfy.utils.state_dict_prefix_replace(t2i_data, prefix_replace)
t2i_data = fcbh.utils.state_dict_prefix_replace(t2i_data, prefix_replace)
keys = t2i_data.keys()
if "body.0.in_conv.weight" in keys:
cin = t2i_data['body.0.in_conv.weight'].shape[1]
model_ad = comfy.t2i_adapter.adapter.Adapter_light(cin=cin, channels=[320, 640, 1280, 1280], nums_rb=4)
model_ad = fcbh.t2i_adapter.adapter.Adapter_light(cin=cin, channels=[320, 640, 1280, 1280], nums_rb=4)
elif 'conv_in.weight' in keys:
cin = t2i_data['conv_in.weight'].shape[1]
channel = t2i_data['conv_in.weight'].shape[0]
@ -475,7 +475,7 @@ def load_t2i_adapter(t2i_data):
xl = False
if cin == 256 or cin == 768:
xl = True
model_ad = comfy.t2i_adapter.adapter.Adapter(cin=cin, channels=[channel, channel*2, channel*4, channel*4][:4], nums_rb=2, ksize=ksize, sk=True, use_conv=use_conv, xl=xl)
model_ad = fcbh.t2i_adapter.adapter.Adapter(cin=cin, channels=[channel, channel*2, channel*4, channel*4][:4], nums_rb=2, ksize=ksize, sk=True, use_conv=use_conv, xl=xl)
else:
return None
missing, unexpected = model_ad.load_state_dict(t2i_data)

View File

@ -1,7 +1,7 @@
import json
import os
import comfy.sd
import fcbh.sd
def first_file(path, filenames):
for f in filenames:
@ -23,14 +23,14 @@ def load_diffusers(model_path, output_vae=True, output_clip=True, embedding_dire
if text_encoder2_path is not None:
text_encoder_paths.append(text_encoder2_path)
unet = comfy.sd.load_unet(unet_path)
unet = fcbh.sd.load_unet(unet_path)
clip = None
if output_clip:
clip = comfy.sd.load_clip(text_encoder_paths, embedding_directory=embedding_directory)
clip = fcbh.sd.load_clip(text_encoder_paths, embedding_directory=embedding_directory)
vae = None
if output_vae:
vae = comfy.sd.VAE(ckpt_path=vae_path)
vae = fcbh.sd.VAE(ckpt_path=vae_path)
return (unet, clip, vae)

View File

@ -3,11 +3,11 @@ import torch
import torch.nn.functional as F
from contextlib import contextmanager
from comfy.ldm.modules.diffusionmodules.model import Encoder, Decoder
from comfy.ldm.modules.distributions.distributions import DiagonalGaussianDistribution
from fcbh.ldm.modules.diffusionmodules.model import Encoder, Decoder
from fcbh.ldm.modules.distributions.distributions import DiagonalGaussianDistribution
from comfy.ldm.util import instantiate_from_config
from comfy.ldm.modules.ema import LitEma
from fcbh.ldm.util import instantiate_from_config
from fcbh.ldm.modules.ema import LitEma
# class AutoencoderKL(pl.LightningModule):
class AutoencoderKL(torch.nn.Module):

View File

@ -4,7 +4,7 @@ import torch
import numpy as np
from tqdm import tqdm
from comfy.ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, make_ddim_timesteps, noise_like, extract_into_tensor
from fcbh.ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, make_ddim_timesteps, noise_like, extract_into_tensor
class DDIMSampler(object):

View File

@ -9,14 +9,14 @@ from typing import Optional, Any
from .diffusionmodules.util import checkpoint
from .sub_quadratic_attention import efficient_dot_product_attention
from comfy import model_management
from fcbh import model_management
if model_management.xformers_enabled():
import xformers
import xformers.ops
from comfy.cli_args import args
import comfy.ops
from fcbh.cli_args import args
import fcbh.ops
# CrossAttn precision handling
if args.dont_upcast_attention:
@ -53,7 +53,7 @@ def init_(tensor):
# feedforward
class GEGLU(nn.Module):
def __init__(self, dim_in, dim_out, dtype=None, device=None, operations=comfy.ops):
def __init__(self, dim_in, dim_out, dtype=None, device=None, operations=fcbh.ops):
super().__init__()
self.proj = operations.Linear(dim_in, dim_out * 2, dtype=dtype, device=device)
@ -63,7 +63,7 @@ class GEGLU(nn.Module):
class FeedForward(nn.Module):
def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0., dtype=None, device=None, operations=comfy.ops):
def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0., dtype=None, device=None, operations=fcbh.ops):
super().__init__()
inner_dim = int(dim * mult)
dim_out = default(dim_out, dim)
@ -310,7 +310,7 @@ else:
optimized_attention = attention_sub_quad
class CrossAttention(nn.Module):
def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0., dtype=None, device=None, operations=comfy.ops):
def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0., dtype=None, device=None, operations=fcbh.ops):
super().__init__()
inner_dim = dim_head * heads
context_dim = default(context_dim, query_dim)
@ -340,7 +340,7 @@ class CrossAttention(nn.Module):
class BasicTransformerBlock(nn.Module):
def __init__(self, dim, n_heads, d_head, dropout=0., context_dim=None, gated_ff=True, checkpoint=True,
disable_self_attn=False, dtype=None, device=None, operations=comfy.ops):
disable_self_attn=False, dtype=None, device=None, operations=fcbh.ops):
super().__init__()
self.disable_self_attn = disable_self_attn
self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head, dropout=dropout,
@ -482,7 +482,7 @@ class SpatialTransformer(nn.Module):
def __init__(self, in_channels, n_heads, d_head,
depth=1, dropout=0., context_dim=None,
disable_self_attn=False, use_linear=False,
use_checkpoint=True, dtype=None, device=None, operations=comfy.ops):
use_checkpoint=True, dtype=None, device=None, operations=fcbh.ops):
super().__init__()
if exists(context_dim) and not isinstance(context_dim, list):
context_dim = [context_dim] * depth

View File

@ -6,8 +6,8 @@ import numpy as np
from einops import rearrange
from typing import Optional, Any
from comfy import model_management
import comfy.ops
from fcbh import model_management
import fcbh.ops
if model_management.xformers_enabled_vae():
import xformers
@ -48,7 +48,7 @@ class Upsample(nn.Module):
super().__init__()
self.with_conv = with_conv
if self.with_conv:
self.conv = comfy.ops.Conv2d(in_channels,
self.conv = fcbh.ops.Conv2d(in_channels,
in_channels,
kernel_size=3,
stride=1,
@ -78,7 +78,7 @@ class Downsample(nn.Module):
self.with_conv = with_conv
if self.with_conv:
# no asymmetric padding in torch conv, must do it ourselves
self.conv = comfy.ops.Conv2d(in_channels,
self.conv = fcbh.ops.Conv2d(in_channels,
in_channels,
kernel_size=3,
stride=2,
@ -105,30 +105,30 @@ class ResnetBlock(nn.Module):
self.swish = torch.nn.SiLU(inplace=True)
self.norm1 = Normalize(in_channels)
self.conv1 = comfy.ops.Conv2d(in_channels,
self.conv1 = fcbh.ops.Conv2d(in_channels,
out_channels,
kernel_size=3,
stride=1,
padding=1)
if temb_channels > 0:
self.temb_proj = comfy.ops.Linear(temb_channels,
self.temb_proj = fcbh.ops.Linear(temb_channels,
out_channels)
self.norm2 = Normalize(out_channels)
self.dropout = torch.nn.Dropout(dropout, inplace=True)
self.conv2 = comfy.ops.Conv2d(out_channels,
self.conv2 = fcbh.ops.Conv2d(out_channels,
out_channels,
kernel_size=3,
stride=1,
padding=1)
if self.in_channels != self.out_channels:
if self.use_conv_shortcut:
self.conv_shortcut = comfy.ops.Conv2d(in_channels,
self.conv_shortcut = fcbh.ops.Conv2d(in_channels,
out_channels,
kernel_size=3,
stride=1,
padding=1)
else:
self.nin_shortcut = comfy.ops.Conv2d(in_channels,
self.nin_shortcut = fcbh.ops.Conv2d(in_channels,
out_channels,
kernel_size=1,
stride=1,
@ -199,22 +199,22 @@ class AttnBlock(nn.Module):
self.in_channels = in_channels
self.norm = Normalize(in_channels)
self.q = comfy.ops.Conv2d(in_channels,
self.q = fcbh.ops.Conv2d(in_channels,
in_channels,
kernel_size=1,
stride=1,
padding=0)
self.k = comfy.ops.Conv2d(in_channels,
self.k = fcbh.ops.Conv2d(in_channels,
in_channels,
kernel_size=1,
stride=1,
padding=0)
self.v = comfy.ops.Conv2d(in_channels,
self.v = fcbh.ops.Conv2d(in_channels,
in_channels,
kernel_size=1,
stride=1,
padding=0)
self.proj_out = comfy.ops.Conv2d(in_channels,
self.proj_out = fcbh.ops.Conv2d(in_channels,
in_channels,
kernel_size=1,
stride=1,
@ -254,22 +254,22 @@ class MemoryEfficientAttnBlock(nn.Module):
self.in_channels = in_channels
self.norm = Normalize(in_channels)
self.q = comfy.ops.Conv2d(in_channels,
self.q = fcbh.ops.Conv2d(in_channels,
in_channels,
kernel_size=1,
stride=1,
padding=0)
self.k = comfy.ops.Conv2d(in_channels,
self.k = fcbh.ops.Conv2d(in_channels,
in_channels,
kernel_size=1,
stride=1,
padding=0)
self.v = comfy.ops.Conv2d(in_channels,
self.v = fcbh.ops.Conv2d(in_channels,
in_channels,
kernel_size=1,
stride=1,
padding=0)
self.proj_out = comfy.ops.Conv2d(in_channels,
self.proj_out = fcbh.ops.Conv2d(in_channels,
in_channels,
kernel_size=1,
stride=1,
@ -305,22 +305,22 @@ class MemoryEfficientAttnBlockPytorch(nn.Module):
self.in_channels = in_channels
self.norm = Normalize(in_channels)
self.q = comfy.ops.Conv2d(in_channels,
self.q = fcbh.ops.Conv2d(in_channels,
in_channels,
kernel_size=1,
stride=1,
padding=0)
self.k = comfy.ops.Conv2d(in_channels,
self.k = fcbh.ops.Conv2d(in_channels,
in_channels,
kernel_size=1,
stride=1,
padding=0)
self.v = comfy.ops.Conv2d(in_channels,
self.v = fcbh.ops.Conv2d(in_channels,
in_channels,
kernel_size=1,
stride=1,
padding=0)
self.proj_out = comfy.ops.Conv2d(in_channels,
self.proj_out = fcbh.ops.Conv2d(in_channels,
in_channels,
kernel_size=1,
stride=1,
@ -390,14 +390,14 @@ class Model(nn.Module):
# timestep embedding
self.temb = nn.Module()
self.temb.dense = nn.ModuleList([
comfy.ops.Linear(self.ch,
fcbh.ops.Linear(self.ch,
self.temb_ch),
comfy.ops.Linear(self.temb_ch,
fcbh.ops.Linear(self.temb_ch,
self.temb_ch),
])
# downsampling
self.conv_in = comfy.ops.Conv2d(in_channels,
self.conv_in = fcbh.ops.Conv2d(in_channels,
self.ch,
kernel_size=3,
stride=1,
@ -466,7 +466,7 @@ class Model(nn.Module):
# end
self.norm_out = Normalize(block_in)
self.conv_out = comfy.ops.Conv2d(block_in,
self.conv_out = fcbh.ops.Conv2d(block_in,
out_ch,
kernel_size=3,
stride=1,
@ -539,7 +539,7 @@ class Encoder(nn.Module):
self.in_channels = in_channels
# downsampling
self.conv_in = comfy.ops.Conv2d(in_channels,
self.conv_in = fcbh.ops.Conv2d(in_channels,
self.ch,
kernel_size=3,
stride=1,
@ -584,7 +584,7 @@ class Encoder(nn.Module):
# end
self.norm_out = Normalize(block_in)
self.conv_out = comfy.ops.Conv2d(block_in,
self.conv_out = fcbh.ops.Conv2d(block_in,
2*z_channels if double_z else z_channels,
kernel_size=3,
stride=1,
@ -640,7 +640,7 @@ class Decoder(nn.Module):
self.z_shape, np.prod(self.z_shape)))
# z to block_in
self.conv_in = comfy.ops.Conv2d(z_channels,
self.conv_in = fcbh.ops.Conv2d(z_channels,
block_in,
kernel_size=3,
stride=1,
@ -682,7 +682,7 @@ class Decoder(nn.Module):
# end
self.norm_out = Normalize(block_in)
self.conv_out = comfy.ops.Conv2d(block_in,
self.conv_out = fcbh.ops.Conv2d(block_in,
out_ch,
kernel_size=3,
stride=1,

View File

@ -14,8 +14,8 @@ from .util import (
timestep_embedding,
)
from ..attention import SpatialTransformer
from comfy.ldm.util import exists
import comfy.ops
from fcbh.ldm.util import exists
import fcbh.ops
class TimestepBlock(nn.Module):
"""
@ -70,7 +70,7 @@ class Upsample(nn.Module):
upsampling occurs in the inner-two dimensions.
"""
def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1, dtype=None, device=None, operations=comfy.ops):
def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1, dtype=None, device=None, operations=fcbh.ops):
super().__init__()
self.channels = channels
self.out_channels = out_channels or channels
@ -106,7 +106,7 @@ class Downsample(nn.Module):
downsampling occurs in the inner-two dimensions.
"""
def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1, dtype=None, device=None, operations=comfy.ops):
def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1, dtype=None, device=None, operations=fcbh.ops):
super().__init__()
self.channels = channels
self.out_channels = out_channels or channels
@ -156,7 +156,7 @@ class ResBlock(TimestepBlock):
down=False,
dtype=None,
device=None,
operations=comfy.ops
operations=fcbh.ops
):
super().__init__()
self.channels = channels
@ -316,7 +316,7 @@ class UNetModel(nn.Module):
adm_in_channels=None,
transformer_depth_middle=None,
device=None,
operations=comfy.ops,
operations=fcbh.ops,
):
super().__init__()
assert use_spatial_transformer == True, "use_spatial_transformer has to be true"

View File

@ -4,7 +4,7 @@ import numpy as np
from functools import partial
from .util import extract_into_tensor, make_beta_schedule
from comfy.ldm.util import default
from fcbh.ldm.util import default
class AbstractLowScaleModel(nn.Module):

View File

@ -15,8 +15,8 @@ import torch.nn as nn
import numpy as np
from einops import repeat
from comfy.ldm.util import instantiate_from_config
import comfy.ops
from fcbh.ldm.util import instantiate_from_config
import fcbh.ops
def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
if schedule == "linear":
@ -233,7 +233,7 @@ def conv_nd(dims, *args, **kwargs):
if dims == 1:
return nn.Conv1d(*args, **kwargs)
elif dims == 2:
return comfy.ops.Conv2d(*args, **kwargs)
return fcbh.ops.Conv2d(*args, **kwargs)
elif dims == 3:
return nn.Conv3d(*args, **kwargs)
raise ValueError(f"unsupported dimensions: {dims}")
@ -243,7 +243,7 @@ def linear(*args, **kwargs):
"""
Create a linear module.
"""
return comfy.ops.Linear(*args, **kwargs)
return fcbh.ops.Linear(*args, **kwargs)
def avg_pool_nd(dims, *args, **kwargs):

View File

@ -24,7 +24,7 @@ except ImportError:
from torch import Tensor
from typing import List
from comfy import model_management
from fcbh import model_management
def dynamic_slice(
x: Tensor,

View File

@ -1,4 +1,4 @@
import comfy.utils
import fcbh.utils
LORA_CLIP_MAP = {
"mlp.fc1": "mlp_fc1",
@ -183,7 +183,7 @@ def model_lora_keys_unet(model, key_map={}):
key_lora = k[len("diffusion_model."):-len(".weight")].replace(".", "_")
key_map["lora_unet_{}".format(key_lora)] = k
diffusers_keys = comfy.utils.unet_to_diffusers(model.model_config.unet_config)
diffusers_keys = fcbh.utils.unet_to_diffusers(model.model_config.unet_config)
for k in diffusers_keys:
if k.endswith(".weight"):
unet_key = "diffusion_model.{}".format(diffusers_keys[k])

View File

@ -1,9 +1,9 @@
import torch
from comfy.ldm.modules.diffusionmodules.openaimodel import UNetModel
from comfy.ldm.modules.encoders.noise_aug_modules import CLIPEmbeddingNoiseAugmentation
from comfy.ldm.modules.diffusionmodules.util import make_beta_schedule
from comfy.ldm.modules.diffusionmodules.openaimodel import Timestep
import comfy.model_management
from fcbh.ldm.modules.diffusionmodules.openaimodel import UNetModel
from fcbh.ldm.modules.encoders.noise_aug_modules import CLIPEmbeddingNoiseAugmentation
from fcbh.ldm.modules.diffusionmodules.util import make_beta_schedule
from fcbh.ldm.modules.diffusionmodules.openaimodel import Timestep
import fcbh.model_management
import numpy as np
from enum import Enum
from . import utils
@ -98,7 +98,7 @@ class BaseModel(torch.nn.Module):
unet_sd = self.diffusion_model.state_dict()
unet_state_dict = {}
for k in unet_sd:
unet_state_dict[k] = comfy.model_management.resolve_lowvram_weight(unet_sd[k], self.diffusion_model, k)
unet_state_dict[k] = fcbh.model_management.resolve_lowvram_weight(unet_sd[k], self.diffusion_model, k)
unet_state_dict = self.model_config.process_unet_state_dict_for_saving(unet_state_dict)
vae_state_dict = self.model_config.process_vae_state_dict_for_saving(vae_state_dict)

View File

@ -1,5 +1,5 @@
import comfy.supported_models
import comfy.supported_models_base
import fcbh.supported_models
import fcbh.supported_models_base
def count_blocks(state_dict_keys, prefix_string):
count = 0
@ -109,7 +109,7 @@ def detect_unet_config(state_dict, key_prefix, use_fp16):
return unet_config
def model_config_from_unet_config(unet_config):
for model_config in comfy.supported_models.models:
for model_config in fcbh.supported_models.models:
if model_config.matches(unet_config):
return model_config(unet_config)
@ -120,7 +120,7 @@ def model_config_from_unet(state_dict, unet_key_prefix, use_fp16, use_base_if_no
unet_config = detect_unet_config(state_dict, unet_key_prefix, use_fp16)
model_config = model_config_from_unet_config(unet_config)
if model_config is None and use_base_if_no_match:
return comfy.supported_models_base.BASE(unet_config)
return fcbh.supported_models_base.BASE(unet_config)
else:
return model_config

View File

@ -1,7 +1,7 @@
import psutil
from enum import Enum
from comfy.cli_args import args
import comfy.utils
from fcbh.cli_args import args
import fcbh.utils
import torch
import sys
@ -681,7 +681,7 @@ def soft_empty_cache(force=False):
def resolve_lowvram_weight(weight, model, key):
if weight.device == torch.device("meta"): #lowvram NOTE: this depends on the inner working of the accelerate library so it might break.
key_split = key.split('.') # I have no idea why they don't just leave the weight there instead of using the meta device.
op = comfy.utils.get_attr(model, '.'.join(key_split[:-1]))
op = fcbh.utils.get_attr(model, '.'.join(key_split[:-1]))
weight = op._hf_hook.weights_map[key_split[-1]]
return weight

View File

@ -2,8 +2,8 @@ import torch
import copy
import inspect
import comfy.utils
import comfy.model_management
import fcbh.utils
import fcbh.model_management
class ModelPatcher:
def __init__(self, model, load_device, offload_device, size=0, current_device=None):
@ -162,11 +162,11 @@ class ModelPatcher:
self.backup[key] = weight.to(self.offload_device)
if device_to is not None:
temp_weight = comfy.model_management.cast_to_device(weight, device_to, torch.float32, copy=True)
temp_weight = fcbh.model_management.cast_to_device(weight, device_to, torch.float32, copy=True)
else:
temp_weight = weight.to(torch.float32, copy=True)
out_weight = self.calculate_weight(self.patches[key], temp_weight, key).to(weight.dtype)
comfy.utils.set_attr(self.model, key, out_weight)
fcbh.utils.set_attr(self.model, key, out_weight)
del temp_weight
if device_to is not None:
@ -193,15 +193,15 @@ class ModelPatcher:
if w1.shape != weight.shape:
print("WARNING SHAPE MISMATCH {} WEIGHT NOT MERGED {} != {}".format(key, w1.shape, weight.shape))
else:
weight += alpha * comfy.model_management.cast_to_device(w1, weight.device, weight.dtype)
weight += alpha * fcbh.model_management.cast_to_device(w1, weight.device, weight.dtype)
elif len(v) == 4: #lora/locon
mat1 = comfy.model_management.cast_to_device(v[0], weight.device, torch.float32)
mat2 = comfy.model_management.cast_to_device(v[1], weight.device, torch.float32)
mat1 = fcbh.model_management.cast_to_device(v[0], weight.device, torch.float32)
mat2 = fcbh.model_management.cast_to_device(v[1], weight.device, torch.float32)
if v[2] is not None:
alpha *= v[2] / mat2.shape[0]
if v[3] is not None:
#locon mid weights, hopefully the math is fine because I didn't properly test it
mat3 = comfy.model_management.cast_to_device(v[3], weight.device, torch.float32)
mat3 = fcbh.model_management.cast_to_device(v[3], weight.device, torch.float32)
final_shape = [mat2.shape[1], mat2.shape[0], mat3.shape[2], mat3.shape[3]]
mat2 = torch.mm(mat2.transpose(0, 1).flatten(start_dim=1), mat3.transpose(0, 1).flatten(start_dim=1)).reshape(final_shape).transpose(0, 1)
try:
@ -220,23 +220,23 @@ class ModelPatcher:
if w1 is None:
dim = w1_b.shape[0]
w1 = torch.mm(comfy.model_management.cast_to_device(w1_a, weight.device, torch.float32),
comfy.model_management.cast_to_device(w1_b, weight.device, torch.float32))
w1 = torch.mm(fcbh.model_management.cast_to_device(w1_a, weight.device, torch.float32),
fcbh.model_management.cast_to_device(w1_b, weight.device, torch.float32))
else:
w1 = comfy.model_management.cast_to_device(w1, weight.device, torch.float32)
w1 = fcbh.model_management.cast_to_device(w1, weight.device, torch.float32)
if w2 is None:
dim = w2_b.shape[0]
if t2 is None:
w2 = torch.mm(comfy.model_management.cast_to_device(w2_a, weight.device, torch.float32),
comfy.model_management.cast_to_device(w2_b, weight.device, torch.float32))
w2 = torch.mm(fcbh.model_management.cast_to_device(w2_a, weight.device, torch.float32),
fcbh.model_management.cast_to_device(w2_b, weight.device, torch.float32))
else:
w2 = torch.einsum('i j k l, j r, i p -> p r k l',
comfy.model_management.cast_to_device(t2, weight.device, torch.float32),
comfy.model_management.cast_to_device(w2_b, weight.device, torch.float32),
comfy.model_management.cast_to_device(w2_a, weight.device, torch.float32))
fcbh.model_management.cast_to_device(t2, weight.device, torch.float32),
fcbh.model_management.cast_to_device(w2_b, weight.device, torch.float32),
fcbh.model_management.cast_to_device(w2_a, weight.device, torch.float32))
else:
w2 = comfy.model_management.cast_to_device(w2, weight.device, torch.float32)
w2 = fcbh.model_management.cast_to_device(w2, weight.device, torch.float32)
if len(w2.shape) == 4:
w1 = w1.unsqueeze(2).unsqueeze(2)
@ -258,19 +258,19 @@ class ModelPatcher:
t1 = v[5]
t2 = v[6]
m1 = torch.einsum('i j k l, j r, i p -> p r k l',
comfy.model_management.cast_to_device(t1, weight.device, torch.float32),
comfy.model_management.cast_to_device(w1b, weight.device, torch.float32),
comfy.model_management.cast_to_device(w1a, weight.device, torch.float32))
fcbh.model_management.cast_to_device(t1, weight.device, torch.float32),
fcbh.model_management.cast_to_device(w1b, weight.device, torch.float32),
fcbh.model_management.cast_to_device(w1a, weight.device, torch.float32))
m2 = torch.einsum('i j k l, j r, i p -> p r k l',
comfy.model_management.cast_to_device(t2, weight.device, torch.float32),
comfy.model_management.cast_to_device(w2b, weight.device, torch.float32),
comfy.model_management.cast_to_device(w2a, weight.device, torch.float32))
fcbh.model_management.cast_to_device(t2, weight.device, torch.float32),
fcbh.model_management.cast_to_device(w2b, weight.device, torch.float32),
fcbh.model_management.cast_to_device(w2a, weight.device, torch.float32))
else:
m1 = torch.mm(comfy.model_management.cast_to_device(w1a, weight.device, torch.float32),
comfy.model_management.cast_to_device(w1b, weight.device, torch.float32))
m2 = torch.mm(comfy.model_management.cast_to_device(w2a, weight.device, torch.float32),
comfy.model_management.cast_to_device(w2b, weight.device, torch.float32))
m1 = torch.mm(fcbh.model_management.cast_to_device(w1a, weight.device, torch.float32),
fcbh.model_management.cast_to_device(w1b, weight.device, torch.float32))
m2 = torch.mm(fcbh.model_management.cast_to_device(w2a, weight.device, torch.float32),
fcbh.model_management.cast_to_device(w2b, weight.device, torch.float32))
try:
weight += (alpha * m1 * m2).reshape(weight.shape).type(weight.dtype)
@ -283,7 +283,7 @@ class ModelPatcher:
keys = list(self.backup.keys())
for k in keys:
comfy.utils.set_attr(self.model, k, self.backup[k])
fcbh.utils.set_attr(self.model, k, self.backup[k])
self.backup = {}

View File

@ -28,7 +28,7 @@ def conv_nd(dims, *args, **kwargs):
raise ValueError(f"unsupported dimensions: {dims}")
@contextmanager
def use_comfy_ops(device=None, dtype=None): # Kind of an ugly hack but I can't think of a better way
def use_fcbh_ops(device=None, dtype=None): # Kind of an ugly hack but I can't think of a better way
old_torch_nn_linear = torch.nn.Linear
force_device = device
force_dtype = dtype

View File

@ -1,7 +1,7 @@
import torch
import comfy.model_management
import comfy.samplers
import comfy.utils
import fcbh.model_management
import fcbh.samplers
import fcbh.utils
import math
import numpy as np
@ -29,7 +29,7 @@ def prepare_mask(noise_mask, shape, device):
noise_mask = torch.nn.functional.interpolate(noise_mask.reshape((-1, 1, noise_mask.shape[-2], noise_mask.shape[-1])), size=(shape[2], shape[3]), mode="bilinear")
noise_mask = noise_mask.round()
noise_mask = torch.cat([noise_mask] * shape[1], dim=1)
noise_mask = comfy.utils.repeat_to_batch_size(noise_mask, shape[0])
noise_mask = fcbh.utils.repeat_to_batch_size(noise_mask, shape[0])
noise_mask = noise_mask.to(device)
return noise_mask
@ -37,7 +37,7 @@ def broadcast_cond(cond, batch, device):
"""broadcasts conditioning to the batch size"""
copy = []
for p in cond:
t = comfy.utils.repeat_to_batch_size(p[0], batch)
t = fcbh.utils.repeat_to_batch_size(p[0], batch)
t = t.to(device)
copy += [[t] + p[1:]]
return copy
@ -78,7 +78,7 @@ def prepare_sampling(model, noise_shape, positive, negative, noise_mask):
real_model = None
models, inference_memory = get_additional_models(positive, negative, model.model_dtype())
comfy.model_management.load_models_gpu([model] + models, comfy.model_management.batch_area_memory(noise_shape[0] * noise_shape[2] * noise_shape[3]) + inference_memory)
fcbh.model_management.load_models_gpu([model] + models, fcbh.model_management.batch_area_memory(noise_shape[0] * noise_shape[2] * noise_shape[3]) + inference_memory)
real_model = model.model
positive_copy = broadcast_cond(positive, noise_shape[0], device)
@ -92,7 +92,7 @@ def sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative
noise = noise.to(model.load_device)
latent_image = latent_image.to(model.load_device)
sampler = comfy.samplers.KSampler(real_model, steps=steps, device=model.load_device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options)
sampler = fcbh.samplers.KSampler(real_model, steps=steps, device=model.load_device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options)
samples = sampler.sample(noise, positive_copy, negative_copy, cfg=cfg, latent_image=latent_image, start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise, denoise_mask=noise_mask, sigmas=sigmas, callback=callback, disable_pbar=disable_pbar, seed=seed)
samples = samples.cpu()
@ -106,7 +106,7 @@ def sample_custom(model, noise, cfg, sampler, sigmas, positive, negative, latent
latent_image = latent_image.to(model.load_device)
sigmas = sigmas.to(model.load_device)
samples = comfy.samplers.sample(real_model, noise, positive_copy, negative_copy, cfg, model.load_device, sampler, sigmas, model_options=model.model_options, latent_image=latent_image, denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=seed)
samples = fcbh.samplers.sample(real_model, noise, positive_copy, negative_copy, cfg, model.load_device, sampler, sigmas, model_options=model.model_options, latent_image=latent_image, denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=seed)
samples = samples.cpu()
cleanup_additional_models(models)
return samples

View File

@ -2,12 +2,12 @@ from .k_diffusion import sampling as k_diffusion_sampling
from .k_diffusion import external as k_diffusion_external
from .extra_samplers import uni_pc
import torch
from comfy import model_management
from fcbh import model_management
from .ldm.models.diffusion.ddim import DDIMSampler
from .ldm.modules.diffusionmodules.util import make_ddim_timesteps
import math
from comfy import model_base
import comfy.utils
from fcbh import model_base
import fcbh.utils
def lcm(a, b): #TODO: eventually replace by math.lcm (added in python3.9)
return abs(a*b) // math.gcd(a, b)
@ -539,7 +539,7 @@ def encode_adm(model, conds, batch_size, width, height, device, prompt_type):
if adm_out is not None:
x[1] = x[1].copy()
x[1]["adm_encoded"] = comfy.utils.repeat_to_batch_size(adm_out, batch_size).to(device)
x[1]["adm_encoded"] = fcbh.utils.repeat_to_batch_size(adm_out, batch_size).to(device)
return conds

View File

@ -2,12 +2,12 @@ import torch
import contextlib
import math
from comfy import model_management
from fcbh import model_management
from .ldm.util import instantiate_from_config
from .ldm.models.autoencoder import AutoencoderKL
import yaml
import comfy.utils
import fcbh.utils
from . import clip_vision
from . import gligen
@ -19,10 +19,10 @@ from . import sd1_clip
from . import sd2_clip
from . import sdxl_clip
import comfy.model_patcher
import comfy.lora
import comfy.t2i_adapter.adapter
import comfy.supported_models_base
import fcbh.model_patcher
import fcbh.lora
import fcbh.t2i_adapter.adapter
import fcbh.supported_models_base
def load_model_weights(model, sd):
m, u = model.load_state_dict(sd, strict=False)
@ -50,14 +50,14 @@ def load_clip_weights(model, sd):
if ids.dtype == torch.float32:
sd['cond_stage_model.transformer.text_model.embeddings.position_ids'] = ids.round()
sd = comfy.utils.transformers_convert(sd, "cond_stage_model.model.", "cond_stage_model.transformer.text_model.", 24)
sd = fcbh.utils.transformers_convert(sd, "cond_stage_model.model.", "cond_stage_model.transformer.text_model.", 24)
return load_model_weights(model, sd)
def load_lora_for_models(model, clip, lora, strength_model, strength_clip):
key_map = comfy.lora.model_lora_keys_unet(model.model)
key_map = comfy.lora.model_lora_keys_clip(clip.cond_stage_model, key_map)
loaded = comfy.lora.load_lora(lora, key_map)
key_map = fcbh.lora.model_lora_keys_unet(model.model)
key_map = fcbh.lora.model_lora_keys_clip(clip.cond_stage_model, key_map)
loaded = fcbh.lora.load_lora(lora, key_map)
new_modelpatcher = model.clone()
k = new_modelpatcher.add_patches(loaded, strength_model)
new_clip = clip.clone()
@ -90,7 +90,7 @@ class CLIP:
self.cond_stage_model = clip(**(params))
self.tokenizer = tokenizer(embedding_directory=embedding_directory)
self.patcher = comfy.model_patcher.ModelPatcher(self.cond_stage_model, load_device=load_device, offload_device=offload_device)
self.patcher = fcbh.model_patcher.ModelPatcher(self.cond_stage_model, load_device=load_device, offload_device=offload_device)
self.layer_idx = None
def clone(self):
@ -149,7 +149,7 @@ class VAE:
self.first_stage_model = AutoencoderKL(**(config['params']))
self.first_stage_model = self.first_stage_model.eval()
if ckpt_path is not None:
sd = comfy.utils.load_torch_file(ckpt_path)
sd = fcbh.utils.load_torch_file(ckpt_path)
if 'decoder.up_blocks.0.resnets.0.norm1.weight' in sd.keys(): #diffusers format
sd = diffusers_convert.convert_vae_state_dict(sd)
m, u = self.first_stage_model.load_state_dict(sd, strict=False)
@ -164,29 +164,29 @@ class VAE:
self.first_stage_model.to(self.vae_dtype)
def decode_tiled_(self, samples, tile_x=64, tile_y=64, overlap = 16):
steps = samples.shape[0] * comfy.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x, tile_y, overlap)
steps += samples.shape[0] * comfy.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x // 2, tile_y * 2, overlap)
steps += samples.shape[0] * comfy.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x * 2, tile_y // 2, overlap)
pbar = comfy.utils.ProgressBar(steps)
steps = samples.shape[0] * fcbh.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x, tile_y, overlap)
steps += samples.shape[0] * fcbh.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x // 2, tile_y * 2, overlap)
steps += samples.shape[0] * fcbh.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x * 2, tile_y // 2, overlap)
pbar = fcbh.utils.ProgressBar(steps)
decode_fn = lambda a: (self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)) + 1.0).float()
output = torch.clamp((
(comfy.utils.tiled_scale(samples, decode_fn, tile_x // 2, tile_y * 2, overlap, upscale_amount = 8, pbar = pbar) +
comfy.utils.tiled_scale(samples, decode_fn, tile_x * 2, tile_y // 2, overlap, upscale_amount = 8, pbar = pbar) +
comfy.utils.tiled_scale(samples, decode_fn, tile_x, tile_y, overlap, upscale_amount = 8, pbar = pbar))
(fcbh.utils.tiled_scale(samples, decode_fn, tile_x // 2, tile_y * 2, overlap, upscale_amount = 8, pbar = pbar) +
fcbh.utils.tiled_scale(samples, decode_fn, tile_x * 2, tile_y // 2, overlap, upscale_amount = 8, pbar = pbar) +
fcbh.utils.tiled_scale(samples, decode_fn, tile_x, tile_y, overlap, upscale_amount = 8, pbar = pbar))
/ 3.0) / 2.0, min=0.0, max=1.0)
return output
def encode_tiled_(self, pixel_samples, tile_x=512, tile_y=512, overlap = 64):
steps = pixel_samples.shape[0] * comfy.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x, tile_y, overlap)
steps += pixel_samples.shape[0] * comfy.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x // 2, tile_y * 2, overlap)
steps += pixel_samples.shape[0] * comfy.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x * 2, tile_y // 2, overlap)
pbar = comfy.utils.ProgressBar(steps)
steps = pixel_samples.shape[0] * fcbh.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x, tile_y, overlap)
steps += pixel_samples.shape[0] * fcbh.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x // 2, tile_y * 2, overlap)
steps += pixel_samples.shape[0] * fcbh.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x * 2, tile_y // 2, overlap)
pbar = fcbh.utils.ProgressBar(steps)
encode_fn = lambda a: self.first_stage_model.encode((2. * a - 1.).to(self.vae_dtype).to(self.device)).sample().float()
samples = comfy.utils.tiled_scale(pixel_samples, encode_fn, tile_x, tile_y, overlap, upscale_amount = (1/8), out_channels=4, pbar=pbar)
samples += comfy.utils.tiled_scale(pixel_samples, encode_fn, tile_x * 2, tile_y // 2, overlap, upscale_amount = (1/8), out_channels=4, pbar=pbar)
samples += comfy.utils.tiled_scale(pixel_samples, encode_fn, tile_x // 2, tile_y * 2, overlap, upscale_amount = (1/8), out_channels=4, pbar=pbar)
samples = fcbh.utils.tiled_scale(pixel_samples, encode_fn, tile_x, tile_y, overlap, upscale_amount = (1/8), out_channels=4, pbar=pbar)
samples += fcbh.utils.tiled_scale(pixel_samples, encode_fn, tile_x * 2, tile_y // 2, overlap, upscale_amount = (1/8), out_channels=4, pbar=pbar)
samples += fcbh.utils.tiled_scale(pixel_samples, encode_fn, tile_x // 2, tile_y * 2, overlap, upscale_amount = (1/8), out_channels=4, pbar=pbar)
samples /= 3.0
return samples
@ -257,10 +257,10 @@ class StyleModel:
def load_style_model(ckpt_path):
model_data = comfy.utils.load_torch_file(ckpt_path, safe_load=True)
model_data = fcbh.utils.load_torch_file(ckpt_path, safe_load=True)
keys = model_data.keys()
if "style_embedding" in keys:
model = comfy.t2i_adapter.adapter.StyleAdapter(width=1024, context_dim=768, num_head=8, n_layes=3, num_token=8)
model = fcbh.t2i_adapter.adapter.StyleAdapter(width=1024, context_dim=768, num_head=8, n_layes=3, num_token=8)
else:
raise Exception("invalid style model {}".format(ckpt_path))
model.load_state_dict(model_data)
@ -270,14 +270,14 @@ def load_style_model(ckpt_path):
def load_clip(ckpt_paths, embedding_directory=None):
clip_data = []
for p in ckpt_paths:
clip_data.append(comfy.utils.load_torch_file(p, safe_load=True))
clip_data.append(fcbh.utils.load_torch_file(p, safe_load=True))
class EmptyClass:
pass
for i in range(len(clip_data)):
if "transformer.resblocks.0.ln_1.weight" in clip_data[i]:
clip_data[i] = comfy.utils.transformers_convert(clip_data[i], "", "text_model.", 32)
clip_data[i] = fcbh.utils.transformers_convert(clip_data[i], "", "text_model.", 32)
clip_target = EmptyClass()
clip_target.params = {}
@ -306,11 +306,11 @@ def load_clip(ckpt_paths, embedding_directory=None):
return clip
def load_gligen(ckpt_path):
data = comfy.utils.load_torch_file(ckpt_path, safe_load=True)
data = fcbh.utils.load_torch_file(ckpt_path, safe_load=True)
model = gligen.load_gligen(data)
if model_management.should_use_fp16():
model = model.half()
return comfy.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=model_management.unet_offload_device())
return fcbh.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=model_management.unet_offload_device())
def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_clip=True, embedding_directory=None, state_dict=None, config=None):
#TODO: this function is a mess and should be removed eventually
@ -346,12 +346,12 @@ def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_cl
pass
if state_dict is None:
state_dict = comfy.utils.load_torch_file(ckpt_path)
state_dict = fcbh.utils.load_torch_file(ckpt_path)
class EmptyClass:
pass
model_config = comfy.supported_models_base.BASE({})
model_config = fcbh.supported_models_base.BASE({})
from . import latent_formats
model_config.latent_format = latent_formats.SD15(scale_factor=scale_factor)
@ -392,10 +392,10 @@ def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_cl
w.cond_stage_model = clip.cond_stage_model
load_clip_weights(w, state_dict)
return (comfy.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=offload_device), clip, vae)
return (fcbh.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=offload_device), clip, vae)
def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True):
sd = comfy.utils.load_torch_file(ckpt_path)
sd = fcbh.utils.load_torch_file(ckpt_path)
sd_keys = sd.keys()
clip = None
clipvision = None
@ -404,7 +404,7 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o
model_patcher = None
clip_target = None
parameters = comfy.utils.calculate_parameters(sd, "model.diffusion_model.")
parameters = fcbh.utils.calculate_parameters(sd, "model.diffusion_model.")
fp16 = model_management.should_use_fp16(model_params=parameters)
class WeightsLoader(torch.nn.Module):
@ -447,7 +447,7 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o
print("left over keys:", left_over)
if output_model:
model_patcher = comfy.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=model_management.unet_offload_device(), current_device=inital_load_device)
model_patcher = fcbh.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=model_management.unet_offload_device(), current_device=inital_load_device)
if inital_load_device != torch.device("cpu"):
print("loaded straight to GPU")
model_management.load_model_gpu(model_patcher)
@ -456,8 +456,8 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o
def load_unet(unet_path): #load unet in diffusers format
sd = comfy.utils.load_torch_file(unet_path)
parameters = comfy.utils.calculate_parameters(sd)
sd = fcbh.utils.load_torch_file(unet_path)
parameters = fcbh.utils.calculate_parameters(sd)
fp16 = model_management.should_use_fp16(model_params=parameters)
if "input_blocks.0.0.weight" in sd: #ldm
model_config = model_detection.model_config_from_unet(sd, "", fp16)
@ -471,7 +471,7 @@ def load_unet(unet_path): #load unet in diffusers format
print("ERROR UNSUPPORTED UNET", unet_path)
return None
diffusers_keys = comfy.utils.unet_to_diffusers(model_config.unet_config)
diffusers_keys = fcbh.utils.unet_to_diffusers(model_config.unet_config)
new_sd = {}
for k in diffusers_keys:
@ -483,9 +483,9 @@ def load_unet(unet_path): #load unet in diffusers format
model = model_config.get_model(new_sd, "")
model = model.to(offload_device)
model.load_model_weights(new_sd, "")
return comfy.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=offload_device)
return fcbh.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=offload_device)
def save_checkpoint(output_path, model, clip, vae, metadata=None):
model_management.load_models_gpu([model, clip.load_model()])
sd = model.model.state_dict_for_saving(clip.get_sd(), vae.get_sd())
comfy.utils.save_torch_file(sd, output_path, metadata=metadata)
fcbh.utils.save_torch_file(sd, output_path, metadata=metadata)

View File

@ -1,7 +1,7 @@
import os
from transformers import CLIPTokenizer, CLIPTextModel, CLIPTextConfig, modeling_utils
import comfy.ops
import fcbh.ops
import torch
import traceback
import zipfile
@ -54,7 +54,7 @@ class SD1ClipModel(torch.nn.Module, ClipTokenWeightEncoder):
textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd1_clip_config.json")
config = CLIPTextConfig.from_json_file(textmodel_json_config)
self.num_layers = config.num_hidden_layers
with comfy.ops.use_comfy_ops(device, dtype):
with fcbh.ops.use_fcbh_ops(device, dtype):
with modeling_utils.no_init_weights():
self.transformer = CLIPTextModel(config)

View File

@ -1,4 +1,4 @@
from comfy import sd1_clip
from fcbh import sd1_clip
import torch
import os

View File

@ -1,4 +1,4 @@
from comfy import sd1_clip
from fcbh import sd1_clip
import torch
import os

View File

@ -6,7 +6,7 @@ Tiny AutoEncoder for Stable Diffusion
import torch
import torch.nn as nn
import comfy.utils
import fcbh.utils
def conv(n_in, n_out, **kwargs):
return nn.Conv2d(n_in, n_out, 3, padding=1, **kwargs)
@ -52,9 +52,9 @@ class TAESD(nn.Module):
self.encoder = Encoder()
self.decoder = Decoder()
if encoder_path is not None:
self.encoder.load_state_dict(comfy.utils.load_torch_file(encoder_path, safe_load=True))
self.encoder.load_state_dict(fcbh.utils.load_torch_file(encoder_path, safe_load=True))
if decoder_path is not None:
self.decoder.load_state_dict(comfy.utils.load_torch_file(decoder_path, safe_load=True))
self.decoder.load_state_dict(fcbh.utils.load_torch_file(decoder_path, safe_load=True))
@staticmethod
def scale_latents(x):

View File

@ -1,7 +1,7 @@
import torch
import math
import struct
import comfy.checkpoint_pickle
import fcbh.checkpoint_pickle
import safetensors.torch
import numpy as np
from PIL import Image
@ -19,7 +19,7 @@ def load_torch_file(ckpt, safe_load=False, device=None):
if safe_load:
pl_sd = torch.load(ckpt, map_location=device, weights_only=True)
else:
pl_sd = torch.load(ckpt, map_location=device, pickle_module=comfy.checkpoint_pickle)
pl_sd = torch.load(ckpt, map_location=device, pickle_module=fcbh.checkpoint_pickle)
if "global_step" in pl_sd:
print(f"Global Step: {pl_sd['global_step']}")
if "state_dict" in pl_sd:

Some files were not shown because too many files have changed in this diff Show More