From 43a1263b609b923b2f69a0510bcf7ac95097e41b Mon Sep 17 00:00:00 2001
From: AustinMroz <austin@comfy.org>
Date: Tue, 21 Apr 2026 17:58:59 -0700
Subject: [PATCH 01/81] Add gpt-image-2 as version option (#13501)

---
 comfy_api_nodes/nodes_openai.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/comfy_api_nodes/nodes_openai.py b/comfy_api_nodes/nodes_openai.py
index 4ee896fa8..90a29c2f2 100644
--- a/comfy_api_nodes/nodes_openai.py
+++ b/comfy_api_nodes/nodes_openai.py
@@ -363,7 +363,7 @@ class OpenAIGPTImage1(IO.ComfyNode):
     def define_schema(cls):
         return IO.Schema(
             node_id="OpenAIGPTImage1",
-            display_name="OpenAI GPT Image 1.5",
+            display_name="OpenAI GPT Image 2",
             category="api node/image/OpenAI",
             description="Generates images synchronously via OpenAI's GPT Image endpoint.",
             inputs=[
@@ -427,8 +427,8 @@ class OpenAIGPTImage1(IO.ComfyNode):
                 ),
                 IO.Combo.Input(
                     "model",
-                    options=["gpt-image-1", "gpt-image-1.5"],
-                    default="gpt-image-1.5",
+                    options=["gpt-image-1", "gpt-image-1.5", 'gpt-image-2'],
+                    default="gpt-image-2",
                     optional=True,
                 ),
             ],
@@ -487,6 +487,8 @@ class OpenAIGPTImage1(IO.ComfyNode):
             price_extractor = calculate_tokens_price_image_1
         elif model == "gpt-image-1.5":
             price_extractor = calculate_tokens_price_image_1_5
+        elif model == "gpt-image-2":
+            price_extractor = calculate_tokens_price_image_1_5
         else:
             raise ValueError(f"Unknown model: {model}")
 

From 529c80255f3f2370c39780c62a9454d95344014d Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 21 Apr 2026 19:59:31 -0700
Subject: [PATCH 02/81] Allow logging in comfy app files. (#13505)

---
 main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/main.py b/main.py
index 12b04719d..dbaf2745c 100644
--- a/main.py
+++ b/main.py
@@ -9,6 +9,8 @@ import folder_paths
 import time
 from comfy.cli_args import args, enables_dynamic_vram
 from app.logger import setup_logger
+setup_logger(log_level=args.verbose, use_stdout=args.log_stdout)
+
 from app.assets.seeder import asset_seeder
 from app.assets.services import register_output_files
 import itertools
@@ -27,8 +29,6 @@ if __name__ == "__main__":
     os.environ['HF_HUB_DISABLE_TELEMETRY'] = '1'
     os.environ['DO_NOT_TRACK'] = '1'
 
-setup_logger(log_level=args.verbose, use_stdout=args.log_stdout)
-
 faulthandler.enable(file=sys.stderr, all_threads=False)
 
 import comfy_aimdo.control

From 6045c11d8b32d5f761c555d6ca026e4d731ac8d5 Mon Sep 17 00:00:00 2001
From: "Daxiong (Lin)" <contact@comfyui-wiki.com>
Date: Wed, 22 Apr 2026 11:45:25 +0800
Subject: [PATCH 03/81] chore: update workflow templates to v0.9.59 (#13507)

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index ccdd47674..a25bc0667 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 comfyui-frontend-package==1.42.14
-comfyui-workflow-templates==0.9.57
+comfyui-workflow-templates==0.9.59
 comfyui-embedded-docs==0.4.3
 torch
 torchsde

From 91e1f45d80fba14d992269b0b98de7a4a14c81b9 Mon Sep 17 00:00:00 2001
From: Matt Miller <matt@miller-media.com>
Date: Tue, 21 Apr 2026 22:31:36 -0700
Subject: [PATCH 04/81] fix(veo): reject 4K resolution for veo-3.0 models in
 Veo3VideoGenerationNode (#13504)

The tooltip on the resolution input states that 4K is not available for
veo-3.1-lite or veo-3.0 models, but the execute guard only rejected the
lite combination. Selecting 4K with veo-3.0-generate-001 or
veo-3.0-fast-generate-001 would fall through and hit the upstream API
with an invalid request.

Broaden the guard to match the documented behavior and update the error
message accordingly.

Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>
---
 comfy_api_nodes/nodes_veo2.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfy_api_nodes/nodes_veo2.py b/comfy_api_nodes/nodes_veo2.py
index 084b086a8..2ff75d9b2 100644
--- a/comfy_api_nodes/nodes_veo2.py
+++ b/comfy_api_nodes/nodes_veo2.py
@@ -393,8 +393,8 @@ class Veo3VideoGenerationNode(IO.ComfyNode):
         model="veo-3.0-generate-001",
         generate_audio=False,
     ):
-        if "lite" in model and resolution == "4k":
-            raise Exception("4K resolution is not supported by the veo-3.1-lite model.")
+        if resolution == "4k" and ("lite" in model or "3.0" in model):
+            raise Exception("4K resolution is not supported by the veo-3.1-lite or veo-3.0 models.")
 
         model = MODELS_MAP[model]
 

From db85cf03ff33f5be09d02f2a52334971209d25d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jukka=20Sepp=C3=A4nen?=
 <40791699+kijai@users.noreply.github.com>
Date: Wed, 22 Apr 2026 14:16:02 +0300
Subject: [PATCH 05/81] feat: RIFE and FILM frame interpolation model support
 (CORE-29) (#13258)

* initial RIFE support

* Also support FILM

* Better RAM usage, reduce FILM VRAM peak

* Add model folder placeholder

* Fix oom fallback frame loss

* Remove torch.compile for now

* Rename model input

* Shorter input type name

---------
---
 .../frame_interpolation_models/film_net.py    | 258 ++++++++++++++++++
 .../frame_interpolation_models/ifnet.py       | 128 +++++++++
 comfy_extras/nodes_frame_interpolation.py     | 211 ++++++++++++++
 folder_paths.py                               |   2 +
 .../put_frame_interpolation_models_here       |   0
 nodes.py                                      |   3 +-
 6 files changed, 601 insertions(+), 1 deletion(-)
 create mode 100644 comfy_extras/frame_interpolation_models/film_net.py
 create mode 100644 comfy_extras/frame_interpolation_models/ifnet.py
 create mode 100644 comfy_extras/nodes_frame_interpolation.py
 create mode 100644 models/frame_interpolation/put_frame_interpolation_models_here

diff --git a/comfy_extras/frame_interpolation_models/film_net.py b/comfy_extras/frame_interpolation_models/film_net.py
new file mode 100644
index 000000000..cf4f6e1e1
--- /dev/null
+++ b/comfy_extras/frame_interpolation_models/film_net.py
@@ -0,0 +1,258 @@
+"""FILM: Frame Interpolation for Large Motion (ECCV 2022)."""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import comfy.ops
+
+ops = comfy.ops.disable_weight_init
+
+
+class FilmConv2d(nn.Module):
+    """Conv2d with optional LeakyReLU and FILM-style padding."""
+
+    def __init__(self, in_channels, out_channels, size, activation=True, device=None, dtype=None, operations=ops):
+        super().__init__()
+        self.even_pad = not size % 2
+        self.conv = operations.Conv2d(in_channels, out_channels, kernel_size=size, padding=size // 2 if size % 2 else 0, device=device, dtype=dtype)
+        self.activation = nn.LeakyReLU(0.2) if activation else None
+
+    def forward(self, x):
+        if self.even_pad:
+            x = F.pad(x, (0, 1, 0, 1))
+        x = self.conv(x)
+        if self.activation is not None:
+            x = self.activation(x)
+        return x
+
+
+def _warp_core(image, flow, grid_x, grid_y):
+    dtype = image.dtype
+    H, W = flow.shape[2], flow.shape[3]
+    dx = flow[:, 0].float() / (W * 0.5)
+    dy = flow[:, 1].float() / (H * 0.5)
+    grid = torch.stack([grid_x[None, None, :] + dx, grid_y[None, :, None] + dy], dim=3)
+    return F.grid_sample(image.float(), grid, mode="bilinear", padding_mode="border", align_corners=False).to(dtype)
+
+
+def build_image_pyramid(image, pyramid_levels):
+    pyramid = [image]
+    for _ in range(1, pyramid_levels):
+        image = F.avg_pool2d(image, 2, 2)
+        pyramid.append(image)
+    return pyramid
+
+
+def flow_pyramid_synthesis(residual_pyramid):
+    flow = residual_pyramid[-1]
+    flow_pyramid = [flow]
+    for residual_flow in residual_pyramid[:-1][::-1]:
+        flow = F.interpolate(flow, size=residual_flow.shape[2:4], mode="bilinear", scale_factor=None).mul_(2).add_(residual_flow)
+        flow_pyramid.append(flow)
+    flow_pyramid.reverse()
+    return flow_pyramid
+
+
+def multiply_pyramid(pyramid, scalar):
+    return [image * scalar[:, None, None, None] for image in pyramid]
+
+
+def pyramid_warp(feature_pyramid, flow_pyramid, warp_fn):
+    return [warp_fn(features, flow) for features, flow in zip(feature_pyramid, flow_pyramid)]
+
+
+def concatenate_pyramids(pyramid1, pyramid2):
+    return [torch.cat([f1, f2], dim=1) for f1, f2 in zip(pyramid1, pyramid2)]
+
+
+class SubTreeExtractor(nn.Module):
+    def __init__(self, in_channels=3, channels=64, n_layers=4, device=None, dtype=None, operations=ops):
+        super().__init__()
+        convs = []
+        for i in range(n_layers):
+            out_ch = channels << i
+            convs.append(nn.Sequential(
+                FilmConv2d(in_channels, out_ch, 3, device=device, dtype=dtype, operations=operations),
+                FilmConv2d(out_ch, out_ch, 3, device=device, dtype=dtype, operations=operations)))
+            in_channels = out_ch
+        self.convs = nn.ModuleList(convs)
+
+    def forward(self, image, n):
+        head = image
+        pyramid = []
+        for i, layer in enumerate(self.convs):
+            head = layer(head)
+            pyramid.append(head)
+            if i < n - 1:
+                head = F.avg_pool2d(head, 2, 2)
+        return pyramid
+
+
+class FeatureExtractor(nn.Module):
+    def __init__(self, in_channels=3, channels=64, sub_levels=4, device=None, dtype=None, operations=ops):
+        super().__init__()
+        self.extract_sublevels = SubTreeExtractor(in_channels, channels, sub_levels, device=device, dtype=dtype, operations=operations)
+        self.sub_levels = sub_levels
+
+    def forward(self, image_pyramid):
+        sub_pyramids = [self.extract_sublevels(image_pyramid[i], min(len(image_pyramid) - i, self.sub_levels))
+                        for i in range(len(image_pyramid))]
+        feature_pyramid = []
+        for i in range(len(image_pyramid)):
+            features = sub_pyramids[i][0]
+            for j in range(1, self.sub_levels):
+                if j <= i:
+                    features = torch.cat([features, sub_pyramids[i - j][j]], dim=1)
+            feature_pyramid.append(features)
+            # Free sub-pyramids no longer needed by future levels
+            if i >= self.sub_levels - 1:
+                sub_pyramids[i - self.sub_levels + 1] = None
+        return feature_pyramid
+
+
+class FlowEstimator(nn.Module):
+    def __init__(self, in_channels, num_convs, num_filters, device=None, dtype=None, operations=ops):
+        super().__init__()
+        self._convs = nn.ModuleList()
+        for _ in range(num_convs):
+            self._convs.append(FilmConv2d(in_channels, num_filters, 3, device=device, dtype=dtype, operations=operations))
+            in_channels = num_filters
+        self._convs.append(FilmConv2d(in_channels, num_filters // 2, 1, device=device, dtype=dtype, operations=operations))
+        self._convs.append(FilmConv2d(num_filters // 2, 2, 1, activation=False, device=device, dtype=dtype, operations=operations))
+
+    def forward(self, features_a, features_b):
+        net = torch.cat([features_a, features_b], dim=1)
+        for conv in self._convs:
+            net = conv(net)
+        return net
+
+
+class PyramidFlowEstimator(nn.Module):
+    def __init__(self, filters=64, flow_convs=(3, 3, 3, 3), flow_filters=(32, 64, 128, 256), device=None, dtype=None, operations=ops):
+        super().__init__()
+        in_channels = filters << 1
+        predictors = []
+        for i in range(len(flow_convs)):
+            predictors.append(FlowEstimator(in_channels, flow_convs[i], flow_filters[i], device=device, dtype=dtype, operations=operations))
+            in_channels += filters << (i + 2)
+        self._predictor = predictors[-1]
+        self._predictors = nn.ModuleList(predictors[:-1][::-1])
+
+    def forward(self, feature_pyramid_a, feature_pyramid_b, warp_fn):
+        levels = len(feature_pyramid_a)
+        v = self._predictor(feature_pyramid_a[-1], feature_pyramid_b[-1])
+        residuals = [v]
+        # Coarse-to-fine: shared predictor for deep levels, then specialized predictors for fine levels
+        steps = [(i, self._predictor) for i in range(levels - 2, len(self._predictors) - 1, -1)]
+        steps += [(len(self._predictors) - 1 - k, p) for k, p in enumerate(self._predictors)]
+        for i, predictor in steps:
+            v = F.interpolate(v, size=feature_pyramid_a[i].shape[2:4], mode="bilinear").mul_(2)
+            v_residual = predictor(feature_pyramid_a[i], warp_fn(feature_pyramid_b[i], v))
+            residuals.append(v_residual)
+            v = v.add_(v_residual)
+        residuals.reverse()
+        return residuals
+
+
+def _get_fusion_channels(level, filters):
+    # Per direction: multi-scale features + RGB image (3ch) + flow (2ch), doubled for both directions
+    return (sum(filters << i for i in range(level)) + 3 + 2) * 2
+
+
+class Fusion(nn.Module):
+    def __init__(self, n_layers=4, specialized_layers=3, filters=64, device=None, dtype=None, operations=ops):
+        super().__init__()
+        self.output_conv = operations.Conv2d(filters, 3, kernel_size=1, device=device, dtype=dtype)
+        self.convs = nn.ModuleList()
+        in_channels = _get_fusion_channels(n_layers, filters)
+        increase = 0
+        for i in range(n_layers)[::-1]:
+            num_filters = (filters << i) if i < specialized_layers else (filters << specialized_layers)
+            self.convs.append(nn.ModuleList([
+                FilmConv2d(in_channels, num_filters, 2, activation=False, device=device, dtype=dtype, operations=operations),
+                FilmConv2d(in_channels + (increase or num_filters), num_filters, 3, device=device, dtype=dtype, operations=operations),
+                FilmConv2d(num_filters, num_filters, 3, device=device, dtype=dtype, operations=operations)]))
+            in_channels = num_filters
+            increase = _get_fusion_channels(i, filters) - num_filters // 2
+
+    def forward(self, pyramid):
+        net = pyramid[-1]
+        for k, layers in enumerate(self.convs):
+            i = len(self.convs) - 1 - k
+            net = layers[0](F.interpolate(net, size=pyramid[i].shape[2:4], mode="nearest"))
+            net = layers[2](layers[1](torch.cat([pyramid[i], net], dim=1)))
+        return self.output_conv(net)
+
+
+class FILMNet(nn.Module):
+    def __init__(self, pyramid_levels=7, fusion_pyramid_levels=5, specialized_levels=3, sub_levels=4,
+                 filters=64, flow_convs=(3, 3, 3, 3), flow_filters=(32, 64, 128, 256), device=None, dtype=None, operations=ops):
+        super().__init__()
+        self.pyramid_levels = pyramid_levels
+        self.fusion_pyramid_levels = fusion_pyramid_levels
+        self.extract = FeatureExtractor(3, filters, sub_levels, device=device, dtype=dtype, operations=operations)
+        self.predict_flow = PyramidFlowEstimator(filters, flow_convs, flow_filters, device=device, dtype=dtype, operations=operations)
+        self.fuse = Fusion(sub_levels, specialized_levels, filters, device=device, dtype=dtype, operations=operations)
+        self._warp_grids = {}
+
+    def get_dtype(self):
+        return self.extract.extract_sublevels.convs[0][0].conv.weight.dtype
+
+    def _build_warp_grids(self, H, W, device):
+        """Pre-compute warp grids for all pyramid levels."""
+        if (H, W) in self._warp_grids:
+            return
+        self._warp_grids = {}  # clear old resolution grids to prevent memory leaks
+        for _ in range(self.pyramid_levels):
+            self._warp_grids[(H, W)] = (
+                torch.linspace(-(1 - 1 / W), 1 - 1 / W, W, dtype=torch.float32, device=device),
+                torch.linspace(-(1 - 1 / H), 1 - 1 / H, H, dtype=torch.float32, device=device),
+            )
+            H, W = H // 2, W // 2
+
+    def warp(self, image, flow):
+        grid_x, grid_y = self._warp_grids[(flow.shape[2], flow.shape[3])]
+        return _warp_core(image, flow, grid_x, grid_y)
+
+    def extract_features(self, img):
+        """Extract image and feature pyramids for a single frame. Can be cached across pairs."""
+        image_pyramid = build_image_pyramid(img, self.pyramid_levels)
+        feature_pyramid = self.extract(image_pyramid)
+        return image_pyramid, feature_pyramid
+
+    def forward(self, img0, img1, timestep=0.5, cache=None):
+        # FILM uses a scalar timestep per batch element (spatially-varying timesteps not supported)
+        t = timestep.mean(dim=(1, 2, 3)).item() if isinstance(timestep, torch.Tensor) else timestep
+        return self.forward_multi_timestep(img0, img1, [t], cache=cache)
+
+    def forward_multi_timestep(self, img0, img1, timesteps, cache=None):
+        """Compute flow once, synthesize at multiple timesteps. Expects batch=1 inputs."""
+        self._build_warp_grids(img0.shape[2], img0.shape[3], img0.device)
+
+        image_pyr0, feat_pyr0 = cache["img0"] if cache and "img0" in cache else self.extract_features(img0)
+        image_pyr1, feat_pyr1 = cache["img1"] if cache and "img1" in cache else self.extract_features(img1)
+
+        fwd_flow = flow_pyramid_synthesis(self.predict_flow(feat_pyr0, feat_pyr1, self.warp))[:self.fusion_pyramid_levels]
+        bwd_flow = flow_pyramid_synthesis(self.predict_flow(feat_pyr1, feat_pyr0, self.warp))[:self.fusion_pyramid_levels]
+
+        # Build warp targets and free full pyramids (only first fpl levels needed from here)
+        fpl = self.fusion_pyramid_levels
+        p2w = [concatenate_pyramids(image_pyr0[:fpl], feat_pyr0[:fpl]),
+               concatenate_pyramids(image_pyr1[:fpl], feat_pyr1[:fpl])]
+        del image_pyr0, image_pyr1, feat_pyr0, feat_pyr1
+
+        results = []
+        dt_tensors = torch.tensor(timesteps, device=img0.device, dtype=img0.dtype)
+        for idx in range(len(timesteps)):
+            batch_dt = dt_tensors[idx:idx + 1]
+            bwd_scaled = multiply_pyramid(bwd_flow, batch_dt)
+            fwd_scaled = multiply_pyramid(fwd_flow, 1 - batch_dt)
+            fwd_warped = pyramid_warp(p2w[0], bwd_scaled, self.warp)
+            bwd_warped = pyramid_warp(p2w[1], fwd_scaled, self.warp)
+            aligned = [torch.cat([fw, bw, bf, ff], dim=1)
+                       for fw, bw, bf, ff in zip(fwd_warped, bwd_warped, bwd_scaled, fwd_scaled)]
+            del fwd_warped, bwd_warped, bwd_scaled, fwd_scaled
+            results.append(self.fuse(aligned))
+            del aligned
+        return torch.cat(results, dim=0)
diff --git a/comfy_extras/frame_interpolation_models/ifnet.py b/comfy_extras/frame_interpolation_models/ifnet.py
new file mode 100644
index 000000000..03cb34c50
--- /dev/null
+++ b/comfy_extras/frame_interpolation_models/ifnet.py
@@ -0,0 +1,128 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import comfy.ops
+
+ops = comfy.ops.disable_weight_init
+
+
+def _warp(img, flow, warp_grids):
+    B, _, H, W = img.shape
+    base_grid, flow_div = warp_grids[(H, W)]
+    flow_norm = torch.cat([flow[:, 0:1] / flow_div[0], flow[:, 1:2] / flow_div[1]], 1).float()
+    grid = (base_grid.expand(B, -1, -1, -1) + flow_norm).permute(0, 2, 3, 1)
+    return F.grid_sample(img.float(), grid, mode="bilinear", padding_mode="border", align_corners=True).to(img.dtype)
+
+
+class Head(nn.Module):
+    def __init__(self, out_ch=4, device=None, dtype=None, operations=ops):
+        super().__init__()
+        self.cnn0 = operations.Conv2d(3, 16, 3, 2, 1, device=device, dtype=dtype)
+        self.cnn1 = operations.Conv2d(16, 16, 3, 1, 1, device=device, dtype=dtype)
+        self.cnn2 = operations.Conv2d(16, 16, 3, 1, 1, device=device, dtype=dtype)
+        self.cnn3 = operations.ConvTranspose2d(16, out_ch, 4, 2, 1, device=device, dtype=dtype)
+        self.relu = nn.LeakyReLU(0.2, True)
+
+    def forward(self, x):
+        x = self.relu(self.cnn0(x))
+        x = self.relu(self.cnn1(x))
+        x = self.relu(self.cnn2(x))
+        return self.cnn3(x)
+
+
+class ResConv(nn.Module):
+    def __init__(self, c, device=None, dtype=None, operations=ops):
+        super().__init__()
+        self.conv = operations.Conv2d(c, c, 3, 1, 1, device=device, dtype=dtype)
+        self.beta = nn.Parameter(torch.ones((1, c, 1, 1), device=device, dtype=dtype))
+        self.relu = nn.LeakyReLU(0.2, True)
+
+    def forward(self, x):
+        return self.relu(torch.addcmul(x, self.conv(x), self.beta))
+
+
+class IFBlock(nn.Module):
+    def __init__(self, in_planes, c=64, device=None, dtype=None, operations=ops):
+        super().__init__()
+        self.conv0 = nn.Sequential(
+            nn.Sequential(operations.Conv2d(in_planes, c // 2, 3, 2, 1, device=device, dtype=dtype), nn.LeakyReLU(0.2, True)),
+            nn.Sequential(operations.Conv2d(c // 2, c, 3, 2, 1, device=device, dtype=dtype), nn.LeakyReLU(0.2, True)))
+        self.convblock = nn.Sequential(*(ResConv(c, device=device, dtype=dtype, operations=operations) for _ in range(8)))
+        self.lastconv = nn.Sequential(operations.ConvTranspose2d(c, 4 * 13, 4, 2, 1, device=device, dtype=dtype), nn.PixelShuffle(2))
+
+    def forward(self, x, flow=None, scale=1):
+        x = F.interpolate(x, scale_factor=1.0 / scale, mode="bilinear")
+        if flow is not None:
+            flow = F.interpolate(flow, scale_factor=1.0 / scale, mode="bilinear").div_(scale)
+            x = torch.cat((x, flow), 1)
+        feat = self.convblock(self.conv0(x))
+        tmp = F.interpolate(self.lastconv(feat), scale_factor=scale, mode="bilinear")
+        return tmp[:, :4] * scale, tmp[:, 4:5], tmp[:, 5:]
+
+
+class IFNet(nn.Module):
+    def __init__(self, head_ch=4, channels=(192, 128, 96, 64, 32), device=None, dtype=None, operations=ops):
+        super().__init__()
+        self.encode = Head(out_ch=head_ch, device=device, dtype=dtype, operations=operations)
+        block_in = [7 + 2 * head_ch] + [8 + 4 + 8 + 2 * head_ch] * 4
+        self.blocks = nn.ModuleList([IFBlock(block_in[i], channels[i], device=device, dtype=dtype, operations=operations) for i in range(5)])
+        self.scale_list = [16, 8, 4, 2, 1]
+        self.pad_align = 64
+        self._warp_grids = {}
+
+    def get_dtype(self):
+        return self.encode.cnn0.weight.dtype
+
+    def _build_warp_grids(self, H, W, device):
+        if (H, W) in self._warp_grids:
+            return
+        self._warp_grids = {}  # clear old resolution grids to prevent memory leaks
+        grid_y, grid_x = torch.meshgrid(
+            torch.linspace(-1.0, 1.0, H, device=device, dtype=torch.float32),
+            torch.linspace(-1.0, 1.0, W, device=device, dtype=torch.float32), indexing="ij")
+        self._warp_grids[(H, W)] = (
+            torch.stack((grid_x, grid_y), dim=0).unsqueeze(0),
+            torch.tensor([(W - 1.0) / 2.0, (H - 1.0) / 2.0], dtype=torch.float32, device=device))
+
+    def warp(self, img, flow):
+        return _warp(img, flow, self._warp_grids)
+
+    def extract_features(self, img):
+        """Extract head features for a single frame. Can be cached across pairs."""
+        return self.encode(img)
+
+    def forward(self, img0, img1, timestep=0.5, cache=None):
+        if not isinstance(timestep, torch.Tensor):
+            timestep = torch.full((img0.shape[0], 1, img0.shape[2], img0.shape[3]), timestep, device=img0.device, dtype=img0.dtype)
+
+        self._build_warp_grids(img0.shape[2], img0.shape[3], img0.device)
+
+        B = img0.shape[0]
+        f0 = cache["img0"].expand(B, -1, -1, -1) if cache and "img0" in cache else self.encode(img0)
+        f1 = cache["img1"].expand(B, -1, -1, -1) if cache and "img1" in cache else self.encode(img1)
+        flow = mask = feat = None
+        warped_img0, warped_img1 = img0, img1
+        for i, block in enumerate(self.blocks):
+            if flow is None:
+                flow, mask, feat = block(torch.cat((img0, img1, f0, f1, timestep), 1), None, scale=self.scale_list[i])
+            else:
+                fd, mask, feat = block(
+                    torch.cat((warped_img0, warped_img1, self.warp(f0, flow[:, :2]), self.warp(f1, flow[:, 2:4]), timestep, mask, feat), 1),
+                    flow, scale=self.scale_list[i])
+                flow = flow.add_(fd)
+            warped_img0 = self.warp(img0, flow[:, :2])
+            warped_img1 = self.warp(img1, flow[:, 2:4])
+        return torch.lerp(warped_img1, warped_img0, torch.sigmoid(mask))
+
+
+def detect_rife_config(state_dict):
+    head_ch = state_dict["encode.cnn3.weight"].shape[1]  # ConvTranspose2d: (in_ch, out_ch, kH, kW)
+    channels = []
+    for i in range(5):
+        key = f"blocks.{i}.conv0.1.0.weight"
+        if key in state_dict:
+            channels.append(state_dict[key].shape[0])
+    if len(channels) != 5:
+        raise ValueError(f"Unsupported RIFE model: expected 5 blocks, found {len(channels)}")
+    return head_ch, channels
diff --git a/comfy_extras/nodes_frame_interpolation.py b/comfy_extras/nodes_frame_interpolation.py
new file mode 100644
index 000000000..a3b00d36e
--- /dev/null
+++ b/comfy_extras/nodes_frame_interpolation.py
@@ -0,0 +1,211 @@
+import torch
+from tqdm import tqdm
+from typing_extensions import override
+
+import comfy.model_patcher
+import comfy.utils
+import folder_paths
+from comfy import model_management
+from comfy_extras.frame_interpolation_models.ifnet import IFNet, detect_rife_config
+from comfy_extras.frame_interpolation_models.film_net import FILMNet
+from comfy_api.latest import ComfyExtension, io
+
+FrameInterpolationModel = io.Custom("INTERP_MODEL")
+
+
+class FrameInterpolationModelLoader(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="FrameInterpolationModelLoader",
+            display_name="Load Frame Interpolation Model",
+            category="loaders",
+            inputs=[
+                io.Combo.Input("model_name", options=folder_paths.get_filename_list("frame_interpolation"),
+                               tooltip="Select a frame interpolation model to load. Models must be placed in the 'frame_interpolation' folder."),
+            ],
+            outputs=[
+                FrameInterpolationModel.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, model_name) -> io.NodeOutput:
+        model_path = folder_paths.get_full_path_or_raise("frame_interpolation", model_name)
+        sd = comfy.utils.load_torch_file(model_path, safe_load=True)
+
+        model = cls._detect_and_load(sd)
+        dtype = torch.float16 if model_management.should_use_fp16(model_management.get_torch_device()) else torch.float32
+        model.eval().to(dtype)
+        patcher = comfy.model_patcher.ModelPatcher(
+            model,
+            load_device=model_management.get_torch_device(),
+            offload_device=model_management.unet_offload_device(),
+        )
+        return io.NodeOutput(patcher)
+
+    @classmethod
+    def _detect_and_load(cls, sd):
+        # Try FILM
+        if "extract.extract_sublevels.convs.0.0.conv.weight" in sd:
+            model = FILMNet()
+            model.load_state_dict(sd)
+            return model
+
+        # Try RIFE (needs key remapping for raw checkpoints)
+        sd = comfy.utils.state_dict_prefix_replace(sd, {"module.": "", "flownet.": ""})
+        key_map = {}
+        for k in sd:
+            for i in range(5):
+                if k.startswith(f"block{i}."):
+                    key_map[k] = f"blocks.{i}.{k[len(f'block{i}.'):]}"
+        if key_map:
+            sd = {key_map.get(k, k): v for k, v in sd.items()}
+        sd = {k: v for k, v in sd.items() if not k.startswith(("teacher.", "caltime."))}
+
+        try:
+            head_ch, channels = detect_rife_config(sd)
+        except (KeyError, ValueError):
+            raise ValueError("Unrecognized frame interpolation model format")
+        model = IFNet(head_ch=head_ch, channels=channels)
+        model.load_state_dict(sd)
+        return model
+
+
+class FrameInterpolate(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="FrameInterpolate",
+            display_name="Frame Interpolate",
+            category="image/video",
+            search_aliases=["rife", "film", "frame interpolation", "slow motion", "interpolate frames", "vfi"],
+            inputs=[
+                FrameInterpolationModel.Input("interp_model"),
+                io.Image.Input("images"),
+                io.Int.Input("multiplier", default=2, min=2, max=16),
+            ],
+            outputs=[
+                io.Image.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, interp_model, images, multiplier) -> io.NodeOutput:
+        offload_device = model_management.intermediate_device()
+
+        num_frames = images.shape[0]
+        if num_frames < 2 or multiplier < 2:
+            return io.NodeOutput(images)
+
+        model_management.load_model_gpu(interp_model)
+        device = interp_model.load_device
+        dtype = interp_model.model_dtype()
+        inference_model = interp_model.model
+
+        # Free VRAM for inference activations (model weights + ~20x a single frame's worth)
+        H, W = images.shape[1], images.shape[2]
+        activation_mem = H * W * 3 * images.element_size() * 20
+        model_management.free_memory(activation_mem, device)
+        align = getattr(inference_model, "pad_align", 1)
+
+        # Prepare a single padded frame on device for determining output dimensions
+        def prepare_frame(idx):
+            frame = images[idx:idx + 1].movedim(-1, 1).to(dtype=dtype, device=device)
+            if align > 1:
+                from comfy.ldm.common_dit import pad_to_patch_size
+                frame = pad_to_patch_size(frame, (align, align), padding_mode="reflect")
+            return frame
+
+        # Count total interpolation passes for progress bar
+        total_pairs = num_frames - 1
+        num_interp = multiplier - 1
+        total_steps = total_pairs * num_interp
+        pbar = comfy.utils.ProgressBar(total_steps)
+        tqdm_bar = tqdm(total=total_steps, desc="Frame interpolation")
+
+        batch = num_interp  # reduced on OOM and persists across pairs (same resolution = same limit)
+        t_values = [t / multiplier for t in range(1, multiplier)]
+
+        out_dtype = model_management.intermediate_dtype()
+        total_out_frames = total_pairs * multiplier + 1
+        result = torch.empty((total_out_frames, 3, H, W), dtype=out_dtype, device=offload_device)
+        result[0] = images[0].movedim(-1, 0).to(out_dtype)
+        out_idx = 1
+
+        # Pre-compute timestep tensor on device (padded dimensions needed)
+        sample = prepare_frame(0)
+        pH, pW = sample.shape[2], sample.shape[3]
+        ts_full = torch.tensor(t_values, device=device, dtype=dtype).reshape(num_interp, 1, 1, 1)
+        ts_full = ts_full.expand(-1, 1, pH, pW)
+        del sample
+
+        multi_fn = getattr(inference_model, "forward_multi_timestep", None)
+        feat_cache = {}
+        prev_frame = None
+
+        try:
+            for i in range(total_pairs):
+                img0_single = prev_frame if prev_frame is not None else prepare_frame(i)
+                img1_single = prepare_frame(i + 1)
+                prev_frame = img1_single
+
+                # Cache features: img1 of pair N becomes img0 of pair N+1
+                feat_cache["img0"] = feat_cache.pop("next") if "next" in feat_cache else inference_model.extract_features(img0_single)
+                feat_cache["img1"] = inference_model.extract_features(img1_single)
+                feat_cache["next"] = feat_cache["img1"]
+
+                used_multi = False
+                if multi_fn is not None:
+                    # Models with timestep-independent flow can compute it once for all timesteps
+                    try:
+                        mids = multi_fn(img0_single, img1_single, t_values, cache=feat_cache)
+                        result[out_idx:out_idx + num_interp] = mids[:, :, :H, :W].to(out_dtype)
+                        out_idx += num_interp
+                        pbar.update(num_interp)
+                        tqdm_bar.update(num_interp)
+                        used_multi = True
+                    except model_management.OOM_EXCEPTION:
+                        model_management.soft_empty_cache()
+                        multi_fn = None  # fall through to single-timestep path
+
+                if not used_multi:
+                    j = 0
+                    while j < num_interp:
+                        b = min(batch, num_interp - j)
+                        try:
+                            img0 = img0_single.expand(b, -1, -1, -1)
+                            img1 = img1_single.expand(b, -1, -1, -1)
+                            mids = inference_model(img0, img1, timestep=ts_full[j:j + b], cache=feat_cache)
+                            result[out_idx:out_idx + b] = mids[:, :, :H, :W].to(out_dtype)
+                            out_idx += b
+                            pbar.update(b)
+                            tqdm_bar.update(b)
+                            j += b
+                        except model_management.OOM_EXCEPTION:
+                            if batch <= 1:
+                                raise
+                            batch = max(1, batch // 2)
+                            model_management.soft_empty_cache()
+
+                result[out_idx] = images[i + 1].movedim(-1, 0).to(out_dtype)
+                out_idx += 1
+        finally:
+            tqdm_bar.close()
+
+        # BCHW -> BHWC
+        result = result.movedim(1, -1).clamp_(0.0, 1.0)
+        return io.NodeOutput(result)
+
+
+class FrameInterpolationExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            FrameInterpolationModelLoader,
+            FrameInterpolate,
+        ]
+
+
+async def comfy_entrypoint() -> FrameInterpolationExtension:
+    return FrameInterpolationExtension()
diff --git a/folder_paths.py b/folder_paths.py
index 9c96540e3..80f4b291a 100644
--- a/folder_paths.py
+++ b/folder_paths.py
@@ -52,6 +52,8 @@ folder_names_and_paths["model_patches"] = ([os.path.join(models_dir, "model_patc
 
 folder_names_and_paths["audio_encoders"] = ([os.path.join(models_dir, "audio_encoders")], supported_pt_extensions)
 
+folder_names_and_paths["frame_interpolation"] = ([os.path.join(models_dir, "frame_interpolation")], supported_pt_extensions)
+
 output_directory = os.path.join(base_path, "output")
 temp_directory = os.path.join(base_path, "temp")
 input_directory = os.path.join(base_path, "input")
diff --git a/models/frame_interpolation/put_frame_interpolation_models_here b/models/frame_interpolation/put_frame_interpolation_models_here
new file mode 100644
index 000000000..e69de29bb
diff --git a/nodes.py b/nodes.py
index 299b3d758..bb38e07b8 100644
--- a/nodes.py
+++ b/nodes.py
@@ -2457,7 +2457,8 @@ async def init_builtin_extra_nodes():
         "nodes_number_convert.py",
         "nodes_painter.py",
         "nodes_curve.py",
-        "nodes_rtdetr.py"
+        "nodes_rtdetr.py",
+        "nodes_frame_interpolation.py",
     ]
 
     import_failed = []

From cc6f9500a1b972e9dca14e769f4b70a8927ffa43 Mon Sep 17 00:00:00 2001
From: Octopus <liyuan851277048@icloud.com>
Date: Thu, 23 Apr 2026 06:05:43 +0800
Subject: [PATCH 06/81] fix: use Parameter assignment for Stable_Zero123
 cc_projection weights (fixes #13492) (#13518)

On Windows with aimdo enabled, disable_weight_init.Linear uses lazy
initialization that sets weight and bias to None to avoid unnecessary
memory allocation. This caused a crash when copy_() was called on the
None weight attribute in Stable_Zero123.__init__.

Replace copy_() with direct torch.nn.Parameter assignment, which works
correctly on both Windows (aimdo enabled) and other platforms.
---
 comfy/model_base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfy/model_base.py b/comfy/model_base.py
index 5c2668ba9..1c7695761 100644
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -578,8 +578,8 @@ class Stable_Zero123(BaseModel):
     def __init__(self, model_config, model_type=ModelType.EPS, device=None, cc_projection_weight=None, cc_projection_bias=None):
         super().__init__(model_config, model_type, device=device)
         self.cc_projection = comfy.ops.manual_cast.Linear(cc_projection_weight.shape[1], cc_projection_weight.shape[0], dtype=self.get_dtype(), device=device)
-        self.cc_projection.weight.copy_(cc_projection_weight)
-        self.cc_projection.bias.copy_(cc_projection_bias)
+        self.cc_projection.weight = torch.nn.Parameter(cc_projection_weight.clone())
+        self.cc_projection.bias = torch.nn.Parameter(cc_projection_bias.clone())
 
     def extra_conds(self, **kwargs):
         out = {}

From 9949c19c632eb6cad50024e02816df86e7d41b27 Mon Sep 17 00:00:00 2001
From: blepping <157360029+blepping@users.noreply.github.com>
Date: Wed, 22 Apr 2026 16:08:19 -0600
Subject: [PATCH 07/81] Derive InterruptProcessingException from BaseException
 (#13523)

---
 comfy/model_management.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index bcf1399c4..3b39d6080 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -1801,7 +1801,7 @@ def debug_memory_summary():
         return torch.cuda.memory.memory_summary()
     return ""
 
-class InterruptProcessingException(Exception):
+class InterruptProcessingException(BaseException):
     pass
 
 interrupt_processing_mutex = threading.RLock()

From cb388e2912f9d3adf50e3510ed1c470ad5c9bc79 Mon Sep 17 00:00:00 2001
From: "Dr.Lt.Data" <128333288+ltdrdata@users.noreply.github.com>
Date: Thu, 23 Apr 2026 07:12:06 +0900
Subject: [PATCH 08/81] bump manager version to 4.2.1 (#13516)

---
 manager_requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/manager_requirements.txt b/manager_requirements.txt
index f770ec933..a079d3492 100644
--- a/manager_requirements.txt
+++ b/manager_requirements.txt
@@ -1 +1 @@
-comfyui_manager==4.1
+comfyui_manager==4.2.1

From ec4b1659ab751b7da07bfff8fa28660c7e82c00b Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Thu, 23 Apr 2026 08:13:38 +1000
Subject: [PATCH 09/81] ModelPatcherDynamic: force cast stray weights on comfy
 layers (#13487)

the mixed_precision ops can have input_scale parameters that are used
in tensor math but arent a weight or bias so dont get proper VRAM
management. Treat these as force-castable parameters like the non comfy
weight, random params are buffers already are.
---
 comfy/model_patcher.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index 93d19d6fe..ee56f8523 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -685,9 +685,9 @@ class ModelPatcher:
                         sd.pop(k)
             return sd
 
-    def patch_weight_to_device(self, key, device_to=None, inplace_update=False, return_weight=False):
+    def patch_weight_to_device(self, key, device_to=None, inplace_update=False, return_weight=False, force_cast=False):
         weight, set_func, convert_func = get_key_weight(self.model, key)
-        if key not in self.patches:
+        if key not in self.patches and not force_cast:
             return weight
 
         inplace_update = self.weight_inplace_update or inplace_update
@@ -695,7 +695,7 @@ class ModelPatcher:
         if key not in self.backup and not return_weight:
             self.backup[key] = collections.namedtuple('Dimension', ['weight', 'inplace_update'])(weight.to(device=self.offload_device, copy=inplace_update), inplace_update)
 
-        temp_dtype = comfy.model_management.lora_compute_dtype(device_to)
+        temp_dtype = comfy.model_management.lora_compute_dtype(device_to) if key in self.patches else None
         if device_to is not None:
             temp_weight = comfy.model_management.cast_to_device(weight, device_to, temp_dtype, copy=True)
         else:
@@ -703,9 +703,10 @@ class ModelPatcher:
         if convert_func is not None:
             temp_weight = convert_func(temp_weight, inplace=True)
 
-        out_weight = comfy.lora.calculate_weight(self.patches[key], temp_weight, key)
+        out_weight = comfy.lora.calculate_weight(self.patches[key], temp_weight, key) if key in self.patches else temp_weight
         if set_func is None:
-            out_weight = comfy.float.stochastic_rounding(out_weight, weight.dtype, seed=comfy.utils.string_to_seed(key))
+            if key in self.patches:
+                out_weight = comfy.float.stochastic_rounding(out_weight, weight.dtype, seed=comfy.utils.string_to_seed(key))
             if return_weight:
                 return out_weight
             elif inplace_update:
@@ -1584,7 +1585,7 @@ class ModelPatcherDynamic(ModelPatcher):
                     key = key_param_name_to_key(n, param_key)
                     if key in self.backup:
                         comfy.utils.set_attr_param(self.model, key, self.backup[key].weight)
-                    self.patch_weight_to_device(key, device_to=device_to)
+                    self.patch_weight_to_device(key, device_to=device_to, force_cast=True)
                     weight, _, _ = get_key_weight(self.model, key)
                     if weight is not None:
                         self.model.model_loaded_weight_memory += weight.numel() * weight.element_size()
@@ -1609,6 +1610,10 @@ class ModelPatcherDynamic(ModelPatcher):
                             m._v = vbar.alloc(v_weight_size)
                         allocated_size += v_weight_size
 
+                    for param in params:
+                        if param not in ("weight", "bias"):
+                            force_load_param(self, param, device_to)
+
                 else:
                     for param in params:
                         key = key_param_name_to_key(n, param)

From 0be87b082a68bca19ea25a9208120ba5090bea8d Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 22 Apr 2026 17:21:43 -0700
Subject: [PATCH 10/81] Update logging level for invalid version format
 (#13526)

---
 utils/install_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/install_util.py b/utils/install_util.py
index 34489aec5..fdba23a8f 100644
--- a/utils/install_util.py
+++ b/utils/install_util.py
@@ -39,7 +39,7 @@ def get_required_packages_versions():
                 if len(s) == 2:
                     version_str = s[-1]
                     if not is_valid_version(version_str):
-                        logging.error(f"Invalid version format in requirements.txt: {version_str}")
+                        logging.debug(f"Invalid version format for {s[0]} in requirements.txt: {version_str}")
                         continue
                     out[s[0]] = version_str
         return out.copy()

From e988df72f8828085c1671d49f96ec50382f11c80 Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Thu, 23 Apr 2026 03:59:55 +0300
Subject: [PATCH 11/81] [Partner Nodes] add SD2 real human support (#13509)

* feat(api-nodes): add SD2 real human support

Signed-off-by: bigcat88 <bigcat88@icloud.com>

* fix: add validation before uploading Assets

Signed-off-by: bigcat88 <bigcat88@icloud.com>

* Add asset_id and group_id displaying on the node

Signed-off-by: bigcat88 <bigcat88@icloud.com>

* extend poll_op to use instead of custom async cycle

Signed-off-by: bigcat88 <bigcat88@icloud.com>

* added the polling for the "Active" status after asset creation

Signed-off-by: bigcat88 <bigcat88@icloud.com>

* updated tooltip for group_id

* allow usage of real human in the ByteDance2FirstLastFrame node

* add reference count limits

* corrected price in status when input assets contain video

Signed-off-by: bigcat88 <bigcat88@icloud.com>

---------

Signed-off-by: bigcat88 <bigcat88@icloud.com>
---
 comfy_api_nodes/apis/bytedance.py  |  35 +++
 comfy_api_nodes/nodes_bytedance.py | 468 +++++++++++++++++++++++++++--
 comfy_api_nodes/util/client.py     |   9 +-
 3 files changed, 494 insertions(+), 18 deletions(-)

diff --git a/comfy_api_nodes/apis/bytedance.py b/comfy_api_nodes/apis/bytedance.py
index dc3bc3213..eafabbefe 100644
--- a/comfy_api_nodes/apis/bytedance.py
+++ b/comfy_api_nodes/apis/bytedance.py
@@ -122,6 +122,41 @@ class TaskStatusResponse(BaseModel):
     usage: TaskStatusUsage | None = Field(None)
 
 
+class GetAssetResponse(BaseModel):
+    id: str = Field(...)
+    name: str | None = Field(None)
+    url: str | None = Field(None)
+    asset_type: str = Field(...)
+    group_id: str = Field(...)
+    status: str = Field(...)
+    error: TaskStatusError | None = Field(None)
+
+
+class SeedanceCreateVisualValidateSessionResponse(BaseModel):
+    session_id: str = Field(...)
+    h5_link: str = Field(...)
+
+
+class SeedanceGetVisualValidateSessionResponse(BaseModel):
+    session_id: str = Field(...)
+    status: str = Field(...)
+    group_id: str | None = Field(None)
+    error_code: str | None = Field(None)
+    error_message: str | None = Field(None)
+
+
+class SeedanceCreateAssetRequest(BaseModel):
+    group_id: str = Field(...)
+    url: str = Field(...)
+    asset_type: str = Field(...)
+    name: str | None = Field(None, max_length=64)
+    project_name: str | None = Field(None)
+
+
+class SeedanceCreateAssetResponse(BaseModel):
+    asset_id: str = Field(...)
+
+
 # Dollars per 1K tokens, keyed by (model_id, has_video_input).
 SEEDANCE2_PRICE_PER_1K_TOKENS = {
     ("dreamina-seedance-2-0-260128", False): 0.007,
diff --git a/comfy_api_nodes/nodes_bytedance.py b/comfy_api_nodes/nodes_bytedance.py
index bc564782d..de192c5ac 100644
--- a/comfy_api_nodes/nodes_bytedance.py
+++ b/comfy_api_nodes/nodes_bytedance.py
@@ -1,5 +1,6 @@
 import logging
 import math
+import re
 
 import torch
 from typing_extensions import override
@@ -11,9 +12,14 @@ from comfy_api_nodes.apis.bytedance import (
     SEEDANCE2_PRICE_PER_1K_TOKENS,
     SEEDANCE2_REF_VIDEO_PIXEL_LIMITS,
     VIDEO_TASKS_EXECUTION_TIME,
+    GetAssetResponse,
     Image2VideoTaskCreationRequest,
     ImageTaskCreationResponse,
     Seedance2TaskCreationRequest,
+    SeedanceCreateAssetRequest,
+    SeedanceCreateAssetResponse,
+    SeedanceCreateVisualValidateSessionResponse,
+    SeedanceGetVisualValidateSessionResponse,
     Seedream4Options,
     Seedream4TaskCreationRequest,
     TaskAudioContent,
@@ -44,10 +50,16 @@ from comfy_api_nodes.util import (
     validate_image_aspect_ratio,
     validate_image_dimensions,
     validate_string,
+    validate_video_dimensions,
+    validate_video_duration,
 )
+from server import PromptServer
 
 BYTEPLUS_IMAGE_ENDPOINT = "/proxy/byteplus/api/v3/images/generations"
 
+_VERIFICATION_POLL_TIMEOUT_SEC = 120
+_VERIFICATION_POLL_INTERVAL_SEC = 3
+
 SEEDREAM_MODELS = {
     "seedream 5.0 lite": "seedream-5-0-260128",
     "seedream-4-5-251128": "seedream-4-5-251128",
@@ -96,6 +108,169 @@ def _validate_ref_video_pixels(video: Input.Video, model_id: str, resolution: st
         )
 
 
+async def _resolve_reference_assets(
+    cls: type[IO.ComfyNode],
+    asset_ids: list[str],
+) -> tuple[dict[str, str], dict[str, str], dict[str, str]]:
+    """Look up each asset, validate Active status, group by asset_type.
+
+    Returns (image_assets, video_assets, audio_assets), each mapping asset_id -> "asset://<asset_id>".
+    """
+    image_assets: dict[str, str] = {}
+    video_assets: dict[str, str] = {}
+    audio_assets: dict[str, str] = {}
+    for i, raw_id in enumerate(asset_ids, 1):
+        asset_id = (raw_id or "").strip()
+        if not asset_id:
+            continue
+        result = await sync_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/seedance/assets/{asset_id}"),
+            response_model=GetAssetResponse,
+        )
+        if result.status != "Active":
+            extra = f" {result.error.code}: {result.error.message}" if result.error else ""
+            raise ValueError(f"Reference asset {i} (Id={asset_id}) is not Active (Status={result.status}).{extra}")
+        asset_uri = f"asset://{asset_id}"
+        if result.asset_type == "Image":
+            image_assets[asset_id] = asset_uri
+        elif result.asset_type == "Video":
+            video_assets[asset_id] = asset_uri
+        elif result.asset_type == "Audio":
+            audio_assets[asset_id] = asset_uri
+    return image_assets, video_assets, audio_assets
+
+
+_ASSET_REF_RE = re.compile(r"\basset ?(\d{1,2})\b", re.IGNORECASE)
+
+
+def _build_asset_labels(
+    reference_assets: dict[str, str],
+    image_asset_uris: dict[str, str],
+    video_asset_uris: dict[str, str],
+    audio_asset_uris: dict[str, str],
+    n_reference_images: int,
+    n_reference_videos: int,
+    n_reference_audios: int,
+) -> dict[int, str]:
+    """Map asset slot number (from 'asset_N' keys) to its positional label.
+
+    Asset entries are appended to `content` after the reference_images/videos/audios,
+    so their 1-indexed labels continue from the count of existing same-type refs:
+    one reference_images entry + one Image-type asset -> asset labelled "Image 2".
+    """
+    image_n = n_reference_images
+    video_n = n_reference_videos
+    audio_n = n_reference_audios
+    labels: dict[int, str] = {}
+    for slot_key, raw_id in reference_assets.items():
+        asset_id = (raw_id or "").strip()
+        if not asset_id:
+            continue
+        try:
+            slot_num = int(slot_key.rsplit("_", 1)[-1])
+        except ValueError:
+            continue
+        if asset_id in image_asset_uris:
+            image_n += 1
+            labels[slot_num] = f"Image {image_n}"
+        elif asset_id in video_asset_uris:
+            video_n += 1
+            labels[slot_num] = f"Video {video_n}"
+        elif asset_id in audio_asset_uris:
+            audio_n += 1
+            labels[slot_num] = f"Audio {audio_n}"
+    return labels
+
+
+def _rewrite_asset_refs(prompt: str, labels: dict[int, str]) -> str:
+    """Case-insensitively replace 'assetNN' (1-2 digit) tokens with their labels."""
+    if not labels:
+        return prompt
+
+    def _sub(m: "re.Match[str]") -> str:
+        return labels.get(int(m.group(1)), m.group(0))
+
+    return _ASSET_REF_RE.sub(_sub, prompt)
+
+
+async def _obtain_group_id_via_h5_auth(cls: type[IO.ComfyNode]) -> str:
+    session = await sync_op(
+        cls,
+        ApiEndpoint(path="/proxy/seedance/visual-validate/sessions", method="POST"),
+        response_model=SeedanceCreateVisualValidateSessionResponse,
+    )
+    logger.warning("Seedance authentication required. Open link: %s", session.h5_link)
+
+    h5_text = f"Open this link in your browser and complete face verification:\n\n{session.h5_link}"
+
+    result = await poll_op(
+        cls,
+        ApiEndpoint(path=f"/proxy/seedance/visual-validate/sessions/{session.session_id}"),
+        response_model=SeedanceGetVisualValidateSessionResponse,
+        status_extractor=lambda r: r.status,
+        completed_statuses=["completed"],
+        failed_statuses=["failed"],
+        poll_interval=_VERIFICATION_POLL_INTERVAL_SEC,
+        max_poll_attempts=(_VERIFICATION_POLL_TIMEOUT_SEC // _VERIFICATION_POLL_INTERVAL_SEC) - 1,
+        estimated_duration=_VERIFICATION_POLL_TIMEOUT_SEC - 1,
+        extra_text=h5_text,
+    )
+
+    if not result.group_id:
+        raise RuntimeError(f"Seedance session {session.session_id} completed without a group_id")
+
+    logger.warning("Seedance authentication complete. New GroupId: %s", result.group_id)
+    PromptServer.instance.send_progress_text(
+        f"Authentication complete. New GroupId: {result.group_id}", cls.hidden.unique_id
+    )
+    return result.group_id
+
+
+async def _resolve_group_id(cls: type[IO.ComfyNode], group_id: str) -> str:
+    if group_id and group_id.strip():
+        return group_id.strip()
+    return await _obtain_group_id_via_h5_auth(cls)
+
+
+async def _create_seedance_asset(
+    cls: type[IO.ComfyNode],
+    *,
+    group_id: str,
+    url: str,
+    name: str,
+    asset_type: str,
+) -> str:
+    req = SeedanceCreateAssetRequest(
+        group_id=group_id,
+        url=url,
+        asset_type=asset_type,
+        name=name or None,
+    )
+    result = await sync_op(
+        cls,
+        ApiEndpoint(path="/proxy/seedance/assets", method="POST"),
+        response_model=SeedanceCreateAssetResponse,
+        data=req,
+    )
+    return result.asset_id
+
+
+async def _wait_for_asset_active(cls: type[IO.ComfyNode], asset_id: str, group_id: str) -> GetAssetResponse:
+    """Poll the newly created asset until its status becomes Active."""
+    return await poll_op(
+        cls,
+        ApiEndpoint(path=f"/proxy/seedance/assets/{asset_id}"),
+        response_model=GetAssetResponse,
+        status_extractor=lambda r: r.status,
+        completed_statuses=["Active"],
+        failed_statuses=["Failed"],
+        poll_interval=5,
+        max_poll_attempts=1200,
+        extra_text=f"Waiting for asset pre-processing...\n\nasset_id: {asset_id}\n\ngroup_id: {group_id}",
+    )
+
+
 def _seedance2_price_extractor(model_id: str, has_video_input: bool):
     """Returns a price_extractor closure for Seedance 2.0 poll_op."""
     rate = SEEDANCE2_PRICE_PER_1K_TOKENS.get((model_id, has_video_input))
@@ -1228,12 +1403,27 @@ class ByteDance2FirstLastFrameNode(IO.ComfyNode):
                 IO.Image.Input(
                     "first_frame",
                     tooltip="First frame image for the video.",
+                    optional=True,
                 ),
                 IO.Image.Input(
                     "last_frame",
                     tooltip="Last frame image for the video.",
                     optional=True,
                 ),
+                IO.String.Input(
+                    "first_frame_asset_id",
+                    default="",
+                    tooltip="Seedance asset_id to use as the first frame. "
+                            "Mutually exclusive with the first_frame image input.",
+                    optional=True,
+                ),
+                IO.String.Input(
+                    "last_frame_asset_id",
+                    default="",
+                    tooltip="Seedance asset_id to use as the last frame. "
+                            "Mutually exclusive with the last_frame image input.",
+                    optional=True,
+                ),
                 IO.Int.Input(
                     "seed",
                     default=0,
@@ -1286,24 +1476,54 @@ class ByteDance2FirstLastFrameNode(IO.ComfyNode):
     async def execute(
         cls,
         model: dict,
-        first_frame: Input.Image,
         seed: int,
         watermark: bool,
+        first_frame: Input.Image | None = None,
         last_frame: Input.Image | None = None,
+        first_frame_asset_id: str = "",
+        last_frame_asset_id: str = "",
     ) -> IO.NodeOutput:
         validate_string(model["prompt"], strip_whitespace=True, min_length=1)
         model_id = SEEDANCE_MODELS[model["model"]]
 
+        first_frame_asset_id = first_frame_asset_id.strip()
+        last_frame_asset_id = last_frame_asset_id.strip()
+
+        if first_frame is not None and first_frame_asset_id:
+            raise ValueError("Provide only one of first_frame or first_frame_asset_id, not both.")
+        if first_frame is None and not first_frame_asset_id:
+            raise ValueError("Either first_frame or first_frame_asset_id is required.")
+        if last_frame is not None and last_frame_asset_id:
+            raise ValueError("Provide only one of last_frame or last_frame_asset_id, not both.")
+
+        asset_ids_to_resolve = [a for a in (first_frame_asset_id, last_frame_asset_id) if a]
+        image_assets: dict[str, str] = {}
+        if asset_ids_to_resolve:
+            image_assets, _, _ = await _resolve_reference_assets(cls, asset_ids_to_resolve)
+            for aid in asset_ids_to_resolve:
+                if aid not in image_assets:
+                    raise ValueError(f"Asset {aid} is not an Image asset.")
+
+        if first_frame_asset_id:
+            first_frame_url = image_assets[first_frame_asset_id]
+        else:
+            first_frame_url = await upload_image_to_comfyapi(cls, first_frame, wait_label="Uploading first frame.")
+
         content: list[TaskTextContent | TaskImageContent] = [
             TaskTextContent(text=model["prompt"]),
             TaskImageContent(
-                image_url=TaskImageContentUrl(
-                    url=await upload_image_to_comfyapi(cls, first_frame, wait_label="Uploading first frame.")
-                ),
+                image_url=TaskImageContentUrl(url=first_frame_url),
                 role="first_frame",
             ),
         ]
-        if last_frame is not None:
+        if last_frame_asset_id:
+            content.append(
+                TaskImageContent(
+                    image_url=TaskImageContentUrl(url=image_assets[last_frame_asset_id]),
+                    role="last_frame",
+                ),
+            )
+        elif last_frame is not None:
             content.append(
                 TaskImageContent(
                     image_url=TaskImageContentUrl(
@@ -1385,6 +1605,24 @@ def _seedance2_reference_inputs(resolutions: list[str]):
             tooltip="Automatically downscale reference videos that exceed the model's pixel budget "
             "for the selected resolution. Aspect ratio is preserved; videos already within limits are untouched.",
         ),
+        IO.Autogrow.Input(
+            "reference_assets",
+            template=IO.Autogrow.TemplateNames(
+                IO.String.Input("reference_asset"),
+                names=[
+                    "asset_1",
+                    "asset_2",
+                    "asset_3",
+                    "asset_4",
+                    "asset_5",
+                    "asset_6",
+                    "asset_7",
+                    "asset_8",
+                    "asset_9",
+                ],
+                min=0,
+            ),
+        ),
     ]
 
 
@@ -1486,24 +1724,42 @@ class ByteDance2ReferenceNode(IO.ComfyNode):
         reference_images = model.get("reference_images", {})
         reference_videos = model.get("reference_videos", {})
         reference_audios = model.get("reference_audios", {})
+        reference_assets = model.get("reference_assets", {})
 
-        if not reference_images and not reference_videos:
-            raise ValueError("At least one reference image or video is required.")
+        reference_image_assets, reference_video_assets, reference_audio_assets = await _resolve_reference_assets(
+            cls, list(reference_assets.values())
+        )
+
+        if not reference_images and not reference_videos and not reference_image_assets and not reference_video_assets:
+            raise ValueError("At least one reference image or video or asset is required.")
+
+        total_images = len(reference_images) + len(reference_image_assets)
+        if total_images > 9:
+            raise ValueError(
+                f"Too many reference images: {total_images} "
+                f"(images={len(reference_images)}, image assets={len(reference_image_assets)}). Maximum is 9."
+            )
+        total_videos = len(reference_videos) + len(reference_video_assets)
+        if total_videos > 3:
+            raise ValueError(
+                f"Too many reference videos: {total_videos} "
+                f"(videos={len(reference_videos)}, video assets={len(reference_video_assets)}). Maximum is 3."
+            )
+        total_audios = len(reference_audios) + len(reference_audio_assets)
+        if total_audios > 3:
+            raise ValueError(
+                f"Too many reference audios: {total_audios} "
+                f"(audios={len(reference_audios)}, audio assets={len(reference_audio_assets)}). Maximum is 3."
+            )
 
         model_id = SEEDANCE_MODELS[model["model"]]
-        has_video_input = len(reference_videos) > 0
+        has_video_input = total_videos > 0
 
         if model.get("auto_downscale") and reference_videos:
-            max_px = (
-                SEEDANCE2_REF_VIDEO_PIXEL_LIMITS.get(model_id, {})
-                .get(model["resolution"], {})
-                .get("max")
-            )
+            max_px = SEEDANCE2_REF_VIDEO_PIXEL_LIMITS.get(model_id, {}).get(model["resolution"], {}).get("max")
             if max_px:
                 for key in reference_videos:
-                    reference_videos[key] = resize_video_to_pixel_budget(
-                        reference_videos[key], max_px
-                    )
+                    reference_videos[key] = resize_video_to_pixel_budget(reference_videos[key], max_px)
 
         total_video_duration = 0.0
         for i, key in enumerate(reference_videos, 1):
@@ -1531,8 +1787,19 @@ class ByteDance2ReferenceNode(IO.ComfyNode):
         if total_audio_duration > 15.1:
             raise ValueError(f"Total reference audio duration is {total_audio_duration:.1f}s. Maximum is 15.1 seconds.")
 
+        asset_labels = _build_asset_labels(
+            reference_assets,
+            reference_image_assets,
+            reference_video_assets,
+            reference_audio_assets,
+            len(reference_images),
+            len(reference_videos),
+            len(reference_audios),
+        )
+        prompt_text = _rewrite_asset_refs(model["prompt"], asset_labels)
+
         content: list[TaskTextContent | TaskImageContent | TaskVideoContent | TaskAudioContent] = [
-            TaskTextContent(text=model["prompt"]),
+            TaskTextContent(text=prompt_text),
         ]
         for i, key in enumerate(reference_images, 1):
             content.append(
@@ -1573,6 +1840,21 @@ class ByteDance2ReferenceNode(IO.ComfyNode):
                     ),
                 ),
             )
+        for url in reference_image_assets.values():
+            content.append(
+                TaskImageContent(
+                    image_url=TaskImageContentUrl(url=url),
+                    role="reference_image",
+                ),
+            )
+        for url in reference_video_assets.values():
+            content.append(
+                TaskVideoContent(video_url=TaskVideoContentUrl(url=url)),
+            )
+        for url in reference_audio_assets.values():
+            content.append(
+                TaskAudioContent(audio_url=TaskAudioContentUrl(url=url)),
+            )
         initial_response = await sync_op(
             cls,
             ApiEndpoint(path=BYTEPLUS_TASK_ENDPOINT, method="POST"),
@@ -1627,6 +1909,156 @@ async def process_video_task(
     return IO.NodeOutput(await download_url_to_video_output(response.content.video_url))
 
 
+class ByteDanceCreateImageAsset(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="ByteDanceCreateImageAsset",
+            display_name="ByteDance Create Image Asset",
+            category="api node/image/ByteDance",
+            description=(
+                "Create a Seedance 2.0 personal image asset. Uploads the input image and "
+                "registers it in the given asset group. If group_id is empty, runs a real-person "
+                "H5 authentication flow to create a new group before adding the asset."
+            ),
+            inputs=[
+                IO.Image.Input("image", tooltip="Image to register as a personal asset."),
+                IO.String.Input(
+                    "group_id",
+                    default="",
+                    tooltip="Reuse an existing Seedance asset group ID to skip repeated human verification for the "
+                    "same person. Leave empty to run real-person authentication in the browser and create a new group.",
+                ),
+                # IO.String.Input(
+                #     "name",
+                #     default="",
+                #     tooltip="Asset name (up to 64 characters).",
+                # ),
+            ],
+            outputs=[
+                IO.String.Output(display_name="asset_id"),
+                IO.String.Output(display_name="group_id"),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            # is_api_node=True,
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        image: Input.Image,
+        group_id: str = "",
+        # name: str = "",
+    ) -> IO.NodeOutput:
+        # if len(name) > 64:
+        #     raise ValueError("Name of asset can not be greater then 64 symbols")
+        validate_image_dimensions(image, min_width=300, max_width=6000, min_height=300, max_height=6000)
+        validate_image_aspect_ratio(image, min_ratio=(0.4, 1), max_ratio=(2.5, 1))
+        resolved_group = await _resolve_group_id(cls, group_id)
+        asset_id = await _create_seedance_asset(
+            cls,
+            group_id=resolved_group,
+            url=await upload_image_to_comfyapi(cls, image),
+            name="",
+            asset_type="Image",
+        )
+        await _wait_for_asset_active(cls, asset_id, resolved_group)
+        PromptServer.instance.send_progress_text(
+            f"Please save the asset_id and group_id for reuse.\n\nasset_id: {asset_id}\n\n"
+            f"group_id: {resolved_group}",
+            cls.hidden.unique_id,
+        )
+        return IO.NodeOutput(asset_id, resolved_group)
+
+
+class ByteDanceCreateVideoAsset(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="ByteDanceCreateVideoAsset",
+            display_name="ByteDance Create Video Asset",
+            category="api node/video/ByteDance",
+            description=(
+                "Create a Seedance 2.0 personal video asset. Uploads the input video and "
+                "registers it in the given asset group. If group_id is empty, runs a real-person "
+                "H5 authentication flow to create a new group before adding the asset."
+            ),
+            inputs=[
+                IO.Video.Input("video", tooltip="Video to register as a personal asset."),
+                IO.String.Input(
+                    "group_id",
+                    default="",
+                    tooltip="Reuse an existing Seedance asset group ID to skip repeated human verification for the "
+                    "same person. Leave empty to run real-person authentication in the browser and create a new group.",
+                ),
+                # IO.String.Input(
+                #     "name",
+                #     default="",
+                #     tooltip="Asset name (up to 64 characters).",
+                # ),
+            ],
+            outputs=[
+                IO.String.Output(display_name="asset_id"),
+                IO.String.Output(display_name="group_id"),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            # is_api_node=True,
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        video: Input.Video,
+        group_id: str = "",
+        # name: str = "",
+    ) -> IO.NodeOutput:
+        # if len(name) > 64:
+        #     raise ValueError("Name of asset can not be greater then 64 symbols")
+        validate_video_duration(video, min_duration=2, max_duration=15)
+        validate_video_dimensions(video, min_width=300, max_width=6000, min_height=300, max_height=6000)
+
+        w, h = video.get_dimensions()
+        if h > 0:
+            ratio = w / h
+            if not (0.4 <= ratio <= 2.5):
+                raise ValueError(f"Asset video aspect ratio (W/H) must be in [0.4, 2.5], got {ratio:.3f} ({w}x{h}).")
+        pixels = w * h
+        if not (409_600 <= pixels <= 927_408):
+            raise ValueError(
+                f"Asset video total pixels (W×H) must be in [409600, 927408], " f"got {pixels:,} ({w}x{h})."
+            )
+
+        fps = float(video.get_frame_rate())
+        if not (24 <= fps <= 60):
+            raise ValueError(f"Asset video FPS must be in [24, 60], got {fps:.2f}.")
+
+        resolved_group = await _resolve_group_id(cls, group_id)
+        asset_id = await _create_seedance_asset(
+            cls,
+            group_id=resolved_group,
+            url=await upload_video_to_comfyapi(cls, video),
+            name="",
+            asset_type="Video",
+        )
+        await _wait_for_asset_active(cls, asset_id, resolved_group)
+        PromptServer.instance.send_progress_text(
+            f"Please save the asset_id and group_id for reuse.\n\nasset_id: {asset_id}\n\n"
+            f"group_id: {resolved_group}",
+            cls.hidden.unique_id,
+        )
+        return IO.NodeOutput(asset_id, resolved_group)
+
+
 class ByteDanceExtension(ComfyExtension):
     @override
     async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@@ -1640,6 +2072,8 @@ class ByteDanceExtension(ComfyExtension):
             ByteDance2TextToVideoNode,
             ByteDance2FirstLastFrameNode,
             ByteDance2ReferenceNode,
+            ByteDanceCreateImageAsset,
+            ByteDanceCreateVideoAsset,
         ]
 
 
diff --git a/comfy_api_nodes/util/client.py b/comfy_api_nodes/util/client.py
index 9d730b81a..b0cf97ae4 100644
--- a/comfy_api_nodes/util/client.py
+++ b/comfy_api_nodes/util/client.py
@@ -156,6 +156,7 @@ async def poll_op(
     estimated_duration: int | None = None,
     cancel_endpoint: ApiEndpoint | None = None,
     cancel_timeout: float = 10.0,
+    extra_text: str | None = None,
 ) -> M:
     raw = await poll_op_raw(
         cls,
@@ -176,6 +177,7 @@ async def poll_op(
         estimated_duration=estimated_duration,
         cancel_endpoint=cancel_endpoint,
         cancel_timeout=cancel_timeout,
+        extra_text=extra_text,
     )
     if not isinstance(raw, dict):
         raise Exception("Expected JSON response to validate into a Pydantic model, got non-JSON (binary or text).")
@@ -260,6 +262,7 @@ async def poll_op_raw(
     estimated_duration: int | None = None,
     cancel_endpoint: ApiEndpoint | None = None,
     cancel_timeout: float = 10.0,
+    extra_text: str | None = None,
 ) -> dict[str, Any]:
     """
     Polls an endpoint until the task reaches a terminal state. Displays time while queued/processing,
@@ -299,6 +302,7 @@ async def poll_op_raw(
                     price=state.price,
                     is_queued=state.is_queued,
                     processing_elapsed_seconds=int(proc_elapsed),
+                    extra_text=extra_text,
                 )
                 await asyncio.sleep(1.0)
         except Exception as exc:
@@ -389,6 +393,7 @@ async def poll_op_raw(
                     price=state.price,
                     is_queued=False,
                     processing_elapsed_seconds=int(state.base_processing_elapsed),
+                    extra_text=extra_text,
                 )
                 return resp_json
 
@@ -462,6 +467,7 @@ def _display_time_progress(
     price: float | None = None,
     is_queued: bool | None = None,
     processing_elapsed_seconds: int | None = None,
+    extra_text: str | None = None,
 ) -> None:
     if estimated_total is not None and estimated_total > 0 and is_queued is False:
         pe = processing_elapsed_seconds if processing_elapsed_seconds is not None else elapsed_seconds
@@ -469,7 +475,8 @@ def _display_time_progress(
         time_line = f"Time elapsed: {int(elapsed_seconds)}s (~{remaining}s remaining)"
     else:
         time_line = f"Time elapsed: {int(elapsed_seconds)}s"
-    _display_text(node_cls, time_line, status=status, price=price)
+    text = f"{time_line}\n\n{extra_text}" if extra_text else time_line
+    _display_text(node_cls, text, status=status, price=price)
 
 
 async def _diagnose_connectivity() -> dict[str, bool]:

From 749d5b4e8d4308c67fee6faa4ef4dfbde23087f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jukka=20Sepp=C3=A4nen?=
 <40791699+kijai@users.noreply.github.com>
Date: Thu, 23 Apr 2026 07:07:43 +0300
Subject: [PATCH 12/81] feat: SAM (segment anything) 3.1 support (CORE-34)
 (#13408)

---
 comfy/ldm/sam3/detector.py       |  596 ++++++++++
 comfy/ldm/sam3/sam.py            |  425 +++++++
 comfy/ldm/sam3/tracker.py        | 1785 ++++++++++++++++++++++++++++++
 comfy/model_base.py              |    5 +
 comfy/model_detection.py         |   12 +
 comfy/supported_models.py        |   53 +-
 comfy/text_encoders/sam3_clip.py |   97 ++
 comfy_extras/nodes_sam3.py       |  529 +++++++++
 nodes.py                         |    1 +
 9 files changed, 3502 insertions(+), 1 deletion(-)
 create mode 100644 comfy/ldm/sam3/detector.py
 create mode 100644 comfy/ldm/sam3/sam.py
 create mode 100644 comfy/ldm/sam3/tracker.py
 create mode 100644 comfy/text_encoders/sam3_clip.py
 create mode 100644 comfy_extras/nodes_sam3.py

diff --git a/comfy/ldm/sam3/detector.py b/comfy/ldm/sam3/detector.py
new file mode 100644
index 000000000..6ae919a79
--- /dev/null
+++ b/comfy/ldm/sam3/detector.py
@@ -0,0 +1,596 @@
+# SAM3 detector: transformer encoder-decoder, segmentation head, geometry encoder, scoring.
+
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision.ops import roi_align
+
+from comfy.ldm.modules.attention import optimized_attention
+from comfy.ldm.sam3.tracker import SAM3Tracker, SAM31Tracker
+from comfy.ldm.sam3.sam import SAM3VisionBackbone  # noqa: used in __init__
+from comfy.ldm.sam3.sam import MLP, PositionEmbeddingSine
+
+TRACKER_CLASSES = {"SAM3": SAM3Tracker, "SAM31": SAM31Tracker}
+from comfy.ops import cast_to_input
+
+
+def box_cxcywh_to_xyxy(x):
+    cx, cy, w, h = x.unbind(-1)
+    return torch.stack([cx - 0.5 * w, cy - 0.5 * h, cx + 0.5 * w, cy + 0.5 * h], dim=-1)
+
+
+def gen_sineembed_for_position(pos_tensor, num_feats=256):
+    """Per-coordinate sinusoidal embedding: (..., N) -> (..., N * num_feats)."""
+    assert num_feats % 2 == 0
+    hdim = num_feats // 2
+    freqs = 10000.0 ** (2 * (torch.arange(hdim, dtype=torch.float32, device=pos_tensor.device) // 2) / hdim)
+    embeds = []
+    for c in range(pos_tensor.shape[-1]):
+        raw = (pos_tensor[..., c].float() * 2 * math.pi).unsqueeze(-1) / freqs
+        embeds.append(torch.stack([raw[..., 0::2].sin(), raw[..., 1::2].cos()], dim=-1).flatten(-2))
+    return torch.cat(embeds, dim=-1).to(pos_tensor.dtype)
+
+
+class SplitMHA(nn.Module):
+    """Multi-head attention with separate Q/K/V projections (split from fused in_proj_weight)."""
+    def __init__(self, d_model, num_heads=8, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.num_heads = num_heads
+        self.q_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+        self.k_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+        self.v_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+        self.out_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+
+    def forward(self, q_input, k_input=None, v_input=None, mask=None):
+        q = self.q_proj(q_input)
+        if k_input is None:
+            k = self.k_proj(q_input)
+            v = self.v_proj(q_input)
+        else:
+            k = self.k_proj(k_input)
+            v = self.v_proj(v_input if v_input is not None else k_input)
+        if mask is not None and mask.ndim == 2:
+            mask = mask[:, None, None, :]  # [B, T] -> [B, 1, 1, T] for SDPA broadcast
+        dtype = q.dtype  # manual_cast may produce mixed dtypes
+        out = optimized_attention(q, k.to(dtype), v.to(dtype), self.num_heads, mask=mask)
+        return self.out_proj(out)
+
+
+class MLPWithNorm(nn.Module):
+    """MLP with residual connection and output LayerNorm."""
+    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, residual=True, device=None, dtype=None, operations=None):
+        super().__init__()
+        dims = [input_dim] + [hidden_dim] * (num_layers - 1) + [output_dim]
+        self.layers = nn.ModuleList([
+            operations.Linear(dims[i], dims[i + 1], device=device, dtype=dtype)
+            for i in range(num_layers)
+        ])
+        self.out_norm = operations.LayerNorm(output_dim, device=device, dtype=dtype)
+        self.residual = residual and (input_dim == output_dim)
+
+    def forward(self, x):
+        orig = x
+        for i, layer in enumerate(self.layers):
+            x = layer(x)
+            if i < len(self.layers) - 1:
+                x = F.relu(x)
+        if self.residual:
+            x = x + orig
+        return self.out_norm(x)
+
+
+class EncoderLayer(nn.Module):
+    def __init__(self, d_model=256, num_heads=8, dim_ff=2048, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.self_attn = SplitMHA(d_model, num_heads, device=device, dtype=dtype, operations=operations)
+        self.cross_attn_image = SplitMHA(d_model, num_heads, device=device, dtype=dtype, operations=operations)
+        self.linear1 = operations.Linear(d_model, dim_ff, device=device, dtype=dtype)
+        self.linear2 = operations.Linear(dim_ff, d_model, device=device, dtype=dtype)
+        self.norm1 = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        self.norm2 = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        self.norm3 = operations.LayerNorm(d_model, device=device, dtype=dtype)
+
+    def forward(self, x, pos, text_memory=None, text_mask=None):
+        normed = self.norm1(x)
+        q_k = normed + pos
+        x = x + self.self_attn(q_k, q_k, normed)
+        if text_memory is not None:
+            normed = self.norm2(x)
+            x = x + self.cross_attn_image(normed, text_memory, text_memory, mask=text_mask)
+        normed = self.norm3(x)
+        x = x + self.linear2(F.relu(self.linear1(normed)))
+        return x
+
+
+class TransformerEncoder(nn.Module):
+    """Checkpoint: transformer.encoder.layers.N.*"""
+    def __init__(self, d_model=256, num_heads=8, dim_ff=2048, num_layers=6, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.layers = nn.ModuleList([
+            EncoderLayer(d_model, num_heads, dim_ff, device=device, dtype=dtype, operations=operations)
+            for _ in range(num_layers)
+        ])
+
+    def forward(self, x, pos, text_memory=None, text_mask=None):
+        for layer in self.layers:
+            x = layer(x, pos, text_memory, text_mask)
+        return x
+
+
+class DecoderLayer(nn.Module):
+    def __init__(self, d_model=256, num_heads=8, dim_ff=2048, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.self_attn = SplitMHA(d_model, num_heads, device=device, dtype=dtype, operations=operations)
+        self.cross_attn = SplitMHA(d_model, num_heads, device=device, dtype=dtype, operations=operations)
+        self.ca_text = SplitMHA(d_model, num_heads, device=device, dtype=dtype, operations=operations)
+        self.norm1 = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        self.norm2 = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        self.norm3 = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        self.catext_norm = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        self.linear1 = operations.Linear(d_model, dim_ff, device=device, dtype=dtype)
+        self.linear2 = operations.Linear(dim_ff, d_model, device=device, dtype=dtype)
+
+    def forward(self, x, memory, x_pos, memory_pos, text_memory=None, text_mask=None, cross_attn_bias=None):
+        q_k = x + x_pos
+        x = self.norm2(x + self.self_attn(q_k, q_k, x))
+        if text_memory is not None:
+            x = self.catext_norm(x + self.ca_text(x + x_pos, text_memory, text_memory, mask=text_mask))
+        x = self.norm1(x + self.cross_attn(x + x_pos, memory + memory_pos, memory, mask=cross_attn_bias))
+        x = self.norm3(x + self.linear2(F.relu(self.linear1(x))))
+        return x
+
+
+class TransformerDecoder(nn.Module):
+    def __init__(self, d_model=256, num_heads=8, dim_ff=2048, num_layers=6,
+                 num_queries=200, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.d_model = d_model
+        self.num_queries = num_queries
+
+        self.layers = nn.ModuleList([
+            DecoderLayer(d_model, num_heads, dim_ff, device=device, dtype=dtype, operations=operations)
+            for _ in range(num_layers)
+        ])
+        self.norm = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        self.query_embed = operations.Embedding(num_queries, d_model, device=device, dtype=dtype)
+        self.reference_points = operations.Embedding(num_queries, 4, device=device, dtype=dtype) # Reference points: Embedding(num_queries, 4) — learned anchor boxes
+        self.ref_point_head = MLP(d_model * 2, d_model, d_model, 2, device=device, dtype=dtype, operations=operations) # ref_point_head input: 512 (4 coords * 128 sine features each)
+        self.bbox_embed = MLP(d_model, d_model, 4, 3, device=device, dtype=dtype, operations=operations)
+
+        self.boxRPB_embed_x = MLP(2, d_model, num_heads, 2, device=device, dtype=dtype, operations=operations)
+        self.boxRPB_embed_y = MLP(2, d_model, num_heads, 2, device=device, dtype=dtype, operations=operations)
+
+        self.presence_token = operations.Embedding(1, d_model, device=device, dtype=dtype)
+        self.presence_token_head = MLP(d_model, d_model, 1, 3, device=device, dtype=dtype, operations=operations)
+        self.presence_token_out_norm = operations.LayerNorm(d_model, device=device, dtype=dtype)
+
+    @staticmethod
+    def _inverse_sigmoid(x):
+        return torch.log(x / (1 - x + 1e-6) + 1e-6)
+
+    def _compute_box_rpb(self, ref_points, H, W):
+        """Box rotary position bias: (B, Q, 4) cxcywh -> (B, n_heads, Q+1, H*W) bias."""
+        boxes_xyxy = box_cxcywh_to_xyxy(ref_points)
+        B, Q, _ = boxes_xyxy.shape
+        coords_h = torch.arange(H, device=ref_points.device, dtype=torch.float32) / H
+        coords_w = torch.arange(W, device=ref_points.device, dtype=torch.float32) / W
+        deltas_x = coords_w.view(1, 1, -1, 1) - boxes_xyxy[:, :, None, 0:3:2]
+        deltas_y = coords_h.view(1, 1, -1, 1) - boxes_xyxy[:, :, None, 1:4:2]
+
+        log2_8 = float(math.log2(8))
+        def log_scale(d):
+            return torch.sign(d * 8) * torch.log2(torch.abs(d * 8) + 1.0) / log2_8
+
+        rpb_x = self.boxRPB_embed_x(log_scale(deltas_x).to(ref_points.dtype))
+        rpb_y = self.boxRPB_embed_y(log_scale(deltas_y).to(ref_points.dtype))
+
+        bias = (rpb_y.unsqueeze(3) + rpb_x.unsqueeze(2)).flatten(2, 3).permute(0, 3, 1, 2)
+        pres_bias = torch.zeros(B, bias.shape[1], 1, bias.shape[3], device=bias.device, dtype=bias.dtype)
+        return torch.cat([pres_bias, bias], dim=2)
+
+    def forward(self, memory, memory_pos, text_memory=None, text_mask=None, H=72, W=72):
+        B = memory.shape[0]
+        tgt = cast_to_input(self.query_embed.weight, memory).unsqueeze(0).expand(B, -1, -1)
+        presence_out = cast_to_input(self.presence_token.weight, memory)[None].expand(B, -1, -1)
+        ref_points = cast_to_input(self.reference_points.weight, memory).unsqueeze(0).expand(B, -1, -1).sigmoid()
+
+        for layer_idx, layer in enumerate(self.layers):
+            query_pos = self.ref_point_head(gen_sineembed_for_position(ref_points, self.d_model))
+            tgt_with_pres = torch.cat([presence_out, tgt], dim=1)
+            pos_with_pres = torch.cat([torch.zeros_like(presence_out), query_pos], dim=1)
+            tgt_with_pres = layer(tgt_with_pres, memory, pos_with_pres, memory_pos,
+                                  text_memory, text_mask, self._compute_box_rpb(ref_points, H, W))
+            presence_out, tgt = tgt_with_pres[:, :1], tgt_with_pres[:, 1:]
+            if layer_idx < len(self.layers) - 1:
+                ref_inv = self._inverse_sigmoid(ref_points)
+                ref_points = (ref_inv + self.bbox_embed(self.norm(tgt))).sigmoid().detach()
+
+        query_out = self.norm(tgt)
+        ref_inv = self._inverse_sigmoid(ref_points)
+        boxes = (ref_inv + self.bbox_embed(query_out)).sigmoid()
+        presence = self.presence_token_head(self.presence_token_out_norm(presence_out)).squeeze(-1)
+        return {"decoder_output": query_out, "pred_boxes": boxes, "presence": presence}
+
+
+class Transformer(nn.Module):
+    def __init__(self, d_model=256, num_heads=8, dim_ff=2048, enc_layers=6, dec_layers=6,
+                 num_queries=200, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.encoder = TransformerEncoder(d_model, num_heads, dim_ff, enc_layers, device=device, dtype=dtype, operations=operations)
+        self.decoder = TransformerDecoder(d_model, num_heads, dim_ff, dec_layers, num_queries, device=device, dtype=dtype, operations=operations)
+
+
+class GeometryEncoder(nn.Module):
+    def __init__(self, d_model=256, num_heads=8, num_layers=3, roi_size=7, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.d_model = d_model
+        self.roi_size = roi_size
+        self.pos_enc = PositionEmbeddingSine(num_pos_feats=d_model, normalize=True)
+        self.points_direct_project = operations.Linear(2, d_model, device=device, dtype=dtype)
+        self.points_pool_project = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+        self.points_pos_enc_project = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+        self.boxes_direct_project = operations.Linear(4, d_model, device=device, dtype=dtype)
+        self.boxes_pool_project = operations.Conv2d(d_model, d_model, kernel_size=roi_size, device=device, dtype=dtype)
+        self.boxes_pos_enc_project = operations.Linear(d_model + 2, d_model, device=device, dtype=dtype)
+        self.label_embed = operations.Embedding(2, d_model, device=device, dtype=dtype)
+        self.cls_embed = operations.Embedding(1, d_model, device=device, dtype=dtype)
+        self.norm = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        self.img_pre_norm = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        self.encode = nn.ModuleList([
+            EncoderLayer(d_model, num_heads, 2048, device=device, dtype=dtype, operations=operations)
+            for _ in range(num_layers)
+        ])
+        self.encode_norm = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        self.final_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+
+    def _encode_points(self, coords, labels, img_feat_2d):
+        """Encode point prompts: direct + pool + pos_enc + label. coords: [B, N, 2] normalized."""
+        B, N, _ = coords.shape
+        embed = self.points_direct_project(coords)
+        # Pool features from backbone at point locations via grid_sample
+        grid = (coords * 2 - 1).unsqueeze(2)  # [B, N, 1, 2] in [-1, 1]
+        sampled = F.grid_sample(img_feat_2d, grid, align_corners=False)  # [B, C, N, 1]
+        embed = embed + self.points_pool_project(sampled.squeeze(-1).permute(0, 2, 1))  # [B, N, C]
+        # Positional encoding of coordinates
+        x, y = coords[:, :, 0], coords[:, :, 1]  # [B, N]
+        pos_x, pos_y = self.pos_enc._encode_xy(x.flatten(), y.flatten())
+        enc = torch.cat([pos_x, pos_y], dim=-1).view(B, N, -1)
+        embed = embed + self.points_pos_enc_project(cast_to_input(enc, embed))
+        embed = embed + cast_to_input(self.label_embed(labels.long()), embed)
+        return embed
+
+    def _encode_boxes(self, boxes, labels, img_feat_2d):
+        """Encode box prompts: direct + pool + pos_enc + label. boxes: [B, N, 4] normalized cxcywh."""
+        B, N, _ = boxes.shape
+        embed = self.boxes_direct_project(boxes)
+        # ROI align from backbone at box regions
+        H, W = img_feat_2d.shape[-2:]
+        boxes_xyxy = box_cxcywh_to_xyxy(boxes)
+        scale = torch.tensor([W, H, W, H], dtype=boxes_xyxy.dtype, device=boxes_xyxy.device)
+        boxes_scaled = boxes_xyxy * scale
+        sampled = roi_align(img_feat_2d, boxes_scaled.view(-1, 4).split(N), self.roi_size)
+        proj = self.boxes_pool_project(sampled).view(B, N, -1)  # Conv2d(roi_size) -> [B*N, C, 1, 1] -> [B, N, C]
+        embed = embed + proj
+        # Positional encoding of box center + size
+        cx, cy, w, h = boxes[:, :, 0], boxes[:, :, 1], boxes[:, :, 2], boxes[:, :, 3]
+        enc = self.pos_enc.encode_boxes(cx.flatten(), cy.flatten(), w.flatten(), h.flatten())
+        enc = enc.view(B, N, -1)
+        embed = embed + self.boxes_pos_enc_project(cast_to_input(enc, embed))
+        embed = embed + cast_to_input(self.label_embed(labels.long()), embed)
+        return embed
+
+    def forward(self, points=None, boxes=None, image_features=None):
+        """Encode geometry prompts. image_features: [B, HW, C] flattened backbone features."""
+        # Prepare 2D image features for pooling
+        img_feat_2d = None
+        if image_features is not None:
+            B = image_features.shape[0]
+            HW, C = image_features.shape[1], image_features.shape[2]
+            hw = int(math.sqrt(HW))
+            img_normed = self.img_pre_norm(image_features)
+            img_feat_2d = img_normed.permute(0, 2, 1).view(B, C, hw, hw)
+
+        embeddings = []
+        if points is not None:
+            coords, labels = points
+            embeddings.append(self._encode_points(coords, labels, img_feat_2d))
+        if boxes is not None:
+            B = boxes.shape[0]
+            box_labels = torch.ones(B, boxes.shape[1], dtype=torch.long, device=boxes.device)
+            embeddings.append(self._encode_boxes(boxes, box_labels, img_feat_2d))
+        if not embeddings:
+            return None
+        geo = torch.cat(embeddings, dim=1)
+        geo = self.norm(geo)
+        if image_features is not None:
+            for layer in self.encode:
+                geo = layer(geo, torch.zeros_like(geo), image_features)
+        geo = self.encode_norm(geo)
+        return self.final_proj(geo)
+
+
+class PixelDecoder(nn.Module):
+    """Top-down FPN pixel decoder with GroupNorm + ReLU + nearest interpolation."""
+    def __init__(self, d_model=256, num_stages=3, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.conv_layers = nn.ModuleList([operations.Conv2d(d_model, d_model, kernel_size=3, padding=1, device=device, dtype=dtype) for _ in range(num_stages)])
+        self.norms = nn.ModuleList([operations.GroupNorm(8, d_model, device=device, dtype=dtype) for _ in range(num_stages)])
+
+    def forward(self, backbone_features):
+        prev = backbone_features[-1]
+        for i, feat in enumerate(backbone_features[:-1][::-1]):
+            prev = F.relu(self.norms[i](self.conv_layers[i](feat + F.interpolate(prev, size=feat.shape[-2:], mode="nearest"))))
+        return prev
+
+
+class MaskPredictor(nn.Module):
+    def __init__(self, d_model=256, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.mask_embed = MLP(d_model, d_model, d_model, 3, device=device, dtype=dtype, operations=operations)
+
+    def forward(self, query_embeddings, pixel_features):
+        mask_embed = self.mask_embed(query_embeddings)
+        return torch.einsum("bqc,bchw->bqhw", mask_embed, pixel_features)
+
+
+class SegmentationHead(nn.Module):
+    def __init__(self, d_model=256, num_heads=8, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.d_model = d_model
+        self.pixel_decoder = PixelDecoder(d_model, 3, device=device, dtype=dtype, operations=operations)
+        self.mask_predictor = MaskPredictor(d_model, device=device, dtype=dtype, operations=operations)
+        self.cross_attend_prompt = SplitMHA(d_model, num_heads, device=device, dtype=dtype, operations=operations)
+        self.cross_attn_norm = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        self.instance_seg_head = operations.Conv2d(d_model, d_model, kernel_size=1, device=device, dtype=dtype)
+        self.semantic_seg_head = operations.Conv2d(d_model, 1, kernel_size=1, device=device, dtype=dtype)
+
+    def forward(self, query_embeddings, backbone_features, encoder_hidden_states=None, prompt=None, prompt_mask=None):
+        if encoder_hidden_states is not None and prompt is not None:
+            enc_normed = self.cross_attn_norm(encoder_hidden_states)
+            enc_cross = self.cross_attend_prompt(enc_normed, prompt, prompt, mask=prompt_mask)
+            encoder_hidden_states = enc_cross + encoder_hidden_states
+
+        if encoder_hidden_states is not None:
+            B, H, W = encoder_hidden_states.shape[0], backbone_features[-1].shape[-2], backbone_features[-1].shape[-1]
+            encoder_visual = encoder_hidden_states[:, :H * W].permute(0, 2, 1).view(B, self.d_model, H, W)
+            backbone_features = list(backbone_features)
+            backbone_features[-1] = encoder_visual
+
+        pixel_features = self.pixel_decoder(backbone_features)
+        instance_features = self.instance_seg_head(pixel_features)
+        masks = self.mask_predictor(query_embeddings, instance_features)
+        return masks
+
+
+class DotProductScoring(nn.Module):
+    def __init__(self, d_model=256, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.hs_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+        self.prompt_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+        self.prompt_mlp = MLPWithNorm(d_model, 2048, d_model, 2, device=device, dtype=dtype, operations=operations)
+        self.scale = 1.0 / (d_model ** 0.5)
+
+    def forward(self, query_embeddings, prompt_embeddings, prompt_mask=None):
+        prompt = self.prompt_mlp(prompt_embeddings)
+        if prompt_mask is not None:
+            weight = prompt_mask.unsqueeze(-1).to(dtype=prompt.dtype)
+            pooled = (prompt * weight).sum(dim=1) / weight.sum(dim=1).clamp(min=1)
+        else:
+            pooled = prompt.mean(dim=1)
+        hs = self.hs_proj(query_embeddings)
+        pp = self.prompt_proj(pooled).unsqueeze(-1).to(hs.dtype)
+        scores = torch.matmul(hs, pp)
+        return (scores * self.scale).clamp(-12.0, 12.0).squeeze(-1)
+
+
+class SAM3Detector(nn.Module):
+    def __init__(self, d_model=256, embed_dim=1024, num_queries=200, device=None, dtype=None, operations=None, **kwargs):
+        super().__init__()
+        image_model = kwargs.pop("image_model", "SAM3")
+        for k in ("num_heads", "num_head_channels"):
+            kwargs.pop(k, None)
+        multiplex = image_model == "SAM31"
+        # SAM3: 4 FPN levels, drop last (scalp=1); SAM3.1: 3 levels, use all (scalp=0)
+        self.scalp = 0 if multiplex else 1
+        self.backbone = nn.ModuleDict({
+            "vision_backbone": SAM3VisionBackbone(embed_dim=embed_dim, d_model=d_model, multiplex=multiplex, device=device, dtype=dtype, operations=operations, **kwargs),
+            "language_backbone": nn.ModuleDict({"resizer": operations.Linear(embed_dim, d_model, device=device, dtype=dtype)}),
+        })
+        self.transformer = Transformer(d_model=d_model, num_queries=num_queries, device=device, dtype=dtype, operations=operations)
+        self.segmentation_head = SegmentationHead(d_model=d_model, device=device, dtype=dtype, operations=operations)
+        self.geometry_encoder = GeometryEncoder(d_model=d_model, device=device, dtype=dtype, operations=operations)
+        self.dot_prod_scoring = DotProductScoring(d_model=d_model, device=device, dtype=dtype, operations=operations)
+
+    def _get_backbone_features(self, images):
+        """Run backbone and return (detector_features, detector_positions, tracker_features, tracker_positions)."""
+        bb = self.backbone["vision_backbone"]
+        if bb.multiplex:
+            all_f, all_p, tf, tp = bb(images, tracker_mode="propagation")
+        else:
+            all_f, all_p, tf, tp = bb(images, need_tracker=True)
+        return all_f, all_p, tf, tp
+
+    @staticmethod
+    def _run_geo_layer(layer, x, memory, memory_pos):
+        x = x + layer.self_attn(layer.norm1(x))
+        x = x + layer.cross_attn_image(layer.norm2(x), memory + memory_pos, memory)
+        x = x + layer.linear2(F.relu(layer.linear1(layer.norm3(x))))
+        return x
+
+    def _detect(self, features, positions, text_embeddings=None, text_mask=None,
+                points=None, boxes=None):
+        """Shared detection: geometry encoding, transformer, scoring, segmentation."""
+        B = features[0].shape[0]
+        # Scalp for encoder (use top-level feature), but keep all levels for segmentation head
+        seg_features = features
+        if self.scalp > 0:
+            features = features[:-self.scalp]
+            positions = positions[:-self.scalp]
+        enc_feat, enc_pos = features[-1], positions[-1]
+        _, _, H, W = enc_feat.shape
+        img_flat = enc_feat.flatten(2).permute(0, 2, 1)
+        pos_flat = enc_pos.flatten(2).permute(0, 2, 1)
+
+        has_prompts = text_embeddings is not None or points is not None or boxes is not None
+        if has_prompts:
+            geo_enc = self.geometry_encoder
+            geo_prompts = geo_enc(points=points, boxes=boxes, image_features=img_flat)
+            geo_cls = geo_enc.norm(geo_enc.final_proj(cast_to_input(geo_enc.cls_embed.weight, img_flat).view(1, 1, -1).expand(B, -1, -1)))
+            for layer in geo_enc.encode:
+                geo_cls = self._run_geo_layer(layer, geo_cls, img_flat, pos_flat)
+            geo_cls = geo_enc.encode_norm(geo_cls)
+            if text_embeddings is not None and text_embeddings.shape[0] != B:
+                text_embeddings = text_embeddings.expand(B, -1, -1)
+            if text_mask is not None and text_mask.shape[0] != B:
+                text_mask = text_mask.expand(B, -1)
+            parts = [t for t in [text_embeddings, geo_prompts, geo_cls] if t is not None]
+            text_embeddings = torch.cat(parts, dim=1)
+            n_new = text_embeddings.shape[1] - (text_mask.shape[1] if text_mask is not None else 0)
+            if text_mask is not None:
+                text_mask = torch.cat([text_mask, torch.ones(B, n_new, dtype=torch.bool, device=text_mask.device)], dim=1)
+            else:
+                text_mask = torch.ones(B, text_embeddings.shape[1], dtype=torch.bool, device=text_embeddings.device)
+
+        memory = self.transformer.encoder(img_flat, pos_flat, text_embeddings, text_mask)
+        dec_out = self.transformer.decoder(memory, pos_flat, text_embeddings, text_mask, H, W)
+        query_out, pred_boxes = dec_out["decoder_output"], dec_out["pred_boxes"]
+
+        if text_embeddings is not None:
+            scores = self.dot_prod_scoring(query_out, text_embeddings, text_mask)
+        else:
+            scores = torch.zeros(B, query_out.shape[1], device=query_out.device)
+
+        masks = self.segmentation_head(query_out, seg_features, encoder_hidden_states=memory, prompt=text_embeddings, prompt_mask=text_mask)
+        return box_cxcywh_to_xyxy(pred_boxes), scores, masks, dec_out
+
+    def forward(self, images, text_embeddings=None, text_mask=None, points=None, boxes=None, threshold=0.3, orig_size=None):
+        features, positions, _, _ = self._get_backbone_features(images)
+
+        if text_embeddings is not None:
+            text_embeddings = self.backbone["language_backbone"]["resizer"](text_embeddings)
+            if text_mask is not None:
+                text_mask = text_mask.bool()
+
+        boxes_xyxy, scores, masks, dec_out = self._detect(
+            features, positions, text_embeddings, text_mask, points, boxes)
+
+        if orig_size is not None:
+            oh, ow = orig_size
+            boxes_xyxy = boxes_xyxy * torch.tensor([ow, oh, ow, oh], device=boxes_xyxy.device, dtype=boxes_xyxy.dtype)
+            masks = F.interpolate(masks, size=orig_size, mode="bilinear", align_corners=False)
+
+        return {
+            "boxes": boxes_xyxy,
+            "scores": scores,
+            "masks": masks,
+            "presence": dec_out.get("presence"),
+        }
+
+    def forward_from_trunk(self, trunk_out, text_embeddings, text_mask):
+        """Run detection using a pre-computed ViTDet trunk output.
+
+        text_embeddings must already be resized through language_backbone.resizer.
+        Returns dict with boxes (normalized xyxy), scores, masks at detector resolution.
+        """
+        bb = self.backbone["vision_backbone"]
+        features = [conv(trunk_out) for conv in bb.convs]
+        positions = [cast_to_input(bb.position_encoding(f), f) for f in features]
+
+        if text_mask is not None:
+            text_mask = text_mask.bool()
+
+        boxes_xyxy, scores, masks, _ = self._detect(features, positions, text_embeddings, text_mask)
+        return {"boxes": boxes_xyxy, "scores": scores, "masks": masks}
+
+
+class SAM3Model(nn.Module):
+    def __init__(self, device=None, dtype=None, operations=None, **kwargs):
+        super().__init__()
+        self.dtype = dtype
+        image_model = kwargs.get("image_model", "SAM3")
+        tracker_cls = TRACKER_CLASSES[image_model]
+        self.detector = SAM3Detector(device=device, dtype=dtype, operations=operations, **kwargs)
+        self.tracker = tracker_cls(device=device, dtype=dtype, operations=operations, **kwargs)
+
+    def forward(self, images, **kwargs):
+        return self.detector(images, **kwargs)
+
+    def forward_segment(self, images, point_inputs=None, box_inputs=None, mask_inputs=None):
+        """Interactive segmentation using SAM decoder with point/box/mask prompts.
+
+        Args:
+            images: [B, 3, 1008, 1008] preprocessed images
+            point_inputs: {"point_coords": [B, N, 2], "point_labels": [B, N]} in 1008x1008 pixel space
+            box_inputs: [B, 2, 2] box corners (top-left, bottom-right) in 1008x1008 pixel space
+            mask_inputs: [B, 1, H, W] coarse mask logits to refine
+        Returns:
+            [B, 1, image_size, image_size] high-res mask logits
+        """
+        bb = self.detector.backbone["vision_backbone"]
+        if bb.multiplex:
+            _, _, tracker_features, tracker_positions = bb(images, tracker_mode="interactive")
+        else:
+            _, _, tracker_features, tracker_positions = bb(images, need_tracker=True)
+            if self.detector.scalp > 0:
+                tracker_features = tracker_features[:-self.detector.scalp]
+                tracker_positions = tracker_positions[:-self.detector.scalp]
+
+        high_res = list(tracker_features[:-1])
+        backbone_feat = tracker_features[-1]
+        B, C, H, W = backbone_feat.shape
+        # Add no-memory embedding (init frame path)
+        no_mem = getattr(self.tracker, 'interactivity_no_mem_embed', None)
+        if no_mem is None:
+            no_mem = getattr(self.tracker, 'no_mem_embed', None)
+        if no_mem is not None:
+            feat_flat = backbone_feat.flatten(2).permute(0, 2, 1)
+            feat_flat = feat_flat + cast_to_input(no_mem, feat_flat)
+            backbone_feat = feat_flat.view(B, H, W, C).permute(0, 3, 1, 2)
+
+        num_pts = 0 if point_inputs is None else point_inputs["point_labels"].size(1)
+        _, high_res_masks, _, _ = self.tracker._forward_sam_heads(
+            backbone_features=backbone_feat,
+            point_inputs=point_inputs,
+            mask_inputs=mask_inputs,
+            box_inputs=box_inputs,
+            high_res_features=high_res,
+            multimask_output=(0 < num_pts <= 1),
+        )
+        return high_res_masks
+
+    def forward_video(self, images, initial_masks, pbar=None, text_prompts=None,
+                       new_det_thresh=0.5, max_objects=0, detect_interval=1):
+        """Track video with optional per-frame text-prompted detection."""
+        bb = self.detector.backbone["vision_backbone"]
+
+        def backbone_fn(frame, frame_idx=None):
+            trunk_out = bb.trunk(frame)
+            if bb.multiplex:
+                _, _, tf, tp = bb(frame, tracker_mode="propagation", cached_trunk=trunk_out, tracker_only=True)
+            else:
+                _, _, tf, tp = bb(frame, need_tracker=True, cached_trunk=trunk_out, tracker_only=True)
+            return tf, tp, trunk_out
+
+        detect_fn = None
+        if text_prompts:
+            resizer = self.detector.backbone["language_backbone"]["resizer"]
+            resized = [(resizer(emb), m.bool() if m is not None else None) for emb, m in text_prompts]
+            def detect_fn(trunk_out):
+                all_scores, all_masks = [], []
+                for emb, mask in resized:
+                    det = self.detector.forward_from_trunk(trunk_out, emb, mask)
+                    all_scores.append(det["scores"])
+                    all_masks.append(det["masks"])
+                return {"scores": torch.cat(all_scores, dim=1), "masks": torch.cat(all_masks, dim=1)}
+
+        if hasattr(self.tracker, 'track_video_with_detection'):
+            return self.tracker.track_video_with_detection(
+                backbone_fn, images, initial_masks, detect_fn,
+                new_det_thresh=new_det_thresh, max_objects=max_objects,
+                detect_interval=detect_interval, backbone_obj=bb, pbar=pbar)
+        # SAM3 (non-multiplex) — no detection support, requires initial masks
+        if initial_masks is None:
+            raise ValueError("SAM3 (non-multiplex) requires initial_mask for video tracking")
+        return self.tracker.track_video(backbone_fn, images, initial_masks, pbar=pbar, backbone_obj=bb)
diff --git a/comfy/ldm/sam3/sam.py b/comfy/ldm/sam3/sam.py
new file mode 100644
index 000000000..272781d45
--- /dev/null
+++ b/comfy/ldm/sam3/sam.py
@@ -0,0 +1,425 @@
+# SAM3 shared components: primitives, ViTDet backbone, FPN neck, position encodings.
+
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from comfy.ldm.modules.attention import optimized_attention
+from comfy.ldm.flux.math import apply_rope
+from comfy.ldm.flux.layers import EmbedND
+from comfy.ops import cast_to_input
+
+
+class MLP(nn.Module):
+    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, sigmoid_output=False, device=None, dtype=None, operations=None):
+        super().__init__()
+        dims = [input_dim] + [hidden_dim] * (num_layers - 1) + [output_dim]
+        self.layers = nn.ModuleList([operations.Linear(dims[i], dims[i + 1], device=device, dtype=dtype) for i in range(num_layers)])
+        self.sigmoid_output = sigmoid_output
+
+    def forward(self, x):
+        for i, layer in enumerate(self.layers):
+            x = F.relu(layer(x)) if i < len(self.layers) - 1 else layer(x)
+        return torch.sigmoid(x) if self.sigmoid_output else x
+
+
+class SAMAttention(nn.Module):
+    def __init__(self, embedding_dim, num_heads, downsample_rate=1, kv_in_dim=None, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.num_heads = num_heads
+        internal_dim = embedding_dim // downsample_rate
+        kv_dim = kv_in_dim if kv_in_dim is not None else embedding_dim
+        self.q_proj = operations.Linear(embedding_dim, internal_dim, device=device, dtype=dtype)
+        self.k_proj = operations.Linear(kv_dim, internal_dim, device=device, dtype=dtype)
+        self.v_proj = operations.Linear(kv_dim, internal_dim, device=device, dtype=dtype)
+        self.out_proj = operations.Linear(internal_dim, embedding_dim, device=device, dtype=dtype)
+
+    def forward(self, q, k, v):
+        q = self.q_proj(q)
+        k = self.k_proj(k)
+        v = self.v_proj(v)
+        return self.out_proj(optimized_attention(q, k, v, self.num_heads))
+
+
+class TwoWayAttentionBlock(nn.Module):
+    def __init__(self, embedding_dim, num_heads, mlp_dim=2048, attention_downsample_rate=2, skip_first_layer_pe=False, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.skip_first_layer_pe = skip_first_layer_pe
+        self.self_attn = SAMAttention(embedding_dim, num_heads, device=device, dtype=dtype, operations=operations)
+        self.cross_attn_token_to_image = SAMAttention(embedding_dim, num_heads, downsample_rate=attention_downsample_rate, device=device, dtype=dtype, operations=operations)
+        self.cross_attn_image_to_token = SAMAttention(embedding_dim, num_heads, downsample_rate=attention_downsample_rate, device=device, dtype=dtype, operations=operations)
+        self.mlp = nn.Sequential(operations.Linear(embedding_dim, mlp_dim, device=device, dtype=dtype), nn.ReLU(), operations.Linear(mlp_dim, embedding_dim, device=device, dtype=dtype))
+        self.norm1 = operations.LayerNorm(embedding_dim, device=device, dtype=dtype)
+        self.norm2 = operations.LayerNorm(embedding_dim, device=device, dtype=dtype)
+        self.norm3 = operations.LayerNorm(embedding_dim, device=device, dtype=dtype)
+        self.norm4 = operations.LayerNorm(embedding_dim, device=device, dtype=dtype)
+
+    def forward(self, queries, keys, query_pe, key_pe):
+        if self.skip_first_layer_pe:
+            queries = self.norm1(self.self_attn(queries, queries, queries))
+        else:
+            q = queries + query_pe
+            queries = self.norm1(queries + self.self_attn(q, q, queries))
+        q, k = queries + query_pe, keys + key_pe
+        queries = self.norm2(queries + self.cross_attn_token_to_image(q, k, keys))
+        queries = self.norm3(queries + self.mlp(queries))
+        q, k = queries + query_pe, keys + key_pe
+        keys = self.norm4(keys + self.cross_attn_image_to_token(k, q, queries))
+        return queries, keys
+
+
+class TwoWayTransformer(nn.Module):
+    def __init__(self, depth=2, embedding_dim=256, num_heads=8, mlp_dim=2048, attention_downsample_rate=2, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.layers = nn.ModuleList([
+            TwoWayAttentionBlock(embedding_dim, num_heads, mlp_dim, attention_downsample_rate,
+                                 skip_first_layer_pe=(i == 0), device=device, dtype=dtype, operations=operations)
+            for i in range(depth)
+        ])
+        self.final_attn_token_to_image = SAMAttention(embedding_dim, num_heads, downsample_rate=attention_downsample_rate, device=device, dtype=dtype, operations=operations)
+        self.norm_final = operations.LayerNorm(embedding_dim, device=device, dtype=dtype)
+
+    def forward(self, image_embedding, image_pe, point_embedding):
+        queries, keys = point_embedding, image_embedding
+        for layer in self.layers:
+            queries, keys = layer(queries, keys, point_embedding, image_pe)
+        q, k = queries + point_embedding, keys + image_pe
+        queries = self.norm_final(queries + self.final_attn_token_to_image(q, k, keys))
+        return queries, keys
+
+
+class PositionEmbeddingRandom(nn.Module):
+    """Fourier feature positional encoding with random gaussian projection."""
+    def __init__(self, num_pos_feats=64, scale=None):
+        super().__init__()
+        self.register_buffer("positional_encoding_gaussian_matrix", (scale or 1.0) * torch.randn(2, num_pos_feats))
+
+    def _encode(self, normalized_coords):
+        """Map normalized [0,1] coordinates to fourier features via random projection. Computes in fp32."""
+        orig_dtype = normalized_coords.dtype
+        proj_matrix = self.positional_encoding_gaussian_matrix.to(device=normalized_coords.device, dtype=torch.float32)
+        projected = 2 * math.pi * (2 * normalized_coords.float() - 1) @ proj_matrix
+        return torch.cat([projected.sin(), projected.cos()], dim=-1).to(orig_dtype)
+
+    def forward(self, size, device=None):
+        h, w = size
+        dev = device if device is not None else self.positional_encoding_gaussian_matrix.device
+        ones = torch.ones((h, w), device=dev, dtype=torch.float32)
+        norm_xy = torch.stack([(ones.cumsum(1) - 0.5) / w, (ones.cumsum(0) - 0.5) / h], dim=-1)
+        return self._encode(norm_xy).permute(2, 0, 1).unsqueeze(0)
+
+    def forward_with_coords(self, pixel_coords, image_size):
+        norm = pixel_coords.clone()
+        norm[:, :, 0] /= image_size[1]
+        norm[:, :, 1] /= image_size[0]
+        return self._encode(norm)
+
+
+# ViTDet backbone + FPN neck
+
+def window_partition(x: torch.Tensor, window_size: int):
+    B, H, W, C = x.shape
+    pad_h = (window_size - H % window_size) % window_size
+    pad_w = (window_size - W % window_size) % window_size
+    if pad_h > 0 or pad_w > 0:
+        x = F.pad(x, (0, 0, 0, pad_w, 0, pad_h))
+    Hp, Wp = H + pad_h, W + pad_w
+    x = x.view(B, Hp // window_size, window_size, Wp // window_size, window_size, C)
+    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
+    return windows, (Hp, Wp)
+
+
+def window_unpartition(windows: torch.Tensor, window_size: int, pad_hw, hw):
+    Hp, Wp = pad_hw
+    H, W = hw
+    B = windows.shape[0] // (Hp * Wp // window_size // window_size)
+    x = windows.view(B, Hp // window_size, Wp // window_size, window_size, window_size, -1)
+    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, Hp, Wp, -1)
+    if Hp > H or Wp > W:
+        x = x[:, :H, :W, :].contiguous()
+    return x
+
+
+def rope_2d(end_x: int, end_y: int, dim: int, theta: float = 10000.0, scale_pos: float = 1.0):
+    """Generate 2D axial RoPE using flux EmbedND. Returns [1, 1, HW, dim//2, 2, 2]."""
+    t = torch.arange(end_x * end_y, dtype=torch.float32)
+    ids = torch.stack([(t % end_x) * scale_pos,
+                       torch.div(t, end_x, rounding_mode="floor") * scale_pos], dim=-1)
+    return EmbedND(dim=dim, theta=theta, axes_dim=[dim // 2, dim // 2])(ids.unsqueeze(0))
+
+
+class _ViTMLP(nn.Module):
+    def __init__(self, dim, mlp_ratio=4.0, device=None, dtype=None, operations=None):
+        super().__init__()
+        hidden = int(dim * mlp_ratio)
+        self.fc1 = operations.Linear(dim, hidden, device=device, dtype=dtype)
+        self.act = nn.GELU()
+        self.fc2 = operations.Linear(hidden, dim, device=device, dtype=dtype)
+
+    def forward(self, x):
+        return self.fc2(self.act(self.fc1(x)))
+
+
+class Attention(nn.Module):
+    """ViTDet multi-head attention with fused QKV projection."""
+
+    def __init__(self, dim, num_heads=8, qkv_bias=True, use_rope=False, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.num_heads = num_heads
+        self.head_dim = dim // num_heads
+        self.use_rope = use_rope
+        self.qkv = operations.Linear(dim, dim * 3, bias=qkv_bias, device=device, dtype=dtype)
+        self.proj = operations.Linear(dim, dim, device=device, dtype=dtype)
+
+    def forward(self, x, freqs_cis=None):
+        B, N, C = x.shape
+        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, self.head_dim)
+        q, k, v = qkv.permute(2, 0, 3, 1, 4).unbind(dim=0)
+        if self.use_rope and freqs_cis is not None:
+            q, k = apply_rope(q, k, freqs_cis)
+        return self.proj(optimized_attention(q, k, v, self.num_heads, skip_reshape=True))
+
+
+class Block(nn.Module):
+    def __init__(self, dim, num_heads, mlp_ratio=4.0, qkv_bias=True, window_size=0, use_rope=False, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.window_size = window_size
+        self.norm1 = operations.LayerNorm(dim, device=device, dtype=dtype)
+        self.attn = Attention(dim, num_heads, qkv_bias, use_rope, device=device, dtype=dtype, operations=operations)
+        self.norm2 = operations.LayerNorm(dim, device=device, dtype=dtype)
+        self.mlp = _ViTMLP(dim, mlp_ratio, device=device, dtype=dtype, operations=operations)
+
+    def forward(self, x, freqs_cis=None):
+        shortcut = x
+        x = self.norm1(x)
+        if self.window_size > 0:
+            H, W = x.shape[1], x.shape[2]
+            x, pad_hw = window_partition(x, self.window_size)
+            x = x.view(x.shape[0], self.window_size * self.window_size, -1)
+            x = self.attn(x, freqs_cis=freqs_cis)
+            x = x.view(-1, self.window_size, self.window_size, x.shape[-1])
+            x = window_unpartition(x, self.window_size, pad_hw, (H, W))
+        else:
+            B, H, W, C = x.shape
+            x = x.view(B, H * W, C)
+            x = self.attn(x, freqs_cis=freqs_cis)
+            x = x.view(B, H, W, C)
+        x = shortcut + x
+        x = x + self.mlp(self.norm2(x))
+        return x
+
+
+class PatchEmbed(nn.Module):
+    def __init__(self, patch_size=14, in_chans=3, embed_dim=1024, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.proj = operations.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size, bias=False, device=device, dtype=dtype)
+
+    def forward(self, x):
+        return self.proj(x)
+
+
+class ViTDet(nn.Module):
+    def __init__(self, img_size=1008, patch_size=14, embed_dim=1024, depth=32, num_heads=16, mlp_ratio=4.625, qkv_bias=True, window_size=24,
+                 global_att_blocks=(7, 15, 23, 31), use_rope=True, pretrain_img_size=336, device=None, dtype=None, operations=None, **kwargs):
+        super().__init__()
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.global_att_blocks = set(global_att_blocks)
+
+        self.patch_embed = PatchEmbed(patch_size, 3, embed_dim, device=device, dtype=dtype, operations=operations)
+
+        num_patches = (pretrain_img_size // patch_size) ** 2 + 1  # +1 for cls token
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim, device=device, dtype=dtype))
+
+        self.ln_pre = operations.LayerNorm(embed_dim, device=device, dtype=dtype)
+
+        grid_size = img_size // patch_size
+        pretrain_grid = pretrain_img_size // patch_size
+
+        self.blocks = nn.ModuleList()
+        for i in range(depth):
+            is_global = i in self.global_att_blocks
+            self.blocks.append(Block(
+                embed_dim, num_heads, mlp_ratio, qkv_bias,
+                window_size=0 if is_global else window_size,
+                use_rope=use_rope,
+                device=device, dtype=dtype, operations=operations,
+            ))
+
+        if use_rope:
+            rope_scale = pretrain_grid / grid_size
+            self.register_buffer("freqs_cis", rope_2d(grid_size, grid_size, embed_dim // num_heads, scale_pos=rope_scale), persistent=False)
+            self.register_buffer("freqs_cis_window", rope_2d(window_size, window_size, embed_dim // num_heads), persistent=False)
+        else:
+            self.freqs_cis = None
+            self.freqs_cis_window = None
+
+    def _get_pos_embed(self, num_tokens):
+        pos = self.pos_embed
+        if pos.shape[1] == num_tokens:
+            return pos
+        cls_pos = pos[:, :1]
+        spatial_pos = pos[:, 1:]
+        old_size = int(math.sqrt(spatial_pos.shape[1]))
+        new_size = int(math.sqrt(num_tokens - 1)) if num_tokens > 1 else old_size
+        spatial_2d = spatial_pos.reshape(1, old_size, old_size, -1).permute(0, 3, 1, 2)
+        tiles_h = new_size // old_size + 1
+        tiles_w = new_size // old_size + 1
+        tiled = spatial_2d.tile([1, 1, tiles_h, tiles_w])[:, :, :new_size, :new_size]
+        tiled = tiled.permute(0, 2, 3, 1).reshape(1, new_size * new_size, -1)
+        return torch.cat([cls_pos, tiled], dim=1)
+
+    def forward(self, x):
+        x = self.patch_embed(x)
+        B, C, Hp, Wp = x.shape
+        x = x.permute(0, 2, 3, 1).reshape(B, Hp * Wp, C)
+
+        pos = cast_to_input(self._get_pos_embed(Hp * Wp + 1), x)
+        x = x + pos[:, 1:Hp * Wp + 1]
+
+        x = x.view(B, Hp, Wp, C)
+        x = self.ln_pre(x)
+
+        freqs_cis_global = self.freqs_cis
+        freqs_cis_win = self.freqs_cis_window
+        if freqs_cis_global is not None:
+            freqs_cis_global = cast_to_input(freqs_cis_global, x)
+        if freqs_cis_win is not None:
+            freqs_cis_win = cast_to_input(freqs_cis_win, x)
+
+        for block in self.blocks:
+            fc = freqs_cis_win if block.window_size > 0 else freqs_cis_global
+            x = block(x, freqs_cis=fc)
+
+        return x.permute(0, 3, 1, 2)
+
+
+class FPNScaleConv(nn.Module):
+    def __init__(self, in_dim, out_dim, scale, device=None, dtype=None, operations=None):
+        super().__init__()
+        if scale == 4.0:
+            self.dconv_2x2_0 = operations.ConvTranspose2d(in_dim, in_dim // 2, kernel_size=2, stride=2, device=device, dtype=dtype)
+            self.dconv_2x2_1 = operations.ConvTranspose2d(in_dim // 2, in_dim // 4, kernel_size=2, stride=2, device=device, dtype=dtype)
+            proj_in = in_dim // 4
+        elif scale == 2.0:
+            self.dconv_2x2 = operations.ConvTranspose2d(in_dim, in_dim // 2, kernel_size=2, stride=2, device=device, dtype=dtype)
+            proj_in = in_dim // 2
+        elif scale == 1.0:
+            proj_in = in_dim
+        elif scale == 0.5:
+            self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
+            proj_in = in_dim
+        self.scale = scale
+        self.conv_1x1 = operations.Conv2d(proj_in, out_dim, kernel_size=1, device=device, dtype=dtype)
+        self.conv_3x3 = operations.Conv2d(out_dim, out_dim, kernel_size=3, padding=1, device=device, dtype=dtype)
+
+    def forward(self, x):
+        if self.scale == 4.0:
+            x = F.gelu(self.dconv_2x2_0(x))
+            x = self.dconv_2x2_1(x)
+        elif self.scale == 2.0:
+            x = self.dconv_2x2(x)
+        elif self.scale == 0.5:
+            x = self.pool(x)
+        x = self.conv_1x1(x)
+        x = self.conv_3x3(x)
+        return x
+
+
+class PositionEmbeddingSine(nn.Module):
+    """2D sinusoidal position encoding (DETR-style) with result caching."""
+    def __init__(self, num_pos_feats=256, temperature=10000.0, normalize=True, scale=None):
+        super().__init__()
+        assert num_pos_feats % 2 == 0
+        self.half_dim = num_pos_feats // 2
+        self.temperature = temperature
+        self.normalize = normalize
+        self.scale = scale if scale is not None else 2 * math.pi
+        self._cache = {}
+
+    def _sincos(self, vals):
+        """Encode 1D values to interleaved sin/cos features."""
+        freqs = self.temperature ** (2 * (torch.arange(self.half_dim, dtype=torch.float32, device=vals.device) // 2) / self.half_dim)
+        raw = vals[..., None] * self.scale / freqs
+        return torch.stack((raw[..., 0::2].sin(), raw[..., 1::2].cos()), dim=-1).flatten(-2)
+
+    def _encode_xy(self, x, y):
+        """Encode normalized x, y coordinates to sinusoidal features. Returns (pos_x, pos_y) each [N, half_dim]."""
+        dim_t = self.temperature ** (2 * (torch.arange(self.half_dim, dtype=torch.float32, device=x.device) // 2) / self.half_dim)
+        pos_x = x[:, None] * self.scale / dim_t
+        pos_y = y[:, None] * self.scale / dim_t
+        pos_x = torch.stack((pos_x[:, 0::2].sin(), pos_x[:, 1::2].cos()), dim=2).flatten(1)
+        pos_y = torch.stack((pos_y[:, 0::2].sin(), pos_y[:, 1::2].cos()), dim=2).flatten(1)
+        return pos_x, pos_y
+
+    def encode_boxes(self, cx, cy, w, h):
+        """Encode box center + size to [N, d_model+2] features."""
+        pos_x, pos_y = self._encode_xy(cx, cy)
+        return torch.cat((pos_y, pos_x, h[:, None], w[:, None]), dim=1)
+
+    def forward(self, x):
+        B, C, H, W = x.shape
+        key = (H, W, x.device)
+        if key not in self._cache:
+            gy = torch.arange(H, dtype=torch.float32, device=x.device)
+            gx = torch.arange(W, dtype=torch.float32, device=x.device)
+            if self.normalize:
+                gy, gx = gy / (H - 1 + 1e-6), gx / (W - 1 + 1e-6)
+            yy, xx = torch.meshgrid(gy, gx, indexing="ij")
+            self._cache[key] = torch.cat((self._sincos(yy), self._sincos(xx)), dim=-1).permute(2, 0, 1).unsqueeze(0)
+        return self._cache[key].expand(B, -1, -1, -1)
+
+
+class SAM3VisionBackbone(nn.Module):
+    def __init__(self, embed_dim=1024, d_model=256, multiplex=False, device=None, dtype=None, operations=None, **kwargs):
+        super().__init__()
+        self.trunk = ViTDet(embed_dim=embed_dim, device=device, dtype=dtype, operations=operations, **kwargs)
+        self.position_encoding = PositionEmbeddingSine(num_pos_feats=d_model, normalize=True)
+        self.multiplex = multiplex
+
+        fpn_args = dict(device=device, dtype=dtype, operations=operations)
+        if multiplex:
+            scales = [4.0, 2.0, 1.0]
+            self.convs = nn.ModuleList([FPNScaleConv(embed_dim, d_model, s, **fpn_args) for s in scales])
+            self.propagation_convs = nn.ModuleList([FPNScaleConv(embed_dim, d_model, s, **fpn_args) for s in scales])
+            self.interactive_convs = nn.ModuleList([FPNScaleConv(embed_dim, d_model, s, **fpn_args) for s in scales])
+        else:
+            scales = [4.0, 2.0, 1.0, 0.5]
+            self.convs = nn.ModuleList([FPNScaleConv(embed_dim, d_model, s, **fpn_args) for s in scales])
+            self.sam2_convs = nn.ModuleList([FPNScaleConv(embed_dim, d_model, s, **fpn_args) for s in scales])
+
+    def forward(self, images, need_tracker=False, tracker_mode=None, cached_trunk=None, tracker_only=False):
+        backbone_out = cached_trunk if cached_trunk is not None else self.trunk(images)
+
+        if tracker_only:
+            # Skip detector FPN when only tracker features are needed (video tracking)
+            if self.multiplex:
+                tracker_convs = self.propagation_convs if tracker_mode == "propagation" else self.interactive_convs
+            else:
+                tracker_convs = self.sam2_convs
+            tracker_features = [conv(backbone_out) for conv in tracker_convs]
+            tracker_positions = [cast_to_input(self.position_encoding(f), f) for f in tracker_features]
+            return None, None, tracker_features, tracker_positions
+
+        features = [conv(backbone_out) for conv in self.convs]
+        positions = [cast_to_input(self.position_encoding(f), f) for f in features]
+
+        if self.multiplex:
+            if tracker_mode == "propagation":
+                tracker_convs = self.propagation_convs
+            elif tracker_mode == "interactive":
+                tracker_convs = self.interactive_convs
+            else:
+                return features, positions, None, None
+        elif need_tracker:
+            tracker_convs = self.sam2_convs
+        else:
+            return features, positions, None, None
+
+        tracker_features = [conv(backbone_out) for conv in tracker_convs]
+        tracker_positions = [cast_to_input(self.position_encoding(f), f) for f in tracker_features]
+        return features, positions, tracker_features, tracker_positions
diff --git a/comfy/ldm/sam3/tracker.py b/comfy/ldm/sam3/tracker.py
new file mode 100644
index 000000000..6ff6369d1
--- /dev/null
+++ b/comfy/ldm/sam3/tracker.py
@@ -0,0 +1,1785 @@
+# SAM3 video tracker: memory encoder, memory attention, SAM mask decoder/prompt encoder.
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from tqdm import tqdm
+
+try:
+    import cv2
+    _HAS_CV2 = True
+except ImportError:
+    from scipy import ndimage
+    _HAS_CV2 = False
+
+import comfy.model_management
+from comfy.ldm.modules.attention import optimized_attention
+from comfy.ldm.sam3.sam import rope_2d, PositionEmbeddingSine
+from comfy.ops import cast_to_input
+from comfy.ldm.flux.math import apply_rope1
+from comfy.ldm.cascade.common import LayerNorm2d_op
+from comfy.ldm.sam3.sam import MLP, PositionEmbeddingRandom
+from comfy.ldm.sam3.sam import TwoWayTransformer as SAMTwoWayTransformer
+
+NO_OBJ_SCORE = -1024.0
+
+
+def to_spatial(x, H, W):
+    """Reshape (B, H*W, C) → (B, C, H, W)."""
+    return x.view(x.shape[0], H, W, -1).permute(0, 3, 1, 2)
+
+class MultiplexState:
+    """Tracks object-to-slot assignments for multiplex tracking. Provides mux/demux operations."""
+
+    def __init__(self, num_objects, multiplex_count, device, dtype):
+        self.multiplex_count = multiplex_count
+        self.device = device
+        self.dtype = dtype
+        self._build(num_objects)
+
+    def mux(self, x):
+        """[N_obj, ...] -> [num_buckets, multiplex_count, ...]"""
+        out_shape = (self.num_buckets, self.multiplex_count) + x.shape[1:]
+        return (self.mux_matrix.to(device=x.device, dtype=x.dtype) @ x.reshape(self.total_valid_entries, -1)).view(out_shape)
+
+    def demux(self, x):
+        """[num_buckets, multiplex_count, ...] -> [N_obj, ...]"""
+        out_shape = (self.total_valid_entries,) + x.shape[2:]
+        flat = x.reshape(self.num_buckets * self.multiplex_count, -1)
+        return (self.demux_matrix.to(device=x.device, dtype=x.dtype) @ flat).view(out_shape)
+
+    def get_valid_object_mask(self):
+        """[num_buckets, multiplex_count] bool tensor, True for valid slots."""
+        return (self.mux_matrix.sum(dim=1) > 0).reshape(self.num_buckets, self.multiplex_count)
+
+    def _build(self, num_objects):
+        M = self.multiplex_count
+        self.num_buckets = (num_objects + M - 1) // M
+        self.total_valid_entries = num_objects
+        total_slots = self.num_buckets * M
+        self.mux_matrix = torch.zeros(total_slots, num_objects, device=self.device, dtype=self.dtype)
+        self.demux_matrix = torch.zeros(num_objects, total_slots, device=self.device, dtype=self.dtype)
+        oids = torch.arange(num_objects, device=self.device)
+        slots = (oids // M) * M + (oids % M)
+        self.mux_matrix[slots, oids] = 1.0
+        self.demux_matrix[oids, slots] = 1.0
+
+    def add_objects(self, n_new):
+        """Grow multiplex state for n_new additional objects."""
+        self._build(self.total_valid_entries + n_new)
+
+def _compute_mask_overlap(masks_a, masks_b):
+    """Max of IoU and IoM (intersection over minimum area). More robust to size differences."""
+    a_flat = (masks_a > 0).float().flatten(1)
+    b_flat = (masks_b > 0).float().flatten(1)
+    intersection = a_flat @ b_flat.T
+    area_a = a_flat.sum(1, keepdim=True)
+    area_b = b_flat.sum(1, keepdim=True).T
+    iou = intersection / (area_a + area_b - intersection).clamp(min=1)
+    iom = intersection / torch.min(area_a.expand_as(iou), area_b.expand_as(iou)).clamp(min=1)
+    return torch.max(iou, iom)
+
+
+def _nms_masks(masks, scores, thresh=0.5):
+    """Mask-based NMS using IoU+IoM overlap. Returns (filtered_masks, filtered_scores)."""
+    order = scores.argsort(descending=True)
+    masks, scores = masks[order], scores[order]
+    keep = []
+    for i in range(masks.shape[0]):
+        if keep:
+            if _compute_mask_overlap(masks[i:i+1], masks[torch.tensor(keep, device=masks.device)]).max() >= thresh:
+                continue
+        keep.append(i)
+    return masks[keep], scores[keep]
+
+
+def _get_connected_components(mask_bin):
+    """Get connected component labels and areas. mask_bin: [B, 1, H, W] uint8."""
+    labels_list, areas_list = [], []
+    for i in range(mask_bin.shape[0]):
+        m = mask_bin[i, 0].cpu().numpy()
+        if _HAS_CV2:
+            _, labeled, stats, _ = cv2.connectedComponentsWithStats(m, connectivity=8)
+            areas = stats[labeled, cv2.CC_STAT_AREA].astype('int32')
+        else:
+            labeled, num_features = ndimage.label(m)
+            areas = np.zeros_like(m, dtype=np.int32)
+            for c in range(1, num_features + 1):
+                component = labeled == c
+                areas[component] = component.sum()
+        labels_list.append(torch.from_numpy(labeled).to(mask_bin.device))
+        areas_list.append(torch.from_numpy(areas).to(device=mask_bin.device, dtype=torch.int32))
+    return torch.stack(labels_list).unsqueeze(1), torch.stack(areas_list).unsqueeze(1)
+
+
+def fill_holes_in_mask_scores(mask, max_area=0):
+    """Remove small foreground sprinkles and fill small background holes using connected components."""
+    if max_area <= 0:
+        return mask
+
+    # Fill holes: small connected components in background → foreground
+    mask_bg = (mask <= 0).to(torch.uint8)
+    _, areas_bg = _get_connected_components(mask_bg)
+    small_bg = mask_bg.bool() & (areas_bg <= max_area)
+    mask = torch.where(small_bg, 0.1, mask)
+
+    # Remove sprinkles: small connected components in foreground → background
+    # Only remove if area < min(max_area, half of total foreground area)
+    mask_fg = (mask > 0).to(torch.uint8)
+    fg_area_thresh = mask_fg.sum(dim=(2, 3), keepdim=True, dtype=torch.int32)
+    fg_area_thresh.floor_divide_(2).clamp_(max=max_area)
+    _, areas_fg = _get_connected_components(mask_fg)
+    small_fg = mask_fg.bool() & (areas_fg <= fg_area_thresh)
+    mask = torch.where(small_fg, -0.1, mask)
+
+    return mask
+
+
+def apply_rope_memory(q, k, freqs, num_heads, num_k_exclude_rope=0):
+    """Apply 2D axial RoPE to memory attention using flux rope format.
+
+    Args:
+        q: [B, Nq, C] projected queries (current frame features)
+        k: [B, Nk, C] projected keys (memory tokens)
+        freqs: [1, Nq, dim//2, 2, 2] flux-format rotation matrices for one frame
+        num_heads: number of attention heads
+        num_k_exclude_rope: number of trailing k tokens to skip RoPE (object pointers)
+    """
+    B, Nq, C = q.shape
+    head_dim = C // num_heads
+
+    # freqs shape: [1, 1, Nq, dim//2, 2, 2] (heads broadcast dim already included)
+    q_h = q.view(B, Nq, num_heads, head_dim).transpose(1, 2)
+    q_h = apply_rope1(q_h, freqs)
+    q = q_h.transpose(1, 2).reshape(B, Nq, C)
+
+    # Apply RoPE to k (excluding last num_k_exclude_rope tokens)
+    Nk = k.shape[1]
+    num_k_rope = Nk - num_k_exclude_rope
+    if num_k_rope > 0:
+        # Repeat freqs for multiple frames of spatial memory
+        Nf = freqs.shape[2]  # spatial positions in one frame
+        if num_k_rope > Nf:
+            r = (num_k_rope + Nf - 1) // Nf
+            pe_k = freqs.repeat(1, 1, r, 1, 1, 1)[:, :, :num_k_rope]
+        else:
+            pe_k = freqs[:, :, :num_k_rope]
+
+        k_h = k[:, :num_k_rope].view(B, num_k_rope, num_heads, head_dim).transpose(1, 2)
+        k_h = apply_rope1(k_h, pe_k)
+        k = k.clone()
+        k[:, :num_k_rope] = k_h.transpose(1, 2).reshape(B, num_k_rope, C)
+
+    return q, k
+
+
+def get_1d_sine_pe(pos_inds, dim, temperature=10000):
+    """1D sinusoidal positional encoding for temporal positions."""
+    pe_dim = dim // 2
+    dim_t = torch.arange(pe_dim, dtype=torch.float32, device=pos_inds.device)
+    dim_t = temperature ** (2 * (dim_t // 2) / pe_dim)
+    pos_embed = pos_inds.unsqueeze(-1) / dim_t
+    return torch.cat([pos_embed.sin(), pos_embed.cos()], dim=-1)
+
+
+def _pad_to_buckets(tensor, target_buckets):
+    """Pad a [num_buckets, ...] tensor to target_buckets along dim 0 if needed."""
+    if tensor.shape[0] >= target_buckets:
+        return tensor
+    pad_shape = (target_buckets - tensor.shape[0],) + tensor.shape[1:]
+    return torch.cat([tensor, torch.zeros(pad_shape, device=tensor.device, dtype=tensor.dtype)], dim=0)
+
+
+def pack_masks(masks):
+    """Pack binary masks [*, H, W] to bit-packed [*, H, W//8] uint8. W must be divisible by 8."""
+    binary = masks > 0
+    shifts = torch.arange(8, device=masks.device)
+    return (binary.view(*masks.shape[:-1], -1, 8) * (1 << shifts)).sum(-1).byte()
+
+
+def unpack_masks(packed):
+    """Unpack bit-packed [*, H, W//8] uint8 to bool [*, H, W*8]."""
+    shifts = torch.arange(8, device=packed.device)
+    return ((packed.unsqueeze(-1) >> shifts) & 1).view(*packed.shape[:-1], -1).bool()
+
+
+def _compute_backbone(backbone_fn, frame, frame_idx=None):
+    """Compute backbone features for a single frame. Returns (vision_feats, vision_pos, feat_sizes, features, trunk_out)."""
+    features, positions, trunk_out = backbone_fn(frame, frame_idx=frame_idx)
+    feat_sizes = [(x.shape[-2], x.shape[-1]) for x in features]
+    vision_feats = [x.flatten(2).permute(0, 2, 1) for x in features]
+    vision_pos = [x.flatten(2).permute(0, 2, 1) for x in positions]
+    return vision_feats, vision_pos, feat_sizes, features, trunk_out
+
+
+def collect_memory_tokens(output_dict, frame_idx, num_maskmem, maskmem_tpos_enc, device,
+                          collect_image_feats=False, tpos_v2=False, num_buckets=None):
+    """Collect spatial memory, position encodings, and optionally image features from past frames."""
+    to_cat_memory, to_cat_memory_pos = [], []
+    to_cat_image_feat, to_cat_image_pos = [], []
+
+    def _append(out, tpos_idx):
+        feats = out["maskmem_features"].to(device)
+        if num_buckets is not None:
+            feats = _pad_to_buckets(feats, num_buckets)
+        to_cat_memory.append(feats.flatten(2).permute(0, 2, 1))
+        enc = out["maskmem_pos_enc"][-1].to(device).flatten(2).permute(0, 2, 1)
+        if num_buckets is not None:
+            enc = _pad_to_buckets(enc, num_buckets)
+        tpos = cast_to_input(maskmem_tpos_enc[tpos_idx], enc)
+        to_cat_memory_pos.append(enc + tpos)
+        if collect_image_feats and "image_features" in out:
+            to_cat_image_feat.append(out["image_features"].to(device))
+            to_cat_image_pos.append(out["image_pos_enc"].to(device) + tpos)
+
+    cond_outputs = output_dict["cond_frame_outputs"]
+    for t, out in cond_outputs.items():
+        if tpos_v2:
+            t_pos = frame_idx - t
+            tpos_idx = num_maskmem - t_pos - 1 if 0 < t_pos < num_maskmem else num_maskmem - 1
+        else:
+            tpos_idx = num_maskmem - 1
+        _append(out, tpos_idx)
+
+    for t_pos in range(1, num_maskmem):
+        out = output_dict["non_cond_frame_outputs"].get(frame_idx - (num_maskmem - t_pos), None)
+        if out is None or out.get("maskmem_features") is None:
+            continue
+        _append(out, num_maskmem - t_pos - 1)
+
+    return to_cat_memory, to_cat_memory_pos, to_cat_image_feat, to_cat_image_pos, cond_outputs
+
+
+def compute_tpos_enc(rel_pos_list, device, d_model, proj_layer, dtype=None, max_abs_pos=None):
+    """Temporal position encoding for object pointers."""
+    pos_enc = torch.tensor(rel_pos_list, dtype=torch.float32, device=device) / max((max_abs_pos or 2) - 1, 1)
+    pos_enc = get_1d_sine_pe(pos_enc, dim=d_model)
+    if dtype is not None:
+        pos_enc = pos_enc.to(dtype)
+    return proj_layer(pos_enc)
+
+
+def forward_sam_heads(backbone_features, prompt_encoder, mask_decoder, obj_ptr_proj, no_obj_fn,
+                      image_size, point_inputs=None, mask_inputs=None, box_inputs=None,
+                      high_res_features=None, multimask_output=False):
+    """Shared SAM prompt encoder + mask decoder forward for both SAM3 and SAM3.1 trackers."""
+    device = backbone_features.device
+    # Batch size from inputs (mask_inputs may have N_obj > 1 while backbone is batch 1)
+    if mask_inputs is not None:
+        B = mask_inputs.shape[0]
+    elif box_inputs is not None:
+        B = box_inputs.shape[0]
+    elif point_inputs is not None:
+        B = point_inputs["point_coords"].shape[0]
+    else:
+        B = backbone_features.shape[0]
+
+    if point_inputs is not None:
+        sam_point_coords = point_inputs["point_coords"]
+        sam_point_labels = point_inputs["point_labels"]
+    else:
+        sam_point_coords = torch.zeros(B, 1, 2, device=device)
+        sam_point_labels = -torch.ones(B, 1, dtype=torch.int32, device=device)
+
+    if mask_inputs is not None:
+        prompt_size = (prompt_encoder.image_embedding_size[0] * 4, prompt_encoder.image_embedding_size[1] * 4)
+        if mask_inputs.shape[-2:] != prompt_size:
+            sam_mask_prompt = F.interpolate(mask_inputs, size=prompt_size, mode="bilinear", align_corners=False, antialias=True)
+        else:
+            sam_mask_prompt = mask_inputs
+    else:
+        sam_mask_prompt = None
+
+    sparse, dense = prompt_encoder(points=(sam_point_coords, sam_point_labels), boxes=box_inputs, masks=sam_mask_prompt)
+    sparse = cast_to_input(sparse, backbone_features)
+    dense = cast_to_input(dense, backbone_features)
+    image_pe = cast_to_input(prompt_encoder.get_dense_pe(), backbone_features)
+
+    low_res_multimasks, ious, sam_output_tokens, object_score_logits = mask_decoder(
+        image_embeddings=backbone_features, image_pe=image_pe,
+        sparse_prompt_embeddings=sparse, dense_prompt_embeddings=dense,
+        high_res_features=high_res_features, multimask_output=multimask_output, return_all=True,
+    )
+
+    is_obj_appearing = object_score_logits > 0
+    low_res_multimasks = torch.where(is_obj_appearing[:, None, None], low_res_multimasks,
+                                      torch.tensor(NO_OBJ_SCORE, device=device, dtype=low_res_multimasks.dtype))
+    high_res_multimasks = F.interpolate(low_res_multimasks, size=(image_size, image_size), mode="bilinear", align_corners=False)
+
+    sam_output_token = sam_output_tokens[:, 0]
+    if multimask_output:
+        best_iou_inds = torch.argmax(ious, dim=-1)
+        batch_inds = torch.arange(B, device=device)
+        low_res_masks = low_res_multimasks[batch_inds, best_iou_inds].unsqueeze(1)
+        high_res_masks = high_res_multimasks[batch_inds, best_iou_inds].unsqueeze(1)
+        if sam_output_tokens.size(1) > 1:
+            sam_output_token = sam_output_tokens[batch_inds, best_iou_inds]
+    else:
+        low_res_masks, high_res_masks = low_res_multimasks, high_res_multimasks
+
+    obj_ptr = obj_ptr_proj(sam_output_token)
+    obj_ptr = no_obj_fn(obj_ptr, is_obj_appearing)
+
+    return low_res_masks, high_res_masks, obj_ptr, object_score_logits
+
+
+def use_mask_as_output(backbone_features, high_res_features, mask_inputs, mask_downsample,
+                       prompt_encoder, mask_decoder, obj_ptr_proj, no_obj_fn, image_size, backbone_stride):
+    """Shared mask-as-output for both SAM3 and SAM3.1 trackers."""
+    out_scale, out_bias = 20.0, -10.0
+    mask_inputs_float = cast_to_input(mask_inputs, backbone_features)
+    high_res_masks = mask_inputs_float * out_scale + out_bias
+    low_res_masks = F.interpolate(high_res_masks, size=(image_size // backbone_stride * 4,) * 2,
+                                   mode="bilinear", align_corners=False, antialias=True)
+    _, _, obj_ptr, _ = forward_sam_heads(
+        backbone_features, prompt_encoder, mask_decoder, obj_ptr_proj, no_obj_fn,
+        image_size, mask_inputs=mask_downsample(mask_inputs_float), high_res_features=high_res_features,
+    )
+    is_obj_appearing = torch.any(mask_inputs.flatten(1) > 0.0, dim=1)[..., None]
+    alpha = is_obj_appearing.to(obj_ptr.dtype)
+    object_score_logits = out_scale * alpha + out_bias
+    return low_res_masks, high_res_masks, obj_ptr, object_score_logits
+
+
+# Split attention with configurable input dims (for asymmetric cross-attention)
+class SplitAttn(nn.Module):
+    def __init__(self, embed_dim, num_heads=1, kv_dim=None, internal_dim=None, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.num_heads = num_heads
+        kv_dim = kv_dim or embed_dim
+        internal_dim = internal_dim or embed_dim
+        self.q_proj = operations.Linear(embed_dim, internal_dim, device=device, dtype=dtype)
+        self.k_proj = operations.Linear(kv_dim, internal_dim, device=device, dtype=dtype)
+        self.v_proj = operations.Linear(kv_dim, internal_dim, device=device, dtype=dtype)
+        self.out_proj = operations.Linear(internal_dim, embed_dim, device=device, dtype=dtype)
+
+    def forward(self, q, k=None, v=None, rope=None, num_k_exclude_rope=0):
+        if k is None:
+            k = q
+        if v is None:
+            v = k
+        q = self.q_proj(q)
+        k = self.k_proj(k)
+        v = self.v_proj(v)
+        if rope is not None:
+            q, k = apply_rope_memory(q, k, rope, self.num_heads, num_k_exclude_rope)
+        out = optimized_attention(q, k, v, self.num_heads)
+        return self.out_proj(out)
+
+
+class MemoryAttnLayer(nn.Module):
+    def __init__(self, d_model=256, num_heads=1, kv_dim=64, dim_ff=2048, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.num_heads = num_heads
+        self.self_attn = SplitAttn(d_model, num_heads, device=device, dtype=dtype, operations=operations)
+        self.cross_attn_image = SplitAttn(d_model, num_heads, kv_dim=kv_dim, device=device, dtype=dtype, operations=operations)
+        self.linear1 = operations.Linear(d_model, dim_ff, device=device, dtype=dtype)
+        self.linear2 = operations.Linear(dim_ff, d_model, device=device, dtype=dtype)
+        self.norm1 = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        self.norm2 = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        self.norm3 = operations.LayerNorm(d_model, device=device, dtype=dtype)
+
+    def forward(self, x, memory, memory_pos=None, rope=None, num_k_exclude_rope=0):
+        x = x + self.self_attn(self.norm1(x), rope=rope)
+        mem_k = memory + memory_pos if memory_pos is not None else memory
+        x = x + self.cross_attn_image(self.norm2(x), mem_k, memory, rope=rope, num_k_exclude_rope=num_k_exclude_rope)
+        normed = self.norm3(x)
+        x = x + self.linear2(F.relu(self.linear1(normed)))
+        return x
+
+
+class MemoryAttnEncoder(nn.Module):
+    def __init__(self, d_model=256, num_heads=1, kv_dim=64, dim_ff=2048, num_layers=4, image_size=1008, patch_size=14,
+                 device=None, dtype=None, operations=None):
+        super().__init__()
+        self.layers = nn.ModuleList([
+            MemoryAttnLayer(d_model, num_heads, kv_dim, dim_ff, device=device, dtype=dtype, operations=operations)
+            for _ in range(num_layers)
+        ])
+        self.norm = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        hw = image_size // patch_size
+        self.register_buffer("_rope", rope_2d(hw, hw, d_model // num_heads), persistent=False)
+
+    def forward(self, x, memory, src_pos=None, memory_pos=None, num_k_exclude_rope=0):
+        if src_pos is not None:
+            x = x + 0.1 * src_pos
+
+        rope = self._rope.to(device=x.device)
+        for layer in self.layers:
+            x = layer(x, memory, memory_pos=memory_pos, rope=rope, num_k_exclude_rope=num_k_exclude_rope)
+        return self.norm(x)
+
+
+class MemoryTransformer(nn.Module):
+    def __init__(self, d_model=256, num_heads=1, kv_dim=64, dim_ff=2048, num_layers=4, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.encoder = MemoryAttnEncoder(d_model, num_heads, kv_dim, dim_ff, num_layers, device=device, dtype=dtype, operations=operations)
+
+
+def _upscale_masks(output_upscaling, conv_s0, conv_s1, src_out, high_res_features):
+    """Shared upscaling for SAM mask decoders: deconv + high-res feature integration."""
+    dc1, ln1, act1, dc2, act2 = output_upscaling
+    if high_res_features is not None:
+        upscaled = act1(ln1(dc1(src_out) + conv_s1(high_res_features[1])))
+        upscaled = act2(dc2(upscaled) + conv_s0(high_res_features[0]))
+    else:
+        upscaled = act2(dc2(act1(ln1(dc1(src_out)))))
+    return upscaled
+
+
+class SAMMaskDecoder(nn.Module):
+    def __init__(self, d_model=256, num_multimask_outputs=3, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.num_mask_tokens = num_multimask_outputs + 1
+
+        self.transformer = SAMTwoWayTransformer(depth=2, embedding_dim=d_model, num_heads=8, mlp_dim=2048, device=device, dtype=dtype, operations=operations)
+
+        self.iou_token = operations.Embedding(1, d_model, device=device, dtype=dtype)
+        self.mask_tokens = operations.Embedding(self.num_mask_tokens, d_model, device=device, dtype=dtype)
+        self.obj_score_token = operations.Embedding(1, d_model, device=device, dtype=dtype)
+
+        # Output upscaling: d_model -> d_model//4 -> d_model//8 at 4x resolution
+        LN2d = LayerNorm2d_op(operations)
+        self.output_upscaling = nn.Sequential(
+            operations.ConvTranspose2d(d_model, d_model // 4, kernel_size=2, stride=2, device=device, dtype=dtype), LN2d(d_model // 4, device=device, dtype=dtype), nn.GELU(),
+            operations.ConvTranspose2d(d_model // 4, d_model // 8, kernel_size=2, stride=2, device=device, dtype=dtype), nn.GELU(),
+        )
+
+        # High-res feature integration
+        self.conv_s0 = operations.Conv2d(d_model, d_model // 8, kernel_size=1, device=device, dtype=dtype)
+        self.conv_s1 = operations.Conv2d(d_model, d_model // 4, kernel_size=1, device=device, dtype=dtype)
+
+        # Per-mask hypernetwork MLPs
+        self.output_hypernetworks_mlps = nn.ModuleList([
+            MLP(d_model, d_model, d_model // 8, 3, device=device, dtype=dtype, operations=operations)
+            for _ in range(self.num_mask_tokens)
+        ])
+
+        self.iou_prediction_head = MLP(d_model, d_model, self.num_mask_tokens, 3, device=device, dtype=dtype, operations=operations)
+        self.pred_obj_score_head = MLP(d_model, d_model, 1, 3, device=device, dtype=dtype, operations=operations)
+
+    def forward(self, image_embeddings, image_pe, sparse_prompt_embeddings, dense_prompt_embeddings,
+                high_res_features=None, multimask_output=False, return_all=False):
+        B = sparse_prompt_embeddings.shape[0]
+        ref = sparse_prompt_embeddings
+        # Token order: [obj_score(1), iou(1), mask(num_mask_tokens)]
+        tokens = torch.cat([cast_to_input(self.obj_score_token.weight, ref),
+                            cast_to_input(self.iou_token.weight, ref),
+                            cast_to_input(self.mask_tokens.weight, ref)], dim=0)
+        tokens = torch.cat([tokens.unsqueeze(0).expand(B, -1, -1), sparse_prompt_embeddings], dim=1)
+
+        src = image_embeddings
+        if src.shape[0] != B:
+            src = src.expand(B, -1, -1, -1)
+        src = src + dense_prompt_embeddings
+        pos_src = image_pe.expand(B, -1, -1, -1)
+
+        b, c, h, w = src.shape
+        src_flat = src.flatten(2).permute(0, 2, 1)
+        pos_flat = pos_src.flatten(2).permute(0, 2, 1)
+
+        hs, src_out = self.transformer(src_flat, pos_flat, tokens)
+
+        obj_score_token_out = hs[:, 0, :]
+        iou_token_out = hs[:, 1, :]
+        mask_tokens_out = hs[:, 2:2 + self.num_mask_tokens, :]
+
+        src_out = src_out.permute(0, 2, 1).view(b, c, h, w)
+        upscaled = _upscale_masks(self.output_upscaling, self.conv_s0, self.conv_s1, src_out, high_res_features)
+
+        hyper_in = torch.stack([
+            mlp(mask_tokens_out[:, i, :]) for i, mlp in enumerate(self.output_hypernetworks_mlps)
+        ], dim=1)
+
+        masks = (hyper_in @ upscaled.flatten(2)).view(B, self.num_mask_tokens, upscaled.shape[2], upscaled.shape[3])
+        iou_pred = self.iou_prediction_head(iou_token_out)
+        object_score_logits = self.pred_obj_score_head(obj_score_token_out)
+
+        if multimask_output:
+            out_masks = masks[:, 1:]
+            out_iou = iou_pred[:, 1:]
+            out_tokens = mask_tokens_out[:, 1:]
+        else:
+            out_masks = masks[:, 0:1]
+            out_iou = iou_pred[:, 0:1]
+            out_tokens = mask_tokens_out[:, 0:1]
+
+        if return_all:
+            return out_masks, out_iou, out_tokens, object_score_logits
+        return out_masks, out_iou
+
+
+class SAMPromptEncoder(nn.Module):
+    def __init__(self, d_model=256, image_embedding_size=(72, 72), input_image_size=(1008, 1008), device=None, dtype=None, operations=None):
+        super().__init__()
+        self.embed_dim = d_model
+        self.image_embedding_size = image_embedding_size
+        self.input_image_size = input_image_size
+
+        self.pe_layer = PositionEmbeddingRandom(d_model // 2)
+        self.point_embeddings = nn.ModuleList([
+            operations.Embedding(1, d_model, device=device, dtype=dtype) for _ in range(4)
+        ])
+        self.not_a_point_embed = operations.Embedding(1, d_model, device=device, dtype=dtype)
+
+        LN2d = LayerNorm2d_op(operations)
+        self.mask_downscaling = nn.Sequential(
+            operations.Conv2d(1, 4, kernel_size=2, stride=2, device=device, dtype=dtype),
+            LN2d(4, device=device, dtype=dtype), nn.GELU(),
+            operations.Conv2d(4, 16, kernel_size=2, stride=2, device=device, dtype=dtype),
+            LN2d(16, device=device, dtype=dtype), nn.GELU(),
+            operations.Conv2d(16, d_model, kernel_size=1, device=device, dtype=dtype),
+        )
+        self.no_mask_embed = operations.Embedding(1, d_model, device=device, dtype=dtype)
+
+    def get_dense_pe(self):
+        return self.pe_layer(self.image_embedding_size)
+
+    def forward(self, points=None, boxes=None, masks=None):
+        ref = points[0] if points is not None else boxes if boxes is not None else masks
+        B = 1
+        sparse = torch.empty((B, 0, self.embed_dim), device=ref.device, dtype=ref.dtype)
+
+        if points is not None:
+            coords, labels = points
+            B = coords.shape[0]
+            # Pad with an extra point (label=-1) when no boxes are provided (matching reference)
+            if boxes is None:
+                coords = torch.cat([coords, torch.zeros(B, 1, 2, device=coords.device, dtype=coords.dtype)], dim=1)
+                labels = torch.cat([labels, -torch.ones(B, 1, device=labels.device, dtype=labels.dtype)], dim=1)
+            pe = self.pe_layer.forward_with_coords(coords + 0.5, self.input_image_size)
+            for i in range(4):
+                pe[labels == i] += cast_to_input(self.point_embeddings[i].weight, ref)
+            invalid = (labels == -1)
+            pe[invalid] = 0.0
+            pe[invalid] += cast_to_input(self.not_a_point_embed.weight, ref)
+            sparse = torch.cat([sparse.expand(B, -1, -1), pe], dim=1)
+
+        if boxes is not None:
+            B = boxes.shape[0]
+            corners = self.pe_layer.forward_with_coords((boxes.reshape(-1, 2, 2) + 0.5), self.input_image_size)
+            corners[:, 0] += cast_to_input(self.point_embeddings[2].weight, ref)
+            corners[:, 1] += cast_to_input(self.point_embeddings[3].weight, ref)
+            sparse = torch.cat([sparse.expand(B, -1, -1), corners], dim=1)
+
+        if masks is not None:
+            dense = self.mask_downscaling(masks)
+        else:
+            dense = cast_to_input(self.no_mask_embed.weight, ref).reshape(1, -1, 1, 1).expand(
+                B, -1, self.image_embedding_size[0], self.image_embedding_size[1])
+
+        return sparse, dense
+
+
+class CXBlock(nn.Module):
+    def __init__(self, dim=256, kernel_size=7, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.dwconv = operations.Conv2d(dim, dim, kernel_size=kernel_size, padding=kernel_size // 2, groups=dim, device=device, dtype=dtype)
+        self.norm = operations.LayerNorm(dim, device=device, dtype=dtype)
+        self.pwconv1 = operations.Linear(dim, 4 * dim, device=device, dtype=dtype)
+        self.pwconv2 = operations.Linear(4 * dim, dim, device=device, dtype=dtype)
+        self.gamma = nn.Parameter(torch.ones(dim, device=device, dtype=dtype))
+
+    def forward(self, x):
+        residual = x
+        x = self.dwconv(x).permute(0, 2, 3, 1)
+        x = self.pwconv2(F.gelu(self.pwconv1(self.norm(x))))
+        x.mul_(cast_to_input(self.gamma, x))
+        return residual + x.permute(0, 3, 1, 2)
+
+
+class MaskDownSampler(nn.Module):
+    def __init__(self, out_dim=256, in_chans=1, channels=None, interpol_size=(1152, 1152), device=None, dtype=None, operations=None):
+        super().__init__()
+        self.interpol_size = list(interpol_size) if interpol_size else None
+        if channels is None:
+            channels = [4, 16, 64, out_dim]  # SAM3 default
+        LN2d = LayerNorm2d_op(operations)
+        layers = []
+        prev = in_chans
+        for ch in channels:
+            layers += [operations.Conv2d(prev, ch, kernel_size=3, stride=2, padding=1, device=device, dtype=dtype),
+                       LN2d(ch, device=device, dtype=dtype), nn.GELU()]
+            prev = ch
+        layers.append(operations.Conv2d(prev, out_dim, kernel_size=1, device=device, dtype=dtype))
+        self.encoder = nn.Sequential(*layers)
+
+    def forward(self, x):
+        if self.interpol_size is not None and list(x.shape[-2:]) != self.interpol_size:
+            x = F.interpolate(x, size=self.interpol_size, mode="bilinear", align_corners=False, antialias=True)
+        return self.encoder(x)
+
+
+class Fuser(nn.Module):
+    def __init__(self, dim=256, num_layers=2, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.layers = nn.Sequential(*[CXBlock(dim, device=device, dtype=dtype, operations=operations) for _ in range(num_layers)])
+
+    def forward(self, x):
+        return self.layers(x)
+
+
+# --- SAM3.1 Multiplex components ---
+
+class DecoupledMemoryAttnLayer(nn.Module):
+    """Decoupled cross-attention layer for SAM3.1: fuses image and memory projections."""
+
+    def __init__(self, d_model=256, num_heads=1, dim_ff=2048, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.num_heads = num_heads
+        # Self-attention projections (flat, not nested)
+        self.self_attn_q_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+        self.self_attn_k_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+        self.self_attn_v_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+        self.self_attn_out_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+        # Cross-attention projections
+        self.cross_attn_q_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+        self.cross_attn_k_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+        self.cross_attn_v_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+        self.cross_attn_out_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+        # Image cross-attention (q/k only, fused with cross_attn)
+        self.image_cross_attn_q_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+        self.image_cross_attn_k_proj = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+        # FFN
+        self.linear1 = operations.Linear(d_model, dim_ff, device=device, dtype=dtype)
+        self.linear2 = operations.Linear(dim_ff, d_model, device=device, dtype=dtype)
+        self.norm1 = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        self.norm2 = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        self.norm3 = operations.LayerNorm(d_model, device=device, dtype=dtype)
+
+    def forward(self, image, x, memory_image, memory, memory_image_pos=None,
+                rope=None, num_k_exclude_rope=0):
+        # Self-attention with RoPE
+        normed = self.norm1(x)
+        q = self.self_attn_q_proj(normed)
+        k = self.self_attn_k_proj(normed)
+        v = self.self_attn_v_proj(normed)
+        if rope is not None:
+            q, k = apply_rope_memory(q, k, rope, self.num_heads, 0)
+        x = x + self.self_attn_out_proj(optimized_attention(q, k, v, self.num_heads))
+
+        # Decoupled cross-attention: fuse image and memory projections
+        normed = self.norm2(x)
+        q = self.image_cross_attn_q_proj(image) + self.cross_attn_q_proj(normed)
+        k = self.image_cross_attn_k_proj(memory_image) + self.cross_attn_k_proj(memory)
+        if memory_image_pos is not None:
+            k = k + memory_image_pos
+        v = self.cross_attn_v_proj(memory)
+        if rope is not None:
+            q, k = apply_rope_memory(q, k, rope, self.num_heads, num_k_exclude_rope)
+        x = x + self.cross_attn_out_proj(optimized_attention(q, k, v, self.num_heads))
+
+        # FFN
+        x = x + self.linear2(F.gelu(self.linear1(self.norm3(x))))
+        return image, x
+
+
+class DecoupledMemoryEncoder(nn.Module):
+    """Memory attention encoder for SAM3.1 with decoupled cross-attention."""
+
+    def __init__(self, d_model=256, num_heads=1, dim_ff=2048, num_layers=4, image_size=1008, patch_size=14,
+                 device=None, dtype=None, operations=None):
+        super().__init__()
+        self.layers = nn.ModuleList([
+            DecoupledMemoryAttnLayer(d_model, num_heads, dim_ff, device=device, dtype=dtype, operations=operations)
+            for _ in range(num_layers)
+        ])
+        self.norm = operations.LayerNorm(d_model, device=device, dtype=dtype)
+        hw = image_size // patch_size
+        self.register_buffer("_rope", rope_2d(hw, hw, d_model // num_heads), persistent=False)
+
+    def forward(self, x, memory, memory_pos=None, src_pos=None, num_k_exclude_rope=0,
+                memory_image=None, memory_image_pos=None):
+        image = x  # constant residual for decoupled cross-attention
+        output = x
+        if src_pos is not None:
+            output = output + 0.1 * src_pos
+
+        B, _, C = x.shape
+        rope = self._rope.to(device=x.device)
+
+        # memory_image: raw backbone features from past frames for decoupled cross-attention
+        if memory_image is None:
+            # Fallback: use spatial portion of memory (without obj pointers)
+            num_spatial = memory.shape[1] - num_k_exclude_rope
+            memory_image = memory[:, :num_spatial]
+            memory_image_pos = memory_pos[:, :num_spatial] if memory_pos is not None else None
+        # Pad memory_image to match memory length (zeros for obj pointer tokens)
+        if memory_image.shape[1] < memory.shape[1]:
+            pad_len = memory.shape[1] - memory_image.shape[1]
+            pad = torch.zeros(B, pad_len, C, device=memory.device, dtype=memory.dtype)
+            memory_image = torch.cat([memory_image, pad], dim=1)
+            if memory_image_pos is not None:
+                ptr_pos = memory_pos[:, -pad_len:] if memory_pos is not None else torch.zeros_like(pad)
+                memory_image_pos = torch.cat([memory_image_pos, ptr_pos], dim=1)
+
+        for layer in self.layers:
+            image, output = layer(image, output, memory_image, memory,
+                                  memory_image_pos=memory_image_pos, rope=rope,
+                                  num_k_exclude_rope=num_k_exclude_rope)
+
+        return self.norm(output)
+
+
+class DecoupledMemoryTransformer(nn.Module):
+    def __init__(self, d_model=256, num_heads=1, dim_ff=2048, num_layers=4, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.encoder = DecoupledMemoryEncoder(d_model, num_heads, dim_ff, num_layers,
+                                              device=device, dtype=dtype, operations=operations)
+
+
+class MemoryBackbone(nn.Module):
+    """Memory encoder: downsamples mask, fuses with pixel features, optionally compresses."""
+
+    def __init__(self, d_model=256, out_dim=None, in_chans=1, channels=None, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.mask_downsampler = MaskDownSampler(d_model, in_chans=in_chans, channels=channels, device=device, dtype=dtype, operations=operations)
+        self.pix_feat_proj = operations.Conv2d(d_model, d_model, kernel_size=1, device=device, dtype=dtype)
+        self.fuser = Fuser(d_model, num_layers=2, device=device, dtype=dtype, operations=operations)
+        self.has_out_proj = out_dim is not None and out_dim != d_model
+        if self.has_out_proj:
+            self.out_proj = operations.Conv2d(d_model, out_dim, kernel_size=1, device=device, dtype=dtype)
+            feat_dim = out_dim
+        else:
+            feat_dim = d_model
+        self.position_encoding = PositionEmbeddingSine(num_pos_feats=feat_dim, normalize=True)
+
+    def forward(self, image_features, mask_for_mem, skip_mask_sigmoid=False):
+        if not skip_mask_sigmoid:
+            mask_for_mem = mask_for_mem.sigmoid()
+        mask_features = self.mask_downsampler(cast_to_input(mask_for_mem, image_features))
+        if mask_features.shape[-2:] != image_features.shape[-2:]:
+            mask_features = F.interpolate(mask_features, size=image_features.shape[-2:], mode="bilinear", align_corners=False)
+        features = self.pix_feat_proj(image_features) + mask_features
+        features = self.fuser(features)
+        if self.has_out_proj:
+            features = self.out_proj(features)
+        pos = cast_to_input(self.position_encoding(features), features)
+        return {"vision_features": features, "vision_pos_enc": [pos]}
+
+
+class MultiplexMaskDecoder(nn.Module):
+    """SAM mask decoder for SAM3.1 multiplex: predicts masks for num_multiplex objects simultaneously.
+
+    Uses multimask_outputs_only=True: num_mask_output_per_object = num_multimask_outputs (no +1).
+    Hypernetwork MLPs are shared across multiplex objects.
+    Token order: [obj_score_token(M), iou_token(M), mask_tokens(M*T)].
+    """
+
+    def __init__(self, d_model=256, num_multiplex=16, num_multimask_outputs=3, device=None, dtype=None, operations=None):
+        super().__init__()
+        self.num_multiplex = num_multiplex
+        self.num_mask_output_per_object = num_multimask_outputs  # 3 (multimask_outputs_only)
+        total_mask_tokens = num_multiplex * self.num_mask_output_per_object  # 48
+
+        self.transformer = SAMTwoWayTransformer(depth=2, embedding_dim=d_model, num_heads=8, mlp_dim=2048, device=device, dtype=dtype, operations=operations)
+
+        self.obj_score_token = operations.Embedding(num_multiplex, d_model, device=device, dtype=dtype)
+        self.iou_token = operations.Embedding(num_multiplex, d_model, device=device, dtype=dtype)
+        self.mask_tokens = operations.Embedding(total_mask_tokens, d_model, device=device, dtype=dtype)
+
+        LN2d = LayerNorm2d_op(operations)
+        self.output_upscaling = nn.Sequential(
+            operations.ConvTranspose2d(d_model, d_model // 4, kernel_size=2, stride=2, device=device, dtype=dtype),
+            LN2d(d_model // 4, device=device, dtype=dtype), nn.GELU(),
+            operations.ConvTranspose2d(d_model // 4, d_model // 8, kernel_size=2, stride=2, device=device, dtype=dtype), nn.GELU(),
+        )
+        self.conv_s0 = operations.Conv2d(d_model, d_model // 8, kernel_size=1, device=device, dtype=dtype)
+        self.conv_s1 = operations.Conv2d(d_model, d_model // 4, kernel_size=1, device=device, dtype=dtype)
+
+        # Shared across all multiplex objects (one per mask output)
+        self.output_hypernetworks_mlps = nn.ModuleList([
+            MLP(d_model, d_model, d_model // 8, 3, device=device, dtype=dtype, operations=operations)
+            for _ in range(self.num_mask_output_per_object)
+        ])
+        self.iou_prediction_head = MLP(d_model, d_model, self.num_mask_output_per_object, 3, device=device, dtype=dtype, operations=operations)
+        self.pred_obj_score_head = MLP(d_model, d_model, 1, 3, device=device, dtype=dtype, operations=operations)
+
+    def forward(self, image_embeddings, image_pe, sparse_prompt_embeddings, dense_prompt_embeddings,
+                high_res_features=None, multimask_output=False, return_all=False, extra_per_object_embeddings=None):
+        B = sparse_prompt_embeddings.shape[0]
+        M = self.num_multiplex
+        T = self.num_mask_output_per_object
+
+        # Token order: [obj_score(M), iou(M), mask(M*T)]
+        ref = sparse_prompt_embeddings
+        mask_tokens = cast_to_input(self.mask_tokens.weight, ref)
+        if extra_per_object_embeddings is not None:
+            mask_tokens = mask_tokens.view(1, M, T, -1).expand(B, -1, -1, -1) + extra_per_object_embeddings.unsqueeze(2)
+            mask_tokens = mask_tokens.flatten(1, 2)  # [B, M*T, C]
+            other_tokens = torch.cat([cast_to_input(self.obj_score_token.weight, ref),
+                                      cast_to_input(self.iou_token.weight, ref)], dim=0).unsqueeze(0).expand(B, -1, -1)
+            tokens = torch.cat([other_tokens, mask_tokens, sparse_prompt_embeddings], dim=1)
+        else:
+            tokens = torch.cat([cast_to_input(self.obj_score_token.weight, ref),
+                                cast_to_input(self.iou_token.weight, ref), mask_tokens], dim=0)
+            tokens = torch.cat([tokens.unsqueeze(0).expand(B, -1, -1), sparse_prompt_embeddings], dim=1)
+
+        src = image_embeddings
+        if src.shape[0] != B:
+            src = src.expand(B, -1, -1, -1)
+        src = src + dense_prompt_embeddings
+        pos_src = image_pe.expand(B, -1, -1, -1)
+
+        b, c, h, w = src.shape
+        hs, src_out = self.transformer(src.flatten(2).permute(0, 2, 1), pos_src.flatten(2).permute(0, 2, 1), tokens)
+
+        # Parse output tokens
+        obj_score_token_out = hs[:, :M]
+        iou_token_out = hs[:, M:2 * M]
+        mask_tokens_out = hs[:, 2 * M:2 * M + M * T]
+
+        src_out = src_out.permute(0, 2, 1).view(b, c, h, w)
+        upscaled = _upscale_masks(self.output_upscaling, self.conv_s0, self.conv_s1, src_out, high_res_features)
+
+        # Reshape mask tokens to [B, M, T, C] and apply shared hypernetwork MLPs per mask output index
+        mask_tokens_2d = mask_tokens_out.view(B, M, T, -1)
+        hyper_in = torch.stack([
+            self.output_hypernetworks_mlps[i](mask_tokens_2d[:, :, i, :])  # [B, M, C//8]
+            for i in range(T)
+        ], dim=2)  # [B, M, T, C//8]
+
+        # Generate masks: [B, M*T, H*W] -> [B, M, T, H, W]
+        masks = torch.bmm(hyper_in.flatten(1, 2), upscaled.flatten(2)).view(b, M, T, upscaled.shape[2], upscaled.shape[3])
+
+        # IoU and object scores
+        iou_pred = self.iou_prediction_head(iou_token_out).view(b, M, T)
+        object_score_logits = self.pred_obj_score_head(obj_score_token_out)  # [B, M, 1]
+
+        # multimask_outputs_only: always output all T masks (no singlemask token)
+        sam_tokens_out = mask_tokens_2d[:, :, 0:1]  # [B, M, 1, C]
+
+        if return_all:
+            return masks, iou_pred, sam_tokens_out, object_score_logits
+        return masks, iou_pred
+
+
+class SAM3Tracker(nn.Module):
+    def __init__(self, d_model=256, mem_dim=64, num_maskmem=7, device=None, dtype=None, operations=None, **kwargs):
+        super().__init__()
+
+        # Memory attention transformer
+        self.transformer = MemoryTransformer(d_model, num_heads=1, kv_dim=mem_dim, dim_ff=2048, num_layers=4,
+                                             device=device, dtype=dtype, operations=operations)
+        # SAM components
+        self.sam_mask_decoder = SAMMaskDecoder(d_model, device=device, dtype=dtype, operations=operations)
+        self.sam_prompt_encoder = SAMPromptEncoder(d_model, device=device, dtype=dtype, operations=operations)
+
+        # Memory backbone
+        self.maskmem_backbone = MemoryBackbone(d_model, out_dim=mem_dim, device=device, dtype=dtype, operations=operations)
+
+        # Standalone parameters
+        self.maskmem_tpos_enc = nn.Parameter(torch.zeros(num_maskmem, 1, 1, mem_dim, device=device, dtype=dtype))
+        self.no_mem_embed = nn.Parameter(torch.zeros(1, 1, d_model, device=device, dtype=dtype))
+        self.register_buffer("no_mem_pos_enc", torch.zeros(1, 1, d_model, device=device, dtype=dtype))  # checkpoint key, unused in forward
+        self.no_obj_embed_spatial = nn.Parameter(torch.zeros(1, mem_dim, device=device, dtype=dtype))
+        self.no_obj_ptr = nn.Parameter(torch.zeros(1, d_model, device=device, dtype=dtype))
+
+        # Object pointer projection
+        self.obj_ptr_proj = MLP(d_model, d_model, d_model, 3, device=device, dtype=dtype, operations=operations)
+        self.obj_ptr_tpos_proj = operations.Linear(d_model, mem_dim, device=device, dtype=dtype)
+
+        # Mask downsample: Conv2d stride 4 to reduce GT mask to SAM logit scale
+        self.mask_downsample = operations.Conv2d(1, 1, kernel_size=4, stride=4, device=device, dtype=dtype)
+
+        # Config
+        self.d_model = d_model
+        self.mem_dim = mem_dim
+        self.num_maskmem = num_maskmem
+        self.image_size = 1008
+        self.backbone_stride = 14
+        self.max_obj_ptrs_in_encoder = 16
+        self.sigmoid_scale_for_mem_enc = 20.0
+        self.sigmoid_bias_for_mem_enc = -10.0
+
+    def _no_obj_blend(self, obj_ptr, is_obj):
+        alpha = is_obj.to(obj_ptr.dtype)
+        return torch.lerp(cast_to_input(self.no_obj_ptr, obj_ptr), obj_ptr, alpha)
+
+    def _forward_sam_heads(self, backbone_features, point_inputs=None, mask_inputs=None, box_inputs=None,
+                           high_res_features=None, multimask_output=False):
+        return forward_sam_heads(backbone_features, self.sam_prompt_encoder, self.sam_mask_decoder,
+                                 self.obj_ptr_proj, self._no_obj_blend, self.image_size,
+                                 point_inputs, mask_inputs, box_inputs, high_res_features, multimask_output)
+
+    def _use_mask_as_output(self, backbone_features, high_res_features, mask_inputs):
+        return use_mask_as_output(backbone_features, high_res_features, mask_inputs,
+                                  self.mask_downsample, self.sam_prompt_encoder, self.sam_mask_decoder,
+                                  self.obj_ptr_proj, self._no_obj_blend, self.image_size, self.backbone_stride)
+
+    def _prepare_memory_conditioned_features(self, frame_idx, is_init_cond_frame, current_vision_feats, current_vision_pos_embeds, feat_sizes, output_dict, num_frames):
+        """Fuse current frame features with memory from previous frames."""
+        B = current_vision_feats[-1].shape[0]
+        C = self.d_model
+        H, W = feat_sizes[-1]
+        device = current_vision_feats[-1].device
+
+        if self.num_maskmem == 0:
+            return current_vision_feats[-1].permute(0, 2, 1).view(B, C, H, W)
+
+        if is_init_cond_frame:
+            # First conditioning frame: no memory yet, add no_mem_embed
+            pix_feat = current_vision_feats[-1] + cast_to_input(self.no_mem_embed, current_vision_feats[-1])
+            return to_spatial(pix_feat, H, W)
+
+        to_cat_memory, to_cat_memory_pos, _, _, cond_outputs = collect_memory_tokens(
+            output_dict, frame_idx, self.num_maskmem, self.maskmem_tpos_enc, device)
+
+        max_obj_ptrs = min(num_frames, self.max_obj_ptrs_in_encoder)
+        pos_and_ptrs = []
+        for t, out in cond_outputs.items():
+            if t <= frame_idx:
+                pos_and_ptrs.append(((frame_idx - t), out["obj_ptr"].to(device)))
+        for t_diff in range(1, max_obj_ptrs):
+            t = frame_idx - t_diff
+            if t < 0:
+                break
+            out = output_dict["non_cond_frame_outputs"].get(t, None)
+            if out is not None:
+                pos_and_ptrs.append((t_diff, out["obj_ptr"].to(device)))
+
+        num_obj_ptr_tokens = 0
+        if len(pos_and_ptrs) > 0:
+            pos_list, ptrs_list = zip(*pos_and_ptrs)
+            obj_ptrs = torch.stack(ptrs_list, dim=1)  # [B, N, C=256]
+
+            # Temporal position encoding for pointers
+            obj_pos = compute_tpos_enc(
+                list(pos_list), device, self.d_model, self.obj_ptr_tpos_proj,
+                max_abs_pos=max_obj_ptrs, dtype=current_vision_feats[-1].dtype
+            )  # [N, mem_dim=64]
+            obj_pos = obj_pos.unsqueeze(0).expand(B, -1, -1)  # [B, N, 64]
+
+            # Split each 256-dim pointer into 4 x 64-dim tokens
+            if self.mem_dim < C:
+                N = obj_ptrs.shape[1]
+                obj_ptrs = obj_ptrs.view(B, N, C // self.mem_dim, self.mem_dim)  # [B, N, 4, 64]
+                obj_ptrs = obj_ptrs.reshape(B, N * (C // self.mem_dim), self.mem_dim)  # [B, N*4, 64]
+                obj_pos = obj_pos.unsqueeze(2).expand(-1, -1, C // self.mem_dim, -1)
+                obj_pos = obj_pos.reshape(B, N * (C // self.mem_dim), self.mem_dim)  # [B, N*4, 64]
+
+            to_cat_memory.append(obj_ptrs)
+            to_cat_memory_pos.append(obj_pos)
+            num_obj_ptr_tokens = obj_ptrs.shape[1]
+
+        if len(to_cat_memory) == 0:
+            # No memory available yet, add no_mem_embed
+            pix_feat = current_vision_feats[-1] + cast_to_input(self.no_mem_embed, current_vision_feats[-1])
+            return to_spatial(pix_feat, H, W)
+
+        # Concatenate all memory and position encodings [B, total_mem, mem_dim=64]
+        memory = torch.cat(to_cat_memory, dim=1)
+        memory_pos = torch.cat(to_cat_memory_pos, dim=1)
+
+        # Run memory attention encoder
+        pix_feat = current_vision_feats[-1]  # [B, HW, C]
+        src_pos = current_vision_pos_embeds[-1]  # [B, HW, C]
+
+        pix_feat_with_mem = self.transformer.encoder(
+            x=pix_feat,
+            memory=memory,
+            src_pos=src_pos,
+            memory_pos=memory_pos,
+            num_k_exclude_rope=num_obj_ptr_tokens,
+        )
+        return to_spatial(pix_feat_with_mem, H, W)
+
+    def _encode_new_memory(self, pix_feat, pred_masks_high_res, object_score_logits, is_mask_from_pts=False):
+        """Encode predicted mask into memory features."""
+        if is_mask_from_pts:
+            mask_for_mem = (pred_masks_high_res > 0).to(pix_feat.dtype)
+        else:
+            mask_for_mem = torch.sigmoid(pred_masks_high_res)
+
+        mask_for_mem.mul_(self.sigmoid_scale_for_mem_enc).add_(self.sigmoid_bias_for_mem_enc)
+
+        maskmem_out = self.maskmem_backbone(pix_feat, mask_for_mem, skip_mask_sigmoid=True)
+        maskmem_features = maskmem_out["vision_features"]
+        maskmem_pos_enc = maskmem_out["vision_pos_enc"]
+
+        # Add no_obj_embed for occluded objects
+        alpha = (object_score_logits > 0).to(maskmem_features.dtype)[..., None, None]
+        no_obj = cast_to_input(self.no_obj_embed_spatial, maskmem_features)[..., None, None].expand_as(maskmem_features)
+        return maskmem_features + (1 - alpha) * no_obj, maskmem_pos_enc
+
+    def track_step(self, frame_idx, is_init_cond_frame, current_vision_feats, current_vision_pos_embeds, feat_sizes, mask_inputs, output_dict,
+                   num_frames, point_inputs=None):
+        """Track one frame: fuse with memory, predict mask, encode memory."""
+        current_out = {}
+
+        # High-res features for SAM head [stride-8, stride-4]
+        if len(current_vision_feats) > 1:
+            high_res_features = [
+                x.view(x.shape[0], feat_sizes[i][0], feat_sizes[i][1], -1).permute(0, 3, 1, 2)
+                for i, x in enumerate(current_vision_feats[:-1])
+            ]
+        else:
+            high_res_features = None
+
+        # Top-level feature for memory
+        H, W = feat_sizes[-1]
+
+        if mask_inputs is not None:
+            # Conditioning frame: use mask directly
+            pix_feat = to_spatial(current_vision_feats[-1], H, W)
+            sam_outputs = self._use_mask_as_output(pix_feat, high_res_features, mask_inputs)
+        else:
+            # Track frame: fuse with memory, then SAM decoder
+            pix_feat_with_mem = self._prepare_memory_conditioned_features(
+                frame_idx=frame_idx,
+                is_init_cond_frame=is_init_cond_frame,
+                current_vision_feats=current_vision_feats,
+                current_vision_pos_embeds=current_vision_pos_embeds,
+                feat_sizes=feat_sizes,
+                output_dict=output_dict,
+                num_frames=num_frames,
+            )
+            # Use multimask for point prompts on init frames (picks best of 3 candidates)
+            num_pts = 0 if point_inputs is None else point_inputs["point_labels"].size(1)
+            multimask_output = is_init_cond_frame and 0 < num_pts <= 1
+            sam_outputs = self._forward_sam_heads(
+                backbone_features=pix_feat_with_mem,
+                point_inputs=point_inputs,
+                high_res_features=high_res_features,
+                multimask_output=multimask_output,
+            )
+
+        (low_res_masks, high_res_masks, obj_ptr, object_score_logits) = sam_outputs
+
+        # Clean low-res masks: remove sprinkles and fill holes
+        low_res_masks = fill_holes_in_mask_scores(low_res_masks, max_area=200)
+        high_res_masks = F.interpolate(low_res_masks, size=(self.image_size, self.image_size), mode="bilinear", align_corners=False)
+
+        current_out["pred_masks"] = low_res_masks
+        current_out["pred_masks_high_res"] = high_res_masks
+        current_out["obj_ptr"] = obj_ptr
+        current_out["object_score_logits"] = object_score_logits
+
+        # Encode memory
+        if self.num_maskmem > 0:
+            pix_feat = to_spatial(current_vision_feats[-1], H, W)
+            maskmem_features, maskmem_pos_enc = self._encode_new_memory(
+                pix_feat=pix_feat,
+                pred_masks_high_res=high_res_masks,
+                object_score_logits=object_score_logits,
+                is_mask_from_pts=(point_inputs is not None),
+            )
+            current_out["maskmem_features"] = maskmem_features
+            current_out["maskmem_pos_enc"] = maskmem_pos_enc
+        else:
+            current_out["maskmem_features"] = None
+            current_out["maskmem_pos_enc"] = None
+
+        return current_out
+
+    def _compute_backbone_frame(self, backbone_fn, frame, frame_idx=None):
+        vision_feats, vision_pos, feat_sizes, _, _ = _compute_backbone(backbone_fn, frame, frame_idx)
+        # SAM3: drop last FPN level
+        return vision_feats[:-1], vision_pos[:-1], feat_sizes[:-1]
+
+    def _track_single_object(self, backbone_fn, images, initial_mask, pbar=None):
+        """Track one object, computing backbone per frame to save VRAM."""
+        N = images.shape[0]
+        device, dt = images.device, images.dtype
+        output_dict = {"cond_frame_outputs": {}, "non_cond_frame_outputs": {}}
+        all_masks = []
+
+        for frame_idx in tqdm(range(N), desc="tracking"):
+            vision_feats, vision_pos, feat_sizes = self._compute_backbone_frame(
+                backbone_fn, images[frame_idx:frame_idx + 1], frame_idx=frame_idx)
+            mask_input = None
+            if frame_idx == 0:
+                mask_input = F.interpolate(initial_mask.to(device=device, dtype=dt),
+                    size=(self.image_size, self.image_size), mode="bilinear", align_corners=False)
+                mask_input = (mask_input > 0.5).to(dt)
+
+            current_out = self.track_step(
+                frame_idx=frame_idx, is_init_cond_frame=(frame_idx == 0),
+                current_vision_feats=vision_feats, current_vision_pos_embeds=vision_pos,
+                feat_sizes=feat_sizes, mask_inputs=mask_input, output_dict=output_dict, num_frames=N)
+
+            if frame_idx == 0:
+                output_dict["cond_frame_outputs"][frame_idx] = current_out
+            else:
+                output_dict["non_cond_frame_outputs"][frame_idx] = current_out
+                lookback = max(self.num_maskmem, self.max_obj_ptrs_in_encoder)
+                for old_idx in list(output_dict["non_cond_frame_outputs"]):
+                    if old_idx < frame_idx - lookback:
+                        del output_dict["non_cond_frame_outputs"][old_idx]
+            # Move masks to CPU immediately to free VRAM
+            all_masks.append(current_out["pred_masks_high_res"].to(comfy.model_management.intermediate_device()))
+            if pbar is not None:
+                pbar.update(1)
+
+        return torch.cat(all_masks, dim=0)  # [N, 1, H, W]
+
+    def track_video(self, backbone_fn, images, initial_masks, pbar=None, **kwargs):
+        """Track one or more objects across video frames.
+
+        Args:
+            backbone_fn: callable that returns (sam2_features, sam2_positions, trunk_out) for a frame
+            images: [N, 3, 1008, 1008] video frames
+            initial_masks: [N_obj, 1, H, W] binary masks for first frame (one per object)
+            pbar: optional progress bar
+
+        Returns:
+            [N, N_obj, image_size, image_size] predicted mask logits per frame per object
+        """
+        N_obj = initial_masks.shape[0]
+        per_object = []
+        for obj_idx in range(N_obj):
+            obj_masks = self._track_single_object(
+                backbone_fn, images, initial_masks[obj_idx:obj_idx + 1], pbar=pbar)
+            per_object.append(obj_masks)
+
+        return torch.cat(per_object, dim=1)  # [N, N_obj, H, W]
+
+
+class SAM31Tracker(nn.Module):
+    """SAM3.1 multiplex tracker: decoupled memory attention, dual decoder, 16-object multiplex."""
+
+    def __init__(self, d_model=256, mem_dim=256, num_maskmem=7, num_multiplex=16, device=None, dtype=None, operations=None, **kwargs):
+        super().__init__()
+        self.d_model = d_model
+        self.mem_dim = mem_dim
+        self.num_maskmem = num_maskmem
+        self.num_multiplex = num_multiplex
+        self.image_size = 1008
+        self.backbone_stride = 14
+        self.max_obj_ptrs_in_encoder = 16
+        self.sigmoid_scale_for_mem_enc = 2.0
+        self.sigmoid_bias_for_mem_enc = -1.0
+
+        # Memory attention (decoupled cross-attention, 8 heads matching reference)
+        self.transformer = DecoupledMemoryTransformer(d_model, num_heads=8, dim_ff=2048, num_layers=4,
+                                                      device=device, dtype=dtype, operations=operations)
+
+        # Propagation decoder (multiplex: 16 objects, multimask_outputs_only)
+        self.sam_mask_decoder = MultiplexMaskDecoder(d_model, num_multiplex, num_multimask_outputs=3,
+                                                     device=device, dtype=dtype, operations=operations)
+        # Interactive decoder (single object, same as SAM3)
+        self.interactive_sam_mask_decoder = SAMMaskDecoder(d_model, num_multimask_outputs=3,
+                                                           device=device, dtype=dtype, operations=operations)
+        self.interactive_sam_prompt_encoder = SAMPromptEncoder(d_model, device=device, dtype=dtype, operations=operations)
+
+        # Memory backbone (mem_dim=256, no out_proj compression)
+        self.maskmem_backbone = MemoryBackbone(d_model, in_chans=num_multiplex * 2, channels=[16, 64, 256, 1024],
+                                                device=device, dtype=dtype, operations=operations)
+
+        # Standalone parameters
+        self.maskmem_tpos_enc = nn.Parameter(torch.zeros(num_maskmem, 1, 1, mem_dim, device=device, dtype=dtype))
+        self.no_obj_embed_spatial = nn.Parameter(torch.zeros(num_multiplex, mem_dim, device=device, dtype=dtype))
+        self.interactivity_no_mem_embed = nn.Parameter(torch.zeros(1, 1, d_model, device=device, dtype=dtype))
+
+        # Object pointer projection
+        self.obj_ptr_proj = MLP(d_model, d_model, d_model, 3, device=device, dtype=dtype, operations=operations)
+        self.obj_ptr_tpos_proj = operations.Linear(d_model, mem_dim, device=device, dtype=dtype)
+        self.no_obj_ptr_linear = operations.Linear(d_model, d_model, device=device, dtype=dtype)
+        self.interactive_obj_ptr_proj = MLP(d_model, d_model, d_model, 3, device=device, dtype=dtype, operations=operations)
+
+        # Interactive mask downsample
+        self.interactive_mask_downsample = operations.Conv2d(1, 1, kernel_size=4, stride=4, device=device, dtype=dtype)
+
+        # Multiplex validity embeddings
+        self.output_valid_embed = nn.Parameter(torch.zeros(num_multiplex, d_model, device=device, dtype=dtype))
+        self.output_invalid_embed = nn.Parameter(torch.zeros(num_multiplex, d_model, device=device, dtype=dtype))
+
+        # Position encoding for image (used by multiplex decoder)
+        self.image_pe_layer = PositionEmbeddingRandom(d_model // 2)
+
+    def _no_obj_blend(self, obj_ptr, is_obj):
+        alpha = is_obj.to(obj_ptr.dtype)
+        return torch.lerp(self.no_obj_ptr_linear(obj_ptr), obj_ptr, alpha)
+
+    def _forward_sam_heads(self, backbone_features, point_inputs=None, mask_inputs=None, box_inputs=None,
+                           high_res_features=None, multimask_output=False):
+        return forward_sam_heads(backbone_features, self.interactive_sam_prompt_encoder, self.interactive_sam_mask_decoder,
+                                 self.interactive_obj_ptr_proj, self._no_obj_blend, self.image_size,
+                                 point_inputs, mask_inputs, box_inputs, high_res_features, multimask_output)
+
+    def _use_mask_as_output(self, backbone_features, high_res_features, mask_inputs):
+        return use_mask_as_output(backbone_features, high_res_features, mask_inputs,
+                                  self.interactive_mask_downsample, self.interactive_sam_prompt_encoder,
+                                  self.interactive_sam_mask_decoder, self.interactive_obj_ptr_proj,
+                                  self._no_obj_blend, self.image_size, self.backbone_stride)
+
+    def _prepare_memory_conditioned_features(self, frame_idx, is_init_cond_frame, current_vision_feats,
+                                              current_vision_pos_embeds, feat_sizes, output_dict, num_frames,
+                                              multiplex_state=None):
+        B = current_vision_feats[-1].shape[0]
+        C = self.d_model
+        H, W = feat_sizes[-1]
+        device = current_vision_feats[-1].device
+        num_buc = multiplex_state.num_buckets if multiplex_state is not None else None
+
+        if self.num_maskmem == 0:
+            return current_vision_feats[-1].permute(0, 2, 1).view(B, C, H, W)
+
+        if is_init_cond_frame:
+            pix_feat = current_vision_feats[-1] + cast_to_input(self.interactivity_no_mem_embed, current_vision_feats[-1])
+            return to_spatial(pix_feat, H, W)
+
+        to_cat_memory, to_cat_memory_pos, to_cat_image_feat, to_cat_image_pos, cond_outputs = collect_memory_tokens(
+            output_dict, frame_idx, self.num_maskmem, self.maskmem_tpos_enc, device,
+            collect_image_feats=True, tpos_v2=True, num_buckets=num_buc)
+
+        max_obj_ptrs = min(num_frames, self.max_obj_ptrs_in_encoder)
+        pos_and_ptrs = []
+        for t, out in cond_outputs.items():
+            if t <= frame_idx and "obj_ptr" in out:
+                ptr = out["obj_ptr"].to(device)
+                if num_buc is not None:
+                    ptr = _pad_to_buckets(ptr, num_buc)
+                pos_and_ptrs.append(((frame_idx - t), ptr))
+        for t_diff in range(1, max_obj_ptrs):
+            t = frame_idx - t_diff
+            if t < 0:
+                break
+            out = output_dict["non_cond_frame_outputs"].get(t, None)
+            if out is not None and "obj_ptr" in out:
+                ptr = out["obj_ptr"].to(device)
+                if num_buc is not None:
+                    ptr = _pad_to_buckets(ptr, num_buc)
+                pos_and_ptrs.append((t_diff, ptr))
+
+        num_obj_ptr_tokens = 0
+        if len(pos_and_ptrs) > 0:
+            pos_list, ptrs_list = zip(*pos_and_ptrs)
+            obj_ptrs = torch.stack(ptrs_list, dim=1)  # [num_buckets, N, M, C]
+            B_ptr = obj_ptrs.shape[0]
+            N_ptrs = obj_ptrs.shape[1]
+            M = obj_ptrs.shape[2]
+            obj_ptrs = obj_ptrs.reshape(B_ptr, N_ptrs * M, -1)
+            obj_pos = compute_tpos_enc(list(pos_list), device, self.d_model, self.obj_ptr_tpos_proj,
+                                       max_abs_pos=max_obj_ptrs, dtype=current_vision_feats[-1].dtype)
+            obj_pos = obj_pos.unsqueeze(0).expand(B_ptr, -1, -1)
+            obj_pos = obj_pos.unsqueeze(2).expand(-1, -1, M, -1).reshape(B_ptr, N_ptrs * M, -1)
+            to_cat_memory.append(obj_ptrs)
+            to_cat_memory_pos.append(obj_pos)
+            num_obj_ptr_tokens = obj_ptrs.shape[1]
+
+        if len(to_cat_memory) == 0:
+            pix_feat = current_vision_feats[-1] + cast_to_input(self.interactivity_no_mem_embed, current_vision_feats[-1])
+            return to_spatial(pix_feat, H, W)
+
+        memory = torch.cat(to_cat_memory, dim=1)
+        memory_pos = torch.cat(to_cat_memory_pos, dim=1)
+
+        # Expand vision features to num_buckets if memory has more buckets than B
+        mem_B = memory.shape[0]
+        x = current_vision_feats[-1]
+        x_pos = current_vision_pos_embeds[-1]
+        if x.shape[0] < mem_B:
+            x = x.expand(mem_B, -1, -1)
+            x_pos = x_pos.expand(mem_B, -1, -1)
+
+        if len(to_cat_image_feat) > 0:
+            # Decoupled cross-attention: separate image features from memory
+            memory_image = cast_to_input(torch.cat(to_cat_image_feat, dim=1), x)
+            memory_image_pos = cast_to_input(torch.cat(to_cat_image_pos, dim=1), x)
+            if memory_image.shape[0] < mem_B:
+                memory_image = memory_image.expand(mem_B, -1, -1)
+                memory_image_pos = memory_image_pos.expand(mem_B, -1, -1)
+            pix_feat_with_mem = self.transformer.encoder(
+                x=x,
+                memory=cast_to_input(memory, x),
+                memory_pos=cast_to_input(memory_pos, x),
+                src_pos=cast_to_input(x_pos, x),
+                num_k_exclude_rope=num_obj_ptr_tokens,
+                memory_image=memory_image,
+                memory_image_pos=memory_image_pos,
+            )
+        else:
+            pix_feat_with_mem = self.transformer.encoder(
+                x=x,
+                memory=memory,
+                memory_pos=memory_pos,
+                src_pos=x_pos,
+                num_k_exclude_rope=num_obj_ptr_tokens,
+            )
+        return to_spatial(pix_feat_with_mem, H, W)
+
+    def _encode_new_memory(self, pix_feat, pred_masks_high_res, object_score_logits, is_mask_from_pts=False,
+                           multiplex_state=None, is_conditioning=False, cond_obj_mask=None):
+        if is_mask_from_pts:
+            mask_for_mem = (pred_masks_high_res > 0).to(pix_feat.dtype)
+        else:
+            mask_for_mem = torch.sigmoid(pred_masks_high_res)
+        mask_for_mem.mul_(self.sigmoid_scale_for_mem_enc).add_(self.sigmoid_bias_for_mem_enc)
+
+        # Mux masks: [N_obj, 1, H, W] -> [num_buckets, M, H, W]
+        mux_masks = multiplex_state.mux(mask_for_mem[:, 0])
+
+        # Conditioning channel: 1.0 = clean mask (trust it), 0.0 = propagation (noisy)
+        N_obj = mask_for_mem.shape[0]
+        cond_values = torch.full((N_obj,), 0.0, device=mask_for_mem.device, dtype=mask_for_mem.dtype)
+        if is_conditioning:
+            cond_values[:] = 1.0
+        elif cond_obj_mask is not None:
+            cond_values[cond_obj_mask] = 1.0
+        cond_spatial = cond_values.view(-1, 1, 1, 1).expand_as(mask_for_mem[:, 0:1, :, :]).squeeze(1)
+        mux_cond = multiplex_state.mux(cond_spatial)  # [num_buckets, M, H, W]
+        mux_input = torch.cat([mux_masks, mux_cond], dim=1)  # [num_buckets, 2*M, H, W]
+
+        maskmem_out = self.maskmem_backbone(pix_feat, mux_input, skip_mask_sigmoid=True)
+        maskmem_features = maskmem_out["vision_features"]
+        maskmem_pos_enc = maskmem_out["vision_pos_enc"]
+
+        # Add no_obj_embed_spatial for occluded objects
+        is_obj = (object_score_logits > 0).float()  # [N_obj, 1]
+        mux_is_obj = multiplex_state.mux(is_obj)  # [num_buckets, M, 1]
+        no_obj_embed = cast_to_input(self.no_obj_embed_spatial, maskmem_features)  # [M, C]
+        no_obj_spatial = no_obj_embed.unsqueeze(0)[..., None, None]  # [1, M, C, 1, 1]
+        # Expand and sum across multiplex slots weighted by (1 - is_obj)
+        alpha = mux_is_obj[..., None, None]  # [num_buckets, M, 1, 1, 1]
+        per_slot_no_obj = ((1 - alpha) * no_obj_spatial).sum(dim=1)  # [num_buckets, C, 1, 1]
+        maskmem_features = maskmem_features + per_slot_no_obj.expand_as(maskmem_features)
+
+        return maskmem_features, maskmem_pos_enc
+
+    def _forward_propagation(self, backbone_features, high_res_features=None, multiplex_state=None):
+        """Propagation path using the multiplex SAM decoder (no prompts)."""
+        B = backbone_features.shape[0]
+        device = backbone_features.device
+
+        # Suppression embeddings from valid object mask
+        valid_mask = cast_to_input(multiplex_state.get_valid_object_mask().unsqueeze(-1).float(), backbone_features)
+        output_valid = cast_to_input(self.output_valid_embed, backbone_features).unsqueeze(0)
+        output_invalid = cast_to_input(self.output_invalid_embed, backbone_features).unsqueeze(0)
+        extra_embed = valid_mask * output_valid + (1 - valid_mask) * output_invalid
+
+        image_pe = self.image_pe_layer((backbone_features.shape[-2], backbone_features.shape[-1]), device=backbone_features.device)
+        image_pe = cast_to_input(image_pe, backbone_features)
+
+        masks, iou_pred, sam_tokens_out, object_score_logits = self.sam_mask_decoder(
+            image_embeddings=backbone_features, image_pe=image_pe,
+            sparse_prompt_embeddings=torch.empty(B, 0, self.d_model, device=device, dtype=backbone_features.dtype),
+            dense_prompt_embeddings=torch.zeros(B, self.d_model, *backbone_features.shape[-2:], device=device, dtype=backbone_features.dtype),
+            high_res_features=high_res_features, multimask_output=True, return_all=True,
+            extra_per_object_embeddings=extra_embed.expand(B, -1, -1),
+        )
+        # masks: [B=num_buckets, M, T, H, W]
+        # Demux to per-object: [N_obj, T, H, W]
+        masks_obj = multiplex_state.demux(masks)
+        iou_obj = multiplex_state.demux(iou_pred)
+        score_obj = multiplex_state.demux(object_score_logits)
+        tokens_obj = multiplex_state.demux(sam_tokens_out)
+
+        # Select best mask by IoU for each object
+        best_idx = torch.argmax(iou_obj, dim=-1)  # [N_obj]
+        N_obj = masks_obj.shape[0]
+        obj_range = torch.arange(N_obj, device=device)
+        low_res_masks = masks_obj[obj_range, best_idx].unsqueeze(1)  # [N_obj, 1, H, W]
+        # Suppress masks for objects with low confidence
+        is_obj = score_obj > 0
+        low_res_masks = torch.where(is_obj[:, :, None, None], low_res_masks,
+                                     torch.tensor(NO_OBJ_SCORE, device=device, dtype=low_res_masks.dtype))
+        high_res_masks = F.interpolate(low_res_masks.float(), size=(self.image_size, self.image_size), mode="bilinear", align_corners=False)
+
+        # Object pointer: compute per-object, mux for storage as [num_buckets, M, C]
+        sam_token = tokens_obj[:, 0]  # [N_obj, C]
+        obj_ptr = self.obj_ptr_proj(sam_token)
+        is_obj = (score_obj > 0).float()
+        no_obj = self.no_obj_ptr_linear(obj_ptr)
+        obj_ptr = is_obj * obj_ptr + (1 - is_obj) * no_obj
+        obj_ptr_muxed = multiplex_state.mux(obj_ptr)  # [num_buckets, M, C]
+
+        return low_res_masks, high_res_masks, obj_ptr_muxed, score_obj
+
+    def track_step(self, frame_idx, is_init_cond_frame, current_vision_feats, current_vision_pos_embeds,
+                   feat_sizes, mask_inputs, output_dict, num_frames, point_inputs=None,
+                   interactive_high_res=None, interactive_backbone=None, propagation_high_res=None,
+                   multiplex_state=None, run_mem_encoder=True):
+        current_out = {}
+        H, W = feat_sizes[-1]
+
+        if mask_inputs is not None:
+            # Conditioning frame: use interactive features if available, else propagation
+            if interactive_backbone is not None:
+                pix_feat = interactive_backbone
+                # Add no_mem_embed for interactive path
+                pix_flat = pix_feat.flatten(2)
+                bf = pix_flat.permute(0, 2, 1) + cast_to_input(self.interactivity_no_mem_embed, pix_flat)
+                pix_feat = to_spatial(bf, H, W)
+                hi_res = interactive_high_res
+            else:
+                # Fallback: interactive backbone not available (e.g. called outside track_video).
+                # Propagation features work but may produce lower-quality conditioning.
+                pix_feat = to_spatial(current_vision_feats[-1], H, W)
+                hi_res = propagation_high_res
+            sam_outputs = self._use_mask_as_output(pix_feat, hi_res, mask_inputs)
+        elif point_inputs is not None:
+            # Interactive path: use interactive SAM decoder
+            pix_feat_with_mem = self._prepare_memory_conditioned_features(
+                frame_idx=frame_idx, is_init_cond_frame=is_init_cond_frame,
+                current_vision_feats=current_vision_feats, current_vision_pos_embeds=current_vision_pos_embeds,
+                feat_sizes=feat_sizes, output_dict=output_dict, num_frames=num_frames,
+                multiplex_state=multiplex_state,
+            )
+            hi_res = interactive_high_res if interactive_high_res is not None else propagation_high_res
+            num_pts = point_inputs["point_labels"].size(1)
+            multimask_output = is_init_cond_frame and 0 < num_pts <= 1
+            sam_outputs = self._forward_sam_heads(
+                backbone_features=pix_feat_with_mem, point_inputs=point_inputs,
+                high_res_features=hi_res, multimask_output=multimask_output,
+            )
+        else:
+            # Propagation path: use multiplex SAM decoder with propagation features
+            pix_feat_with_mem = self._prepare_memory_conditioned_features(
+                frame_idx=frame_idx, is_init_cond_frame=is_init_cond_frame,
+                current_vision_feats=current_vision_feats, current_vision_pos_embeds=current_vision_pos_embeds,
+                feat_sizes=feat_sizes, output_dict=output_dict, num_frames=num_frames,
+                multiplex_state=multiplex_state,
+            )
+            sam_outputs = self._forward_propagation(pix_feat_with_mem, propagation_high_res,
+                                                     multiplex_state=multiplex_state)
+
+        (low_res_masks, high_res_masks, obj_ptr, object_score_logits) = sam_outputs
+
+        # Mux obj_ptr if it came from interactive path (shape [B, C]) vs propagation ([num_buckets, M, C])
+        if multiplex_state is not None and obj_ptr.dim() == 2:
+            obj_ptr = multiplex_state.mux(obj_ptr)  # [N_obj, C] -> [num_buckets, M, C]
+
+        # Encode memory (can be deferred with run_mem_encoder=False)
+        if run_mem_encoder and self.num_maskmem > 0:
+            pix_feat = to_spatial(current_vision_feats[-1], H, W)
+            maskmem_features, maskmem_pos_enc = self._encode_new_memory(
+                pix_feat=pix_feat, pred_masks_high_res=high_res_masks,
+                object_score_logits=object_score_logits,
+                is_mask_from_pts=(point_inputs is not None),
+                multiplex_state=multiplex_state,
+                is_conditioning=(mask_inputs is not None),
+            )
+            current_out["maskmem_features"] = maskmem_features
+            current_out["maskmem_pos_enc"] = maskmem_pos_enc
+        else:
+            current_out["maskmem_features"] = None
+            current_out["maskmem_pos_enc"] = None
+
+        # Store propagation image features for decoupled memory attention
+        current_out["image_features"] = current_vision_feats[-1]  # [B, HW, C]
+        current_out["image_pos_enc"] = current_vision_pos_embeds[-1]  # [B, HW, C]
+
+        current_out["pred_masks"] = low_res_masks
+        current_out["pred_masks_high_res"] = high_res_masks
+        current_out["obj_ptr"] = obj_ptr
+        current_out["object_score_logits"] = object_score_logits
+
+        return current_out
+
+    def _compute_backbone_frame(self, backbone_fn, frame, frame_idx=None):
+        vision_feats, vision_pos, feat_sizes, features, trunk_out = _compute_backbone(backbone_fn, frame, frame_idx)
+        return vision_feats, vision_pos, feat_sizes, list(features[:-1]), trunk_out
+
+    @staticmethod
+    def _suppress_recently_occluded(low_res_masks, last_occluded, frame_idx, threshold=0.3):
+        """Suppress overlapping masks for objects that were most recently occluded.
+        Prevents corrupted masks from occluded objects from contaminating other objects."""
+        N_obj = low_res_masks.shape[0]
+        if N_obj <= 1:
+            return low_res_masks
+        binary = low_res_masks[:, 0] > 0  # [N_obj, H, W]
+        iou = _compute_mask_overlap(low_res_masks[:, 0], low_res_masks[:, 0])
+        overlapping = torch.triu(iou >= threshold, diagonal=1)  # [N, N] upper triangle
+        last_occ_i = last_occluded.unsqueeze(1)  # [N, 1]
+        last_occ_j = last_occluded.unsqueeze(0)  # [1, N]
+        # Suppress the more recently occluded object in each overlapping pair
+        suppress_i = overlapping & (last_occ_i > last_occ_j) & (last_occ_j > -1)
+        suppress_j = overlapping & (last_occ_j > last_occ_i) & (last_occ_i > -1)
+        to_suppress = suppress_i.any(dim=1) | suppress_j.any(dim=0)
+        # Update last_occluded for occluded/suppressed objects
+        is_empty = ~binary.any(dim=(-1, -2))
+        newly_occluded = is_empty | to_suppress
+        last_occluded[newly_occluded] = frame_idx
+        # Suppress masks
+        low_res_masks[to_suppress] = -10.0
+        return low_res_masks
+
+    def _deferred_memory_encode(self, current_out, N_obj, vision_feats, feat_sizes, mux_state, device,
+                                cond_obj_mask=None):
+        """Deferred memory encoding for propagation frames. cond_obj_mask: per-object bool for conditioning."""
+        low_res_masks = current_out["pred_masks"]  # [N_obj, 1, H_low, W_low]
+
+        if N_obj > 1:
+            lr = low_res_masks.squeeze(1)  # [N_obj, H, W]
+            max_obj = torch.argmax(lr, dim=0, keepdim=True)
+            batch_inds = torch.arange(N_obj, device=device)[:, None, None]
+            pixel_nol = torch.where(max_obj == batch_inds, lr, torch.clamp(lr, max=-10.0))
+            area_before = (lr > 0).sum(dim=(-1, -2)).float().clamp(min=1)
+            area_after = (pixel_nol > 0).sum(dim=(-1, -2)).float()
+            shrink_ok = (area_after / area_before) >= 0.3
+            low_res_masks = torch.where(
+                shrink_ok[:, None, None, None].expand_as(low_res_masks),
+                low_res_masks, torch.clamp(low_res_masks, max=-10.0))
+
+        interpol_size = self.maskmem_backbone.mask_downsampler.interpol_size
+        mem_masks = F.interpolate(low_res_masks, size=interpol_size,
+                                  mode="bilinear", align_corners=False)
+
+        obj_scores = torch.where(
+            (mem_masks > 0).any(dim=(-1, -2)), 10.0, -10.0)
+
+        pix_feat = to_spatial(vision_feats[-1], feat_sizes[-1][0], feat_sizes[-1][1])
+        maskmem_features, maskmem_pos_enc = self._encode_new_memory(
+            pix_feat=pix_feat, pred_masks_high_res=mem_masks,
+            object_score_logits=obj_scores,
+            multiplex_state=mux_state, cond_obj_mask=cond_obj_mask)
+        current_out["maskmem_features"] = maskmem_features
+        current_out["maskmem_pos_enc"] = maskmem_pos_enc
+
+    def _add_detected_objects(self, new_masks, mux_state, vision_feats, feat_sizes, current_out):
+        """Grow MultiplexState with new detections, merge masks, re-encode memory. Modifies current_out."""
+        n_old = mux_state.total_valid_entries
+        mux_state.add_objects(new_masks.shape[0])
+        N_obj = mux_state.total_valid_entries
+        # Stored memory with old bucket counts is padded at read time by _pad_to_buckets
+        for k in ("pred_masks", "pred_masks_high_res"):
+            det = F.interpolate(new_masks.unsqueeze(1), size=current_out[k].shape[-2:],
+                                mode="bilinear", align_corners=False)
+            current_out[k] = torch.cat([current_out[k], det], dim=0)
+        if self.num_maskmem > 0:
+            # Mark new objects as conditioning (clean detection masks) so model trusts them
+            cond_mask = torch.zeros(N_obj, dtype=torch.bool, device=new_masks.device)
+            cond_mask[n_old:] = True
+            self._deferred_memory_encode(current_out, N_obj, vision_feats, feat_sizes,
+                                         mux_state, new_masks.device, cond_obj_mask=cond_mask)
+
+    def _condition_with_masks(self, masks, frame_idx, vision_feats, vision_pos, feat_sizes,
+                              high_res_prop, output_dict, N, mux_state, backbone_obj, frame,
+                              trunk_out, threshold=0.5):
+        """Condition tracker with masks on a frame."""
+        mask_input = F.interpolate(masks if masks.dim() == 4 else masks.unsqueeze(1),
+            size=(self.image_size, self.image_size), mode="bilinear", align_corners=False)
+        mask_input = (mask_input > threshold).to(masks.dtype)
+        hi_res = lo_feat = None
+        if backbone_obj is not None and backbone_obj.multiplex:
+            _, _, itf, _ = backbone_obj(frame, tracker_mode="interactive", cached_trunk=trunk_out, tracker_only=True)
+            hi_res, lo_feat = itf[:-1], itf[-1]
+        current_out = self.track_step(
+            frame_idx=frame_idx, is_init_cond_frame=True, current_vision_feats=vision_feats,
+            current_vision_pos_embeds=vision_pos, feat_sizes=feat_sizes, mask_inputs=mask_input,
+            output_dict=output_dict, num_frames=N, interactive_high_res=hi_res,
+            interactive_backbone=lo_feat, propagation_high_res=high_res_prop,
+            multiplex_state=mux_state, run_mem_encoder=True)
+        output_dict["cond_frame_outputs"][frame_idx] = current_out
+        return current_out
+
+    def _match_and_add_detections(self, det_masks, det_scores, current_out, mux_state,
+                                  vision_feats, feat_sizes, device, max_objects=0,
+                                  keep_alive=None):
+        """Match detections against tracked masks, add new objects, recondition degraded tracks.
+        Updates keep_alive counters: +1 for matched tracks, -1 for unmatched."""
+        N_obj = mux_state.total_valid_entries
+        if det_masks.shape[0] == 0:
+            if keep_alive is not None:
+                for i in range(N_obj):
+                    keep_alive[i] = max(-4, keep_alive.get(i, 0) - 1)
+            return []
+
+        # Match at low-res (like reference)
+        trk_masks = current_out["pred_masks"][:, 0]  # [N_obj, H_low, W_low]
+        det_resized = F.interpolate(det_masks.unsqueeze(1), size=trk_masks.shape[-2:],
+                                    mode="bilinear", align_corners=False)[:, 0]
+        overlap = _compute_mask_overlap(det_resized, trk_masks)
+
+        # Update keep_alive and find matched tracks
+        matched = set()
+        if overlap.shape[1] > 0:
+            matched = set((overlap >= 0.5).any(dim=0).nonzero(as_tuple=True)[0].tolist())
+        if keep_alive is not None:
+            for i in range(N_obj):
+                if i in matched:
+                    keep_alive[i] = min(8, keep_alive.get(i, 0) + 1)
+                else:
+                    keep_alive[i] = max(-4, keep_alive.get(i, 0) - 1)
+
+        # Recondition: high-confidence detections (>=0.8) with high overlap refresh tracked masks
+        reconditioned = False
+        if det_scores is not None and overlap.shape[1] > 0:
+            HIGH_CONF = 0.8
+            for det_idx in range(overlap.shape[0]):
+                if det_scores[det_idx] < HIGH_CONF:
+                    continue
+                best_trk = overlap[det_idx].argmax().item()
+                if overlap[det_idx, best_trk] >= 0.5:
+                    # Replace tracked mask with fresh detection mask
+                    current_out["pred_masks"][best_trk] = det_resized[det_idx].unsqueeze(0)
+                    det_hr = F.interpolate(det_masks[det_idx:det_idx+1].unsqueeze(1),
+                        size=current_out["pred_masks_high_res"].shape[-2:],
+                        mode="bilinear", align_corners=False)
+                    current_out["pred_masks_high_res"][best_trk] = det_hr[0]
+                    reconditioned = True
+
+        # Re-encode memory if any tracks were reconditioned
+        if reconditioned and self.num_maskmem > 0:
+            self._deferred_memory_encode(current_out, N_obj, vision_feats, feat_sizes, mux_state, device)
+
+        # Add new detections (not matching any track)
+        if max_objects > 0 and N_obj >= max_objects:
+            return []
+        max_overlap = overlap.max(dim=1)[0] if overlap.shape[1] > 0 else torch.zeros(overlap.shape[0], device=device)
+        new_dets = max_overlap < 0.5
+        if new_dets.any():
+            if max_objects > 0:
+                slots = max_objects - N_obj
+                new_dets = new_dets & (torch.cumsum(new_dets.int(), 0) <= slots)
+            self._add_detected_objects(det_masks[new_dets], mux_state,
+                                       vision_feats, feat_sizes, current_out)
+            if keep_alive is not None:
+                for i in range(N_obj, mux_state.total_valid_entries):
+                    keep_alive[i] = 1
+            return det_scores[new_dets].tolist() if det_scores is not None else [0.0] * new_dets.sum().item()
+        return []
+
+    def track_video_with_detection(self, backbone_fn, images, initial_masks, detect_fn=None,
+                                   new_det_thresh=0.5, max_objects=0, detect_interval=1,
+                                   backbone_obj=None, pbar=None):
+        """Track with optional per-frame detection. Returns [N, max_N_obj, H, W] mask logits."""
+        N, device, dt = images.shape[0], images.device, images.dtype
+        output_dict = {"cond_frame_outputs": {}, "non_cond_frame_outputs": {}}
+        all_masks = []
+        idev = comfy.model_management.intermediate_device()
+        mux_state = None
+        if initial_masks is not None:
+            mux_state = MultiplexState(initial_masks.shape[0], self.num_multiplex, device, dt)
+        obj_scores = []  # per-object detection score (1.0 for initial masks)
+        keep_alive = {} if detect_fn is not None else None
+        last_occluded = torch.empty(0, device=device, dtype=torch.long)  # per-object last occluded frame
+
+        # Prefetch next frame's backbone on a separate CUDA stream
+        prefetch = False
+        backbone_stream = None
+        if comfy.model_management.is_device_cuda(device):
+            try:
+                backbone_stream = torch.cuda.Stream(device=device)
+                prefetch = True
+            except RuntimeError:
+                pass
+        cur_bb = self._compute_backbone_frame(backbone_fn, images[0:1], frame_idx=0)
+
+        for frame_idx in tqdm(range(N), desc="tracking"):
+            vision_feats, vision_pos, feat_sizes, high_res_prop, trunk_out = cur_bb
+
+            # Start next frame's backbone on separate stream (overlaps with current frame's work)
+            if prefetch and frame_idx + 1 < N:
+                backbone_stream.wait_stream(torch.cuda.current_stream(device))
+                with torch.cuda.stream(backbone_stream):
+                    next_bb = self._compute_backbone_frame(
+                        backbone_fn, images[frame_idx + 1:frame_idx + 2], frame_idx=frame_idx + 1)
+
+            # Per-frame detection with NMS (skip if no detect_fn, or interval/max not met)
+            det_masks = torch.empty(0, device=device)
+            det_scores = None
+            run_det = (detect_fn is not None
+                       and frame_idx % max(detect_interval, 1) == 0
+                       and not (max_objects > 0 and mux_state is not None
+                                and mux_state.total_valid_entries >= max_objects))
+            if run_det:
+                det_out = detect_fn(trunk_out)
+                scores = det_out["scores"][0].sigmoid()
+                keep = scores > new_det_thresh
+                det_masks, det_scores = det_out["masks"][0][keep], scores[keep]
+                if det_masks.shape[0] > 1:
+                    det_masks, det_scores = _nms_masks(det_masks, det_scores)
+
+            if frame_idx == 0 and initial_masks is not None:
+                current_out = self._condition_with_masks(
+                    initial_masks.to(device=device, dtype=dt), frame_idx, vision_feats, vision_pos,
+                    feat_sizes, high_res_prop, output_dict, N, mux_state, backbone_obj,
+                    images[frame_idx:frame_idx + 1], trunk_out)
+                last_occluded = torch.full((mux_state.total_valid_entries,), -1, device=device, dtype=torch.long)
+                obj_scores = [1.0] * mux_state.total_valid_entries
+                if keep_alive is not None:
+                    for i in range(mux_state.total_valid_entries):
+                        keep_alive[i] = 8
+            elif mux_state is None or mux_state.total_valid_entries == 0:
+                if det_masks.shape[0] > 0:
+                    if max_objects > 0:
+                        det_scores = det_scores[:max_objects]
+                        det_masks = det_masks[:max_objects]
+                    mux_state = MultiplexState(det_masks.shape[0], self.num_multiplex, device, dt)
+                    current_out = self._condition_with_masks(
+                        det_masks, frame_idx, vision_feats, vision_pos, feat_sizes, high_res_prop,
+                        output_dict, N, mux_state, backbone_obj,
+                        images[frame_idx:frame_idx + 1], trunk_out, threshold=0.0)
+                    last_occluded = torch.full((mux_state.total_valid_entries,), -1, device=device, dtype=torch.long)
+                    obj_scores = det_scores[:mux_state.total_valid_entries].tolist()
+                    if keep_alive is not None:
+                        for i in range(mux_state.total_valid_entries):
+                            keep_alive[i] = 1
+                else:
+                    all_masks.append(None)
+                    if pbar is not None:
+                        pbar.update(1)
+                    # Skip to backbone advance at end of loop
+                    if frame_idx + 1 < N:
+                        if prefetch:
+                            torch.cuda.current_stream(device).wait_stream(backbone_stream)
+                            cur_bb = next_bb
+                        else:
+                            cur_bb = self._compute_backbone_frame(backbone_fn, images[frame_idx + 1:frame_idx + 2], frame_idx=frame_idx + 1)
+                    continue
+            else:
+                N_obj = mux_state.total_valid_entries
+                current_out = self.track_step(
+                    frame_idx=frame_idx, is_init_cond_frame=False, current_vision_feats=vision_feats,
+                    current_vision_pos_embeds=vision_pos, feat_sizes=feat_sizes, mask_inputs=None,
+                    output_dict=output_dict, num_frames=N, propagation_high_res=high_res_prop,
+                    multiplex_state=mux_state, run_mem_encoder=False)
+                current_out["pred_masks"] = fill_holes_in_mask_scores(
+                    current_out["pred_masks"], max_area=16)
+                if last_occluded.shape[0] == N_obj and N_obj > 1:
+                    self._suppress_recently_occluded(
+                        current_out["pred_masks"], last_occluded, frame_idx)
+                if self.num_maskmem > 0:
+                    self._deferred_memory_encode(current_out, N_obj, vision_feats, feat_sizes, mux_state, device)
+                output_dict["non_cond_frame_outputs"][frame_idx] = current_out
+                lookback = max(self.num_maskmem, self.max_obj_ptrs_in_encoder)
+                for old_idx in list(output_dict["non_cond_frame_outputs"]):
+                    if old_idx < frame_idx - lookback:
+                        del output_dict["non_cond_frame_outputs"][old_idx]
+                n_before = mux_state.total_valid_entries
+                new_obj_scores = self._match_and_add_detections(det_masks, det_scores, current_out, mux_state,
+                                               vision_feats, feat_sizes, device, max_objects,
+                                               keep_alive if run_det else None)
+                n_added = mux_state.total_valid_entries - n_before
+                if n_added > 0:
+                    last_occluded = torch.cat([last_occluded,
+                        torch.full((n_added,), -1, device=device, dtype=torch.long)])
+                    obj_scores.extend(new_obj_scores)
+
+            masks_out = current_out["pred_masks_high_res"][:, 0]
+            if keep_alive is not None:
+                for i in range(masks_out.shape[0]):
+                    if keep_alive.get(i, 0) <= 0:
+                        masks_out[i] = NO_OBJ_SCORE
+            N_obj_now = mux_state.total_valid_entries if mux_state is not None else 0
+            if N_obj_now > 0:
+                all_masks.append(pack_masks(masks_out).to(idev))
+            else:
+                all_masks.append(None)
+            if pbar is not None:
+                pbar.update(1)
+
+            # Next frame's backbone
+            if frame_idx + 1 < N:
+                if prefetch:
+                    torch.cuda.current_stream(device).wait_stream(backbone_stream)
+                    cur_bb = next_bb
+                else:
+                    cur_bb = self._compute_backbone_frame(backbone_fn, images[frame_idx + 1:frame_idx + 2], frame_idx=frame_idx + 1)
+
+        if not all_masks or all(m is None for m in all_masks):
+            return {"packed_masks": None, "n_frames": N, "scores": []}
+
+        max_obj = max(m.shape[0] for m in all_masks if m is not None)
+        sample = next(m for m in all_masks if m is not None)
+        empty_packed = torch.zeros(max_obj, *sample.shape[1:], dtype=torch.uint8, device=sample.device)
+        for i, m in enumerate(all_masks):
+            if m is None:
+                all_masks[i] = empty_packed
+            elif m.shape[0] < max_obj:
+                pad = torch.zeros(max_obj - m.shape[0], *m.shape[1:], dtype=torch.uint8, device=m.device)
+                all_masks[i] = torch.cat([m, pad], dim=0)
+        return {"packed_masks": torch.stack(all_masks, dim=0), "n_frames": N, "scores": obj_scores}
diff --git a/comfy/model_base.py b/comfy/model_base.py
index 1c7695761..787ea1145 100644
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -54,6 +54,7 @@ import comfy.ldm.anima.model
 import comfy.ldm.ace.ace_step15
 import comfy.ldm.rt_detr.rtdetr_v4
 import comfy.ldm.ernie.model
+import comfy.ldm.sam3.detector
 
 import comfy.model_management
 import comfy.patcher_extension
@@ -1974,3 +1975,7 @@ class ErnieImage(BaseModel):
         if cross_attn is not None:
             out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
         return out
+
+class SAM3(BaseModel):
+    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
+        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.sam3.detector.SAM3Model)
diff --git a/comfy/model_detection.py b/comfy/model_detection.py
index ca06cdd1e..724a241bf 100644
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -718,6 +718,14 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
         dit_config["image_model"] = "ernie"
         return dit_config
 
+    if 'detector.backbone.vision_backbone.trunk.blocks.0.attn.qkv.weight' in state_dict_keys: # SAM3 / SAM3.1
+        if 'detector.transformer.decoder.query_embed.weight' in state_dict_keys:
+            dit_config = {}
+            dit_config["image_model"] = "SAM3"
+            if 'detector.backbone.vision_backbone.propagation_convs.0.conv_1x1.weight' in state_dict_keys:
+                dit_config["image_model"] = "SAM31"
+            return dit_config
+
     if '{}input_blocks.0.0.weight'.format(key_prefix) not in state_dict_keys:
         return None
 
@@ -873,6 +881,10 @@ def model_config_from_unet(state_dict, unet_key_prefix, use_base_if_no_match=Fal
     return model_config
 
 def unet_prefix_from_state_dict(state_dict):
+    # SAM3: detector.* and tracker.* at top level, no common prefix
+    if any(k.startswith("detector.") for k in state_dict) and any(k.startswith("tracker.") for k in state_dict):
+        return ""
+
     candidates = ["model.diffusion_model.", #ldm/sgm models
                   "model.model.", #audio models
                   "net.", #cosmos
diff --git a/comfy/supported_models.py b/comfy/supported_models.py
index 58d4ce731..8886f32d5 100644
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@@ -1781,6 +1781,57 @@ class ErnieImage(supported_models_base.BASE):
         return supported_models_base.ClipTarget(comfy.text_encoders.ernie.ErnieTokenizer, comfy.text_encoders.ernie.te(**hunyuan_detect))
 
 
-models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, LongCatImage, FluxSchnell, GenmoMochi, LTXV, LTXAV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImagePixelSpace, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, WAN21_FlowRVS, WAN21_SCAIL, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, ACEStep15, Omnigen2, QwenImage, Flux2, Kandinsky5Image, Kandinsky5, Anima, RT_DETR_v4, ErnieImage]
+class SAM3(supported_models_base.BASE):
+    unet_config = {"image_model": "SAM3"}
+    supported_inference_dtypes = [torch.float16, torch.bfloat16, torch.float32]
+    text_encoder_key_prefix = ["detector.backbone.language_backbone."]
+    unet_extra_prefix = ""
+
+    def process_clip_state_dict(self, state_dict):
+        clip_keys = getattr(self, "_clip_stash", {})
+        clip_keys = utils.state_dict_prefix_replace(clip_keys, {"detector.backbone.language_backbone.": "", "backbone.language_backbone.": ""}, filter_keys=True)
+        clip_keys = utils.clip_text_transformers_convert(clip_keys, "encoder.", "sam3_clip.transformer.")
+        return {k: v for k, v in clip_keys.items() if not k.startswith("resizer.")}
+
+    def process_unet_state_dict(self, state_dict):
+        self._clip_stash = {k: state_dict.pop(k) for k in list(state_dict.keys()) if "language_backbone" in k and "resizer" not in k}
+        # SAM3.1: remap tracker.model.* -> tracker.*
+        for k in list(state_dict.keys()):
+            if k.startswith("tracker.model."):
+                state_dict["tracker." + k[len("tracker.model."):]] = state_dict.pop(k)
+        # SAM3.1: remove per-block freqs_cis buffers (computed dynamically)
+        for k in [k for k in list(state_dict.keys()) if ".attn.freqs_cis" in k]:
+            state_dict.pop(k)
+        # Split fused QKV projections
+        for k in [k for k in list(state_dict.keys()) if k.endswith((".in_proj_weight", ".in_proj_bias"))]:
+            t = state_dict.pop(k)
+            base, suffix = k.rsplit(".in_proj_", 1)
+            s = ".weight" if suffix == "weight" else ".bias"
+            d = t.shape[0] // 3
+            state_dict[base + ".q_proj" + s] = t[:d]
+            state_dict[base + ".k_proj" + s] = t[d:2*d]
+            state_dict[base + ".v_proj" + s] = t[2*d:]
+        # Remap tracker SAM decoder transformer key names to match sam.py TwoWayTransformer
+        for k in list(state_dict.keys()):
+            if "sam_mask_decoder.transformer." not in k:
+                continue
+            new_k = k.replace(".mlp.lin1.", ".mlp.0.").replace(".mlp.lin2.", ".mlp.2.").replace(".norm_final_attn.", ".norm_final.")
+            if new_k != k:
+                state_dict[new_k] = state_dict.pop(k)
+        return state_dict
+
+    def get_model(self, state_dict, prefix="", device=None):
+        return model_base.SAM3(self, device=device)
+
+    def clip_target(self, state_dict={}):
+        import comfy.text_encoders.sam3_clip
+        return supported_models_base.ClipTarget(comfy.text_encoders.sam3_clip.SAM3TokenizerWrapper, comfy.text_encoders.sam3_clip.SAM3ClipModelWrapper)
+
+
+class SAM31(SAM3):
+    unet_config = {"image_model": "SAM31"}
+
+
+models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, LongCatImage, FluxSchnell, GenmoMochi, LTXV, LTXAV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImagePixelSpace, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, WAN21_FlowRVS, WAN21_SCAIL, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, ACEStep15, Omnigen2, QwenImage, Flux2, Kandinsky5Image, Kandinsky5, Anima, RT_DETR_v4, ErnieImage, SAM3, SAM31]
 
 models += [SVD_img2vid]
diff --git a/comfy/text_encoders/sam3_clip.py b/comfy/text_encoders/sam3_clip.py
new file mode 100644
index 000000000..11cb7d9db
--- /dev/null
+++ b/comfy/text_encoders/sam3_clip.py
@@ -0,0 +1,97 @@
+import re
+from comfy import sd1_clip
+
+SAM3_CLIP_CONFIG = {
+    "architectures": ["CLIPTextModel"],
+    "hidden_act": "quick_gelu",
+    "hidden_size": 1024,
+    "intermediate_size": 4096,
+    "num_attention_heads": 16,
+    "num_hidden_layers": 24,
+    "max_position_embeddings": 32,
+    "projection_dim": 512,
+    "vocab_size": 49408,
+    "layer_norm_eps": 1e-5,
+    "eos_token_id": 49407,
+}
+
+
+class SAM3ClipModel(sd1_clip.SDClipModel):
+    def __init__(self, device="cpu", dtype=None, model_options={}):
+        super().__init__(device=device, dtype=dtype, max_length=32, layer="last", textmodel_json_config=SAM3_CLIP_CONFIG, special_tokens={"start": 49406, "end": 49407, "pad": 0}, return_projected_pooled=False, return_attention_masks=True, enable_attention_masks=True, model_options=model_options)
+
+
+class SAM3Tokenizer(sd1_clip.SDTokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        super().__init__(max_length=32, pad_with_end=False, pad_token=0, embedding_directory=embedding_directory, embedding_size=1024, embedding_key="sam3_clip", tokenizer_data=tokenizer_data)
+        self.disable_weights = True
+
+
+def _parse_prompts(text):
+    """Split comma-separated prompts with optional :N max detections per category"""
+    text = text.replace("(", "").replace(")", "")
+    parts = [p.strip() for p in text.split(",") if p.strip()]
+    result = []
+    for part in parts:
+        m = re.match(r'^(.+?)\s*:\s*([\d.]+)\s*$', part)
+        if m:
+            text_part = m.group(1).strip()
+            val = m.group(2)
+            max_det = max(1, round(float(val)))
+            result.append((text_part, max_det))
+        else:
+            result.append((part, 1))
+    return result
+
+
+class SAM3TokenizerWrapper(sd1_clip.SD1Tokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, clip_name="l", tokenizer=SAM3Tokenizer, name="sam3_clip")
+
+    def tokenize_with_weights(self, text: str, return_word_ids=False, **kwargs):
+        parsed = _parse_prompts(text)
+        if len(parsed) <= 1 and (not parsed or parsed[0][1] == 1):
+            return super().tokenize_with_weights(text, return_word_ids, **kwargs)
+        # Tokenize each prompt part separately, store per-part batches and metadata
+        inner = getattr(self, self.clip)
+        per_prompt = []
+        for prompt_text, max_det in parsed:
+            batches = inner.tokenize_with_weights(prompt_text, return_word_ids, **kwargs)
+            per_prompt.append((batches, max_det))
+        # Main output uses first prompt's tokens (for compatibility)
+        out = {self.clip_name: per_prompt[0][0], "sam3_per_prompt": per_prompt}
+        return out
+
+
+class SAM3ClipModelWrapper(sd1_clip.SD1ClipModel):
+    def __init__(self, device="cpu", dtype=None, model_options={}, **kwargs):
+        super().__init__(device=device, dtype=dtype, model_options=model_options, clip_name="l", clip_model=SAM3ClipModel, name="sam3_clip")
+
+    def encode_token_weights(self, token_weight_pairs):
+        per_prompt = token_weight_pairs.pop("sam3_per_prompt", None)
+        if per_prompt is None:
+            return super().encode_token_weights(token_weight_pairs)
+
+        # Encode each prompt separately, pack into extra dict
+        inner = getattr(self, self.clip)
+        multi_cond = []
+        first_pooled = None
+        for batches, max_det in per_prompt:
+            out = inner.encode_token_weights(batches)
+            cond, pooled = out[0], out[1]
+            extra = out[2] if len(out) > 2 else {}
+            if first_pooled is None:
+                first_pooled = pooled
+            multi_cond.append({
+                "cond": cond,
+                "attention_mask": extra.get("attention_mask"),
+                "max_detections": max_det,
+            })
+
+        # Return first prompt as main (for non-SAM3 consumers), all prompts in metadata
+        main = multi_cond[0]
+        main_extra = {}
+        if main["attention_mask"] is not None:
+            main_extra["attention_mask"] = main["attention_mask"]
+        main_extra["sam3_multi_cond"] = multi_cond
+        return (main["cond"], first_pooled, main_extra)
diff --git a/comfy_extras/nodes_sam3.py b/comfy_extras/nodes_sam3.py
new file mode 100644
index 000000000..5cf92ccb3
--- /dev/null
+++ b/comfy_extras/nodes_sam3.py
@@ -0,0 +1,529 @@
+"""
+SAM3 (Segment Anything 3) nodes for detection, segmentation, and video tracking.
+"""
+
+from typing_extensions import override
+
+import json
+import os
+import torch
+import torch.nn.functional as F
+import comfy.model_management
+import comfy.utils
+import folder_paths
+from comfy_api.latest import ComfyExtension, io, ui
+import av
+from fractions import Fraction
+
+
+def _extract_text_prompts(conditioning, device, dtype):
+    """Extract list of (text_embeddings, text_mask) from conditioning."""
+    cond_meta = conditioning[0][1]
+    multi = cond_meta.get("sam3_multi_cond")
+    prompts = []
+    if multi is not None:
+        for entry in multi:
+            emb = entry["cond"].to(device=device, dtype=dtype)
+            mask = entry["attention_mask"].to(device) if entry["attention_mask"] is not None else None
+            if mask is None:
+                mask = torch.ones(emb.shape[0], emb.shape[1], dtype=torch.int64, device=device)
+            prompts.append((emb, mask, entry.get("max_detections", 1)))
+    else:
+        emb = conditioning[0][0].to(device=device, dtype=dtype)
+        mask = cond_meta.get("attention_mask")
+        if mask is not None:
+            mask = mask.to(device)
+        else:
+            mask = torch.ones(emb.shape[0], emb.shape[1], dtype=torch.int64, device=device)
+        prompts.append((emb, mask, 1))
+    return prompts
+
+
+def _refine_mask(sam3_model, orig_image_hwc, coarse_mask, box_xyxy, H, W, device, dtype, iterations):
+    """Refine a coarse detector mask via SAM decoder, cropping to the detection box.
+
+    Returns: [1, H, W] binary mask
+    """
+    def _coarse_fallback():
+        return (F.interpolate(coarse_mask.unsqueeze(0).unsqueeze(0), size=(H, W),
+                              mode="bilinear", align_corners=False)[0] > 0).float()
+
+    if iterations <= 0:
+        return _coarse_fallback()
+
+    pad_frac = 0.1
+    x1, y1, x2, y2 = box_xyxy.tolist()
+    bw, bh = x2 - x1, y2 - y1
+    cx1 = max(0, int(x1 - bw * pad_frac))
+    cy1 = max(0, int(y1 - bh * pad_frac))
+    cx2 = min(W, int(x2 + bw * pad_frac))
+    cy2 = min(H, int(y2 + bh * pad_frac))
+    if cx2 <= cx1 or cy2 <= cy1:
+        return _coarse_fallback()
+
+    crop = orig_image_hwc[cy1:cy2, cx1:cx2, :3]
+    crop_1008 = comfy.utils.common_upscale(crop.unsqueeze(0).movedim(-1, 1), 1008, 1008, "bilinear", crop="disabled")
+    crop_frame = crop_1008.to(device=device, dtype=dtype)
+    crop_h, crop_w = cy2 - cy1, cx2 - cx1
+
+    # Crop coarse mask and refine via SAM on the cropped image
+    mask_h, mask_w = coarse_mask.shape[-2:]
+    mx1, my1 = int(cx1 / W * mask_w), int(cy1 / H * mask_h)
+    mx2, my2 = int(cx2 / W * mask_w), int(cy2 / H * mask_h)
+    if mx2 <= mx1 or my2 <= my1:
+        return _coarse_fallback()
+    mask_logit = coarse_mask[..., my1:my2, mx1:mx2].unsqueeze(0).unsqueeze(0)
+    for _ in range(iterations):
+        coarse_input = F.interpolate(mask_logit, size=(1008, 1008), mode="bilinear", align_corners=False)
+        mask_logit = sam3_model.forward_segment(crop_frame, mask_inputs=coarse_input)
+
+    refined_crop = F.interpolate(mask_logit, size=(crop_h, crop_w), mode="bilinear", align_corners=False)
+    full_mask = torch.zeros(1, 1, H, W, device=device, dtype=dtype)
+    full_mask[:, :, cy1:cy2, cx1:cx2] = refined_crop
+    coarse_full = F.interpolate(coarse_mask.unsqueeze(0).unsqueeze(0), size=(H, W), mode="bilinear", align_corners=False)
+    return ((full_mask[0] > 0) | (coarse_full[0] > 0)).float()
+
+
+
+class SAM3_Detect(io.ComfyNode):
+    """Open-vocabulary detection and segmentation using text, box, or point prompts."""
+
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SAM3_Detect",
+            display_name="SAM3 Detect",
+            category="detection/",
+            search_aliases=["sam3", "segment anything", "open vocabulary", "text detection", "segment"],
+            inputs=[
+                io.Model.Input("model", display_name="model"),
+                io.Image.Input("image", display_name="image"),
+                io.Conditioning.Input("conditioning", display_name="conditioning", optional=True, tooltip="Text conditioning from CLIPTextEncode"),
+                io.BoundingBox.Input("bboxes", display_name="bboxes", force_input=True, optional=True, tooltip="Bounding boxes to segment within"),
+                io.String.Input("positive_coords", display_name="positive_coords", force_input=True, optional=True, tooltip="Positive point prompts as JSON [{\"x\": int, \"y\": int}, ...] (pixel coords)"),
+                io.String.Input("negative_coords", display_name="negative_coords", force_input=True, optional=True, tooltip="Negative point prompts as JSON [{\"x\": int, \"y\": int}, ...] (pixel coords)"),
+                io.Float.Input("threshold", display_name="threshold", default=0.5, min=0.0, max=1.0, step=0.01),
+                io.Int.Input("refine_iterations", display_name="refine_iterations", default=2, min=0, max=5, tooltip="SAM decoder refinement passes (0=use raw detector masks)"),
+                io.Boolean.Input("individual_masks", display_name="individual_masks", default=False, tooltip="Output per-object masks instead of union"),
+            ],
+            outputs=[
+                io.Mask.Output("masks"),
+                io.BoundingBox.Output("bboxes"),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, model, image, conditioning=None, bboxes=None, positive_coords=None, negative_coords=None, threshold=0.5, refine_iterations=2, individual_masks=False) -> io.NodeOutput:
+        B, H, W, C = image.shape
+        image_in = comfy.utils.common_upscale(image[..., :3].movedim(-1, 1), 1008, 1008, "bilinear", crop="disabled")
+
+        # Convert bboxes to normalized cxcywh format, per-frame list of [1, N, 4] tensors.
+        # Supports: single dict (all frames), list[dict] (all frames), list[list[dict]] (per-frame).
+        def _boxes_to_tensor(box_list):
+            coords = []
+            for d in box_list:
+                cx = (d["x"] + d["width"] / 2) / W
+                cy = (d["y"] + d["height"] / 2) / H
+                coords.append([cx, cy, d["width"] / W, d["height"] / H])
+            return torch.tensor([coords], dtype=torch.float32)  # [1, N, 4]
+
+        per_frame_boxes = None
+        if bboxes is not None:
+            if isinstance(bboxes, dict):
+                # Single box → same for all frames
+                shared = _boxes_to_tensor([bboxes])
+                per_frame_boxes = [shared] * B
+            elif isinstance(bboxes, list) and len(bboxes) > 0 and isinstance(bboxes[0], list):
+                # list[list[dict]] → per-frame boxes
+                per_frame_boxes = [_boxes_to_tensor(frame_boxes) if frame_boxes else None for frame_boxes in bboxes]
+                # Pad to B if fewer frames provided
+                while len(per_frame_boxes) < B:
+                    per_frame_boxes.append(per_frame_boxes[-1] if per_frame_boxes else None)
+            elif isinstance(bboxes, list) and len(bboxes) > 0:
+                # list[dict] → same boxes for all frames
+                shared = _boxes_to_tensor(bboxes)
+                per_frame_boxes = [shared] * B
+
+        # Parse point prompts from JSON (KJNodes PointsEditor format: [{"x": int, "y": int}, ...])
+        pos_pts = json.loads(positive_coords) if positive_coords else []
+        neg_pts = json.loads(negative_coords) if negative_coords else []
+        has_points = len(pos_pts) > 0 or len(neg_pts) > 0
+
+        comfy.model_management.load_model_gpu(model)
+        device = comfy.model_management.get_torch_device()
+        dtype = model.model.get_dtype()
+        sam3_model = model.model.diffusion_model
+
+        # Build point inputs for tracker SAM decoder path
+        point_inputs = None
+        if has_points:
+            all_coords = [[p["x"] / W * 1008, p["y"] / H * 1008] for p in pos_pts] + \
+                         [[p["x"] / W * 1008, p["y"] / H * 1008] for p in neg_pts]
+            all_labels = [1] * len(pos_pts) + [0] * len(neg_pts)
+            point_inputs = {
+                "point_coords": torch.tensor([all_coords], dtype=dtype, device=device),
+                "point_labels": torch.tensor([all_labels], dtype=torch.int32, device=device),
+            }
+
+        cond_list = _extract_text_prompts(conditioning, device, dtype) if conditioning is not None and len(conditioning) > 0 else []
+        has_text = len(cond_list) > 0
+
+        # Run per-image through detector (text/boxes) and/or tracker (points)
+        all_bbox_dicts = []
+        all_masks = []
+        pbar = comfy.utils.ProgressBar(B)
+
+        for b in range(B):
+            frame = image_in[b:b+1].to(device=device, dtype=dtype)
+            b_boxes = None
+            if per_frame_boxes is not None and per_frame_boxes[b] is not None:
+                b_boxes = per_frame_boxes[b].to(device=device, dtype=dtype)
+
+            frame_bbox_dicts = []
+            frame_masks = []
+
+            # Point prompts: tracker SAM decoder path with iterative refinement
+            if point_inputs is not None:
+                mask_logit = sam3_model.forward_segment(frame, point_inputs=point_inputs)
+                for _ in range(max(0, refine_iterations - 1)):
+                    mask_logit = sam3_model.forward_segment(frame, mask_inputs=mask_logit)
+                mask = F.interpolate(mask_logit, size=(H, W), mode="bilinear", align_corners=False)
+                frame_masks.append((mask[0] > 0).float())
+
+            # Box prompts: SAM decoder path (segment inside each box)
+            if b_boxes is not None and not has_text:
+                for box_cxcywh in b_boxes[0]:
+                    cx, cy, bw, bh = box_cxcywh.tolist()
+                    # Convert cxcywh normalized → xyxy in 1008 space → [1, 2, 2] corners
+                    sam_box = torch.tensor([[[(cx - bw/2) * 1008, (cy - bh/2) * 1008],
+                                             [(cx + bw/2) * 1008, (cy + bh/2) * 1008]]],
+                                           device=device, dtype=dtype)
+                    mask_logit = sam3_model.forward_segment(frame, box_inputs=sam_box)
+                    for _ in range(max(0, refine_iterations - 1)):
+                        mask_logit = sam3_model.forward_segment(frame, mask_inputs=mask_logit)
+                    mask = F.interpolate(mask_logit, size=(H, W), mode="bilinear", align_corners=False)
+                    frame_masks.append((mask[0] > 0).float())
+
+            # Text prompts: run detector per text prompt (each detects one category)
+            for text_embeddings, text_mask, max_det in cond_list:
+                results = sam3_model(
+                    frame, text_embeddings=text_embeddings, text_mask=text_mask,
+                    boxes=b_boxes, threshold=threshold, orig_size=(H, W))
+
+                pred_boxes = results["boxes"][0]
+                scores = results["scores"][0]
+                masks = results["masks"][0]
+
+                probs = scores.sigmoid()
+                keep = probs > threshold
+                kept_boxes = pred_boxes[keep].cpu()
+                kept_scores = probs[keep].cpu()
+                kept_masks = masks[keep]
+
+                order = kept_scores.argsort(descending=True)[:max_det]
+                kept_boxes = kept_boxes[order]
+                kept_scores = kept_scores[order]
+                kept_masks = kept_masks[order]
+
+                for box, score in zip(kept_boxes, kept_scores):
+                    frame_bbox_dicts.append({
+                        "x": float(box[0]), "y": float(box[1]),
+                        "width": float(box[2] - box[0]), "height": float(box[3] - box[1]),
+                        "score": float(score),
+                    })
+                for m, box in zip(kept_masks, kept_boxes):
+                    frame_masks.append(_refine_mask(
+                        sam3_model, image[b], m, box, H, W, device, dtype, refine_iterations))
+
+            all_bbox_dicts.append(frame_bbox_dicts)
+            if len(frame_masks) > 0:
+                combined = torch.cat(frame_masks, dim=0)  # [N_obj, H, W]
+                if individual_masks:
+                    all_masks.append(combined)
+                else:
+                    all_masks.append((combined > 0).any(dim=0).float())
+            else:
+                if individual_masks:
+                    all_masks.append(torch.zeros(0, H, W, device=comfy.model_management.intermediate_device()))
+                else:
+                    all_masks.append(torch.zeros(H, W, device=comfy.model_management.intermediate_device()))
+            pbar.update(1)
+
+        idev = comfy.model_management.intermediate_device()
+        all_masks = [m.to(idev) for m in all_masks]
+        mask_out = torch.cat(all_masks, dim=0) if individual_masks else torch.stack(all_masks)
+        return io.NodeOutput(mask_out, all_bbox_dicts)
+
+
+SAM3TrackData = io.Custom("SAM3_TRACK_DATA")
+
+class SAM3_VideoTrack(io.ComfyNode):
+    """Track objects across video frames using SAM3's memory-based tracker."""
+
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SAM3_VideoTrack",
+            display_name="SAM3 Video Track",
+            category="detection/",
+            search_aliases=["sam3", "video", "track", "propagate"],
+            inputs=[
+                io.Image.Input("images", display_name="images", tooltip="Video frames as batched images"),
+                io.Model.Input("model", display_name="model"),
+                io.Mask.Input("initial_mask", display_name="initial_mask", optional=True, tooltip="Mask(s) for the first frame to track (one per object)"),
+                io.Conditioning.Input("conditioning", display_name="conditioning", optional=True, tooltip="Text conditioning for detecting new objects during tracking"),
+                io.Float.Input("detection_threshold", display_name="detection_threshold", default=0.5, min=0.0, max=1.0, step=0.01, tooltip="Score threshold for text-prompted detection"),
+                io.Int.Input("max_objects", display_name="max_objects", default=0, min=0, tooltip="Max tracked objects (0=unlimited). Initial masks count toward this limit."),
+                io.Int.Input("detect_interval", display_name="detect_interval", default=1, min=1, tooltip="Run detection every N frames (1=every frame). Higher values save compute."),
+            ],
+            outputs=[
+                SAM3TrackData.Output("track_data", display_name="track_data"),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, images, model, initial_mask=None, conditioning=None, detection_threshold=0.5, max_objects=0, detect_interval=1) -> io.NodeOutput:
+        N, H, W, C = images.shape
+
+        comfy.model_management.load_model_gpu(model)
+        device = comfy.model_management.get_torch_device()
+        dtype = model.model.get_dtype()
+        sam3_model = model.model.diffusion_model
+
+        frames = images[..., :3].movedim(-1, 1)
+        frames_in = comfy.utils.common_upscale(frames, 1008, 1008, "bilinear", crop="disabled").to(device=device, dtype=dtype)
+
+        init_masks = None
+        if initial_mask is not None:
+            init_masks = initial_mask.unsqueeze(1).to(device=device, dtype=dtype)
+
+        pbar = comfy.utils.ProgressBar(N)
+
+        text_prompts = None
+        if conditioning is not None and len(conditioning) > 0:
+            text_prompts = [(emb, mask) for emb, mask, _ in _extract_text_prompts(conditioning, device, dtype)]
+        elif initial_mask is None:
+            raise ValueError("Either initial_mask or conditioning must be provided")
+
+        result = sam3_model.forward_video(
+            images=frames_in, initial_masks=init_masks, pbar=pbar, text_prompts=text_prompts,
+            new_det_thresh=detection_threshold, max_objects=max_objects,
+            detect_interval=detect_interval)
+        result["orig_size"] = (H, W)
+        return io.NodeOutput(result)
+
+
+class SAM3_TrackPreview(io.ComfyNode):
+    """Visualize tracked objects with distinct colors as a video preview. No tensor output — saves to temp video."""
+
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SAM3_TrackPreview",
+            display_name="SAM3 Track Preview",
+            category="detection/",
+            inputs=[
+                SAM3TrackData.Input("track_data", display_name="track_data"),
+                io.Image.Input("images", display_name="images", optional=True),
+                io.Float.Input("opacity", display_name="opacity", default=0.5, min=0.0, max=1.0, step=0.05),
+                io.Float.Input("fps", display_name="fps", default=24.0, min=1.0, max=120.0, step=1.0),
+            ],
+            is_output_node=True,
+        )
+
+    COLORS = [
+        (0.12, 0.47, 0.71), (1.0, 0.5, 0.05), (0.17, 0.63, 0.17), (0.84, 0.15, 0.16),
+        (0.58, 0.4, 0.74), (0.55, 0.34, 0.29), (0.89, 0.47, 0.76), (0.5, 0.5, 0.5),
+        (0.74, 0.74, 0.13), (0.09, 0.75, 0.81), (0.94, 0.76, 0.06), (0.42, 0.68, 0.84),
+    ]
+
+    # 5x3 bitmap font atlas for digits 0-9 [10, 5, 3]
+    _glyph_cache = {}  # (device, scale) -> (glyphs, outlines, gh, gw, oh, ow)
+
+    @staticmethod
+    def _get_glyphs(device, scale=3):
+        key = (device, scale)
+        if key in SAM3_TrackPreview._glyph_cache:
+            return SAM3_TrackPreview._glyph_cache[key]
+        atlas = torch.tensor([
+            [[1,1,1],[1,0,1],[1,0,1],[1,0,1],[1,1,1]],
+            [[0,1,0],[1,1,0],[0,1,0],[0,1,0],[1,1,1]],
+            [[1,1,1],[0,0,1],[1,1,1],[1,0,0],[1,1,1]],
+            [[1,1,1],[0,0,1],[1,1,1],[0,0,1],[1,1,1]],
+            [[1,0,1],[1,0,1],[1,1,1],[0,0,1],[0,0,1]],
+            [[1,1,1],[1,0,0],[1,1,1],[0,0,1],[1,1,1]],
+            [[1,1,1],[1,0,0],[1,1,1],[1,0,1],[1,1,1]],
+            [[1,1,1],[0,0,1],[0,0,1],[0,0,1],[0,0,1]],
+            [[1,1,1],[1,0,1],[1,1,1],[1,0,1],[1,1,1]],
+            [[1,1,1],[1,0,1],[1,1,1],[0,0,1],[1,1,1]],
+        ], dtype=torch.bool)
+        glyphs, outlines = [], []
+        for d in range(10):
+            g = atlas[d].repeat_interleave(scale, 0).repeat_interleave(scale, 1)
+            padded = F.pad(g.float().unsqueeze(0).unsqueeze(0), (1,1,1,1))
+            o = (F.max_pool2d(padded, 3, stride=1, padding=1)[0, 0] > 0)
+            glyphs.append(g.to(device))
+            outlines.append(o.to(device))
+        gh, gw = glyphs[0].shape
+        oh, ow = outlines[0].shape
+        SAM3_TrackPreview._glyph_cache[key] = (glyphs, outlines, gh, gw, oh, ow)
+        return SAM3_TrackPreview._glyph_cache[key]
+
+    @staticmethod
+    def _draw_number_gpu(frame, number, cx, cy, color, scale=3):
+        """Draw a number on a GPU tensor [H, W, 3] float 0-1 at (cx, cy) with outline."""
+        H, W = frame.shape[:2]
+        device = frame.device
+        glyphs, outlines, gh, gw, oh, ow = SAM3_TrackPreview._get_glyphs(device, scale)
+        color_t = torch.tensor(color, device=device, dtype=frame.dtype)
+        digs = [int(d) for d in str(number)]
+        total_w = len(digs) * (gw + scale) - scale
+        x0 = cx - total_w // 2
+        y0 = cy - gh // 2
+        for i, d in enumerate(digs):
+            dx = x0 + i * (gw + scale)
+            # Black outline
+            oy0, ox0 = y0 - 1, dx - 1
+            osy1, osx1 = max(0, -oy0), max(0, -ox0)
+            osy2, osx2 = min(oh, H - oy0), min(ow, W - ox0)
+            if osy2 > osy1 and osx2 > osx1:
+                fy1, fx1 = oy0 + osy1, ox0 + osx1
+                frame[fy1:fy1+(osy2-osy1), fx1:fx1+(osx2-osx1)][outlines[d][osy1:osy2, osx1:osx2]] = 0
+            # Colored fill
+            sy1, sx1 = max(0, -y0), max(0, -dx)
+            sy2, sx2 = min(gh, H - y0), min(gw, W - dx)
+            if sy2 > sy1 and sx2 > sx1:
+                fy1, fx1 = y0 + sy1, dx + sx1
+                frame[fy1:fy1+(sy2-sy1), fx1:fx1+(sx2-sx1)][glyphs[d][sy1:sy2, sx1:sx2]] = color_t
+
+    @classmethod
+    def execute(cls, track_data, images=None, opacity=0.5, fps=24.0) -> io.NodeOutput:
+
+        from comfy.ldm.sam3.tracker import unpack_masks
+        packed = track_data["packed_masks"]
+        H, W = track_data["orig_size"]
+        if images is not None:
+            H, W = images.shape[1], images.shape[2]
+        if packed is None:
+            N, N_obj = track_data["n_frames"], 0
+        else:
+            N, N_obj = packed.shape[0], packed.shape[1]
+
+        import uuid
+        gpu = comfy.model_management.get_torch_device()
+        temp_dir = folder_paths.get_temp_directory()
+        filename = f"sam3_track_preview_{uuid.uuid4().hex[:8]}.mp4"
+        filepath = os.path.join(temp_dir, filename)
+        with av.open(filepath, mode='w') as output:
+            stream = output.add_stream('h264', rate=Fraction(round(fps * 1000), 1000))
+            stream.width = W
+            stream.height = H
+            stream.pix_fmt = 'yuv420p'
+
+            frame_cpu = torch.empty(H, W, 3, dtype=torch.uint8)
+            frame_np = frame_cpu.numpy()
+            if N_obj > 0:
+                colors_t = torch.tensor([cls.COLORS[i % len(cls.COLORS)] for i in range(N_obj)],
+                                       device=gpu, dtype=torch.float32)
+                grid_y = torch.arange(H, device=gpu).view(1, H, 1)
+                grid_x = torch.arange(W, device=gpu).view(1, 1, W)
+            for t in range(N):
+                if images is not None and t < images.shape[0]:
+                    frame = images[t].clone()
+                else:
+                    frame = torch.zeros(H, W, 3)
+
+                if N_obj > 0:
+                    frame_binary = unpack_masks(packed[t:t+1].to(gpu))  # [1, N_obj, H, W] bool
+                    frame_masks = F.interpolate(frame_binary.float(), size=(H, W), mode="nearest")[0]
+                    frame_gpu = frame.to(gpu)
+                    bool_masks = frame_masks > 0.5
+                    any_mask = bool_masks.any(dim=0)
+                    if any_mask.any():
+                        obj_idx_map = bool_masks.to(torch.uint8).argmax(dim=0)
+                        color_overlay = colors_t[obj_idx_map]
+                        mask_3d = any_mask.unsqueeze(-1)
+                        frame_gpu = torch.where(mask_3d, frame_gpu * (1 - opacity) + color_overlay * opacity, frame_gpu)
+                    area = bool_masks.sum(dim=(-1, -2)).clamp_(min=1)
+                    cy = (bool_masks * grid_y).sum(dim=(-1, -2)) // area
+                    cx = (bool_masks * grid_x).sum(dim=(-1, -2)) // area
+                    has = area > 1
+                    scores = track_data.get("scores", [])
+                    for obj_idx in range(N_obj):
+                        if has[obj_idx]:
+                            _cx, _cy = int(cx[obj_idx]), int(cy[obj_idx])
+                            color = cls.COLORS[obj_idx % len(cls.COLORS)]
+                            SAM3_TrackPreview._draw_number_gpu(frame_gpu, obj_idx, _cx, _cy, color)
+                            if obj_idx < len(scores) and scores[obj_idx] < 1.0:
+                                SAM3_TrackPreview._draw_number_gpu(frame_gpu, int(scores[obj_idx] * 100),
+                                                                   _cx, _cy + 5 * 3 + 3, color, scale=2)
+                    frame_cpu.copy_(frame_gpu.clamp_(0, 1).mul_(255).byte())
+                else:
+                    frame_cpu.copy_(frame.clamp_(0, 1).mul_(255).byte())
+
+                vframe = av.VideoFrame.from_ndarray(frame_np, format='rgb24')
+                output.mux(stream.encode(vframe.reformat(format='yuv420p')))
+            output.mux(stream.encode(None))
+        return io.NodeOutput(ui=ui.PreviewVideo([ui.SavedResult(filename, "", io.FolderType.temp)]))
+
+
+class SAM3_TrackToMask(io.ComfyNode):
+    """Select tracked objects by index and output as mask."""
+
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SAM3_TrackToMask",
+            display_name="SAM3 Track to Mask",
+            category="detection/",
+            inputs=[
+                SAM3TrackData.Input("track_data", display_name="track_data"),
+                io.String.Input("object_indices", display_name="object_indices", default="",
+                                tooltip="Comma-separated object indices to include (e.g. '0,2,3'). Empty = all objects."),
+            ],
+            outputs=[
+                io.Mask.Output("masks", display_name="masks"),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, track_data, object_indices="") -> io.NodeOutput:
+        from comfy.ldm.sam3.tracker import unpack_masks
+        packed = track_data["packed_masks"]
+        H, W = track_data["orig_size"]
+
+        if packed is None:
+            N = track_data["n_frames"]
+            return io.NodeOutput(torch.zeros(N, H, W, device=comfy.model_management.intermediate_device()))
+
+        N, N_obj = packed.shape[0], packed.shape[1]
+
+        if object_indices.strip():
+            indices = [int(i.strip()) for i in object_indices.split(",") if i.strip().isdigit()]
+            indices = [i for i in indices if 0 <= i < N_obj]
+        else:
+            indices = list(range(N_obj))
+
+        if not indices:
+            return io.NodeOutput(torch.zeros(N, H, W, device=comfy.model_management.intermediate_device()))
+
+        selected = packed[:, indices]
+        binary = unpack_masks(selected)  # [N, len(indices), Hm, Wm] bool
+        union = binary.any(dim=1, keepdim=True).float()
+        mask_out = F.interpolate(union, size=(H, W), mode="bilinear", align_corners=False)[:, 0]
+        return io.NodeOutput(mask_out)
+
+
+class SAM3Extension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            SAM3_Detect,
+            SAM3_VideoTrack,
+            SAM3_TrackPreview,
+            SAM3_TrackToMask,
+        ]
+
+
+async def comfy_entrypoint() -> SAM3Extension:
+    return SAM3Extension()
diff --git a/nodes.py b/nodes.py
index bb38e07b8..fb83da896 100644
--- a/nodes.py
+++ b/nodes.py
@@ -2459,6 +2459,7 @@ async def init_builtin_extra_nodes():
         "nodes_curve.py",
         "nodes_rtdetr.py",
         "nodes_frame_interpolation.py",
+        "nodes_sam3.py"
     ]
 
     import_failed = []

From 3cdc0d523f080deb22fee24bfb0080180cde4f6e Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Thu, 23 Apr 2026 08:47:33 +0300
Subject: [PATCH 13/81] [Partner Nodes] GPTImage: fix price badges, add new
 resolutions (#13519)

* fix(api-nodes): fixed price badges, add new resolutions

Signed-off-by: bigcat88 <bigcat88@icloud.com>

* proper calculate the total run cost when "n > 1"

Signed-off-by: bigcat88 <bigcat88@icloud.com>

---------

Signed-off-by: bigcat88 <bigcat88@icloud.com>
---
 comfy_api_nodes/nodes_openai.py | 59 +++++++++++++++++++++++++--------
 1 file changed, 46 insertions(+), 13 deletions(-)

diff --git a/comfy_api_nodes/nodes_openai.py b/comfy_api_nodes/nodes_openai.py
index 90a29c2f2..bbb758068 100644
--- a/comfy_api_nodes/nodes_openai.py
+++ b/comfy_api_nodes/nodes_openai.py
@@ -357,6 +357,10 @@ def calculate_tokens_price_image_1_5(response: OpenAIImageGenerationResponse) ->
     return ((response.usage.input_tokens * 8.0) + (response.usage.output_tokens * 32.0)) / 1_000_000.0
 
 
+def calculate_tokens_price_image_2_0(response: OpenAIImageGenerationResponse) -> float | None:
+    return ((response.usage.input_tokens * 8.0) + (response.usage.output_tokens * 30.0)) / 1_000_000.0
+
+
 class OpenAIGPTImage1(IO.ComfyNode):
 
     @classmethod
@@ -401,7 +405,17 @@ class OpenAIGPTImage1(IO.ComfyNode):
                 IO.Combo.Input(
                     "size",
                     default="auto",
-                    options=["auto", "1024x1024", "1024x1536", "1536x1024"],
+                    options=[
+                        "auto",
+                        "1024x1024",
+                        "1024x1536",
+                        "1536x1024",
+                        "2048x2048",
+                        "2048x1152",
+                        "1152x2048",
+                        "3840x2160",
+                        "2160x3840",
+                    ],
                     tooltip="Image size",
                     optional=True,
                 ),
@@ -427,7 +441,7 @@ class OpenAIGPTImage1(IO.ComfyNode):
                 ),
                 IO.Combo.Input(
                     "model",
-                    options=["gpt-image-1", "gpt-image-1.5", 'gpt-image-2'],
+                    options=["gpt-image-1", "gpt-image-1.5", "gpt-image-2"],
                     default="gpt-image-2",
                     optional=True,
                 ),
@@ -442,23 +456,36 @@ class OpenAIGPTImage1(IO.ComfyNode):
             ],
             is_api_node=True,
             price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(widgets=["quality", "n"]),
+                depends_on=IO.PriceBadgeDepends(widgets=["quality", "n", "model"]),
                 expr="""
                 (
                   $ranges := {
-                    "low":    [0.011, 0.02],
-                    "medium": [0.046, 0.07],
-                    "high":   [0.167, 0.3]
+                    "gpt-image-1": {
+                      "low":    [0.011, 0.02],
+                      "medium": [0.042, 0.07],
+                      "high":   [0.167, 0.25]
+                    },
+                    "gpt-image-1.5": {
+                      "low":    [0.009, 0.02],
+                      "medium": [0.034, 0.062],
+                      "high":   [0.133, 0.22]
+                    },
+                    "gpt-image-2": {
+                      "low":    [0.0048, 0.012],
+                      "medium": [0.041, 0.112],
+                      "high":   [0.165, 0.43]
+                    }
                   };
-                  $range := $lookup($ranges, widgets.quality);
-                  $n := widgets.n;
+                  $range := $lookup($lookup($ranges, widgets.model), widgets.quality);
+                  $nRaw := widgets.n;
+                  $n := ($nRaw != null and $nRaw != 0) ? $nRaw : 1;
                   ($n = 1)
-                    ? {"type":"range_usd","min_usd": $range[0], "max_usd": $range[1]}
+                    ? {"type":"range_usd","min_usd": $range[0], "max_usd": $range[1], "format": {"approximate": true}}
                     : {
                         "type":"range_usd",
-                        "min_usd": $range[0],
-                        "max_usd": $range[1],
-                        "format": { "suffix": " x " & $string($n) & "/Run" }
+                        "min_usd": $range[0] * $n,
+                        "max_usd": $range[1] * $n,
+                        "format": { "suffix": "/Run", "approximate": true }
                       }
                 )
                 """,
@@ -483,12 +510,18 @@ class OpenAIGPTImage1(IO.ComfyNode):
         if mask is not None and image is None:
             raise ValueError("Cannot use a mask without an input image")
 
+        if model in ("gpt-image-1", "gpt-image-1.5"):
+            if size not in ("auto", "1024x1024", "1024x1536", "1536x1024"):
+                raise ValueError(f"Resolution {size} is only supported by GPT Image 2 model")
+
         if model == "gpt-image-1":
             price_extractor = calculate_tokens_price_image_1
         elif model == "gpt-image-1.5":
             price_extractor = calculate_tokens_price_image_1_5
         elif model == "gpt-image-2":
-            price_extractor = calculate_tokens_price_image_1_5
+            price_extractor = calculate_tokens_price_image_2_0
+            if background == "transparent":
+                raise ValueError("Transparent background is not supported for GPT Image 2 model")
         else:
             raise ValueError(f"Unknown model: {model}")
 

From 5edbdf4364c6c89c3c6a5c6630807b59cb7652ba Mon Sep 17 00:00:00 2001
From: "Daxiong (Lin)" <contact@comfyui-wiki.com>
Date: Thu, 23 Apr 2026 22:51:20 +0800
Subject: [PATCH 14/81] chore: update workflow templates to v0.9.61 (#13533)

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index a25bc0667..8a6ecf6d8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 comfyui-frontend-package==1.42.14
-comfyui-workflow-templates==0.9.59
+comfyui-workflow-templates==0.9.61
 comfyui-embedded-docs==0.4.3
 torch
 torchsde

From 2a14e1e96afdb8ca744663e3f3f5970c5d023f5b Mon Sep 17 00:00:00 2001
From: "Daxiong (Lin)" <contact@comfyui-wiki.com>
Date: Thu, 23 Apr 2026 23:15:47 +0800
Subject: [PATCH 15/81] chore: update embedded docs to v0.4.4 (#13535)

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 8a6ecf6d8..419124f48 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 comfyui-frontend-package==1.42.14
 comfyui-workflow-templates==0.9.61
-comfyui-embedded-docs==0.4.3
+comfyui-embedded-docs==0.4.4
 torch
 torchsde
 torchvision

From abf3d56f27948b122dbcba35847b59e5ff299030 Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Thu, 23 Apr 2026 18:49:54 +0300
Subject: [PATCH 16/81] add 4K resolution to Kling nodes (#13536)

Signed-off-by: bigcat88 <bigcat88@icloud.com>
---
 comfy_api_nodes/nodes_kling.py | 91 ++++++++++++++++++++++++++--------
 1 file changed, 70 insertions(+), 21 deletions(-)

diff --git a/comfy_api_nodes/nodes_kling.py b/comfy_api_nodes/nodes_kling.py
index 9a37ccc53..709b3726c 100644
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
@@ -276,6 +276,7 @@ async def finish_omni_video_task(cls: type[IO.ComfyNode], response: TaskStatusRe
         cls,
         ApiEndpoint(path=f"/proxy/kling/v1/videos/omni-video/{response.data.task_id}"),
         response_model=TaskStatusResponse,
+        max_poll_attempts=280,
         status_extractor=lambda r: (r.data.task_status if r.data else None),
     )
     return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
@@ -862,7 +863,7 @@ class OmniProTextToVideoNode(IO.ComfyNode):
                 ),
                 IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "1:1"]),
                 IO.Int.Input("duration", default=5, min=3, max=15, display_mode=IO.NumberDisplay.slider),
-                IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True),
+                IO.Combo.Input("resolution", options=["4k", "1080p", "720p"], default="1080p", optional=True),
                 IO.DynamicCombo.Input(
                     "storyboards",
                     options=[
@@ -904,12 +905,13 @@ class OmniProTextToVideoNode(IO.ComfyNode):
                 depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution", "model_name", "generate_audio"]),
                 expr="""
                 (
-                  $mode := (widgets.resolution = "720p") ? "std" : "pro";
+                  $res := widgets.resolution;
+                  $mode := $res = "4k" ? "4k" : ($res = "720p" ? "std" : "pro");
                   $isV3 := $contains(widgets.model_name, "v3");
                   $audio := $isV3 and widgets.generate_audio;
                   $rates := $audio
-                    ? {"std": 0.112, "pro": 0.14}
-                    : {"std": 0.084, "pro": 0.112};
+                    ? {"std": 0.112, "pro": 0.14, "4k": 0.42}
+                    : {"std": 0.084, "pro": 0.112, "4k": 0.42};
                   {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration}
                 )
                 """,
@@ -934,6 +936,8 @@ class OmniProTextToVideoNode(IO.ComfyNode):
                 raise ValueError("kling-video-o1 only supports durations of 5 or 10 seconds.")
             if generate_audio:
                 raise ValueError("kling-video-o1 does not support audio generation.")
+            if resolution == "4k":
+                raise ValueError("kling-video-o1 does not support 4k resolution.")
         stories_enabled = storyboards is not None and storyboards["storyboards"] != "disabled"
         if stories_enabled and model_name == "kling-video-o1":
             raise ValueError("kling-video-o1 does not support storyboards.")
@@ -963,6 +967,12 @@ class OmniProTextToVideoNode(IO.ComfyNode):
                     f"must equal the global duration ({duration}s)."
                 )
 
+        if resolution == "4k":
+            mode = "4k"
+        elif resolution == "1080p":
+            mode = "pro"
+        else:
+            mode = "std"
         response = await sync_op(
             cls,
             ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
@@ -972,7 +982,7 @@ class OmniProTextToVideoNode(IO.ComfyNode):
                 prompt=prompt,
                 aspect_ratio=aspect_ratio,
                 duration=str(duration),
-                mode="pro" if resolution == "1080p" else "std",
+                mode=mode,
                 multi_shot=multi_shot,
                 multi_prompt=multi_prompt_list,
                 shot_type="customize" if multi_shot else None,
@@ -1014,7 +1024,7 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
                     optional=True,
                     tooltip="Up to 6 additional reference images.",
                 ),
-                IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True),
+                IO.Combo.Input("resolution", options=["4k", "1080p", "720p"], default="1080p", optional=True),
                 IO.DynamicCombo.Input(
                     "storyboards",
                     options=[
@@ -1061,12 +1071,13 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
                 depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution", "model_name", "generate_audio"]),
                 expr="""
                 (
-                  $mode := (widgets.resolution = "720p") ? "std" : "pro";
+                  $res := widgets.resolution;
+                  $mode := $res = "4k" ? "4k" : ($res = "720p" ? "std" : "pro");
                   $isV3 := $contains(widgets.model_name, "v3");
                   $audio := $isV3 and widgets.generate_audio;
                   $rates := $audio
-                    ? {"std": 0.112, "pro": 0.14}
-                    : {"std": 0.084, "pro": 0.112};
+                    ? {"std": 0.112, "pro": 0.14, "4k": 0.42}
+                    : {"std": 0.084, "pro": 0.112, "4k": 0.42};
                   {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration}
                 )
                 """,
@@ -1093,6 +1104,8 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
                 raise ValueError("kling-video-o1 does not support durations greater than 10 seconds.")
             if generate_audio:
                 raise ValueError("kling-video-o1 does not support audio generation.")
+            if resolution == "4k":
+                raise ValueError("kling-video-o1 does not support 4k resolution.")
         stories_enabled = storyboards is not None and storyboards["storyboards"] != "disabled"
         if stories_enabled and model_name == "kling-video-o1":
             raise ValueError("kling-video-o1 does not support storyboards.")
@@ -1161,6 +1174,12 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
                 validate_image_aspect_ratio(i, (1, 2.5), (2.5, 1))
             for i in await upload_images_to_comfyapi(cls, reference_images, wait_label="Uploading reference frame(s)"):
                 image_list.append(OmniParamImage(image_url=i))
+        if resolution == "4k":
+            mode = "4k"
+        elif resolution == "1080p":
+            mode = "pro"
+        else:
+            mode = "std"
         response = await sync_op(
             cls,
             ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
@@ -1170,7 +1189,7 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
                 prompt=prompt,
                 duration=str(duration),
                 image_list=image_list,
-                mode="pro" if resolution == "1080p" else "std",
+                mode=mode,
                 sound="on" if generate_audio else "off",
                 multi_shot=multi_shot,
                 multi_prompt=multi_prompt_list,
@@ -1204,7 +1223,7 @@ class OmniProImageToVideoNode(IO.ComfyNode):
                     "reference_images",
                     tooltip="Up to 7 reference images.",
                 ),
-                IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True),
+                IO.Combo.Input("resolution", options=["4k", "1080p", "720p"], default="1080p", optional=True),
                 IO.DynamicCombo.Input(
                     "storyboards",
                     options=[
@@ -1251,12 +1270,13 @@ class OmniProImageToVideoNode(IO.ComfyNode):
                 depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution", "model_name", "generate_audio"]),
                 expr="""
                 (
-                  $mode := (widgets.resolution = "720p") ? "std" : "pro";
+                  $res := widgets.resolution;
+                  $mode := $res = "4k" ? "4k" : ($res = "720p" ? "std" : "pro");
                   $isV3 := $contains(widgets.model_name, "v3");
                   $audio := $isV3 and widgets.generate_audio;
                   $rates := $audio
-                    ? {"std": 0.112, "pro": 0.14}
-                    : {"std": 0.084, "pro": 0.112};
+                    ? {"std": 0.112, "pro": 0.14, "4k": 0.42}
+                    : {"std": 0.084, "pro": 0.112, "4k": 0.42};
                   {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration}
                 )
                 """,
@@ -1282,6 +1302,8 @@ class OmniProImageToVideoNode(IO.ComfyNode):
                 raise ValueError("kling-video-o1 does not support durations greater than 10 seconds.")
             if generate_audio:
                 raise ValueError("kling-video-o1 does not support audio generation.")
+            if resolution == "4k":
+                raise ValueError("kling-video-o1 does not support 4k resolution.")
         stories_enabled = storyboards is not None and storyboards["storyboards"] != "disabled"
         if stories_enabled and model_name == "kling-video-o1":
             raise ValueError("kling-video-o1 does not support storyboards.")
@@ -1320,6 +1342,12 @@ class OmniProImageToVideoNode(IO.ComfyNode):
         image_list: list[OmniParamImage] = []
         for i in await upload_images_to_comfyapi(cls, reference_images, wait_label="Uploading reference image"):
             image_list.append(OmniParamImage(image_url=i))
+        if resolution == "4k":
+            mode = "4k"
+        elif resolution == "1080p":
+            mode = "pro"
+        else:
+            mode = "std"
         response = await sync_op(
             cls,
             ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
@@ -1330,7 +1358,7 @@ class OmniProImageToVideoNode(IO.ComfyNode):
                 aspect_ratio=aspect_ratio,
                 duration=str(duration),
                 image_list=image_list,
-                mode="pro" if resolution == "1080p" else "std",
+                mode=mode,
                 sound="on" if generate_audio else "off",
                 multi_shot=multi_shot,
                 multi_prompt=multi_prompt_list,
@@ -2860,7 +2888,7 @@ class KlingVideoNode(IO.ComfyNode):
                         IO.DynamicCombo.Option(
                             "kling-v3",
                             [
-                                IO.Combo.Input("resolution", options=["1080p", "720p"]),
+                                IO.Combo.Input("resolution", options=["4k", "1080p", "720p"], default="1080p"),
                                 IO.Combo.Input(
                                     "aspect_ratio",
                                     options=["16:9", "9:16", "1:1"],
@@ -2913,7 +2941,11 @@ class KlingVideoNode(IO.ComfyNode):
                 ),
                 expr="""
                 (
-                  $rates := {"1080p": {"off": 0.112, "on": 0.168}, "720p": {"off": 0.084, "on": 0.126}};
+                  $rates := {
+                    "4k": {"off": 0.42, "on": 0.42},
+                    "1080p": {"off": 0.112, "on": 0.168},
+                    "720p": {"off": 0.084, "on": 0.126}
+                  };
                   $res := $lookup(widgets, "model.resolution");
                   $audio := widgets.generate_audio ? "on" : "off";
                   $rate := $lookup($lookup($rates, $res), $audio);
@@ -2943,7 +2975,12 @@ class KlingVideoNode(IO.ComfyNode):
         start_frame: Input.Image | None = None,
     ) -> IO.NodeOutput:
         _ = seed
-        mode = "pro" if model["resolution"] == "1080p" else "std"
+        if model["resolution"] == "4k":
+            mode = "4k"
+        elif model["resolution"] == "1080p":
+            mode = "pro"
+        else:
+            mode = "std"
         custom_multi_shot = False
         if multi_shot["multi_shot"] == "disabled":
             shot_type = None
@@ -3025,6 +3062,7 @@ class KlingVideoNode(IO.ComfyNode):
             cls,
             ApiEndpoint(path=poll_path),
             response_model=TaskStatusResponse,
+            max_poll_attempts=280,
             status_extractor=lambda r: (r.data.task_status if r.data else None),
         )
         return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
@@ -3057,7 +3095,7 @@ class KlingFirstLastFrameNode(IO.ComfyNode):
                         IO.DynamicCombo.Option(
                             "kling-v3",
                             [
-                                IO.Combo.Input("resolution", options=["1080p", "720p"]),
+                                IO.Combo.Input("resolution", options=["4k", "1080p", "720p"], default="1080p"),
                             ],
                         ),
                     ],
@@ -3089,7 +3127,11 @@ class KlingFirstLastFrameNode(IO.ComfyNode):
                 ),
                 expr="""
                 (
-                  $rates := {"1080p": {"off": 0.112, "on": 0.168}, "720p": {"off": 0.084, "on": 0.126}};
+                  $rates := {
+                    "4k": {"off": 0.42, "on": 0.42},
+                    "1080p": {"off": 0.112, "on": 0.168},
+                    "720p": {"off": 0.084, "on": 0.126}
+                  };
                   $res := $lookup(widgets, "model.resolution");
                   $audio := widgets.generate_audio ? "on" : "off";
                   $rate := $lookup($lookup($rates, $res), $audio);
@@ -3118,6 +3160,12 @@ class KlingFirstLastFrameNode(IO.ComfyNode):
         validate_image_aspect_ratio(end_frame, (1, 2.5), (2.5, 1))
         image_url = await upload_image_to_comfyapi(cls, first_frame, wait_label="Uploading first frame")
         image_tail_url = await upload_image_to_comfyapi(cls, end_frame, wait_label="Uploading end frame")
+        if model["resolution"] == "4k":
+            mode = "4k"
+        elif model["resolution"] == "1080p":
+            mode = "pro"
+        else:
+            mode = "std"
         response = await sync_op(
             cls,
             ApiEndpoint(path="/proxy/kling/v1/videos/image2video", method="POST"),
@@ -3127,7 +3175,7 @@ class KlingFirstLastFrameNode(IO.ComfyNode):
                 image=image_url,
                 image_tail=image_tail_url,
                 prompt=prompt,
-                mode="pro" if model["resolution"] == "1080p" else "std",
+                mode=mode,
                 duration=str(duration),
                 sound="on" if generate_audio else "off",
             ),
@@ -3140,6 +3188,7 @@ class KlingFirstLastFrameNode(IO.ComfyNode):
             cls,
             ApiEndpoint(path=f"/proxy/kling/v1/videos/image2video/{response.data.task_id}"),
             response_model=TaskStatusResponse,
+            max_poll_attempts=280,
             status_extractor=lambda r: (r.data.task_status if r.data else None),
         )
         return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))

From 6fbb6b6f49ccd1d7d336368540b71248e3701dde Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jukka=20Sepp=C3=A4nen?=
 <40791699+kijai@users.noreply.github.com>
Date: Thu, 23 Apr 2026 21:13:17 +0300
Subject: [PATCH 17/81] Fix LTXV Reference Audio node (#13531)

---
 comfy_extras/nodes_lt.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/comfy_extras/nodes_lt.py b/comfy_extras/nodes_lt.py
index d7c2e8744..19d8a387f 100644
--- a/comfy_extras/nodes_lt.py
+++ b/comfy_extras/nodes_lt.py
@@ -1,6 +1,7 @@
 import nodes
 import node_helpers
 import torch
+import torchaudio
 import comfy.model_management
 import comfy.model_sampling
 import comfy.samplers
@@ -711,7 +712,14 @@ class LTXVReferenceAudio(io.ComfyNode):
     @classmethod
     def execute(cls, model, positive, negative, reference_audio, audio_vae, identity_guidance_scale, start_percent, end_percent) -> io.NodeOutput:
         # Encode reference audio to latents and patchify
-        audio_latents = audio_vae.encode(reference_audio)
+        sample_rate = reference_audio["sample_rate"]
+        vae_sample_rate = getattr(audio_vae, "audio_sample_rate", 44100)
+        if vae_sample_rate != sample_rate:
+            waveform = torchaudio.functional.resample(reference_audio["waveform"], sample_rate, vae_sample_rate)
+        else:
+            waveform = reference_audio["waveform"]
+
+        audio_latents = audio_vae.encode(waveform.movedim(1, -1))
         b, c, t, f = audio_latents.shape
         ref_tokens = audio_latents.permute(0, 2, 1, 3).reshape(b, t, c * f)
         ref_audio = {"tokens": ref_tokens}

From ef8f3cbcdc214b3b1647d3ad845aae99a3bf95d1 Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Fri, 24 Apr 2026 04:14:13 +1000
Subject: [PATCH 18/81] comfy-aimdo 0.2.14: Hotfix async allocator estimations
 (#13534)

This was doing an over-estimate of VRAM used by the async allocator when lots
of little small tensors were in play.

Also change the versioning scheme to == so we can roll forward aimdo without
worrying about stable regressions downstream in comfyUI core.
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 419124f48..7a2e4e0a2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,7 +23,7 @@ SQLAlchemy>=2.0
 filelock
 av>=14.2.0
 comfy-kitchen>=0.2.8
-comfy-aimdo>=0.2.12
+comfy-aimdo==0.2.14
 requests
 simpleeval>=1.0.0
 blake3

From 084e08c6e2d1c2c450fb74ec4f2ac39c31ea69bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jukka=20Sepp=C3=A4nen?=
 <40791699+kijai@users.noreply.github.com>
Date: Thu, 23 Apr 2026 21:14:42 +0300
Subject: [PATCH 19/81] Disable sageattention for SAM3 (#13529)

Causes Nans
---
 comfy/ldm/sam3/detector.py | 2 +-
 comfy/ldm/sam3/sam.py      | 4 ++--
 comfy/ldm/sam3/tracker.py  | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/comfy/ldm/sam3/detector.py b/comfy/ldm/sam3/detector.py
index 6ae919a79..12d3a01ab 100644
--- a/comfy/ldm/sam3/detector.py
+++ b/comfy/ldm/sam3/detector.py
@@ -54,7 +54,7 @@ class SplitMHA(nn.Module):
         if mask is not None and mask.ndim == 2:
             mask = mask[:, None, None, :]  # [B, T] -> [B, 1, 1, T] for SDPA broadcast
         dtype = q.dtype  # manual_cast may produce mixed dtypes
-        out = optimized_attention(q, k.to(dtype), v.to(dtype), self.num_heads, mask=mask)
+        out = optimized_attention(q, k.to(dtype), v.to(dtype), self.num_heads, mask=mask, low_precision_attention=False)
         return self.out_proj(out)
 
 
diff --git a/comfy/ldm/sam3/sam.py b/comfy/ldm/sam3/sam.py
index 272781d45..75cb457cf 100644
--- a/comfy/ldm/sam3/sam.py
+++ b/comfy/ldm/sam3/sam.py
@@ -40,7 +40,7 @@ class SAMAttention(nn.Module):
         q = self.q_proj(q)
         k = self.k_proj(k)
         v = self.v_proj(v)
-        return self.out_proj(optimized_attention(q, k, v, self.num_heads))
+        return self.out_proj(optimized_attention(q, k, v, self.num_heads, low_precision_attention=False))
 
 
 class TwoWayAttentionBlock(nn.Module):
@@ -179,7 +179,7 @@ class Attention(nn.Module):
         q, k, v = qkv.permute(2, 0, 3, 1, 4).unbind(dim=0)
         if self.use_rope and freqs_cis is not None:
             q, k = apply_rope(q, k, freqs_cis)
-        return self.proj(optimized_attention(q, k, v, self.num_heads, skip_reshape=True))
+        return self.proj(optimized_attention(q, k, v, self.num_heads, skip_reshape=True, low_precision_attention=False))
 
 
 class Block(nn.Module):
diff --git a/comfy/ldm/sam3/tracker.py b/comfy/ldm/sam3/tracker.py
index 6ff6369d1..8f7481003 100644
--- a/comfy/ldm/sam3/tracker.py
+++ b/comfy/ldm/sam3/tracker.py
@@ -364,7 +364,7 @@ class SplitAttn(nn.Module):
         v = self.v_proj(v)
         if rope is not None:
             q, k = apply_rope_memory(q, k, rope, self.num_heads, num_k_exclude_rope)
-        out = optimized_attention(q, k, v, self.num_heads)
+        out = optimized_attention(q, k, v, self.num_heads, low_precision_attention=False)
         return self.out_proj(out)
 
 
@@ -657,7 +657,7 @@ class DecoupledMemoryAttnLayer(nn.Module):
         v = self.self_attn_v_proj(normed)
         if rope is not None:
             q, k = apply_rope_memory(q, k, rope, self.num_heads, 0)
-        x = x + self.self_attn_out_proj(optimized_attention(q, k, v, self.num_heads))
+        x = x + self.self_attn_out_proj(optimized_attention(q, k, v, self.num_heads, low_precision_attention=False))
 
         # Decoupled cross-attention: fuse image and memory projections
         normed = self.norm2(x)
@@ -668,7 +668,7 @@ class DecoupledMemoryAttnLayer(nn.Module):
         v = self.cross_attn_v_proj(memory)
         if rope is not None:
             q, k = apply_rope_memory(q, k, rope, self.num_heads, num_k_exclude_rope)
-        x = x + self.cross_attn_out_proj(optimized_attention(q, k, v, self.num_heads))
+        x = x + self.cross_attn_out_proj(optimized_attention(q, k, v, self.num_heads, low_precision_attention=False))
 
         # FFN
         x = x + self.linear2(F.gelu(self.linear1(self.norm3(x))))

From 2327fa1c908602076318e5ffca02a45d4a7e6af8 Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Fri, 24 Apr 2026 08:20:24 +1000
Subject: [PATCH 20/81] execution: Add anti-cycle validation (#13169)

Currently if the graph contains a cycle, the just inifitiate recursions,
hits a catch all then throws a generic error against the output node
that seeded the validation. Instead, fail the offending cycling mode
chain and handlng it as an error in its own right.

Co-authored-by: guill <jacob.e.segal@gmail.com>
---
 execution.py | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/execution.py b/execution.py
index 5e02dffb2..e15eb4bda 100644
--- a/execution.py
+++ b/execution.py
@@ -811,11 +811,30 @@ class PromptExecutor:
             self._notify_prompt_lifecycle("end", prompt_id)
 
 
-async def validate_inputs(prompt_id, prompt, item, validated):
+async def validate_inputs(prompt_id, prompt, item, validated, visiting=None):
+    if visiting is None:
+        visiting = []
+
     unique_id = item
     if unique_id in validated:
         return validated[unique_id]
 
+    if unique_id in visiting:
+        cycle_path_nodes = visiting[visiting.index(unique_id):] + [unique_id]
+        cycle_nodes = list(dict.fromkeys(cycle_path_nodes))
+        cycle_path = " -> ".join(f"{node_id} ({prompt[node_id]['class_type']})" for node_id in cycle_path_nodes)
+        for node_id in cycle_nodes:
+            validated[node_id] = (False, [{
+                "type": "dependency_cycle",
+                "message": "Dependency cycle detected",
+                "details": cycle_path,
+                "extra_info": {
+                    "node_id": node_id,
+                    "cycle_nodes": cycle_nodes,
+                }
+            }], node_id)
+        return validated[unique_id]
+
     inputs = prompt[unique_id]['inputs']
     class_type = prompt[unique_id]['class_type']
     obj_class = nodes.NODE_CLASS_MAPPINGS[class_type]
@@ -899,7 +918,11 @@ async def validate_inputs(prompt_id, prompt, item, validated):
                 errors.append(error)
                 continue
             try:
-                r = await validate_inputs(prompt_id, prompt, o_id, validated)
+                visiting.append(unique_id)
+                try:
+                    r = await validate_inputs(prompt_id, prompt, o_id, validated, visiting)
+                finally:
+                    visiting.pop()
                 if r[0] is False:
                     # `r` will be set in `validated[o_id]` already
                     valid = False
@@ -1048,10 +1071,13 @@ async def validate_inputs(prompt_id, prompt, item, validated):
                     errors.append(error)
                     continue
 
-    if len(errors) > 0 or valid is not True:
-        ret = (False, errors, unique_id)
-    else:
-        ret = (True, [], unique_id)
+    ret = validated.get(unique_id, (True, [], unique_id))
+    # Recursive cycle detection may have already populated an error on us. Join it.
+    ret = (
+        ret[0] and valid is True and not errors,
+        ret[1] + [error for error in errors if error not in ret[1]],
+        unique_id,
+    )
 
     validated[unique_id] = ret
     return ret

From 47ccecaee009cce148e8c2a5bdc2ecb302cc52ee Mon Sep 17 00:00:00 2001
From: "Daxiong (Lin)" <contact@comfyui-wiki.com>
Date: Fri, 24 Apr 2026 07:56:13 +0800
Subject: [PATCH 21/81] chore: update workflow templates to v0.9.62 (#13539)

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 7a2e4e0a2..346ce4b76 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 comfyui-frontend-package==1.42.14
-comfyui-workflow-templates==0.9.61
+comfyui-workflow-templates==0.9.62
 comfyui-embedded-docs==0.4.4
 torch
 torchsde

From c5d9edacd0d92cf2b6d9f82e6b60d6250c269e9e Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Thu, 23 Apr 2026 19:19:00 -0700
Subject: [PATCH 22/81] Print more tensor values in the preview any node.
 (#13544)

---
 comfy_extras/nodes_preview_any.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/comfy_extras/nodes_preview_any.py b/comfy_extras/nodes_preview_any.py
index 0a1558f2b..17e25d514 100644
--- a/comfy_extras/nodes_preview_any.py
+++ b/comfy_extras/nodes_preview_any.py
@@ -1,5 +1,6 @@
 import json
 from comfy.comfy_types.node_typing import IO
+import torch
 
 # Preview Any - original implement from
 # https://github.com/rgthree/rgthree-comfy/blob/main/py/display_any.py
@@ -19,6 +20,7 @@ class PreviewAny():
     SEARCH_ALIASES = ["show output", "inspect", "debug", "print value", "show text"]
 
     def main(self, source=None):
+        torch.set_printoptions(edgeitems=6)
         value = 'None'
         if isinstance(source, str):
             value = source
@@ -33,6 +35,7 @@ class PreviewAny():
                 except Exception:
                     value = 'source exists, but could not be serialized.'
 
+        torch.set_printoptions()
         return {"ui": {"text": (value,)}, "result": (value,)}
 
 NODE_CLASS_MAPPINGS = {

From 00d2f4047db3de6c14f965f6f34354d5ed5d0ccc Mon Sep 17 00:00:00 2001
From: Terry Jia <terryjia88@gmail.com>
Date: Thu, 23 Apr 2026 23:42:22 -0400
Subject: [PATCH 23/81] fix: use textureSize instead of u_resolution for texel
 size in blur/sharpen shaders (#13347)

* fix: use textureSize instead of u_resolution for texel size in blur/sharpen shaders

* fix: remove unused u_resolution uniform and fix Glow shader texelSize

---------

Co-authored-by: guill <jacob.e.segal@gmail.com>
---
 blueprints/.glsl/Glow_30.frag         | 3 +--
 blueprints/.glsl/Image_Blur_1.frag    | 3 +--
 blueprints/.glsl/Sharpen_23.frag      | 3 +--
 blueprints/.glsl/Unsharp_Mask_26.frag | 3 +--
 blueprints/Glow.json                  | 2 +-
 blueprints/Image Blur.json            | 2 +-
 blueprints/Sharpen.json               | 2 +-
 blueprints/Unsharp Mask.json          | 2 +-
 8 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/blueprints/.glsl/Glow_30.frag b/blueprints/.glsl/Glow_30.frag
index 0ee152628..f3c85a212 100644
--- a/blueprints/.glsl/Glow_30.frag
+++ b/blueprints/.glsl/Glow_30.frag
@@ -2,7 +2,6 @@
 precision mediump float;
 
 uniform sampler2D u_image0;
-uniform vec2 u_resolution;
 uniform int u_int0;      // Blend mode
 uniform int u_int1;      // Color tint
 uniform float u_float0;  // Intensity
@@ -75,7 +74,7 @@ void main() {
     float t0 = threshold - 0.15;
     float t1 = threshold + 0.15;
     
-    vec2 texelSize = 1.0 / u_resolution;
+    vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0));
     float radius2 = radius * radius;
     
     float sampleScale = clamp(radius * 0.75, 0.35, 1.0);
diff --git a/blueprints/.glsl/Image_Blur_1.frag b/blueprints/.glsl/Image_Blur_1.frag
index 83238111d..1819e1695 100644
--- a/blueprints/.glsl/Image_Blur_1.frag
+++ b/blueprints/.glsl/Image_Blur_1.frag
@@ -12,7 +12,6 @@ const int RADIAL_SAMPLES = 12;
 const float RADIAL_STRENGTH = 0.0003;
 
 uniform sampler2D u_image0;
-uniform vec2 u_resolution;
 uniform int u_int0;      // Blur type (BLUR_GAUSSIAN, BLUR_BOX, BLUR_RADIAL)
 uniform float u_float0;  // Blur radius/amount
 uniform int u_pass;      // Pass index (0 = horizontal, 1 = vertical)
@@ -25,7 +24,7 @@ float gaussian(float x, float sigma) {
 }
 
 void main() {
-    vec2 texelSize = 1.0 / u_resolution;
+    vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0));
     float radius = max(u_float0, 0.0);
 
     // Radial (angular) blur - single pass, doesn't use separable
diff --git a/blueprints/.glsl/Sharpen_23.frag b/blueprints/.glsl/Sharpen_23.frag
index c03f94b66..e7463a329 100644
--- a/blueprints/.glsl/Sharpen_23.frag
+++ b/blueprints/.glsl/Sharpen_23.frag
@@ -2,14 +2,13 @@
 precision highp float;
 
 uniform sampler2D u_image0;
-uniform vec2 u_resolution;
 uniform float u_float0;  // strength [0.0 – 2.0] typical: 0.3–1.0
 
 in vec2 v_texCoord;
 layout(location = 0) out vec4 fragColor0;
 
 void main() {
-    vec2 texel = 1.0 / u_resolution;
+    vec2 texel = 1.0 / vec2(textureSize(u_image0, 0));
     
     // Sample center and neighbors
     vec4 center = texture(u_image0, v_texCoord);
diff --git a/blueprints/.glsl/Unsharp_Mask_26.frag b/blueprints/.glsl/Unsharp_Mask_26.frag
index f5990cb4a..d968c9c03 100644
--- a/blueprints/.glsl/Unsharp_Mask_26.frag
+++ b/blueprints/.glsl/Unsharp_Mask_26.frag
@@ -2,7 +2,6 @@
 precision highp float;
 
 uniform sampler2D u_image0;
-uniform vec2 u_resolution;
 uniform float u_float0;  // amount    [0.0 - 3.0]  typical: 0.5-1.5
 uniform float u_float1;  // radius    [0.5 - 10.0] blur radius in pixels
 uniform float u_float2;  // threshold [0.0 - 0.1]  min difference to sharpen
@@ -19,7 +18,7 @@ float getLuminance(vec3 color) {
 }
 
 void main() {
-    vec2 texel = 1.0 / u_resolution;
+    vec2 texel = 1.0 / vec2(textureSize(u_image0, 0));
     float radius = max(u_float1, 0.5);
     float amount = u_float0;
     float threshold = u_float2;
diff --git a/blueprints/Glow.json b/blueprints/Glow.json
index 8c690fc68..1dafb2d35 100644
--- a/blueprints/Glow.json
+++ b/blueprints/Glow.json
@@ -268,7 +268,7 @@
               "Node name for S&R": "GLSLShader"
             },
             "widgets_values": [
-              "#version 300 es\nprecision mediump float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform int u_int0;      // Blend mode\nuniform int u_int1;      // Color tint\nuniform float u_float0;  // Intensity\nuniform float u_float1;  // Radius\nuniform float u_float2;  // Threshold\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nconst int BLEND_ADD      = 0;\nconst int BLEND_SCREEN   = 1;\nconst int BLEND_SOFT     = 2;\nconst int BLEND_OVERLAY  = 3;\nconst int BLEND_LIGHTEN  = 4;\n\nconst float GOLDEN_ANGLE = 2.39996323;\nconst int MAX_SAMPLES = 48;\nconst vec3 LUMA = vec3(0.299, 0.587, 0.114);\n\nfloat hash(vec2 p) {\n    p = fract(p * vec2(123.34, 456.21));\n    p += dot(p, p + 45.32);\n    return fract(p.x * p.y);\n}\n\nvec3 hexToRgb(int h) {\n    return vec3(\n        float((h >> 16) & 255),\n        float((h >> 8) & 255),\n        float(h & 255)\n    ) * (1.0 / 255.0);\n}\n\nvec3 blend(vec3 base, vec3 glow, int mode) {\n    if (mode == BLEND_SCREEN) {\n        return 1.0 - (1.0 - base) * (1.0 - glow);\n    }\n    if (mode == BLEND_SOFT) {\n        return mix(\n            base - (1.0 - 2.0 * glow) * base * (1.0 - base),\n            base + (2.0 * glow - 1.0) * (sqrt(base) - base),\n            step(0.5, glow)\n        );\n    }\n    if (mode == BLEND_OVERLAY) {\n        return mix(\n            2.0 * base * glow,\n            1.0 - 2.0 * (1.0 - base) * (1.0 - glow),\n            step(0.5, base)\n        );\n    }\n    if (mode == BLEND_LIGHTEN) {\n        return max(base, glow);\n    }\n    return base + glow;\n}\n\nvoid main() {\n    vec4 original = texture(u_image0, v_texCoord);\n    \n    float intensity = u_float0 * 0.05;\n    float radius = u_float1 * u_float1 * 0.012;\n    \n    if (intensity < 0.001 || radius < 0.1) {\n        fragColor = original;\n        return;\n    }\n    \n    float threshold = 1.0 - u_float2 * 0.01;\n    float t0 = threshold - 0.15;\n    float t1 = threshold + 0.15;\n    \n    vec2 texelSize = 1.0 / u_resolution;\n    float radius2 = radius * radius;\n    \n    float sampleScale = clamp(radius * 0.75, 0.35, 1.0);\n    int samples = int(float(MAX_SAMPLES) * sampleScale);\n    \n    float noise = hash(gl_FragCoord.xy);\n    float angleOffset = noise * GOLDEN_ANGLE;\n    float radiusJitter = 0.85 + noise * 0.3;\n    \n    float ca = cos(GOLDEN_ANGLE);\n    float sa = sin(GOLDEN_ANGLE);\n    vec2 dir = vec2(cos(angleOffset), sin(angleOffset));\n    \n    vec3 glow = vec3(0.0);\n    float totalWeight = 0.0;\n    \n    // Center tap\n    float centerMask = smoothstep(t0, t1, dot(original.rgb, LUMA));\n    glow += original.rgb * centerMask * 2.0;\n    totalWeight += 2.0;\n    \n    for (int i = 1; i < MAX_SAMPLES; i++) {\n        if (i >= samples) break;\n        \n        float fi = float(i);\n        float dist = sqrt(fi / float(samples)) * radius * radiusJitter;\n        \n        vec2 offset = dir * dist * texelSize;\n        vec3 c = texture(u_image0, v_texCoord + offset).rgb;\n        float mask = smoothstep(t0, t1, dot(c, LUMA));\n        \n        float w = 1.0 - (dist * dist) / (radius2 * 1.5);\n        w = max(w, 0.0);\n        w *= w;\n        \n        glow += c * mask * w;\n        totalWeight += w;\n        \n        dir = vec2(\n            dir.x * ca - dir.y * sa,\n            dir.x * sa + dir.y * ca\n        );\n    }\n    \n    glow *= intensity / max(totalWeight, 0.001);\n    \n    if (u_int1 > 0) {\n        glow *= hexToRgb(u_int1);\n    }\n    \n    vec3 result = blend(original.rgb, glow, u_int0);\n    result += (noise - 0.5) * (1.0 / 255.0);\n    \n    fragColor = vec4(clamp(result, 0.0, 1.0), original.a);\n}",
+              "#version 300 es\nprecision mediump float;\n\nuniform sampler2D u_image0;\nuniform int u_int0;      // Blend mode\nuniform int u_int1;      // Color tint\nuniform float u_float0;  // Intensity\nuniform float u_float1;  // Radius\nuniform float u_float2;  // Threshold\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nconst int BLEND_ADD      = 0;\nconst int BLEND_SCREEN   = 1;\nconst int BLEND_SOFT     = 2;\nconst int BLEND_OVERLAY  = 3;\nconst int BLEND_LIGHTEN  = 4;\n\nconst float GOLDEN_ANGLE = 2.39996323;\nconst int MAX_SAMPLES = 48;\nconst vec3 LUMA = vec3(0.299, 0.587, 0.114);\n\nfloat hash(vec2 p) {\n    p = fract(p * vec2(123.34, 456.21));\n    p += dot(p, p + 45.32);\n    return fract(p.x * p.y);\n}\n\nvec3 hexToRgb(int h) {\n    return vec3(\n        float((h >> 16) & 255),\n        float((h >> 8) & 255),\n        float(h & 255)\n    ) * (1.0 / 255.0);\n}\n\nvec3 blend(vec3 base, vec3 glow, int mode) {\n    if (mode == BLEND_SCREEN) {\n        return 1.0 - (1.0 - base) * (1.0 - glow);\n    }\n    if (mode == BLEND_SOFT) {\n        return mix(\n            base - (1.0 - 2.0 * glow) * base * (1.0 - base),\n            base + (2.0 * glow - 1.0) * (sqrt(base) - base),\n            step(0.5, glow)\n        );\n    }\n    if (mode == BLEND_OVERLAY) {\n        return mix(\n            2.0 * base * glow,\n            1.0 - 2.0 * (1.0 - base) * (1.0 - glow),\n            step(0.5, base)\n        );\n    }\n    if (mode == BLEND_LIGHTEN) {\n        return max(base, glow);\n    }\n    return base + glow;\n}\n\nvoid main() {\n    vec4 original = texture(u_image0, v_texCoord);\n    \n    float intensity = u_float0 * 0.05;\n    float radius = u_float1 * u_float1 * 0.012;\n    \n    if (intensity < 0.001 || radius < 0.1) {\n        fragColor = original;\n        return;\n    }\n    \n    float threshold = 1.0 - u_float2 * 0.01;\n    float t0 = threshold - 0.15;\n    float t1 = threshold + 0.15;\n    \n    vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0));\n    float radius2 = radius * radius;\n    \n    float sampleScale = clamp(radius * 0.75, 0.35, 1.0);\n    int samples = int(float(MAX_SAMPLES) * sampleScale);\n    \n    float noise = hash(gl_FragCoord.xy);\n    float angleOffset = noise * GOLDEN_ANGLE;\n    float radiusJitter = 0.85 + noise * 0.3;\n    \n    float ca = cos(GOLDEN_ANGLE);\n    float sa = sin(GOLDEN_ANGLE);\n    vec2 dir = vec2(cos(angleOffset), sin(angleOffset));\n    \n    vec3 glow = vec3(0.0);\n    float totalWeight = 0.0;\n    \n    // Center tap\n    float centerMask = smoothstep(t0, t1, dot(original.rgb, LUMA));\n    glow += original.rgb * centerMask * 2.0;\n    totalWeight += 2.0;\n    \n    for (int i = 1; i < MAX_SAMPLES; i++) {\n        if (i >= samples) break;\n        \n        float fi = float(i);\n        float dist = sqrt(fi / float(samples)) * radius * radiusJitter;\n        \n        vec2 offset = dir * dist * texelSize;\n        vec3 c = texture(u_image0, v_texCoord + offset).rgb;\n        float mask = smoothstep(t0, t1, dot(c, LUMA));\n        \n        float w = 1.0 - (dist * dist) / (radius2 * 1.5);\n        w = max(w, 0.0);\n        w *= w;\n        \n        glow += c * mask * w;\n        totalWeight += w;\n        \n        dir = vec2(\n            dir.x * ca - dir.y * sa,\n            dir.x * sa + dir.y * ca\n        );\n    }\n    \n    glow *= intensity / max(totalWeight, 0.001);\n    \n    if (u_int1 > 0) {\n        glow *= hexToRgb(u_int1);\n    }\n    \n    vec3 result = blend(original.rgb, glow, u_int0);\n    result += (noise - 0.5) * (1.0 / 255.0);\n    \n    fragColor = vec4(clamp(result, 0.0, 1.0), original.a);\n}",
               "from_input"
             ]
           },
diff --git a/blueprints/Image Blur.json b/blueprints/Image Blur.json
index b1d449e32..3c7a784b0 100644
--- a/blueprints/Image Blur.json	
+++ b/blueprints/Image Blur.json	
@@ -331,7 +331,7 @@
               "Node name for S&R": "GLSLShader"
             },
             "widgets_values": [
-              "#version 300 es\n#pragma passes 2\nprecision highp float;\n\n// Blur type constants\nconst int BLUR_GAUSSIAN = 0;\nconst int BLUR_BOX = 1;\nconst int BLUR_RADIAL = 2;\n\n// Radial blur config\nconst int RADIAL_SAMPLES = 12;\nconst float RADIAL_STRENGTH = 0.0003;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform int u_int0;      // Blur type (BLUR_GAUSSIAN, BLUR_BOX, BLUR_RADIAL)\nuniform float u_float0;  // Blur radius/amount\nuniform int u_pass;      // Pass index (0 = horizontal, 1 = vertical)\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nfloat gaussian(float x, float sigma) {\n    return exp(-(x * x) / (2.0 * sigma * sigma));\n}\n\nvoid main() {\n    vec2 texelSize = 1.0 / u_resolution;\n    float radius = max(u_float0, 0.0);\n\n    // Radial (angular) blur - single pass, doesn't use separable\n    if (u_int0 == BLUR_RADIAL) {\n        // Only execute on first pass\n        if (u_pass > 0) {\n            fragColor0 = texture(u_image0, v_texCoord);\n            return;\n        }\n\n        vec2 center = vec2(0.5);\n        vec2 dir = v_texCoord - center;\n        float dist = length(dir);\n\n        if (dist < 1e-4) {\n            fragColor0 = texture(u_image0, v_texCoord);\n            return;\n        }\n\n        vec4 sum = vec4(0.0);\n        float totalWeight = 0.0;\n        float angleStep = radius * RADIAL_STRENGTH;\n\n        dir /= dist;\n\n        float cosStep = cos(angleStep);\n        float sinStep = sin(angleStep);\n\n        float negAngle = -float(RADIAL_SAMPLES) * angleStep;\n        vec2 rotDir = vec2(\n            dir.x * cos(negAngle) - dir.y * sin(negAngle),\n            dir.x * sin(negAngle) + dir.y * cos(negAngle)\n        );\n\n        for (int i = -RADIAL_SAMPLES; i <= RADIAL_SAMPLES; i++) {\n            vec2 uv = center + rotDir * dist;\n            float w = 1.0 - abs(float(i)) / float(RADIAL_SAMPLES);\n            sum += texture(u_image0, uv) * w;\n            totalWeight += w;\n\n            rotDir = vec2(\n                rotDir.x * cosStep - rotDir.y * sinStep,\n                rotDir.x * sinStep + rotDir.y * cosStep\n            );\n        }\n\n        fragColor0 = sum / max(totalWeight, 0.001);\n        return;\n    }\n\n    // Separable Gaussian / Box blur\n    int samples = int(ceil(radius));\n\n    if (samples == 0) {\n        fragColor0 = texture(u_image0, v_texCoord);\n        return;\n    }\n\n    // Direction: pass 0 = horizontal, pass 1 = vertical\n    vec2 dir = (u_pass == 0) ? vec2(1.0, 0.0) : vec2(0.0, 1.0);\n\n    vec4 color = vec4(0.0);\n    float totalWeight = 0.0;\n    float sigma = radius / 2.0;\n\n    for (int i = -samples; i <= samples; i++) {\n        vec2 offset = dir * float(i) * texelSize;\n        vec4 sample_color = texture(u_image0, v_texCoord + offset);\n\n        float weight;\n        if (u_int0 == BLUR_GAUSSIAN) {\n            weight = gaussian(float(i), sigma);\n        } else {\n            // BLUR_BOX\n            weight = 1.0;\n        }\n\n        color += sample_color * weight;\n        totalWeight += weight;\n    }\n\n    fragColor0 = color / totalWeight;\n}\n",
+              "#version 300 es\n#pragma passes 2\nprecision highp float;\n\n// Blur type constants\nconst int BLUR_GAUSSIAN = 0;\nconst int BLUR_BOX = 1;\nconst int BLUR_RADIAL = 2;\n\n// Radial blur config\nconst int RADIAL_SAMPLES = 12;\nconst float RADIAL_STRENGTH = 0.0003;\n\nuniform sampler2D u_image0;\nuniform int u_int0;      // Blur type (BLUR_GAUSSIAN, BLUR_BOX, BLUR_RADIAL)\nuniform float u_float0;  // Blur radius/amount\nuniform int u_pass;      // Pass index (0 = horizontal, 1 = vertical)\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nfloat gaussian(float x, float sigma) {\n    return exp(-(x * x) / (2.0 * sigma * sigma));\n}\n\nvoid main() {\n    vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0));\n    float radius = max(u_float0, 0.0);\n\n    // Radial (angular) blur - single pass, doesn't use separable\n    if (u_int0 == BLUR_RADIAL) {\n        // Only execute on first pass\n        if (u_pass > 0) {\n            fragColor0 = texture(u_image0, v_texCoord);\n            return;\n        }\n\n        vec2 center = vec2(0.5);\n        vec2 dir = v_texCoord - center;\n        float dist = length(dir);\n\n        if (dist < 1e-4) {\n            fragColor0 = texture(u_image0, v_texCoord);\n            return;\n        }\n\n        vec4 sum = vec4(0.0);\n        float totalWeight = 0.0;\n        float angleStep = radius * RADIAL_STRENGTH;\n\n        dir /= dist;\n\n        float cosStep = cos(angleStep);\n        float sinStep = sin(angleStep);\n\n        float negAngle = -float(RADIAL_SAMPLES) * angleStep;\n        vec2 rotDir = vec2(\n            dir.x * cos(negAngle) - dir.y * sin(negAngle),\n            dir.x * sin(negAngle) + dir.y * cos(negAngle)\n        );\n\n        for (int i = -RADIAL_SAMPLES; i <= RADIAL_SAMPLES; i++) {\n            vec2 uv = center + rotDir * dist;\n            float w = 1.0 - abs(float(i)) / float(RADIAL_SAMPLES);\n            sum += texture(u_image0, uv) * w;\n            totalWeight += w;\n\n            rotDir = vec2(\n                rotDir.x * cosStep - rotDir.y * sinStep,\n                rotDir.x * sinStep + rotDir.y * cosStep\n            );\n        }\n\n        fragColor0 = sum / max(totalWeight, 0.001);\n        return;\n    }\n\n    // Separable Gaussian / Box blur\n    int samples = int(ceil(radius));\n\n    if (samples == 0) {\n        fragColor0 = texture(u_image0, v_texCoord);\n        return;\n    }\n\n    // Direction: pass 0 = horizontal, pass 1 = vertical\n    vec2 dir = (u_pass == 0) ? vec2(1.0, 0.0) : vec2(0.0, 1.0);\n\n    vec4 color = vec4(0.0);\n    float totalWeight = 0.0;\n    float sigma = radius / 2.0;\n\n    for (int i = -samples; i <= samples; i++) {\n        vec2 offset = dir * float(i) * texelSize;\n        vec4 sample_color = texture(u_image0, v_texCoord + offset);\n\n        float weight;\n        if (u_int0 == BLUR_GAUSSIAN) {\n            weight = gaussian(float(i), sigma);\n        } else {\n            // BLUR_BOX\n            weight = 1.0;\n        }\n\n        color += sample_color * weight;\n        totalWeight += weight;\n    }\n\n    fragColor0 = color / totalWeight;\n}\n",
               "from_input"
             ]
           }
diff --git a/blueprints/Sharpen.json b/blueprints/Sharpen.json
index bb79f61fc..f332400fd 100644
--- a/blueprints/Sharpen.json
+++ b/blueprints/Sharpen.json
@@ -267,7 +267,7 @@
               "Node name for S&R": "GLSLShader"
             },
             "widgets_values": [
-              "#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0;  // strength [0.0 – 2.0] typical: 0.3–1.0\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nvoid main() {\n    vec2 texel = 1.0 / u_resolution;\n    \n    // Sample center and neighbors\n    vec4 center = texture(u_image0, v_texCoord);\n    vec4 top    = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));\n    vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0,  texel.y));\n    vec4 left   = texture(u_image0, v_texCoord + vec2(-texel.x,  0.0));\n    vec4 right  = texture(u_image0, v_texCoord + vec2( texel.x,  0.0));\n    \n    // Edge enhancement (Laplacian)\n    vec4 edges = center * 4.0 - top - bottom - left - right;\n    \n    // Add edges back scaled by strength\n    vec4 sharpened = center + edges * u_float0;\n    \n    fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);\n}",
+              "#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform float u_float0;  // strength [0.0 – 2.0] typical: 0.3–1.0\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nvoid main() {\n    vec2 texel = 1.0 / vec2(textureSize(u_image0, 0));\n    \n    // Sample center and neighbors\n    vec4 center = texture(u_image0, v_texCoord);\n    vec4 top    = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));\n    vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0,  texel.y));\n    vec4 left   = texture(u_image0, v_texCoord + vec2(-texel.x,  0.0));\n    vec4 right  = texture(u_image0, v_texCoord + vec2( texel.x,  0.0));\n    \n    // Edge enhancement (Laplacian)\n    vec4 edges = center * 4.0 - top - bottom - left - right;\n    \n    // Add edges back scaled by strength\n    vec4 sharpened = center + edges * u_float0;\n    \n    fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);\n}",
               "from_input"
             ]
           }
diff --git a/blueprints/Unsharp Mask.json b/blueprints/Unsharp Mask.json
index b673eb703..137acaa43 100644
--- a/blueprints/Unsharp Mask.json	
+++ b/blueprints/Unsharp Mask.json	
@@ -383,7 +383,7 @@
               "Node name for S&R": "GLSLShader"
             },
             "widgets_values": [
-              "#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0;  // amount    [0.0 - 3.0]  typical: 0.5-1.5\nuniform float u_float1;  // radius    [0.5 - 10.0] blur radius in pixels\nuniform float u_float2;  // threshold [0.0 - 0.1]  min difference to sharpen\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nfloat gaussian(float x, float sigma) {\n    return exp(-(x * x) / (2.0 * sigma * sigma));\n}\n\nfloat getLuminance(vec3 color) {\n    return dot(color, vec3(0.2126, 0.7152, 0.0722));\n}\n\nvoid main() {\n    vec2 texel = 1.0 / u_resolution;\n    float radius = max(u_float1, 0.5);\n    float amount = u_float0;\n    float threshold = u_float2;\n\n    vec4 original = texture(u_image0, v_texCoord);\n\n    // Gaussian blur for the \"unsharp\" mask\n    int samples = int(ceil(radius));\n    float sigma = radius / 2.0;\n\n    vec4 blurred = vec4(0.0);\n    float totalWeight = 0.0;\n\n    for (int x = -samples; x <= samples; x++) {\n        for (int y = -samples; y <= samples; y++) {\n            vec2 offset = vec2(float(x), float(y)) * texel;\n            vec4 sample_color = texture(u_image0, v_texCoord + offset);\n\n            float dist = length(vec2(float(x), float(y)));\n            float weight = gaussian(dist, sigma);\n            blurred += sample_color * weight;\n            totalWeight += weight;\n        }\n    }\n    blurred /= totalWeight;\n\n    // Unsharp mask = original - blurred\n    vec3 mask = original.rgb - blurred.rgb;\n\n    // Luminance-based threshold with smooth falloff\n    float lumaDelta = abs(getLuminance(original.rgb) - getLuminance(blurred.rgb));\n    float thresholdScale = smoothstep(0.0, threshold, lumaDelta);\n    mask *= thresholdScale;\n\n    // Sharpen: original + mask * amount\n    vec3 sharpened = original.rgb + mask * amount;\n\n    fragColor0 = vec4(clamp(sharpened, 0.0, 1.0), original.a);\n}\n",
+              "#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform float u_float0;  // amount    [0.0 - 3.0]  typical: 0.5-1.5\nuniform float u_float1;  // radius    [0.5 - 10.0] blur radius in pixels\nuniform float u_float2;  // threshold [0.0 - 0.1]  min difference to sharpen\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nfloat gaussian(float x, float sigma) {\n    return exp(-(x * x) / (2.0 * sigma * sigma));\n}\n\nfloat getLuminance(vec3 color) {\n    return dot(color, vec3(0.2126, 0.7152, 0.0722));\n}\n\nvoid main() {\n    vec2 texel = 1.0 / vec2(textureSize(u_image0, 0));\n    float radius = max(u_float1, 0.5);\n    float amount = u_float0;\n    float threshold = u_float2;\n\n    vec4 original = texture(u_image0, v_texCoord);\n\n    // Gaussian blur for the \"unsharp\" mask\n    int samples = int(ceil(radius));\n    float sigma = radius / 2.0;\n\n    vec4 blurred = vec4(0.0);\n    float totalWeight = 0.0;\n\n    for (int x = -samples; x <= samples; x++) {\n        for (int y = -samples; y <= samples; y++) {\n            vec2 offset = vec2(float(x), float(y)) * texel;\n            vec4 sample_color = texture(u_image0, v_texCoord + offset);\n\n            float dist = length(vec2(float(x), float(y)));\n            float weight = gaussian(dist, sigma);\n            blurred += sample_color * weight;\n            totalWeight += weight;\n        }\n    }\n    blurred /= totalWeight;\n\n    // Unsharp mask = original - blurred\n    vec3 mask = original.rgb - blurred.rgb;\n\n    // Luminance-based threshold with smooth falloff\n    float lumaDelta = abs(getLuminance(original.rgb) - getLuminance(blurred.rgb));\n    float thresholdScale = smoothstep(0.0, threshold, lumaDelta);\n    mask *= thresholdScale;\n\n    // Sharpen: original + mask * amount\n    vec3 sharpened = original.rgb + mask * amount;\n\n    fragColor0 = vec4(clamp(sharpened, 0.0, 1.0), original.a);\n}\n",
               "from_input"
             ]
           }

From 2e0503780d8cd4285d2b883ba5ba1ea152eb194e Mon Sep 17 00:00:00 2001
From: Terry Jia <terryjia88@gmail.com>
Date: Thu, 23 Apr 2026 23:51:34 -0400
Subject: [PATCH 24/81] range type (#13322)

Co-authored-by: guill <jacob.e.segal@gmail.com>
---
 comfy_api/input/__init__.py            |  2 +
 comfy_api/latest/_input/__init__.py    |  2 +
 comfy_api/latest/_input/range_types.py | 70 ++++++++++++++++++++++++++
 comfy_api/latest/_io.py                | 38 ++++++++++++++
 4 files changed, 112 insertions(+)
 create mode 100644 comfy_api/latest/_input/range_types.py

diff --git a/comfy_api/input/__init__.py b/comfy_api/input/__init__.py
index 16d4acfd1..dc33533cc 100644
--- a/comfy_api/input/__init__.py
+++ b/comfy_api/input/__init__.py
@@ -9,6 +9,7 @@ from comfy_api.latest._input import (
     CurveInput,
     MonotoneCubicCurve,
     LinearCurve,
+    RangeInput,
 )
 
 __all__ = [
@@ -21,4 +22,5 @@ __all__ = [
     "CurveInput",
     "MonotoneCubicCurve",
     "LinearCurve",
+    "RangeInput",
 ]
diff --git a/comfy_api/latest/_input/__init__.py b/comfy_api/latest/_input/__init__.py
index 05cd3d40a..f0229717e 100644
--- a/comfy_api/latest/_input/__init__.py
+++ b/comfy_api/latest/_input/__init__.py
@@ -1,5 +1,6 @@
 from .basic_types import ImageInput, AudioInput, MaskInput, LatentInput
 from .curve_types import CurvePoint, CurveInput, MonotoneCubicCurve, LinearCurve
+from .range_types import RangeInput
 from .video_types import VideoInput
 
 __all__ = [
@@ -12,4 +13,5 @@ __all__ = [
     "CurveInput",
     "MonotoneCubicCurve",
     "LinearCurve",
+    "RangeInput",
 ]
diff --git a/comfy_api/latest/_input/range_types.py b/comfy_api/latest/_input/range_types.py
new file mode 100644
index 000000000..f4c5cb290
--- /dev/null
+++ b/comfy_api/latest/_input/range_types.py
@@ -0,0 +1,70 @@
+from __future__ import annotations
+
+import logging
+import math
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+class RangeInput:
+    """Represents a levels/range adjustment: input range [min, max] with
+    optional midpoint (gamma control).
+
+    Generates a 1D LUT identical to GIMP's levels mapping:
+        1. Normalize input to [0, 1] using [min, max]
+        2. Apply gamma correction: pow(value, 1/gamma)
+        3. Clamp to [0, 1]
+
+    The midpoint field is a position in [0, 1] representing where the
+    midtone falls within [min, max]. It maps to gamma via:
+        gamma = -log2(midpoint)
+    So midpoint=0.5 → gamma=1.0 (linear).
+    """
+
+    def __init__(self, min_val: float, max_val: float, midpoint: float | None = None):
+        self.min_val = min_val
+        self.max_val = max_val
+        self.midpoint = midpoint
+
+    @staticmethod
+    def from_raw(data) -> RangeInput:
+        if isinstance(data, RangeInput):
+            return data
+        if isinstance(data, dict):
+            return RangeInput(
+                min_val=float(data.get("min", 0.0)),
+                max_val=float(data.get("max", 1.0)),
+                midpoint=float(data["midpoint"]) if data.get("midpoint") is not None else None,
+            )
+        raise TypeError(f"Cannot convert {type(data)} to RangeInput")
+
+    def to_lut(self, size: int = 256) -> np.ndarray:
+        """Generate a float64 lookup table mapping [0, 1] input through this
+        levels adjustment.
+
+        The LUT maps normalized input values (0..1) to output values (0..1),
+        matching the GIMP levels formula.
+        """
+        xs = np.linspace(0.0, 1.0, size, dtype=np.float64)
+
+        in_range = self.max_val - self.min_val
+        if abs(in_range) < 1e-10:
+            return np.where(xs >= self.min_val, 1.0, 0.0).astype(np.float64)
+
+        # Normalize: map [min, max] → [0, 1]
+        result = (xs - self.min_val) / in_range
+        result = np.clip(result, 0.0, 1.0)
+
+        # Gamma correction from midpoint
+        if self.midpoint is not None and self.midpoint > 0 and self.midpoint != 0.5:
+            gamma = max(-math.log2(self.midpoint), 0.001)
+            inv_gamma = 1.0 / gamma
+            mask = result > 0
+            result[mask] = np.power(result[mask], inv_gamma)
+
+        return result
+
+    def __repr__(self) -> str:
+        mid = f", midpoint={self.midpoint}" if self.midpoint is not None else ""
+        return f"RangeInput(min={self.min_val}, max={self.max_val}{mid})"
diff --git a/comfy_api/latest/_io.py b/comfy_api/latest/_io.py
index fdeffea2d..4942ed46c 100644
--- a/comfy_api/latest/_io.py
+++ b/comfy_api/latest/_io.py
@@ -1266,6 +1266,43 @@ class Histogram(ComfyTypeIO):
     Type = list[int]
 
 
+@comfytype(io_type="RANGE")
+class Range(ComfyTypeIO):
+    from comfy_api.input import RangeInput
+    if TYPE_CHECKING:
+        Type = RangeInput
+
+    class Input(WidgetInput):
+        def __init__(self, id: str, display_name: str=None, optional=False, tooltip: str=None,
+                     socketless: bool=True, default: dict=None,
+                     display: str=None,
+                     gradient_stops: list=None,
+                     show_midpoint: bool=None,
+                     midpoint_scale: str=None,
+                     value_min: float=None,
+                     value_max: float=None,
+                     advanced: bool=None):
+            super().__init__(id, display_name, optional, tooltip, None, default, socketless, None, None, None, None, advanced)
+            if default is None:
+                self.default = {"min": 0.0, "max": 1.0}
+            self.display = display
+            self.gradient_stops = gradient_stops
+            self.show_midpoint = show_midpoint
+            self.midpoint_scale = midpoint_scale
+            self.value_min = value_min
+            self.value_max = value_max
+
+        def as_dict(self):
+            return super().as_dict() | prune_dict({
+                "display": self.display,
+                "gradient_stops": self.gradient_stops,
+                "show_midpoint": self.show_midpoint,
+                "midpoint_scale": self.midpoint_scale,
+                "value_min": self.value_min,
+                "value_max": self.value_max,
+            })
+
+
 DYNAMIC_INPUT_LOOKUP: dict[str, Callable[[dict[str, Any], dict[str, Any], tuple[str, dict[str, Any]], str, list[str] | None], None]] = {}
 def register_dynamic_input_func(io_type: str, func: Callable[[dict[str, Any], dict[str, Any], tuple[str, dict[str, Any]], str, list[str] | None], None]):
     DYNAMIC_INPUT_LOOKUP[io_type] = func
@@ -2276,5 +2313,6 @@ __all__ = [
     "BoundingBox",
     "Curve",
     "Histogram",
+    "Range",
     "NodeReplace",
 ]

From 443074eee92fb0f41b38b83404010069fdb25860 Mon Sep 17 00:00:00 2001
From: Matt Miller <mattmiller@comfy.org>
Date: Thu, 23 Apr 2026 21:00:25 -0700
Subject: [PATCH 25/81] Add OpenAPI 3.1 specification for ComfyUI API (#13397)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Add OpenAPI 3.1 specification for ComfyUI API

Adds a comprehensive OpenAPI 3.1 spec documenting all HTTP endpoints
exposed by ComfyUI's server, including prompt execution, queue management,
file uploads, userdata, settings, system stats, object info, assets,
and internal routes.

The spec was validated against the source code with adversarial review
from multiple models, and passes Spectral linting with zero errors.

Also removes openapi.yaml from .gitignore so the spec is tracked.

* Mark /api/history endpoints as deprecated

Address Jacob's review feedback on PR #13397 by explicitly marking the
three /api/history operations as deprecated in the OpenAPI spec:

  * GET  /api/history              -> superseded by GET /api/jobs
  * POST /api/history              -> superseded by /api/jobs management
  * GET  /api/history/{prompt_id}  -> superseded by GET /api/jobs/{job_id}

Each operation gains deprecated: true plus a description that names the
replacement. A formal sunset timeline (RFC 8594 Deprecation and RFC 8553
Sunset headers, minimum-runway policy) is being defined separately and
will be applied as a follow-up.

* Address Spectral lint findings in openapi.yaml

- Add operation descriptions to 52 endpoints (prompt, queue, upload,
  view, models, userdata, settings, assets, internal, etc.)
- Add schema descriptions to 22 component schemas
- Add parameter descriptions to 8 path parameters that were missing them
- Remove 6 unused component schemas: TaskOutput, EmbeddingsResponse,
  ExtensionsResponse, LogRawResponse, UserInfo, UserDataFullInfo

No wire/shape changes. Reduces Spectral findings from 92 to 4. The
remaining 4 are real issues (WebSocket 101 on /ws, loose error schema,
and two snake_case warnings on real wire field names) and are worth
addressing separately.

* fix(openapi): address jtreminio oneOf review on /api/userdata

Restructure the UserData response schemas to address the review feedback
on the `oneOf` without a discriminator, and fix two accuracy bugs found
while doing it.

Changes
- GET /api/userdata response: extract the inline `oneOf` to a named
  schema (`ListUserdataResponse`) and add the missing third variant
  returned when `split=true` and `full_info=false` (array of
  `[relative_path, ...path_components]`). Previously only two of the
  three actual server response shapes were described.
- UserDataResponse (POST endpoints): correct the description — this
  schema is a single item, not a list — and point at the canonical
  `GetUserDataResponseFullFile` schema instead of the duplicate
  `UserDataResponseFull`. Also removes the malformed blank line in
  `UserDataResponseShort`.
- Delete the now-unused `UserDataResponseFull` and
  `UserDataResponseShort` schemas (replaced by reuse of
  `GetUserDataResponseFullFile` and an inline string variant).
- Add an `x-variant-selector` vendor extension to both `oneOf` sites
  documenting which query-parameter combination selects which branch,
  since a true OpenAPI `discriminator` is not applicable (the variants
  are type-disjoint and the selector lives in the request, not the
  response body).

This keeps the shapes the server actually emits (no wire-breaking
change) while making the selection rule explicit for SDK generators
and readers.

---------

Co-authored-by: guill <jacob.e.segal@gmail.com>
---
 .gitignore   |    1 -
 openapi.yaml | 3231 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 3231 insertions(+), 1 deletion(-)
 create mode 100644 openapi.yaml

diff --git a/.gitignore b/.gitignore
index 2700ad5c2..0ab4ba75e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,6 +21,5 @@ venv*/
 *.log
 web_custom_versions/
 .DS_Store
-openapi.yaml
 filtered-openapi.yaml
 uv.lock
diff --git a/openapi.yaml b/openapi.yaml
new file mode 100644
index 000000000..77d0e2318
--- /dev/null
+++ b/openapi.yaml
@@ -0,0 +1,3231 @@
+openapi: 3.1.0
+info:
+  title: ComfyUI API
+  description: |
+    API for ComfyUI - A powerful and modular stable diffusion GUI and backend.
+
+    This API allows you to interact with ComfyUI programmatically, including:
+    - Submitting and managing workflow executions
+    - Querying node/object information
+    - Uploading and viewing files
+    - Managing user settings and data
+    - Asset management (feature-gated)
+
+    ## Dual-path routing
+    Every route registered via `self.routes` in the ComfyUI server is available at
+    both its bare path (e.g. `/prompt`) and an `/api`-prefixed path (e.g. `/api/prompt`).
+    This spec uses the `/api`-prefixed versions as canonical.
+
+    ## Multi-user mode
+    When ComfyUI is started with `--multi-user`, the `Comfy-User` header identifies
+    the active user for settings, userdata, and history isolation. This is **not** a
+    security mechanism — it is an organisational convenience with no authentication
+    or authorisation behind it.
+  version: 1.0.0
+  license:
+    name: GNU General Public License v3.0
+    url: https://github.com/comfyanonymous/ComfyUI/blob/master/LICENSE
+
+servers:
+  - url: /
+    description: Default ComfyUI server (typically http://127.0.0.1:8188)
+
+tags:
+  - name: prompt
+    description: Workflow submission and prompt info
+  - name: queue
+    description: Queue inspection and management
+  - name: history
+    description: Execution history
+  - name: upload
+    description: File upload endpoints
+  - name: view
+    description: File viewing / download
+  - name: system
+    description: System stats and feature flags
+  - name: node
+    description: Node / object_info definitions
+  - name: model
+    description: Model folder and file listing
+  - name: user
+    description: User management (multi-user mode)
+  - name: userdata
+    description: Per-user file storage
+  - name: settings
+    description: Per-user settings
+  - name: extensions
+    description: Frontend extension JS files
+  - name: subgraph
+    description: Global subgraph blueprints
+  - name: internal
+    description: Internal / debug endpoints
+  - name: assets
+    description: Asset management (feature-gated behind enable-assets)
+
+paths:
+  # ---------------------------------------------------------------------------
+  # WebSocket
+  # ---------------------------------------------------------------------------
+  /ws:
+    get:
+      operationId: connectWebSocket
+      tags: [system]
+      summary: WebSocket connection for real-time updates
+      description: |
+        Upgrades to a WebSocket connection that streams execution progress,
+        node status, and output messages. The server sends an initial `status`
+        message with the session ID (SID) on connect.
+
+        ## Message types (server → client)
+        The server sends JSON messages with a `type` field. See the
+        `x-websocket-messages` list below for the schema of each message type.
+      parameters:
+        - name: clientId
+          in: query
+          required: false
+          schema:
+            type: string
+          description: Client identifier. If omitted the server assigns one.
+      responses:
+        "101":
+          description: WebSocket upgrade successful
+      x-websocket-messages:
+        - type: status
+          schema:
+            $ref: "#/components/schemas/StatusWsMessage"
+        - type: progress
+          schema:
+            $ref: "#/components/schemas/ProgressWsMessage"
+        - type: progress_text
+          schema:
+            $ref: "#/components/schemas/ProgressTextWsMessage"
+        - type: progress_state
+          schema:
+            $ref: "#/components/schemas/ProgressStateWsMessage"
+        - type: executing
+          schema:
+            $ref: "#/components/schemas/ExecutingWsMessage"
+        - type: executed
+          schema:
+            $ref: "#/components/schemas/ExecutedWsMessage"
+        - type: execution_start
+          schema:
+            $ref: "#/components/schemas/ExecutionStartWsMessage"
+        - type: execution_success
+          schema:
+            $ref: "#/components/schemas/ExecutionSuccessWsMessage"
+        - type: execution_cached
+          schema:
+            $ref: "#/components/schemas/ExecutionCachedWsMessage"
+        - type: execution_interrupted
+          schema:
+            $ref: "#/components/schemas/ExecutionInterruptedWsMessage"
+        - type: execution_error
+          schema:
+            $ref: "#/components/schemas/ExecutionErrorWsMessage"
+        - type: logs
+          schema:
+            $ref: "#/components/schemas/LogsWsMessage"
+        - type: notification
+          schema:
+            $ref: "#/components/schemas/NotificationWsMessage"
+        - type: feature_flags
+          schema:
+            $ref: "#/components/schemas/FeatureFlagsWsMessage"
+        - type: asset_download
+          schema:
+            $ref: "#/components/schemas/AssetDownloadWsMessage"
+        - type: asset_export
+          schema:
+            $ref: "#/components/schemas/AssetExportWsMessage"
+
+  # ---------------------------------------------------------------------------
+  # Prompt
+  # ---------------------------------------------------------------------------
+  /api/prompt:
+    get:
+      operationId: getPromptInfo
+      tags: [prompt]
+      summary: Get queue status
+      description: Returns how many items remain in the execution queue.
+      responses:
+        "200":
+          description: Queue info
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/PromptInfo"
+    post:
+      operationId: executePrompt
+      tags: [prompt]
+      summary: Submit a workflow for execution
+      description: Submits a workflow for execution. The server validates the graph, assigns a `prompt_id`, and enqueues it. Clients listen on `/ws` for execution progress and output messages.
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/PromptRequest"
+      responses:
+        "200":
+          description: Prompt accepted
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/PromptResponse"
+        "400":
+          description: Validation or node errors
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/PromptErrorResponse"
+
+  # ---------------------------------------------------------------------------
+  # Queue
+  # ---------------------------------------------------------------------------
+  /api/queue:
+    get:
+      operationId: getQueue
+      tags: [queue]
+      summary: Get running and pending queue items
+      description: Returns the server's current execution queue, split into the currently-running prompt and the list of pending prompts.
+      responses:
+        "200":
+          description: Queue contents
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/QueueInfo"
+    post:
+      operationId: manageQueue
+      tags: [queue]
+      summary: Clear or delete items from the queue
+      description: Mutates the execution queue. Supports clearing all queued prompts or deleting individual prompts by ID.
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/QueueManageRequest"
+      responses:
+        "200":
+          description: Queue updated
+
+  /api/interrupt:
+    post:
+      operationId: interruptExecution
+      tags: [queue]
+      summary: Interrupt current execution
+      description: Interrupts the prompt that is currently executing. The next queued prompt (if any) will start immediately after.
+      requestBody:
+        required: false
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                prompt_id:
+                  type: string
+                  format: uuid
+                  description: "If provided, only interrupts this specific running prompt. Otherwise interrupts all."
+      responses:
+        "200":
+          description: Interrupt signal sent
+
+  /api/free:
+    post:
+      operationId: freeMemory
+      tags: [queue]
+      summary: Free GPU memory and/or unload models
+      description: Frees GPU memory by unloading models and/or freeing the resident model cache, controlled by the request flags.
+      requestBody:
+        required: false
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                unload_models:
+                  type: boolean
+                  description: Unload all models from VRAM/RAM
+                free_memory:
+                  type: boolean
+                  description: Run garbage collection and free cached memory
+      responses:
+        "200":
+          description: Memory freed
+
+  # ---------------------------------------------------------------------------
+  # Jobs
+  # ---------------------------------------------------------------------------
+  /api/jobs:
+    get:
+      operationId: listJobs
+      tags: [queue]
+      summary: List jobs with filtering and pagination
+      description: Returns a paginated list of completed prompt executions, newest first.
+      parameters:
+        - name: status
+          in: query
+          schema:
+            type: string
+          description: Filter by job status
+        - name: workflow_id
+          in: query
+          schema:
+            type: string
+          description: Filter by workflow ID
+        - name: sort_by
+          in: query
+          schema:
+            type: string
+          description: Field to sort by
+        - name: sort_order
+          in: query
+          schema:
+            type: string
+            enum: [asc, desc]
+          description: Sort direction
+        - name: limit
+          in: query
+          schema:
+            type: integer
+          description: Maximum number of results (default is unlimited/None)
+        - name: offset
+          in: query
+          schema:
+            type: integer
+            default: 0
+          description: Pagination offset
+      responses:
+        "200":
+          description: Jobs list
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  jobs:
+                    type: array
+                    items:
+                      $ref: "#/components/schemas/JobEntry"
+                  pagination:
+                    $ref: "#/components/schemas/PaginationInfo"
+
+  /api/jobs/{job_id}:
+    get:
+      operationId: getJob
+      tags: [queue]
+      summary: Get a single job by ID
+      description: Returns the full record for a single completed prompt execution, including its outputs, status, and metadata.
+      parameters:
+        - name: job_id
+          in: path
+          description: The job (prompt) ID to fetch.
+          required: true
+          schema:
+            type: string
+            format: uuid
+      responses:
+        "200":
+          description: Job detail
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/JobDetailResponse"
+        "404":
+          description: Job not found
+
+  # ---------------------------------------------------------------------------
+  # History
+  # ---------------------------------------------------------------------------
+  /api/history:
+    get:
+      operationId: getHistory
+      tags: [history]
+      summary: Get execution history
+      deprecated: true
+      description: |
+        **Deprecated.** Superseded by `GET /api/jobs`, which returns the same
+        execution records in a paginated, filterable format. Planned for removal
+        no earlier than a future major release; sunset timeline TBD.
+
+        Returns a dictionary keyed by prompt_id. Each value is a HistoryEntry
+        containing prompt metadata, outputs, status, and node meta.
+      parameters:
+        - $ref: "#/components/parameters/ComfyUserHeader"
+        - name: max_items
+          in: query
+          schema:
+            type: integer
+          description: Maximum number of history entries to return
+        - name: offset
+          in: query
+          schema:
+            type: integer
+          description: Pagination offset (number of entries to skip)
+      responses:
+        "200":
+          description: History dictionary keyed by prompt_id
+          content:
+            application/json:
+              schema:
+                type: object
+                additionalProperties:
+                  $ref: "#/components/schemas/HistoryEntry"
+    post:
+      operationId: manageHistory
+      tags: [history]
+      summary: Clear or delete history entries
+      deprecated: true
+      description: |
+        **Deprecated.** Superseded by the forthcoming job-management endpoints
+        under `/api/jobs`. Planned for removal no earlier than a future major
+        release; sunset timeline TBD.
+      parameters:
+        - $ref: "#/components/parameters/ComfyUserHeader"
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/HistoryManageRequest"
+      responses:
+        "200":
+          description: History updated
+
+  /api/history/{prompt_id}:
+    get:
+      operationId: getHistoryByPromptId
+      tags: [history]
+      summary: Get history for a specific prompt
+      deprecated: true
+      description: |
+        **Deprecated.** Superseded by `GET /api/jobs/{job_id}`, which returns
+        the same execution record. Planned for removal no earlier than a future
+        major release; sunset timeline TBD.
+      parameters:
+        - $ref: "#/components/parameters/ComfyUserHeader"
+        - name: prompt_id
+          in: path
+          description: The prompt ID to fetch history for.
+          required: true
+          schema:
+            type: string
+            format: uuid
+      responses:
+        "200":
+          description: Single-entry history dictionary. Returns an empty object `{}` if the prompt_id is not found.
+          content:
+            application/json:
+              schema:
+                type: object
+                additionalProperties:
+                  $ref: "#/components/schemas/HistoryEntry"
+
+  # ---------------------------------------------------------------------------
+  # Upload
+  # ---------------------------------------------------------------------------
+  /api/upload/image:
+    post:
+      operationId: uploadImage
+      tags: [upload]
+      summary: Upload an image file
+      description: Uploads an image file into one of the input/output/temp directories so it can be referenced by workflow nodes.
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              type: object
+              required:
+                - image
+              properties:
+                image:
+                  type: string
+                  format: binary
+                  description: Image file to upload
+                type:
+                  type: string
+                  enum: [input, temp, output]
+                  default: input
+                  description: Target directory type
+                overwrite:
+                  type: string
+                  description: 'Set to "true" to overwrite existing files'
+                subfolder:
+                  type: string
+                  description: Subfolder within the target directory
+      responses:
+        "200":
+          description: Upload result
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UploadResult"
+        "400":
+          description: No file provided or invalid request
+
+  /api/upload/mask:
+    post:
+      operationId: uploadMask
+      tags: [upload]
+      summary: Upload a mask image
+      description: Uploads a mask image associated with a previously-uploaded reference image.
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              type: object
+              required:
+                - image
+                - original_ref
+              properties:
+                image:
+                  type: string
+                  format: binary
+                  description: Mask image (alpha channel is used)
+                original_ref:
+                  type: object
+                  description: Reference to the original image file
+                  required:
+                    - filename
+                  properties:
+                    filename:
+                      type: string
+                      description: Filename of the original image
+                  additionalProperties: true
+                type:
+                  type: string
+                  enum: [input, temp, output]
+                  default: input
+                  description: Target directory type
+                overwrite:
+                  type: string
+                  description: 'Set to "true" to overwrite existing files'
+                subfolder:
+                  type: string
+                  description: Subfolder within the target directory
+      responses:
+        "200":
+          description: Upload result
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UploadResult"
+        "400":
+          description: No file provided or invalid request
+
+  # ---------------------------------------------------------------------------
+  # View
+  # ---------------------------------------------------------------------------
+  /api/view:
+    get:
+      operationId: viewFile
+      tags: [view]
+      summary: View or download a file
+      description: Serves a file (image, audio, or video) from the input/output/temp directory identified by the query parameters.
+      parameters:
+        - name: filename
+          in: query
+          required: true
+          schema:
+            type: string
+          description: Name of the file to view
+        - name: type
+          in: query
+          schema:
+            type: string
+            enum: [input, output, temp]
+            default: output
+          description: Directory type
+        - name: subfolder
+          in: query
+          schema:
+            type: string
+          description: Subfolder within the directory
+        - name: preview
+          in: query
+          schema:
+            type: string
+          description: Preview format hint (e.g. "webp;90")
+        - name: channel
+          in: query
+          schema:
+            type: string
+            enum: [rgba, rgb, a]
+          description: Channel extraction mode
+      responses:
+        "200":
+          description: File content
+          content:
+            image/*:
+              schema:
+                type: string
+                format: binary
+            video/*:
+              schema:
+                type: string
+                format: binary
+            audio/*:
+              schema:
+                type: string
+                format: binary
+            application/octet-stream:
+              schema:
+                type: string
+                format: binary
+        "404":
+          description: File not found
+
+  /api/view_metadata/{folder_name}:
+    get:
+      operationId: viewMetadata
+      tags: [view]
+      summary: Get metadata for a file (e.g. safetensors header)
+      description: Returns embedded metadata parsed from a file in the given folder — for example, the header of a safetensors model.
+      parameters:
+        - name: folder_name
+          in: path
+          required: true
+          schema:
+            type: string
+          description: Folder type (output, input, temp, etc.)
+        - name: filename
+          in: query
+          required: true
+          schema:
+            type: string
+          description: Filename to read metadata from
+      responses:
+        "200":
+          description: File metadata
+          content:
+            application/json:
+              schema:
+                type: object
+                additionalProperties: true
+        "404":
+          description: File or metadata not found
+
+  # ---------------------------------------------------------------------------
+  # System
+  # ---------------------------------------------------------------------------
+  /api/system_stats:
+    get:
+      operationId: getSystemStats
+      tags: [system]
+      summary: Get system statistics
+      description: Returns hardware, Python, VRAM, and runtime statistics for the running ComfyUI process.
+      responses:
+        "200":
+          description: System stats
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/SystemStatsResponse"
+
+  /api/features:
+    get:
+      operationId: getFeatures
+      tags: [system]
+      summary: Get enabled feature flags
+      description: Returns a dictionary of feature flag names to their enabled state.
+      responses:
+        "200":
+          description: Feature flags
+          content:
+            application/json:
+              schema:
+                type: object
+                additionalProperties:
+                  type: boolean
+
+  # ---------------------------------------------------------------------------
+  # Node / Object Info
+  # ---------------------------------------------------------------------------
+  /api/object_info:
+    get:
+      operationId: getObjectInfo
+      tags: [node]
+      summary: Get all node definitions
+      description: |
+        Returns a dictionary of every registered node class, keyed by class name.
+        Each value is a NodeInfo object describing inputs, outputs, category, etc.
+      responses:
+        "200":
+          description: All node definitions
+          content:
+            application/json:
+              schema:
+                type: object
+                additionalProperties:
+                  $ref: "#/components/schemas/NodeInfo"
+
+  /api/object_info/{node_class}:
+    get:
+      operationId: getObjectInfoByClass
+      tags: [node]
+      summary: Get a single node definition
+      description: Returns the `NodeInfo` definition for a single registered node class.
+      parameters:
+        - name: node_class
+          in: path
+          required: true
+          schema:
+            type: string
+          description: Node class name (e.g. "KSampler")
+      responses:
+        "200":
+          description: Single node definition
+          content:
+            application/json:
+              schema:
+                type: object
+                additionalProperties:
+                  $ref: "#/components/schemas/NodeInfo"
+        "404":
+          description: Node class not found
+
+  /api/embeddings:
+    get:
+      operationId: getEmbeddings
+      tags: [node]
+      summary: List available embedding names
+      description: Returns the list of text-encoder embeddings available on disk.
+      responses:
+        "200":
+          description: Embedding names
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  type: string
+
+  # ---------------------------------------------------------------------------
+  # Models
+  # ---------------------------------------------------------------------------
+  /api/models:
+    get:
+      operationId: getModelTypes
+      tags: [model]
+      summary: List model folder type names
+      description: Returns an array of model type names (e.g. checkpoints, loras, vae).
+      responses:
+        "200":
+          description: Model type names
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  type: string
+
+  /api/models/{folder}:
+    get:
+      operationId: getModelsByFolder
+      tags: [model]
+      summary: List model filenames in a folder
+      description: Returns the names of model files in the given folder. This endpoint predates `/api/experiment/models/{folder}` and returns names only — prefer the experiment endpoint for new integrations.
+      parameters:
+        - name: folder
+          in: path
+          required: true
+          schema:
+            type: string
+          description: Model folder type name
+      responses:
+        "200":
+          description: Model filenames
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  type: string
+        "404":
+          description: Unknown folder type
+
+  /api/experiment/models:
+    get:
+      operationId: getExperimentModels
+      tags: [model]
+      summary: List model folders with paths
+      description: Returns an array of model folder objects with name and folder paths.
+      responses:
+        "200":
+          description: Model folders
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: "#/components/schemas/ModelFolder"
+
+  /api/experiment/models/{folder}:
+    get:
+      operationId: getExperimentModelsByFolder
+      tags: [model]
+      summary: List model files with metadata
+      description: Returns the model files in the given folder with richer metadata (path index, mtime, size) than the legacy `/api/models/{folder}` endpoint.
+      parameters:
+        - name: folder
+          in: path
+          required: true
+          schema:
+            type: string
+          description: Model folder type name
+      responses:
+        "200":
+          description: Model files with metadata
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: "#/components/schemas/ModelFile"
+        "404":
+          description: Unknown folder type
+
+  /api/experiment/models/preview/{folder}/{path_index}/{filename}:
+    get:
+      operationId: getModelPreview
+      tags: [model]
+      summary: Get model preview image
+      description: Returns the preview image associated with a model file, if one exists alongside the model on disk.
+      parameters:
+        - name: folder
+          in: path
+          required: true
+          schema:
+            type: string
+          description: Model folder type name
+        - name: path_index
+          in: path
+          required: true
+          schema:
+            type: integer
+          description: Path index within the folder
+        - name: filename
+          in: path
+          required: true
+          schema:
+            type: string
+          description: Model filename
+      responses:
+        "200":
+          description: Preview image (WebP)
+          content:
+            image/webp:
+              schema:
+                type: string
+                format: binary
+        "404":
+          description: Preview not found
+
+  # ---------------------------------------------------------------------------
+  # Users
+  # ---------------------------------------------------------------------------
+  /api/users:
+    get:
+      operationId: getUsers
+      tags: [user]
+      summary: Get user storage info
+      description: |
+        Returns user storage configuration. In single-user mode returns
+        `{"storage": "server", "migrated": true/false}`. In multi-user mode
+        returns `{"storage": "server", "users": {"user_id": "user_dir", ...}}`.
+      parameters:
+        - $ref: "#/components/parameters/ComfyUserHeader"
+      responses:
+        "200":
+          description: User info
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  storage:
+                    type: string
+                    description: Storage backend type (always "server")
+                  migrated:
+                    type: boolean
+                    description: Whether migration from browser storage is complete (single-user)
+                  users:
+                    type: object
+                    additionalProperties:
+                      type: string
+                    description: Map of user_id to directory name (multi-user)
+    post:
+      operationId: createUser
+      tags: [user]
+      summary: Create a new user (multi-user mode)
+      description: Creates a new user entry. Only meaningful when ComfyUI is running in multi-user mode.
+      parameters:
+        - $ref: "#/components/parameters/ComfyUserHeader"
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - username
+              properties:
+                username:
+                  type: string
+                  description: Username for the new user
+      responses:
+        "200":
+          description: Created user ID
+          content:
+            application/json:
+              schema:
+                type: string
+                description: The generated user_id
+        "400":
+          description: Username already exists or invalid
+
+  # ---------------------------------------------------------------------------
+  # Userdata
+  # ---------------------------------------------------------------------------
+  /api/userdata:
+    get:
+      operationId: listUserdata
+      tags: [userdata]
+      summary: List files in a userdata directory
+      description: Lists files in the authenticated user's data directory. Returns either filename strings or full objects depending on the `full_info` query parameter.
+      parameters:
+        - $ref: "#/components/parameters/ComfyUserHeader"
+        - name: dir
+          in: query
+          required: true
+          schema:
+            type: string
+          description: Directory path relative to the user's data folder
+        - name: recurse
+          in: query
+          schema:
+            type: boolean
+          description: Recurse into subdirectories
+        - name: full_info
+          in: query
+          schema:
+            type: boolean
+          description: Return full file info objects instead of just names
+        - name: split
+          in: query
+          schema:
+            type: boolean
+          description: Split paths into directory components
+      responses:
+        "200":
+          description: File listing
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ListUserdataResponse"
+        "404":
+          description: Directory not found
+
+  /api/v2/userdata:
+    get:
+      operationId: listUserdataV2
+      tags: [userdata]
+      summary: List files in userdata (v2 format)
+      description: Lists files in the authenticated user's data directory using the v2 response shape, which always returns full objects.
+      parameters:
+        - $ref: "#/components/parameters/ComfyUserHeader"
+        - name: path
+          in: query
+          schema:
+            type: string
+          description: Directory path relative to user data root
+      responses:
+        "200":
+          description: File listing with metadata
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  type: object
+                  properties:
+                    name:
+                      type: string
+                    path:
+                      type: string
+                    type:
+                      type: string
+                      enum: [file, directory]
+                    size:
+                      type: integer
+                    modified:
+                      type: number
+                      description: Unix timestamp
+
+  /api/userdata/{file}:
+    get:
+      operationId: getUserdataFile
+      tags: [userdata]
+      summary: Read a userdata file
+      description: Reads the contents of a file from the authenticated user's data directory.
+      parameters:
+        - $ref: "#/components/parameters/ComfyUserHeader"
+        - name: file
+          in: path
+          required: true
+          schema:
+            type: string
+          description: File path relative to user data directory
+      responses:
+        "200":
+          description: File content
+          content:
+            application/octet-stream:
+              schema:
+                type: string
+                format: binary
+        "404":
+          description: File not found
+    post:
+      operationId: writeUserdataFile
+      tags: [userdata]
+      summary: Write or create a userdata file
+      description: Writes (creates or replaces) a file in the authenticated user's data directory.
+      parameters:
+        - $ref: "#/components/parameters/ComfyUserHeader"
+        - name: file
+          in: path
+          required: true
+          schema:
+            type: string
+          description: File path relative to user data directory
+        - name: overwrite
+          in: query
+          schema:
+            type: boolean
+          description: Allow overwriting existing files
+        - name: full_info
+          in: query
+          schema:
+            type: boolean
+          description: Return full file info in response
+      requestBody:
+        required: true
+        content:
+          application/octet-stream:
+            schema:
+              type: string
+              format: binary
+          application/json:
+            schema: {}
+      responses:
+        "200":
+          description: File written
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UserDataResponse"
+        "409":
+          description: File exists and overwrite not set
+    delete:
+      operationId: deleteUserdataFile
+      tags: [userdata]
+      summary: Delete a userdata file
+      description: Deletes a file from the authenticated user's data directory.
+      parameters:
+        - $ref: "#/components/parameters/ComfyUserHeader"
+        - name: file
+          in: path
+          required: true
+          schema:
+            type: string
+          description: File path relative to user data directory
+      responses:
+        "204":
+          description: File deleted
+        "404":
+          description: File not found
+
+  /api/userdata/{file}/move/{dest}:
+    post:
+      operationId: moveUserdataFile
+      tags: [userdata]
+      summary: Move or rename a userdata file
+      description: Renames or moves a file within the authenticated user's data directory.
+      parameters:
+        - $ref: "#/components/parameters/ComfyUserHeader"
+        - name: file
+          in: path
+          required: true
+          schema:
+            type: string
+          description: Source file path
+        - name: dest
+          in: path
+          required: true
+          schema:
+            type: string
+          description: Destination file path
+        - name: overwrite
+          in: query
+          schema:
+            type: boolean
+          description: Allow overwriting at destination
+        - name: full_info
+          in: query
+          schema:
+            type: boolean
+          description: Return full file info in response
+      responses:
+        "200":
+          description: File moved
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UserDataResponse"
+        "404":
+          description: Source file not found
+        "409":
+          description: Destination exists and overwrite not set
+
+  # ---------------------------------------------------------------------------
+  # Settings
+  # ---------------------------------------------------------------------------
+  /api/settings:
+    get:
+      operationId: getSettings
+      tags: [settings]
+      summary: Get all user settings
+      description: Returns all settings for the authenticated user.
+      parameters:
+        - $ref: "#/components/parameters/ComfyUserHeader"
+      responses:
+        "200":
+          description: Settings object
+          content:
+            application/json:
+              schema:
+                type: object
+                additionalProperties: true
+    post:
+      operationId: updateSettings
+      tags: [settings]
+      summary: Update user settings (partial merge)
+      description: Replaces the authenticated user's settings with the provided object.
+      parameters:
+        - $ref: "#/components/parameters/ComfyUserHeader"
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              additionalProperties: true
+              description: Partial settings to merge
+      responses:
+        "200":
+          description: Settings updated
+
+  /api/settings/{id}:
+    get:
+      operationId: getSetting
+      tags: [settings]
+      summary: Get a single setting by key
+      description: Returns the value of a single setting, identified by key.
+      parameters:
+        - $ref: "#/components/parameters/ComfyUserHeader"
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
+          description: Setting key
+      responses:
+        "200":
+          description: Setting value (null if the setting does not exist)
+          content:
+            application/json:
+              schema:
+                nullable: true
+                description: The setting value (any JSON type), or null if not set
+    post:
+      operationId: updateSetting
+      tags: [settings]
+      summary: Set a single setting value
+      description: Sets the value of a single setting, identified by key.
+      parameters:
+        - $ref: "#/components/parameters/ComfyUserHeader"
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
+          description: Setting key
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              description: The setting value (any JSON type)
+      responses:
+        "200":
+          description: Setting updated
+
+  # ---------------------------------------------------------------------------
+  # Extensions / Templates / i18n
+  # ---------------------------------------------------------------------------
+  /api/extensions:
+    get:
+      operationId: getExtensions
+      tags: [extensions]
+      summary: List frontend extension JS file paths
+      description: Returns the list of frontend extension JS URLs registered by custom nodes, to be loaded by the frontend on startup.
+      responses:
+        "200":
+          description: Array of JS file paths
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  type: string
+                  description: Relative path to extension JS file
+
+  /api/workflow_templates:
+    get:
+      operationId: getWorkflowTemplates
+      tags: [extensions]
+      summary: Get workflow template mappings
+      description: Returns a map of custom node names to their provided workflow template names.
+      responses:
+        "200":
+          description: Template mappings
+          content:
+            application/json:
+              schema:
+                type: object
+                additionalProperties:
+                  type: array
+                  items:
+                    type: string
+                description: Map of node pack name to array of template names
+
+  /api/i18n:
+    get:
+      operationId: getI18n
+      tags: [extensions]
+      summary: Get internationalisation translation strings
+      description: Returns the URLs of translation files contributed by custom nodes, keyed by locale.
+      responses:
+        "200":
+          description: Translation map
+          content:
+            application/json:
+              schema:
+                type: object
+                additionalProperties: true
+                description: Nested map of locale to translation key-value pairs
+
+  # ---------------------------------------------------------------------------
+  # Subgraphs
+  # ---------------------------------------------------------------------------
+  /api/global_subgraphs:
+    get:
+      operationId: getGlobalSubgraphs
+      tags: [subgraph]
+      summary: List global subgraph blueprints
+      description: Returns a dictionary of subgraph IDs to their metadata.
+      responses:
+        "200":
+          description: Subgraph metadata dictionary
+          content:
+            application/json:
+              schema:
+                type: object
+                additionalProperties:
+                  $ref: "#/components/schemas/GlobalSubgraphInfo"
+
+  /api/global_subgraphs/{id}:
+    get:
+      operationId: getGlobalSubgraph
+      tags: [subgraph]
+      summary: Get a global subgraph with full data
+      description: Returns the blueprint for a globally-registered subgraph, used by the frontend to materialize the subgraph node.
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
+          description: Subgraph identifier
+      responses:
+        "200":
+          description: Full subgraph data
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/GlobalSubgraphData"
+        "404":
+          description: Subgraph not found
+
+  # ---------------------------------------------------------------------------
+  # Node Replacements
+  # ---------------------------------------------------------------------------
+  /api/node_replacements:
+    get:
+      operationId: getNodeReplacements
+      tags: [node]
+      summary: Get node replacement mappings
+      description: |
+        Returns a dictionary mapping deprecated or replaced node class names
+        to their replacement node information.
+      responses:
+        "200":
+          description: Replacement mappings
+          content:
+            application/json:
+              schema:
+                type: object
+                additionalProperties: true
+
+  # ---------------------------------------------------------------------------
+  # Internal (x-internal: true)
+  # ---------------------------------------------------------------------------
+  /internal/logs:
+    get:
+      operationId: getInternalLogs
+      tags: [internal]
+      summary: Get server logs as text
+      description: Returns structured ComfyUI log entries from the in-memory log buffer.
+      x-internal: true
+      responses:
+        "200":
+          description: Log text
+          content:
+            text/plain:
+              schema:
+                type: string
+
+  /internal/logs/raw:
+    get:
+      operationId: getInternalLogsRaw
+      tags: [internal]
+      summary: Get raw structured log entries
+      description: Returns the raw ComfyUI log buffer as text, together with metadata about the current size limit.
+      x-internal: true
+      responses:
+        "200":
+          description: Structured log data
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  entries:
+                    type: array
+                    items:
+                      type: object
+                      properties:
+                        t:
+                          type: number
+                          description: Timestamp
+                        m:
+                          type: string
+                          description: Message
+                  size:
+                    type: object
+                    properties:
+                      cols:
+                        type: integer
+                      rows:
+                        type: integer
+
+  /internal/logs/subscribe:
+    patch:
+      operationId: subscribeToLogs
+      tags: [internal]
+      summary: Subscribe or unsubscribe a WebSocket client to log streaming
+      description: Subscribes or unsubscribes the current client from live log streaming over the WebSocket.
+      x-internal: true
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - clientId
+                - enabled
+              properties:
+                clientId:
+                  type: string
+                  description: WebSocket client ID
+                enabled:
+                  type: boolean
+                  description: Enable or disable log streaming for this client
+      responses:
+        "200":
+          description: Subscription updated
+
+  /internal/folder_paths:
+    get:
+      operationId: getInternalFolderPaths
+      tags: [internal]
+      summary: Get configured folder paths
+      description: Returns the filesystem paths ComfyUI is configured to load models and other assets from, keyed by folder type.
+      x-internal: true
+      responses:
+        "200":
+          description: Dictionary of folder type to paths
+          content:
+            application/json:
+              schema:
+                type: object
+                additionalProperties:
+                  type: array
+                  items:
+                    type: array
+                    items:
+                      type: string
+                description: Map of folder type name to list of [path, ...] entries
+
+  /internal/files/{directory_type}:
+    get:
+      operationId: getInternalFiles
+      tags: [internal]
+      summary: List files in a directory type
+      description: Lists the files present in one of ComfyUI's known directories (input, output, or temp).
+      x-internal: true
+      parameters:
+        - name: directory_type
+          in: path
+          required: true
+          schema:
+            type: string
+          description: Directory type (e.g. output, input, temp)
+      responses:
+        "200":
+          description: Array of filenames
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  type: string
+
+  # ---------------------------------------------------------------------------
+  # Assets (x-feature-gate: enable-assets)
+  # ---------------------------------------------------------------------------
+  /api/assets/hash/{hash}:
+    head:
+      operationId: checkAssetByHash
+      tags: [assets]
+      summary: Check if an asset with the given hash exists
+      description: Returns 204 if an asset with the given content hash already exists, 404 otherwise. Used by clients to deduplicate uploads before transferring bytes.
+      x-feature-gate: enable-assets
+      parameters:
+        - name: hash
+          in: path
+          required: true
+          schema:
+            type: string
+          description: "Blake3 hash of the asset (e.g. blake3:abc123...)"
+      responses:
+        "200":
+          description: Asset exists
+        "404":
+          description: No asset with this hash
+
+  /api/assets:
+    get:
+      operationId: listAssets
+      tags: [assets]
+      summary: List assets with filtering and pagination
+      description: Returns a paginated list of assets, optionally filtered by tags, name, or other query parameters.
+      x-feature-gate: enable-assets
+      parameters:
+        - name: limit
+          in: query
+          schema:
+            type: integer
+            default: 50
+        - name: offset
+          in: query
+          schema:
+            type: integer
+            default: 0
+        - name: include_tags
+          in: query
+          schema:
+            type: array
+            items:
+              type: string
+          style: form
+          explode: true
+          description: Tags that assets must have (AND logic)
+        - name: exclude_tags
+          in: query
+          schema:
+            type: array
+            items:
+              type: string
+          style: form
+          explode: true
+          description: Tags that assets must not have
+        - name: name_contains
+          in: query
+          schema:
+            type: string
+          description: Filter assets whose name contains this substring
+        - name: metadata_filter
+          in: query
+          schema:
+            type: string
+          description: JSON-encoded metadata key/value filter
+        - name: sort
+          in: query
+          schema:
+            type: string
+          description: Field to sort by
+        - name: order
+          in: query
+          schema:
+            type: string
+            enum: [asc, desc]
+          description: Sort direction
+      responses:
+        "200":
+          description: Asset list
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ListAssetsResponse"
+    post:
+      operationId: createAsset
+      tags: [assets]
+      summary: Upload a new asset
+      description: Uploads a new asset (binary content plus metadata) and registers it in the asset database.
+      x-feature-gate: enable-assets
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              type: object
+              required:
+                - file
+              properties:
+                file:
+                  type: string
+                  format: binary
+                  description: Asset file to upload
+                name:
+                  type: string
+                  description: Display name for the asset
+                tags:
+                  type: string
+                  description: Comma-separated tags
+                user_metadata:
+                  type: string
+                  description: JSON-encoded user metadata
+                hash:
+                  type: string
+                  description: "Blake3 hash of the file content (e.g. blake3:abc123...)"
+                mime_type:
+                  type: string
+                  description: MIME type of the file (overrides auto-detected type)
+                preview_id:
+                  type: string
+                  format: uuid
+                  description: ID of an existing asset to use as the preview image
+      responses:
+        "201":
+          description: Asset created
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/AssetCreated"
+
+  /api/assets/from-hash:
+    post:
+      operationId: createAssetFromHash
+      tags: [assets]
+      summary: Create an asset reference from an existing hash
+      description: Registers a new asset that references existing content by hash, without re-uploading the bytes.
+      x-feature-gate: enable-assets
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - hash
+                - name
+              properties:
+                hash:
+                  type: string
+                  description: Blake3 hash of existing content
+                name:
+                  type: string
+                  description: Display name
+                tags:
+                  type: array
+                  items:
+                    type: string
+                user_metadata:
+                  type: object
+                  additionalProperties: true
+      responses:
+        "201":
+          description: Asset created from hash
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/AssetCreated"
+
+  /api/assets/{id}:
+    get:
+      operationId: getAsset
+      tags: [assets]
+      summary: Get asset metadata
+      description: Returns the metadata for a single asset.
+      x-feature-gate: enable-assets
+      parameters:
+        - name: id
+          in: path
+          description: The asset ID.
+          required: true
+          schema:
+            type: string
+            format: uuid
+      responses:
+        "200":
+          description: Asset metadata
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Asset"
+        "404":
+          description: Asset not found
+    put:
+      operationId: updateAsset
+      tags: [assets]
+      summary: Update asset metadata
+      description: Updates the mutable metadata of an asset (name, tags, etc.). Binary content is immutable.
+      x-feature-gate: enable-assets
+      parameters:
+        - name: id
+          in: path
+          description: The asset ID.
+          required: true
+          schema:
+            type: string
+            format: uuid
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                name:
+                  type: string
+                  description: New display name for the asset
+                user_metadata:
+                  type: object
+                  additionalProperties: true
+                  description: Custom user metadata to set
+                preview_id:
+                  type: string
+                  format: uuid
+                  description: ID of the asset to use as the preview
+      responses:
+        "200":
+          description: Asset updated
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/AssetUpdated"
+    delete:
+      operationId: deleteAsset
+      tags: [assets]
+      summary: Delete an asset
+      description: Removes an asset entry. Depending on the server configuration, the underlying content may also be deleted.
+      x-feature-gate: enable-assets
+      parameters:
+        - name: id
+          in: path
+          description: The asset ID.
+          required: true
+          schema:
+            type: string
+            format: uuid
+        - name: delete_content
+          in: query
+          schema:
+            type: boolean
+          description: Also delete the underlying content file
+      responses:
+        "204":
+          description: Asset deleted
+
+  /api/assets/{id}/content:
+    get:
+      operationId: getAssetContent
+      tags: [assets]
+      summary: Download asset file content
+      description: Returns the binary content of an asset. Supports range requests.
+      x-feature-gate: enable-assets
+      parameters:
+        - name: id
+          in: path
+          description: The asset ID.
+          required: true
+          schema:
+            type: string
+            format: uuid
+      responses:
+        "200":
+          description: Asset file content
+          content:
+            application/octet-stream:
+              schema:
+                type: string
+                format: binary
+        "404":
+          description: Asset not found
+
+  /api/assets/{id}/tags:
+    post:
+      operationId: addAssetTags
+      tags: [assets]
+      summary: Add tags to an asset
+      description: Adds one or more tags to an asset.
+      x-feature-gate: enable-assets
+      parameters:
+        - name: id
+          in: path
+          description: The asset ID.
+          required: true
+          schema:
+            type: string
+            format: uuid
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - tags
+              properties:
+                tags:
+                  type: array
+                  items:
+                    type: string
+      responses:
+        "200":
+          description: Tags added
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/TagsModificationResponse"
+    delete:
+      operationId: removeAssetTags
+      tags: [assets]
+      summary: Remove tags from an asset
+      description: Removes one or more tags from an asset.
+      x-feature-gate: enable-assets
+      parameters:
+        - name: id
+          in: path
+          description: The asset ID.
+          required: true
+          schema:
+            type: string
+            format: uuid
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - tags
+              properties:
+                tags:
+                  type: array
+                  items:
+                    type: string
+      responses:
+        "200":
+          description: Tags removed
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/TagsModificationResponse"
+
+  /api/tags:
+    get:
+      operationId: listTags
+      tags: [assets]
+      summary: List all known tags with counts
+      description: Returns the list of all tags known to the asset database, with counts.
+      x-feature-gate: enable-assets
+      parameters:
+        - name: limit
+          in: query
+          schema:
+            type: integer
+        - name: offset
+          in: query
+          schema:
+            type: integer
+        - name: search
+          in: query
+          schema:
+            type: string
+          description: Search term for tag name
+      responses:
+        "200":
+          description: Tag list
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ListTagsResponse"
+
+  /api/assets/tags/refine:
+    get:
+      operationId: refineAssetTags
+      tags: [assets]
+      summary: Get tag counts for assets matching current filters
+      description: Returns suggested additional tags that would refine a filtered asset query, together with the count of assets each tag would select.
+      x-feature-gate: enable-assets
+      parameters:
+        - name: include_tags
+          in: query
+          schema:
+            type: array
+            items:
+              type: string
+          style: form
+          explode: true
+          description: Tags that assets must have (AND logic)
+        - name: exclude_tags
+          in: query
+          schema:
+            type: array
+            items:
+              type: string
+          style: form
+          explode: true
+          description: Tags that assets must not have
+        - name: name_contains
+          in: query
+          schema:
+            type: string
+          description: Filter assets whose name contains this substring
+        - name: metadata_filter
+          in: query
+          schema:
+            type: string
+          description: JSON-encoded metadata key/value filter
+        - name: limit
+          in: query
+          schema:
+            type: integer
+        - name: offset
+          in: query
+          schema:
+            type: integer
+        - name: sort
+          in: query
+          schema:
+            type: string
+          description: Field to sort by
+        - name: order
+          in: query
+          schema:
+            type: string
+            enum: [asc, desc]
+          description: Sort direction
+      responses:
+        "200":
+          description: Tag histogram
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/AssetTagHistogramResponse"
+
+  /api/assets/seed:
+    post:
+      operationId: seedAssets
+      tags: [assets]
+      summary: Trigger asset scan/seed from filesystem
+      description: Starts a background job that scans the configured directories and registers any assets not yet present in the asset database.
+      x-feature-gate: enable-assets
+      requestBody:
+        required: false
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                roots:
+                  type: array
+                  items:
+                    type: string
+                  description: Root folder paths to scan (if omitted, scans all)
+      responses:
+        "200":
+          description: Seed started
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  status:
+                    type: string
+
+  /api/assets/seed/status:
+    get:
+      operationId: getAssetSeedStatus
+      tags: [assets]
+      summary: Get asset scan progress
+      description: Returns the progress and status of the most recently-started asset seed job.
+      x-feature-gate: enable-assets
+      responses:
+        "200":
+          description: Scan progress
+          content:
+            application/json:
+              schema:
+                type: object
+                additionalProperties: true
+                description: Scan progress details (files scanned, total, status, etc.)
+
+  /api/assets/seed/cancel:
+    post:
+      operationId: cancelAssetSeed
+      tags: [assets]
+      summary: Cancel an in-progress asset scan
+      description: Requests cancellation of the currently-running asset seed job.
+      x-feature-gate: enable-assets
+      responses:
+        "200":
+          description: Scan cancelled
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  status:
+                    type: string
+
+  /api/assets/prune:
+    post:
+      operationId: pruneAssets
+      tags: [assets]
+      summary: Mark assets whose backing files no longer exist on disk
+      description: Starts a background job that removes asset entries whose underlying content no longer exists on disk.
+      x-feature-gate: enable-assets
+      responses:
+        "200":
+          description: Prune result
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  status:
+                    type: string
+                  marked:
+                    type: integer
+                    description: Number of assets marked as missing
+
+components:
+  parameters:
+    ComfyUserHeader:
+      name: Comfy-User
+      in: header
+      required: false
+      schema:
+        type: string
+      description: |
+        Identifies the active user in multi-user mode. Used for settings,
+        userdata, and history isolation. This is not a security mechanism —
+        it is an organisational convenience with no authentication behind it.
+
+  schemas:
+    # -------------------------------------------------------------------
+    # Prompt
+    # -------------------------------------------------------------------
+    PromptRequest:
+      type: object
+      description: A workflow submission. Wraps the prompt graph plus optional client identifier and extra per-request data.
+      required:
+        - prompt
+      properties:
+        prompt:
+          type: object
+          description: |
+            The workflow graph to execute. Keys are node IDs (strings);
+            values are objects with class_type and inputs.
+          additionalProperties: true
+        number:
+          type: number
+          description: Priority number for the queue (lower numbers have higher priority)
+        front:
+          type: boolean
+          description: If true, adds the prompt to the front of the queue
+        extra_data:
+          type: object
+          description: Extra data associated with the prompt (e.g. extra_pnginfo)
+          additionalProperties: true
+        client_id:
+          type: string
+          description: WebSocket client ID to receive progress updates
+        prompt_id:
+          type: string
+          format: uuid
+          description: "Client-supplied prompt ID. Server generates a UUID if omitted."
+        partial_execution_targets:
+          type: array
+          items:
+            type: string
+          description: List of node IDs to execute (partial graph execution)
+
+    PromptResponse:
+      type: object
+      description: Server acknowledgement of a workflow submission. Includes the assigned `prompt_id` and current queue position.
+      properties:
+        prompt_id:
+          type: string
+          format: uuid
+          description: Unique identifier for the prompt execution
+        number:
+          type: number
+          description: Priority number in the queue
+        node_errors:
+          type: object
+          description: Validation errors keyed by node ID
+          additionalProperties:
+            $ref: "#/components/schemas/NodeError"
+        error:
+          description: Top-level prompt error (string message or structured error)
+          oneOf:
+            - type: string
+            - $ref: "#/components/schemas/PromptError"
+
+    PromptErrorResponse:
+      type: object
+      description: Error response when prompt validation fails
+      additionalProperties: true
+
+    PromptError:
+      type: object
+      description: Structured prompt validation error
+      properties:
+        type:
+          type: string
+        message:
+          type: string
+        details:
+          type: string
+
+    Error:
+      type: object
+      description: Detailed node-level error
+      properties:
+        type:
+          type: string
+        message:
+          type: string
+        details:
+          type: string
+        extra_info:
+          type: object
+          properties:
+            input_name:
+              type: string
+          additionalProperties: true
+
+    NodeError:
+      type: object
+      description: Error details for a single node
+      properties:
+        errors:
+          type: array
+          items:
+            $ref: "#/components/schemas/Error"
+        class_type:
+          type: string
+          description: The node's class type
+        dependent_outputs:
+          type: array
+          items: {}
+
+    PromptInfo:
+      type: object
+      description: Summary of a queued or recently-executed prompt, as returned by the queue and history endpoints.
+      properties:
+        exec_info:
+          type: object
+          properties:
+            queue_remaining:
+              type: integer
+              description: Number of items remaining in the queue
+
+    # -------------------------------------------------------------------
+    # Queue
+    # -------------------------------------------------------------------
+    QueueInfo:
+      type: object
+      description: Queue information with pending and running items
+      properties:
+        queue_running:
+          type: array
+          description: Currently running queue items
+          items:
+            type: array
+            description: |
+              Queue item tuple: [number, prompt_id, prompt, extra_data, outputs_to_execute, sensitive]
+            items: {}
+            prefixItems:
+              - type: number
+                description: Priority number
+              - type: string
+                format: uuid
+                description: prompt_id
+              - type: object
+                description: prompt graph
+                additionalProperties: true
+              - type: object
+                description: extra_data
+                additionalProperties: true
+              - type: array
+                description: outputs_to_execute (list of output node IDs)
+                items:
+                  type: string
+              - type: object
+                description: sensitive data (may be omitted)
+                additionalProperties: true
+        queue_pending:
+          type: array
+          description: Pending queue items (oldest first)
+          items:
+            type: array
+            description: |
+              Queue item tuple: [number, prompt_id, prompt, extra_data, outputs_to_execute, sensitive]
+            items: {}
+            prefixItems:
+              - type: number
+                description: Priority number
+              - type: string
+                format: uuid
+                description: prompt_id
+              - type: object
+                description: prompt graph
+                additionalProperties: true
+              - type: object
+                description: extra_data
+                additionalProperties: true
+              - type: array
+                description: outputs_to_execute (list of output node IDs)
+                items:
+                  type: string
+              - type: object
+                description: sensitive data (may be omitted)
+                additionalProperties: true
+
+    QueueManageRequest:
+      type: object
+      description: Request to clear or delete from queue
+      properties:
+        clear:
+          type: boolean
+          description: If true, clear all pending items
+        delete:
+          type: array
+          items:
+            type: string
+          description: Array of prompt IDs to delete from queue
+
+    # -------------------------------------------------------------------
+    # History
+    # -------------------------------------------------------------------
+    HistoryEntry:
+      type: object
+      description: A single execution history entry
+      properties:
+        prompt:
+          type: array
+          description: |
+            Prompt tuple: [number, prompt_id, prompt_graph, extra_data, output_node_ids]
+          items: {}
+        outputs:
+          type: object
+          description: Output data from execution keyed by node ID
+          additionalProperties: true
+        status:
+          type: object
+          description: Execution status (status_str, completed, messages, etc.)
+          additionalProperties: true
+        meta:
+          type: object
+          description: Metadata about the execution and nodes
+          additionalProperties: true
+
+    HistoryManageRequest:
+      type: object
+      description: Request to clear or delete history entries
+      properties:
+        clear:
+          type: boolean
+          description: If true, clear all history
+        delete:
+          type: array
+          items:
+            type: string
+          description: Array of prompt IDs to delete from history
+
+    # -------------------------------------------------------------------
+    # Jobs
+    # -------------------------------------------------------------------
+    JobEntry:
+      type: object
+      description: Lightweight job data for list views
+      required:
+        - id
+        - status
+      properties:
+        id:
+          type: string
+          format: uuid
+          description: Unique job identifier (same as prompt_id)
+        status:
+          type: string
+          description: Current job status
+        create_time:
+          type: number
+          description: Job creation timestamp
+        execution_start_time:
+          type: number
+          description: Workflow execution start timestamp
+        execution_end_time:
+          type: number
+          description: Workflow execution end timestamp
+        preview_output:
+          type: object
+          additionalProperties: true
+          description: Primary preview output
+        outputs_count:
+          type: integer
+          description: Total number of output files
+
+    JobDetailResponse:
+      type: object
+      description: Full job details including workflow and outputs
+      required:
+        - id
+        - status
+      properties:
+        id:
+          type: string
+          format: uuid
+        status:
+          type: string
+        workflow:
+          type: object
+          additionalProperties: true
+          description: Full ComfyUI workflow
+        outputs:
+          type: object
+          additionalProperties: true
+          description: Full outputs object from execution
+        execution_error:
+          $ref: "#/components/schemas/ExecutionError"
+        create_time:
+          type: number
+        update_time:
+          type: number
+        execution_start_time:
+          type: number
+        execution_end_time:
+          type: number
+        preview_output:
+          type: object
+          additionalProperties: true
+        outputs_count:
+          type: integer
+        execution_status:
+          type: object
+          additionalProperties: true
+        execution_meta:
+          type: object
+          additionalProperties: true
+
+    ExecutionError:
+      type: object
+      description: Detailed execution error from ComfyUI
+      properties:
+        node_id:
+          type: string
+          description: ID of the node that failed
+        node_type:
+          type: string
+          description: Type name of the node
+        exception_message:
+          type: string
+          description: Human-readable error message
+        exception_type:
+          type: string
+          description: Python exception type
+        traceback:
+          type: array
+          items:
+            type: string
+          description: Traceback lines
+        current_inputs:
+          type: object
+          additionalProperties: true
+        current_outputs:
+          type: object
+          additionalProperties: true
+
+    PaginationInfo:
+      type: object
+      description: Pagination metadata returned alongside list responses.
+      properties:
+        offset:
+          type: integer
+        limit:
+          type: integer
+        total:
+          type: integer
+        has_more:
+          type: boolean
+
+    # -------------------------------------------------------------------
+    # Upload / View
+    # -------------------------------------------------------------------
+    UploadResult:
+      type: object
+      description: Response body returned by the image/mask upload endpoints, describing where the uploaded file now lives.
+      properties:
+        name:
+          type: string
+          description: Saved filename (may be renamed to avoid collisions)
+        subfolder:
+          type: string
+          description: Subfolder the file was saved to
+        type:
+          type: string
+          description: Directory type (input, temp)
+
+    # -------------------------------------------------------------------
+    # System
+    # -------------------------------------------------------------------
+    DeviceStats:
+      type: object
+      description: GPU/compute device statistics
+      required:
+        - name
+        - type
+        - index
+      properties:
+        name:
+          type: string
+          description: Device name
+        type:
+          type: string
+          description: Device type (cuda, mps, cpu, etc.)
+        index:
+          type: number
+          description: Device index
+        vram_total:
+          type: number
+          description: Total VRAM in bytes
+        vram_free:
+          type: number
+          description: Free VRAM in bytes
+        torch_vram_total:
+          type: number
+          description: Total PyTorch-managed VRAM in bytes
+        torch_vram_free:
+          type: number
+          description: Free PyTorch-managed VRAM in bytes
+
+    SystemStatsResponse:
+      type: object
+      description: Hardware, VRAM, Python, and ComfyUI version information for the running process.
+      required:
+        - system
+        - devices
+      properties:
+        system:
+          type: object
+          required:
+            - os
+            - python_version
+            - embedded_python
+            - comfyui_version
+            - pytorch_version
+            - argv
+            - ram_total
+            - ram_free
+          properties:
+            os:
+              type: string
+              description: Operating system
+            python_version:
+              type: string
+              description: Python version
+            embedded_python:
+              type: boolean
+              description: Whether using embedded Python
+            comfyui_version:
+              type: string
+              description: ComfyUI version string
+            pytorch_version:
+              type: string
+              description: PyTorch version
+            required_frontend_version:
+              type: string
+              description: Required frontend version
+            argv:
+              type: array
+              items:
+                type: string
+              description: Command line arguments
+            ram_total:
+              type: number
+              description: Total RAM in bytes
+            ram_free:
+              type: number
+              description: Free RAM in bytes
+            installed_templates_version:
+              type: string
+              nullable: true
+              description: Version of the currently installed workflow templates
+            required_templates_version:
+              type: string
+              nullable: true
+              description: Minimum required workflow templates version for this ComfyUI build
+        devices:
+          type: array
+          items:
+            $ref: "#/components/schemas/DeviceStats"
+
+    # -------------------------------------------------------------------
+    # Node / Object Info
+    # -------------------------------------------------------------------
+    NodeInfo:
+      type: object
+      description: 'Definition of a registered node class: its inputs, outputs, category, and display metadata.'
+      properties:
+        input:
+          type: object
+          description: Input specifications (required and optional groups)
+          additionalProperties: true
+        input_order:
+          type: object
+          description: Ordered input names per group
+          additionalProperties:
+            type: array
+            items:
+              type: string
+        output:
+          type: array
+          items:
+            type: string
+          description: Output type names
+        output_is_list:
+          type: array
+          items:
+            type: boolean
+          description: Whether each output is a list
+        output_name:
+          type: array
+          items:
+            type: string
+          description: Display names of outputs
+        name:
+          type: string
+          description: Internal class name
+        display_name:
+          type: string
+          description: Human-readable display name
+        description:
+          type: string
+          description: Node description
+        python_module:
+          type: string
+          description: Python module implementing the node
+        category:
+          type: string
+          description: Node category path
+        output_node:
+          type: boolean
+          description: Whether this is an output node
+        output_tooltips:
+          type: array
+          items:
+            type: string
+          description: Tooltips for each output
+        deprecated:
+          type: boolean
+          description: Whether the node is deprecated
+        experimental:
+          type: boolean
+          description: Whether the node is experimental
+        api_node:
+          type: boolean
+          description: Whether this is an API node
+        is_input_list:
+          type: boolean
+          description: Whether the node accepts list inputs
+        dev_only:
+          type: boolean
+          description: Whether the node is developer-only (hidden in production UI)
+        has_intermediate_output:
+          type: boolean
+          description: Whether the node emits intermediate output during execution
+        search_aliases:
+          type: array
+          items:
+            type: string
+          description: Alternative search terms for finding this node
+        essentials_category:
+          type: string
+          description: Category override used by the essentials pack
+
+    # -------------------------------------------------------------------
+    # Models
+    # -------------------------------------------------------------------
+    ModelFolder:
+      type: object
+      description: A configured model folder and the list of disk paths it resolves to.
+      required:
+        - name
+        - folders
+      properties:
+        name:
+          type: string
+          description: Model folder type name (e.g. "checkpoints")
+        folders:
+          type: array
+          items:
+            type: string
+          description: Filesystem paths for this model type
+
+    ModelFile:
+      type: object
+      description: A single model file in a folder, with filesystem metadata.
+      required:
+        - name
+        - pathIndex
+      properties:
+        name:
+          type: string
+          description: Model filename
+        pathIndex:
+          type: integer
+          description: Index into the folder's paths array
+        modified:
+          type: number
+          description: File modification timestamp
+        created:
+          type: number
+          description: File creation timestamp
+        size:
+          type: integer
+          format: int64
+          description: File size in bytes
+
+    # -------------------------------------------------------------------
+    # Subgraphs
+    # -------------------------------------------------------------------
+    GlobalSubgraphInfo:
+      type: object
+      description: Metadata for a global subgraph blueprint (without full data)
+      required:
+        - source
+        - name
+        - info
+      properties:
+        source:
+          type: string
+          description: Source type ("templates" or "custom_node")
+        name:
+          type: string
+          description: Display name of the subgraph blueprint
+        info:
+          type: object
+          description: Additional information about the subgraph
+          required:
+            - node_pack
+          properties:
+            node_pack:
+              type: string
+              description: The node pack/module providing this subgraph
+        data:
+          type: string
+          description: The full subgraph JSON data (may be empty in list view)
+
+    GlobalSubgraphData:
+      type: object
+      description: Full data for a global subgraph blueprint
+      required:
+        - source
+        - name
+        - info
+        - data
+      properties:
+        source:
+          type: string
+          description: Source type ("templates" or "custom_node")
+        name:
+          type: string
+          description: Display name of the subgraph blueprint
+        info:
+          type: object
+          description: Additional information about the subgraph
+          required:
+            - node_pack
+          properties:
+            node_pack:
+              type: string
+              description: The node pack/module providing this subgraph
+        data:
+          type: string
+          description: The full subgraph JSON data as a string
+
+    # -------------------------------------------------------------------
+    # Userdata
+    # -------------------------------------------------------------------
+    UserDataResponse:
+      description: |
+        Response body for the POST endpoints `/api/userdata/{file}` and
+        `/api/userdata/{file}/move/{dest}`. Returns a single item whose
+        shape depends on the `full_info` query parameter.
+      x-variant-selector:
+        full_info=true: file-info object (`GetUserDataResponseFullFile`)
+        default: relative path string
+      oneOf:
+        - $ref: "#/components/schemas/GetUserDataResponseFullFile"
+        - type: string
+          description: Relative path of the written or moved file. Returned when `full_info` is absent or false.
+
+    ListUserdataResponse:
+      description: |
+        Response body for `GET /api/userdata`. The array item shape is
+        determined by the `full_info` and `split` query parameters.
+      x-variant-selector:
+        full_info=true: array of file-info objects (`GetUserDataResponseFullFile`)
+        split=true: array of `[relative_path, ...path_components]` arrays
+        default: array of relative path strings
+      oneOf:
+        - type: array
+          items:
+            $ref: "#/components/schemas/GetUserDataResponseFullFile"
+          description: Returned when `full_info=true`.
+        - type: array
+          items:
+            type: array
+            items:
+              type: string
+            minItems: 2
+          description: |
+            Returned when `split=true` and `full_info=false`. Each inner
+            array is `[relative_path, ...path_components]`.
+        - type: array
+          items:
+            type: string
+          description: Default shape — array of file paths relative to the user data root.
+
+    GetUserDataResponseFullFile:
+      type: object
+      description: A single entry in a full-info user data listing.
+      properties:
+        path:
+          type: string
+          description: File name or path relative to the user directory
+        created:
+          type: number
+          description: Unix timestamp of file creation
+        size:
+          type: integer
+          description: File size in bytes
+        modified:
+          type: integer
+          format: int64
+          description: Unix timestamp of last modification in milliseconds
+
+    # -------------------------------------------------------------------
+    # Assets
+    # -------------------------------------------------------------------
+    Asset:
+      type: object
+      description: A registered asset — an input/output file tracked in the asset database with content hash and metadata.
+      required:
+        - id
+        - name
+        - size
+        - created_at
+        - updated_at
+      properties:
+        id:
+          type: string
+          format: uuid
+          description: Unique identifier for the asset
+        name:
+          type: string
+          description: Name of the asset file
+        asset_hash:
+          type: string
+          description: Blake3 hash of the asset content
+          pattern: "^blake3:[a-f0-9]{64}$"
+        size:
+          type: integer
+          format: int64
+          description: Size of the asset in bytes
+        mime_type:
+          type: string
+          description: MIME type of the asset
+        tags:
+          type: array
+          items:
+            type: string
+          description: Tags associated with the asset
+        user_metadata:
+          type: object
+          description: Custom user metadata
+          additionalProperties: true
+        metadata:
+          type: object
+          description: System-managed metadata (read-only)
+          additionalProperties: true
+          readOnly: true
+        preview_url:
+          type: string
+          format: uri
+          description: URL for asset preview/thumbnail
+        preview_id:
+          type: string
+          format: uuid
+          description: ID of the preview asset if available
+        prompt_id:
+          type: string
+          format: uuid
+          description: ID of the prompt that created this asset
+        created_at:
+          type: string
+          format: date-time
+        updated_at:
+          type: string
+          format: date-time
+        last_access_time:
+          type: string
+          format: date-time
+        is_immutable:
+          type: boolean
+          description: Whether this asset is immutable
+
+    AssetCreated:
+      description: Response body returned after successfully registering a new asset.
+      allOf:
+        - $ref: "#/components/schemas/Asset"
+        - type: object
+          required:
+            - created_new
+          properties:
+            created_new:
+              type: boolean
+              description: Whether this was a new creation (true) or returned existing (false)
+
+    AssetUpdated:
+      type: object
+      description: Response body returned after updating an asset's metadata.
+      required:
+        - id
+        - updated_at
+      properties:
+        id:
+          type: string
+          format: uuid
+        name:
+          type: string
+        asset_hash:
+          type: string
+          pattern: "^blake3:[a-f0-9]{64}$"
+        tags:
+          type: array
+          items:
+            type: string
+        mime_type:
+          type: string
+        user_metadata:
+          type: object
+          additionalProperties: true
+        updated_at:
+          type: string
+          format: date-time
+
+    ListAssetsResponse:
+      type: object
+      description: Paginated list of assets.
+      required:
+        - assets
+        - total
+        - has_more
+      properties:
+        assets:
+          type: array
+          items:
+            $ref: "#/components/schemas/Asset"
+        total:
+          type: integer
+        has_more:
+          type: boolean
+
+    TagInfo:
+      type: object
+      description: A tag known to the asset database, with the number of assets bearing it.
+      required:
+        - name
+        - count
+      properties:
+        name:
+          type: string
+        count:
+          type: integer
+
+    ListTagsResponse:
+      type: object
+      description: Flat list of all tags, with counts.
+      required:
+        - tags
+        - total
+        - has_more
+      properties:
+        tags:
+          type: array
+          items:
+            $ref: "#/components/schemas/TagInfo"
+        total:
+          type: integer
+        has_more:
+          type: boolean
+
+    AssetTagHistogramResponse:
+      type: object
+      description: Tags that would refine a filtered asset query, with the count of assets each tag would additionally select.
+      required:
+        - tag_counts
+      properties:
+        tag_counts:
+          type: object
+          additionalProperties:
+            type: integer
+          description: Map of tag names to occurrence counts
+
+    TagsModificationResponse:
+      type: object
+      description: Response body returned after adding or removing tags on an asset.
+      required:
+        - total_tags
+      properties:
+        added:
+          type: array
+          items:
+            type: string
+          description: Tags successfully added
+        removed:
+          type: array
+          items:
+            type: string
+          description: Tags successfully removed
+        already_present:
+          type: array
+          items:
+            type: string
+          description: Tags already present (for add)
+        not_present:
+          type: array
+          items:
+            type: string
+          description: Tags not present (for remove)
+        total_tags:
+          type: array
+          items:
+            type: string
+          description: All tags on the asset after the operation
+
+    # -------------------------------------------------------------------
+    # Result / Output types
+    # -------------------------------------------------------------------
+    ResultItem:
+      type: object
+      description: A single output file reference
+      properties:
+        filename:
+          type: string
+        subfolder:
+          type: string
+        type:
+          type: string
+          enum: [input, output, temp]
+        display_name:
+          type: string
+
+    NodeOutputs:
+      type: object
+      description: |
+        Outputs from a single node execution. Known keys are listed below,
+        but custom nodes may add arbitrary keys (additionalProperties).
+      properties:
+        images:
+          type: array
+          items:
+            $ref: "#/components/schemas/ResultItem"
+        audio:
+          type: array
+          items:
+            $ref: "#/components/schemas/ResultItem"
+        video:
+          type: array
+          items:
+            $ref: "#/components/schemas/ResultItem"
+        animated:
+          type: array
+          items:
+            type: boolean
+        text:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                type: string
+      additionalProperties: true
+
+    TerminalSize:
+      type: object
+      description: Terminal dimensions
+      properties:
+        cols:
+          type: number
+        row:
+          type: number
+
+    LogEntry:
+      type: object
+      description: A single log entry
+      properties:
+        t:
+          type: string
+          description: Timestamp
+        m:
+          type: string
+          description: Log message
+
+    StatusWsMessageStatus:
+      type: object
+      description: Inner payload of a `status` WebSocket message, describing the execution queue state.
+      properties:
+        exec_info:
+          type: object
+          required:
+            - queue_remaining
+          properties:
+            queue_remaining:
+              type: integer
+
+    StatusWsMessage:
+      type: object
+      description: Initial status message sent on connect + queue status updates
+      properties:
+        status:
+          $ref: "#/components/schemas/StatusWsMessageStatus"
+        sid:
+          type: string
+          description: Session ID assigned by the server
+
+    ProgressWsMessage:
+      type: object
+      description: Node execution progress (step N of M)
+      required:
+        - value
+        - max
+        - prompt_id
+        - node
+      properties:
+        value:
+          type: integer
+          description: Current step
+        max:
+          type: integer
+          description: Total steps
+        prompt_id:
+          type: string
+        node:
+          type: string
+          description: Node ID currently executing
+
+    ProgressTextWsMessage:
+      type: object
+      description: Text-based progress update from a node
+      properties:
+        nodeId:
+          type: string
+        text:
+          type: string
+        prompt_id:
+          type: string
+
+    NodeProgressState:
+      type: object
+      description: Progress state for a single node
+      properties:
+        value:
+          type: number
+        max:
+          type: number
+        state:
+          type: string
+          enum: [pending, running, finished, error]
+        node_id:
+          type: string
+        prompt_id:
+          type: string
+        display_node_id:
+          type: string
+        parent_node_id:
+          type: string
+        real_node_id:
+          type: string
+
+    ProgressStateWsMessage:
+      type: object
+      description: Bulk progress state for all nodes in a prompt
+      required:
+        - prompt_id
+        - nodes
+      properties:
+        prompt_id:
+          type: string
+        nodes:
+          type: object
+          description: Map of node ID to progress state
+          additionalProperties:
+            $ref: "#/components/schemas/NodeProgressState"
+
+    ExecutingWsMessage:
+      type: object
+      description: Fired when a node begins execution
+      required:
+        - node
+        - display_node
+        - prompt_id
+      properties:
+        node:
+          type: string
+          description: Node ID
+        display_node:
+          type: string
+          description: Display node ID (may differ for subgraphs)
+        prompt_id:
+          type: string
+
+    ExecutedWsMessage:
+      type: object
+      description: Fired when a node completes execution with output
+      required:
+        - node
+        - display_node
+        - prompt_id
+        - output
+      properties:
+        node:
+          type: string
+        display_node:
+          type: string
+        prompt_id:
+          type: string
+        output:
+          $ref: "#/components/schemas/NodeOutputs"
+        merge:
+          type: boolean
+          description: Whether to merge with existing output
+
+    ExecutionWsMessageBase:
+      type: object
+      description: Base fields for execution lifecycle messages
+      required:
+        - prompt_id
+        - timestamp
+      properties:
+        prompt_id:
+          type: string
+        timestamp:
+          type: integer
+          description: Unix timestamp in milliseconds
+
+    ExecutionStartWsMessage:
+      allOf:
+        - $ref: "#/components/schemas/ExecutionWsMessageBase"
+      description: Fired when prompt execution begins
+
+    ExecutionSuccessWsMessage:
+      allOf:
+        - $ref: "#/components/schemas/ExecutionWsMessageBase"
+      description: Fired when prompt execution completes successfully
+
+    ExecutionCachedWsMessage:
+      allOf:
+        - $ref: "#/components/schemas/ExecutionWsMessageBase"
+        - type: object
+          properties:
+            nodes:
+              type: array
+              items:
+                type: string
+              description: List of node IDs that were cached
+      description: Fired when nodes are served from cache
+
+    ExecutionInterruptedWsMessage:
+      allOf:
+        - $ref: "#/components/schemas/ExecutionWsMessageBase"
+        - type: object
+          properties:
+            node_id:
+              type: string
+            node_type:
+              type: string
+            executed:
+              type: array
+              items:
+                type: string
+              description: Node IDs that completed before interruption
+      description: Fired when execution is interrupted by user
+
+    ExecutionErrorWsMessage:
+      allOf:
+        - $ref: "#/components/schemas/ExecutionWsMessageBase"
+        - type: object
+          properties:
+            node_id:
+              type: string
+            node_type:
+              type: string
+            executed:
+              type: array
+              items:
+                type: string
+            exception_message:
+              type: string
+            exception_type:
+              type: string
+            traceback:
+              type: array
+              items:
+                type: string
+            current_inputs: {}
+            current_outputs: {}
+      description: Fired when a node throws an exception during execution
+
+    LogsWsMessage:
+      type: object
+      description: Streaming log entries from the server
+      properties:
+        size:
+          $ref: "#/components/schemas/TerminalSize"
+        entries:
+          type: array
+          items:
+            $ref: "#/components/schemas/LogEntry"
+
+    NotificationWsMessage:
+      type: object
+      description: Server notification (e.g. model download complete)
+      properties:
+        value:
+          type: string
+        id:
+          type: string
+
+    FeatureFlagsWsMessage:
+      type: object
+      description: Feature flags sent on connect
+      additionalProperties: true
+
+    AssetDownloadWsMessage:
+      type: object
+      description: Asset download progress
+      required:
+        - task_id
+        - asset_name
+        - bytes_total
+        - bytes_downloaded
+        - progress
+        - status
+      properties:
+        task_id:
+          type: string
+        asset_name:
+          type: string
+        bytes_total:
+          type: number
+        bytes_downloaded:
+          type: number
+        progress:
+          type: number
+          description: 0.0 to 1.0
+        status:
+          type: string
+          enum: [created, running, completed, failed]
+        asset_id:
+          type: string
+        error:
+          type: string
+
+    AssetExportWsMessage:
+      type: object
+      description: Bulk asset export progress
+      required:
+        - task_id
+        - assets_total
+        - assets_attempted
+        - assets_failed
+        - bytes_total
+        - bytes_processed
+        - progress
+        - status
+      properties:
+        task_id:
+          type: string
+        export_name:
+          type: string
+        assets_total:
+          type: number
+        assets_attempted:
+          type: number
+        assets_failed:
+          type: number
+        bytes_total:
+          type: number
+        bytes_processed:
+          type: number
+        progress:
+          type: number
+          description: 0.0 to 1.0
+        status:
+          type: string
+          enum: [created, running, completed, failed]
+        error:
+          type: string

From 7636599389a6798e813d6036fb0dcf08295e7971 Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Fri, 24 Apr 2026 16:54:10 +0300
Subject: [PATCH 26/81] chore(api-nodes): add upcoming-deprecation notice to
 Sora nodes (#13549)

---
 comfy_api_nodes/nodes_sora.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/comfy_api_nodes/nodes_sora.py b/comfy_api_nodes/nodes_sora.py
index afc18bb25..4d9075dcf 100644
--- a/comfy_api_nodes/nodes_sora.py
+++ b/comfy_api_nodes/nodes_sora.py
@@ -33,9 +33,13 @@ class OpenAIVideoSora2(IO.ComfyNode):
     def define_schema(cls):
         return IO.Schema(
             node_id="OpenAIVideoSora2",
-            display_name="OpenAI Sora - Video",
+            display_name="OpenAI Sora - Video (Deprecated)",
             category="api node/video/Sora",
-            description="OpenAI video and audio generation.",
+            description=(
+                "OpenAI video and audio generation.\n\n"
+                "DEPRECATION NOTICE: OpenAI will stop serving the Sora v2 API in September 2026. "
+                "This node will be removed from ComfyUI at that time."
+            ),
             inputs=[
                 IO.Combo.Input(
                     "model",

From 4304c15e9b4acb45fa9241e8e1723f8ce6397550 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Fri, 24 Apr 2026 13:46:10 -0700
Subject: [PATCH 27/81] Properly load higher bit depth videos. (#13542)

---
 comfy_api/latest/_input_impl/video_types.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py
index 1b4993aa7..bd8090635 100644
--- a/comfy_api/latest/_input_impl/video_types.py
+++ b/comfy_api/latest/_input_impl/video_types.py
@@ -248,8 +248,8 @@ class VideoFromFile(VideoInput):
                 continue
             if self.__duration and frame.pts >= end_pts:
                 break
-            img = frame.to_ndarray(format='rgb24')  # shape: (H, W, 3)
-            img = torch.from_numpy(img) / 255.0  # shape: (H, W, 3)
+            img = frame.to_ndarray(format='gbrpf32le')  # shape: (H, W, 3)
+            img = torch.from_numpy(img)
             frames.append(img)
 
         images = torch.stack(frames) if len(frames) > 0 else torch.zeros(0, 3, 0, 0)

From 5e3f15a830ff27d3563ef4b43e9f6a0321ea36cd Mon Sep 17 00:00:00 2001
From: Comfy Org PR Bot <snomiao+comfy-pr@gmail.com>
Date: Sat, 25 Apr 2026 09:21:39 +0900
Subject: [PATCH 28/81] Bump comfyui-frontend-package to 1.42.15 (#13556)

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 346ce4b76..6c7457e03 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-comfyui-frontend-package==1.42.14
+comfyui-frontend-package==1.42.15
 comfyui-workflow-templates==0.9.62
 comfyui-embedded-docs==0.4.4
 torch

From df22bcd5e192ce0b1ae09eaf2e423d0a12cf6638 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Sat, 25 Apr 2026 18:02:58 -0700
Subject: [PATCH 29/81] Support loading the alpha channel of videos. (#13564)

Not exposed in nodes yet.
---
 comfy_api/latest/_input_impl/video_types.py | 25 ++++++++++++++++-----
 comfy_api/latest/_util/video_types.py       |  5 ++---
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py
index bd8090635..eb4d3701d 100644
--- a/comfy_api/latest/_input_impl/video_types.py
+++ b/comfy_api/latest/_input_impl/video_types.py
@@ -240,19 +240,34 @@ class VideoFromFile(VideoInput):
             start_time = self.__start_time
         # Get video frames
         frames = []
+        alphas = None
         start_pts = int(start_time / video_stream.time_base)
         end_pts = int((start_time + self.__duration) / video_stream.time_base)
         container.seek(start_pts, stream=video_stream)
+        image_format = 'gbrpf32le'
         for frame in container.decode(video_stream):
+            if alphas is None:
+                for comp in frame.format.components:
+                    if comp.is_alpha:
+                        alphas = []
+                        image_format = 'gbrapf32le'
+                        break
+
             if frame.pts < start_pts:
                 continue
             if self.__duration and frame.pts >= end_pts:
                 break
-            img = frame.to_ndarray(format='gbrpf32le')  # shape: (H, W, 3)
-            img = torch.from_numpy(img)
-            frames.append(img)
 
-        images = torch.stack(frames) if len(frames) > 0 else torch.zeros(0, 3, 0, 0)
+            img = frame.to_ndarray(format=image_format)  # shape: (H, W, 4)
+            if alphas is None:
+                frames.append(torch.from_numpy(img))
+            else:
+                frames.append(torch.from_numpy(img[..., :-1]))
+                alphas.append(torch.from_numpy(img[..., -1:]))
+
+        images = torch.stack(frames) if len(frames) > 0 else torch.zeros(0, 0, 0, 3)
+        if alphas is not None:
+            alphas = torch.stack(alphas) if len(alphas) > 0 else torch.zeros(0, 0, 0, 1)
 
         # Get frame rate
         frame_rate = Fraction(video_stream.average_rate) if video_stream.average_rate else Fraction(1)
@@ -295,7 +310,7 @@ class VideoFromFile(VideoInput):
                 })
 
         metadata = container.metadata
-        return VideoComponents(images=images, audio=audio, frame_rate=frame_rate, metadata=metadata)
+        return VideoComponents(images=images, alpha=alphas, audio=audio, frame_rate=frame_rate, metadata=metadata)
 
     def get_components(self) -> VideoComponents:
         if isinstance(self.__file, io.BytesIO):
diff --git a/comfy_api/latest/_util/video_types.py b/comfy_api/latest/_util/video_types.py
index fd3b5a510..c92477f08 100644
--- a/comfy_api/latest/_util/video_types.py
+++ b/comfy_api/latest/_util/video_types.py
@@ -3,7 +3,7 @@ from dataclasses import dataclass
 from enum import Enum
 from fractions import Fraction
 from typing import Optional
-from .._input import ImageInput, AudioInput
+from .._input import ImageInput, AudioInput, MaskInput
 
 class VideoCodec(str, Enum):
     AUTO = "auto"
@@ -48,5 +48,4 @@ class VideoComponents:
     frame_rate: Fraction
     audio: Optional[AudioInput] = None
     metadata: Optional[dict] = None
-
-
+    alpha: Optional[MaskInput] = None

From 7385eb28004a45c723345b132cd4dcd327aabf9a Mon Sep 17 00:00:00 2001
From: "Daxiong (Lin)" <contact@comfyui-wiki.com>
Date: Sun, 26 Apr 2026 22:59:16 +0800
Subject: [PATCH 30/81] Add new ComfyUI blueprints and fix subgraph naming
 (#13371)

* Remove local tag from subgraph name

* New Subgraph blueprints

* Remove duplicate blueprint

* Update Subgraph size

* Update subgraph

* Update Blueprint

* Remove local tag from subgraph name

* New Subgraph blueprints

* Remove duplicate blueprint

* Update Subgraph size

* Update subgraph

* Update Blueprint

* Update LTX 2.0 Pose to Video

* Fix crop blueprint split coverage

Made-with: Cursor

* Clean up image edit blueprint metadata

Made-with: Cursor

* Update subgraph blueprints

---------

Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>
---
 blueprints/Crop Images 2x2.json               | 1620 +++++++
 blueprints/Crop Images 3x3.json               | 2957 ++++++++++++
 .../Depth to Image (Z-Image-Turbo).json       |    4 +-
 blueprints/Depth to Video (ltx 2.0).json      |    4 +-
 .../First-Last-Frame to Video (LTX-2.3).json  | 3360 +++++++++++++
 .../Image Edit (FireRed Image Edit 1.1).json  | 2148 +++++++++
 blueprints/Image Edit (Flux.2 Klein 4B).json  |    4 +-
 .../Image Edit (LongCat Image Edit).json      | 1427 ++++++
 .../Image Inpainting (Flux.1 Fill Dev).json   | 1205 +++++
 blueprints/Image Inpainting (Qwen-image).json |    4 +-
 .../Image Outpainting (Qwen-Image).json       |    4 +-
 ... Image to Layers(Qwen-Image-Layered).json} |  418 +-
 blueprints/Image to Video (LTX-2.3).json      | 4233 ++++++++++++++++
 blueprints/Pose to Video (LTX 2.0).json       | 2004 ++++----
 blueprints/Text to Image (Flux.1 Dev).json    | 1046 ++++
 .../Text to Image (Flux.1 Krea Dev).json      | 1040 ++++
 .../Text to Image (NetaYume Lumina).json      | 1468 ++++++
 .../Text to Image (Qwen-Image 2512).json      | 1951 ++++++++
 blueprints/Text to Image (Qwen-Image).json    | 1881 ++++++++
 blueprints/Text to Video (LTX-2.3).json       | 4296 +++++++++++++++++
 20 files changed, 29993 insertions(+), 1081 deletions(-)
 create mode 100644 blueprints/Crop Images 2x2.json
 create mode 100644 blueprints/Crop Images 3x3.json
 create mode 100644 blueprints/First-Last-Frame to Video (LTX-2.3).json
 create mode 100644 blueprints/Image Edit (FireRed Image Edit 1.1).json
 create mode 100644 blueprints/Image Edit (LongCat Image Edit).json
 create mode 100644 blueprints/Image Inpainting (Flux.1 Fill Dev).json
 rename blueprints/{Image to Layers(Qwen-Image Layered).json => Image to Layers(Qwen-Image-Layered).json} (83%)
 create mode 100644 blueprints/Image to Video (LTX-2.3).json
 create mode 100644 blueprints/Text to Image (Flux.1 Dev).json
 create mode 100644 blueprints/Text to Image (Flux.1 Krea Dev).json
 create mode 100644 blueprints/Text to Image (NetaYume Lumina).json
 create mode 100644 blueprints/Text to Image (Qwen-Image 2512).json
 create mode 100644 blueprints/Text to Image (Qwen-Image).json
 create mode 100644 blueprints/Text to Video (LTX-2.3).json

diff --git a/blueprints/Crop Images 2x2.json b/blueprints/Crop Images 2x2.json
new file mode 100644
index 000000000..2aa42cfc3
--- /dev/null
+++ b/blueprints/Crop Images 2x2.json	
@@ -0,0 +1,1620 @@
+{
+  "revision": 0,
+  "last_node_id": 139,
+  "last_link_id": 0,
+  "nodes": [
+    {
+      "id": 135,
+      "type": "3b5ed000-6ab3-4458-91f7-8d6d366b0b40",
+      "pos": [
+        -2479.9999801712506,
+        2019.9999372732784
+      ],
+      "size": [
+        230,
+        170
+      ],
+      "flags": {},
+      "order": 3,
+      "mode": 0,
+      "inputs": [
+        {
+          "localized_name": "image",
+          "name": "image",
+          "type": "IMAGE",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "label": "top_left",
+          "localized_name": "IMAGE",
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": []
+        },
+        {
+          "label": "bottom_left",
+          "localized_name": "IMAGE_1",
+          "name": "IMAGE_1",
+          "type": "IMAGE",
+          "links": []
+        },
+        {
+          "label": "top_right",
+          "localized_name": "IMAGE_2",
+          "name": "IMAGE_2",
+          "type": "IMAGE",
+          "links": []
+        },
+        {
+          "label": "bottom_right",
+          "localized_name": "IMAGE_3",
+          "name": "IMAGE_3",
+          "type": "IMAGE",
+          "links": []
+        },
+        {
+          "label": "images",
+          "name": "IMAGE_4",
+          "type": "IMAGE",
+          "links": []
+        }
+      ],
+      "properties": {
+        "proxyWidgets": [],
+        "ue_properties": {
+          "widget_ue_connectable": {},
+          "input_ue_unconnectable": {},
+          "version": "7.7"
+        },
+        "cnr_id": "comfy-core",
+        "ver": "0.18.1"
+      },
+      "widgets_values": [],
+      "title": "Crop Images 2x2"
+    }
+  ],
+  "links": [],
+  "version": 0.4,
+  "definitions": {
+    "subgraphs": [
+      {
+        "id": "3b5ed000-6ab3-4458-91f7-8d6d366b0b40",
+        "version": 1,
+        "state": {
+          "lastGroupId": 3,
+          "lastNodeId": 142,
+          "lastLinkId": 245,
+          "lastRerouteId": 0
+        },
+        "revision": 0,
+        "config": {},
+        "name": "Crop Images 2x2",
+        "inputNode": {
+          "id": -10,
+          "bounding": [
+            -10,
+            1570,
+            120,
+            60
+          ]
+        },
+        "outputNode": {
+          "id": -20,
+          "bounding": [
+            2919.9998608196274,
+            1435,
+            120,
+            140
+          ]
+        },
+        "inputs": [
+          {
+            "id": "741854dd-bfb1-4700-ba8c-3b9dea59d021",
+            "name": "image",
+            "type": "IMAGE",
+            "linkIds": [
+              2,
+              11,
+              13,
+              30,
+              32
+            ],
+            "localized_name": "image",
+            "pos": [
+              90,
+              1590
+            ]
+          }
+        ],
+        "outputs": [
+          {
+            "id": "0eaca6d4-679a-433e-9703-bfa6dceacb18",
+            "name": "IMAGE",
+            "type": "IMAGE",
+            "linkIds": [
+              41
+            ],
+            "localized_name": "IMAGE",
+            "label": "top_left",
+            "pos": [
+              2939.9998608196274,
+              1455
+            ]
+          },
+          {
+            "id": "fff5a1ad-3a74-4c87-938c-ee0fff55f840",
+            "name": "IMAGE_1",
+            "type": "IMAGE",
+            "linkIds": [
+              42
+            ],
+            "localized_name": "IMAGE_1",
+            "label": "bottom_left",
+            "pos": [
+              2939.9998608196274,
+              1475
+            ]
+          },
+          {
+            "id": "08f40978-fb25-4d98-b716-b61e43b16043",
+            "name": "IMAGE_2",
+            "type": "IMAGE",
+            "linkIds": [
+              43
+            ],
+            "localized_name": "IMAGE_2",
+            "label": "top_right",
+            "pos": [
+              2939.9998608196274,
+              1495
+            ]
+          },
+          {
+            "id": "17b9416f-3369-43c1-b62f-3e31fc2a7e32",
+            "name": "IMAGE_3",
+            "type": "IMAGE",
+            "linkIds": [
+              44
+            ],
+            "localized_name": "IMAGE_3",
+            "label": "bottom_right",
+            "pos": [
+              2939.9998608196274,
+              1515
+            ]
+          },
+          {
+            "id": "430e2f3b-c617-4549-9daf-3ebf5be423a3",
+            "name": "IMAGE_4",
+            "type": "IMAGE",
+            "linkIds": [
+              240
+            ],
+            "label": "images",
+            "pos": [
+              2939.9998608196274,
+              1535
+            ]
+          }
+        ],
+        "widgets": [],
+        "nodes": [
+          {
+            "id": 7,
+            "type": "ComfyMathExpression",
+            "pos": [
+              740,
+              1390
+            ],
+            "size": [
+              370,
+              190
+            ],
+            "flags": {},
+            "order": 1,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 3
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": 4
+              },
+              {
+                "label": "c",
+                "localized_name": "values.c",
+                "name": "values.c",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": null
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  7,
+                  14,
+                  28,
+                  40,
+                  242
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ComfyMathExpression"
+            },
+            "widgets_values": [
+              "max(1, int(a/b))"
+            ]
+          },
+          {
+            "id": 8,
+            "type": "GetImageSize",
+            "pos": [
+              390,
+              1450
+            ],
+            "size": [
+              230,
+              120
+            ],
+            "flags": {},
+            "order": 2,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 2
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "links": [
+                  3,
+                  241
+                ]
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "links": [
+                  5,
+                  245
+                ]
+              },
+              {
+                "localized_name": "batch_size",
+                "name": "batch_size",
+                "type": "INT",
+                "links": null
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "GetImageSize"
+            }
+          },
+          {
+            "id": 9,
+            "type": "PrimitiveInt",
+            "pos": [
+              390,
+              1650
+            ],
+            "size": [
+              230,
+              110
+            ],
+            "flags": {},
+            "order": 0,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  4,
+                  6
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "PrimitiveInt"
+            },
+            "widgets_values": [
+              2,
+              "fixed"
+            ]
+          },
+          {
+            "id": 10,
+            "type": "ImageCropV2",
+            "pos": [
+              1710,
+              430
+            ],
+            "size": [
+              300,
+              480
+            ],
+            "flags": {},
+            "order": 3,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 11
+              },
+              {
+                "localized_name": "crop_region",
+                "name": "crop_region",
+                "type": "BOUNDING_BOX",
+                "widget": {
+                  "name": "crop_region"
+                },
+                "link": 9
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "links": [
+                  41,
+                  236
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ImageCropV2"
+            },
+            "widgets_values": [
+              {
+                "x": 0,
+                "y": 0,
+                "width": 512,
+                "height": 512
+              },
+              0,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 12,
+            "type": "PrimitiveBoundingBox",
+            "pos": [
+              1370,
+              570
+            ],
+            "size": [
+              270,
+              200
+            ],
+            "flags": {},
+            "order": 4,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "x",
+                "name": "x",
+                "type": "INT",
+                "widget": {
+                  "name": "x"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "y",
+                "name": "y",
+                "type": "INT",
+                "widget": {
+                  "name": "y"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 7
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 8
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "BOUNDING_BOX",
+                "name": "BOUNDING_BOX",
+                "type": "BOUNDING_BOX",
+                "links": [
+                  9
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "PrimitiveBoundingBox"
+            },
+            "widgets_values": [
+              0,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 13,
+            "type": "ComfyMathExpression",
+            "pos": [
+              750,
+              1650
+            ],
+            "size": [
+              370,
+              190
+            ],
+            "flags": {},
+            "order": 5,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 5
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": 6
+              },
+              {
+                "label": "c",
+                "localized_name": "values.c",
+                "name": "values.c",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": null
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  8,
+                  23,
+                  27,
+                  39,
+                  246
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ComfyMathExpression"
+            },
+            "widgets_values": [
+              "max(1, int(a/b))"
+            ]
+          },
+          {
+            "id": 138,
+            "type": "ComfyMathExpression",
+            "pos": [
+              1170,
+              1210
+            ],
+            "size": [
+              420,
+              190
+            ],
+            "flags": {},
+            "order": 13,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 241
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": 242
+              },
+              {
+                "label": "c",
+                "localized_name": "values.c",
+                "name": "values.c",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": null
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  243,
+                  244
+                ]
+              }
+            ],
+            "title": "Math Expression (Right Width)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ComfyMathExpression"
+            },
+            "widgets_values": [
+              "max(1, a - b)"
+            ]
+          },
+          {
+            "id": 139,
+            "type": "ComfyMathExpression",
+            "pos": [
+              1170,
+              1860
+            ],
+            "size": [
+              420,
+              190
+            ],
+            "flags": {},
+            "order": 14,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 245
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": 246
+              },
+              {
+                "label": "c",
+                "localized_name": "values.c",
+                "name": "values.c",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": null
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  247,
+                  248
+                ]
+              }
+            ],
+            "title": "Math Expression (Bottom Height)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ComfyMathExpression"
+            },
+            "widgets_values": [
+              "max(1, a - b)"
+            ]
+          },
+          {
+            "id": 15,
+            "type": "ImageCropV2",
+            "pos": [
+              1740,
+              1600
+            ],
+            "size": [
+              300,
+              480
+            ],
+            "flags": {},
+            "order": 6,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 13
+              },
+              {
+                "localized_name": "crop_region",
+                "name": "crop_region",
+                "type": "BOUNDING_BOX",
+                "widget": {
+                  "name": "crop_region"
+                },
+                "link": 12
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "links": [
+                  42,
+                  238
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ImageCropV2"
+            },
+            "widgets_values": [
+              {
+                "x": 0,
+                "y": 0,
+                "width": 512,
+                "height": 512
+              },
+              0,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 16,
+            "type": "PrimitiveBoundingBox",
+            "pos": [
+              1350,
+              1780
+            ],
+            "size": [
+              270,
+              200
+            ],
+            "flags": {},
+            "order": 7,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "x",
+                "name": "x",
+                "type": "INT",
+                "widget": {
+                  "name": "x"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "y",
+                "name": "y",
+                "type": "INT",
+                "widget": {
+                  "name": "y"
+                },
+                "link": 23
+              },
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 14
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 247
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "BOUNDING_BOX",
+                "name": "BOUNDING_BOX",
+                "type": "BOUNDING_BOX",
+                "links": [
+                  12
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "PrimitiveBoundingBox"
+            },
+            "widgets_values": [
+              0,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 25,
+            "type": "PrimitiveBoundingBox",
+            "pos": [
+              1350,
+              1200
+            ],
+            "size": [
+              270,
+              200
+            ],
+            "flags": {},
+            "order": 8,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "x",
+                "name": "x",
+                "type": "INT",
+                "widget": {
+                  "name": "x"
+                },
+                "link": 28
+              },
+              {
+                "localized_name": "y",
+                "name": "y",
+                "type": "INT",
+                "widget": {
+                  "name": "y"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 243
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 27
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "BOUNDING_BOX",
+                "name": "BOUNDING_BOX",
+                "type": "BOUNDING_BOX",
+                "links": [
+                  29
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "PrimitiveBoundingBox"
+            },
+            "widgets_values": [
+              6,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 26,
+            "type": "ImageCropV2",
+            "pos": [
+              1720,
+              1050
+            ],
+            "size": [
+              300,
+              480
+            ],
+            "flags": {},
+            "order": 9,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 30
+              },
+              {
+                "localized_name": "crop_region",
+                "name": "crop_region",
+                "type": "BOUNDING_BOX",
+                "widget": {
+                  "name": "crop_region"
+                },
+                "link": 29
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "links": [
+                  43,
+                  237
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ImageCropV2"
+            },
+            "widgets_values": [
+              {
+                "x": 0,
+                "y": 0,
+                "width": 512,
+                "height": 512
+              },
+              0,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 30,
+            "type": "ImageCropV2",
+            "pos": [
+              1740,
+              2130
+            ],
+            "size": [
+              300,
+              480
+            ],
+            "flags": {},
+            "order": 10,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 32
+              },
+              {
+                "localized_name": "crop_region",
+                "name": "crop_region",
+                "type": "BOUNDING_BOX",
+                "widget": {
+                  "name": "crop_region"
+                },
+                "link": 35
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "links": [
+                  44,
+                  239
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ImageCropV2"
+            },
+            "widgets_values": [
+              {
+                "x": 0,
+                "y": 0,
+                "width": 512,
+                "height": 512
+              },
+              0,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 32,
+            "type": "PrimitiveBoundingBox",
+            "pos": [
+              1370,
+              2280
+            ],
+            "size": [
+              270,
+              200
+            ],
+            "flags": {},
+            "order": 11,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "x",
+                "name": "x",
+                "type": "INT",
+                "widget": {
+                  "name": "x"
+                },
+                "link": 40
+              },
+              {
+                "localized_name": "y",
+                "name": "y",
+                "type": "INT",
+                "widget": {
+                  "name": "y"
+                },
+                "link": 39
+              },
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 244
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 248
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "BOUNDING_BOX",
+                "name": "BOUNDING_BOX",
+                "type": "BOUNDING_BOX",
+                "links": [
+                  35
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "PrimitiveBoundingBox"
+            },
+            "widgets_values": [
+              6,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 137,
+            "type": "BatchImagesNode",
+            "pos": [
+              2520,
+              1540
+            ],
+            "size": [
+              230,
+              170
+            ],
+            "flags": {},
+            "order": 12,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "image0",
+                "localized_name": "images.image0",
+                "name": "images.image0",
+                "type": "IMAGE",
+                "link": 236
+              },
+              {
+                "label": "image1",
+                "localized_name": "images.image1",
+                "name": "images.image1",
+                "type": "IMAGE",
+                "link": 237
+              },
+              {
+                "label": "image2",
+                "localized_name": "images.image2",
+                "name": "images.image2",
+                "shape": 7,
+                "type": "IMAGE",
+                "link": 238
+              },
+              {
+                "label": "image3",
+                "localized_name": "images.image3",
+                "name": "images.image3",
+                "shape": 7,
+                "type": "IMAGE",
+                "link": 239
+              },
+              {
+                "label": "image4",
+                "localized_name": "images.image4",
+                "name": "images.image4",
+                "shape": 7,
+                "type": "IMAGE",
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "links": [
+                  240
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "BatchImagesNode"
+            }
+          }
+        ],
+        "groups": [
+          {
+            "id": 1,
+            "title": "Crop Images 2x2",
+            "bounding": [
+              380,
+              360,
+              1710,
+              2270
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          }
+        ],
+        "links": [
+          {
+            "id": 3,
+            "origin_id": 8,
+            "origin_slot": 0,
+            "target_id": 7,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 4,
+            "origin_id": 9,
+            "origin_slot": 0,
+            "target_id": 7,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 9,
+            "origin_id": 12,
+            "origin_slot": 0,
+            "target_id": 10,
+            "target_slot": 1,
+            "type": "BOUNDING_BOX"
+          },
+          {
+            "id": 7,
+            "origin_id": 7,
+            "origin_slot": 1,
+            "target_id": 12,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 8,
+            "origin_id": 13,
+            "origin_slot": 1,
+            "target_id": 12,
+            "target_slot": 3,
+            "type": "INT"
+          },
+          {
+            "id": 5,
+            "origin_id": 8,
+            "origin_slot": 1,
+            "target_id": 13,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 6,
+            "origin_id": 9,
+            "origin_slot": 0,
+            "target_id": 13,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 12,
+            "origin_id": 16,
+            "origin_slot": 0,
+            "target_id": 15,
+            "target_slot": 1,
+            "type": "BOUNDING_BOX"
+          },
+          {
+            "id": 23,
+            "origin_id": 13,
+            "origin_slot": 1,
+            "target_id": 16,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 14,
+            "origin_id": 7,
+            "origin_slot": 1,
+            "target_id": 16,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 247,
+            "origin_id": 139,
+            "origin_slot": 1,
+            "target_id": 16,
+            "target_slot": 3,
+            "type": "INT"
+          },
+          {
+            "id": 28,
+            "origin_id": 7,
+            "origin_slot": 1,
+            "target_id": 25,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 243,
+            "origin_id": 138,
+            "origin_slot": 1,
+            "target_id": 25,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 27,
+            "origin_id": 13,
+            "origin_slot": 1,
+            "target_id": 25,
+            "target_slot": 3,
+            "type": "INT"
+          },
+          {
+            "id": 29,
+            "origin_id": 25,
+            "origin_slot": 0,
+            "target_id": 26,
+            "target_slot": 1,
+            "type": "BOUNDING_BOX"
+          },
+          {
+            "id": 35,
+            "origin_id": 32,
+            "origin_slot": 0,
+            "target_id": 30,
+            "target_slot": 1,
+            "type": "BOUNDING_BOX"
+          },
+          {
+            "id": 40,
+            "origin_id": 7,
+            "origin_slot": 1,
+            "target_id": 32,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 39,
+            "origin_id": 13,
+            "origin_slot": 1,
+            "target_id": 32,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 244,
+            "origin_id": 138,
+            "origin_slot": 1,
+            "target_id": 32,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 248,
+            "origin_id": 139,
+            "origin_slot": 1,
+            "target_id": 32,
+            "target_slot": 3,
+            "type": "INT"
+          },
+          {
+            "id": 241,
+            "origin_id": 8,
+            "origin_slot": 0,
+            "target_id": 138,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 242,
+            "origin_id": 7,
+            "origin_slot": 1,
+            "target_id": 138,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 245,
+            "origin_id": 8,
+            "origin_slot": 1,
+            "target_id": 139,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 246,
+            "origin_id": 13,
+            "origin_slot": 1,
+            "target_id": 139,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 2,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 8,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 11,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 10,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 13,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 15,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 30,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 26,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 32,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 30,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 41,
+            "origin_id": 10,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 42,
+            "origin_id": 15,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 1,
+            "type": "IMAGE"
+          },
+          {
+            "id": 43,
+            "origin_id": 26,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 2,
+            "type": "IMAGE"
+          },
+          {
+            "id": 44,
+            "origin_id": 30,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 3,
+            "type": "IMAGE"
+          },
+          {
+            "id": 236,
+            "origin_id": 10,
+            "origin_slot": 0,
+            "target_id": 137,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 237,
+            "origin_id": 26,
+            "origin_slot": 0,
+            "target_id": 137,
+            "target_slot": 1,
+            "type": "IMAGE"
+          },
+          {
+            "id": 238,
+            "origin_id": 15,
+            "origin_slot": 0,
+            "target_id": 137,
+            "target_slot": 2,
+            "type": "IMAGE"
+          },
+          {
+            "id": 239,
+            "origin_id": 30,
+            "origin_slot": 0,
+            "target_id": 137,
+            "target_slot": 3,
+            "type": "IMAGE"
+          },
+          {
+            "id": 240,
+            "origin_id": 137,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 4,
+            "type": "IMAGE"
+          }
+        ],
+        "extra": {},
+        "category": "Image Tools/Crop"
+      }
+    ]
+  },
+  "extra": {
+    "ue_links": [],
+    "links_added_by_ue": []
+  }
+}
\ No newline at end of file
diff --git a/blueprints/Crop Images 3x3.json b/blueprints/Crop Images 3x3.json
new file mode 100644
index 000000000..3a3615ac8
--- /dev/null
+++ b/blueprints/Crop Images 3x3.json	
@@ -0,0 +1,2957 @@
+{
+  "revision": 0,
+  "last_node_id": 141,
+  "last_link_id": 0,
+  "nodes": [
+    {
+      "id": 134,
+      "type": "7fd47bca-ff89-476c-a98d-ca6f7cf756fe",
+      "pos": [
+        -2620,
+        1620
+      ],
+      "size": [
+        230,
+        290
+      ],
+      "flags": {},
+      "order": 2,
+      "mode": 0,
+      "inputs": [
+        {
+          "localized_name": "image",
+          "name": "image",
+          "type": "IMAGE",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "label": "top_left",
+          "localized_name": "IMAGE",
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": []
+        },
+        {
+          "label": "top_center",
+          "name": "IMAGE_1",
+          "type": "IMAGE",
+          "links": []
+        },
+        {
+          "label": "top_right",
+          "name": "IMAGE_2",
+          "type": "IMAGE",
+          "links": []
+        },
+        {
+          "label": "middle_left",
+          "name": "IMAGE_3",
+          "type": "IMAGE",
+          "links": []
+        },
+        {
+          "label": "middle_center",
+          "name": "IMAGE_4",
+          "type": "IMAGE",
+          "links": []
+        },
+        {
+          "label": "middle_right",
+          "name": "IMAGE_5",
+          "type": "IMAGE",
+          "links": []
+        },
+        {
+          "label": "bottom_left",
+          "name": "IMAGE_6",
+          "type": "IMAGE",
+          "links": []
+        },
+        {
+          "label": "bottom_center",
+          "name": "IMAGE_7",
+          "type": "IMAGE",
+          "links": []
+        },
+        {
+          "label": "bottom_right",
+          "name": "IMAGE_8",
+          "type": "IMAGE",
+          "links": []
+        },
+        {
+          "label": "images",
+          "name": "IMAGE_9",
+          "type": "IMAGE",
+          "links": []
+        }
+      ],
+      "properties": {
+        "proxyWidgets": [],
+        "ue_properties": {
+          "widget_ue_connectable": {},
+          "input_ue_unconnectable": {},
+          "version": "7.7"
+        },
+        "cnr_id": "comfy-core",
+        "ver": "0.18.1"
+      },
+      "widgets_values": [],
+      "title": "Crop Images 3x3"
+    }
+  ],
+  "links": [],
+  "version": 0.4,
+  "definitions": {
+    "subgraphs": [
+      {
+        "id": "7fd47bca-ff89-476c-a98d-ca6f7cf756fe",
+        "version": 1,
+        "state": {
+          "lastGroupId": 3,
+          "lastNodeId": 142,
+          "lastLinkId": 245,
+          "lastRerouteId": 0
+        },
+        "revision": 0,
+        "config": {},
+        "name": "Crop Images 3x3",
+        "inputNode": {
+          "id": -10,
+          "bounding": [
+            -710,
+            5440,
+            120,
+            60
+          ]
+        },
+        "outputNode": {
+          "id": -20,
+          "bounding": [
+            3430,
+            5270,
+            121.720703125,
+            240
+          ]
+        },
+        "inputs": [
+          {
+            "id": "e54e8e8b-6ce6-4f80-a38f-87a77d990efc",
+            "name": "image",
+            "type": "IMAGE",
+            "linkIds": [
+              74,
+              75,
+              82,
+              91,
+              94,
+              117,
+              129,
+              137,
+              148,
+              157
+            ],
+            "localized_name": "image",
+            "pos": [
+              -610,
+              5460
+            ]
+          }
+        ],
+        "outputs": [
+          {
+            "id": "3dd8abe2-a7da-4052-a556-9ae157ff3cf4",
+            "name": "IMAGE",
+            "type": "IMAGE",
+            "linkIds": [
+              101
+            ],
+            "localized_name": "IMAGE",
+            "label": "top_left",
+            "pos": [
+              3450,
+              5290
+            ]
+          },
+          {
+            "id": "aa220733-759b-474e-9d29-634a3a23c5da",
+            "name": "IMAGE_1",
+            "type": "IMAGE",
+            "linkIds": [
+              192
+            ],
+            "label": "top_center",
+            "pos": [
+              3450,
+              5310
+            ]
+          },
+          {
+            "id": "f1911df1-d50c-4bf8-9623-5e581d2a8902",
+            "name": "IMAGE_2",
+            "type": "IMAGE",
+            "linkIds": [
+              193
+            ],
+            "label": "top_right",
+            "pos": [
+              3450,
+              5330
+            ]
+          },
+          {
+            "id": "71ebb807-e7e9-438f-990d-511e0745d10d",
+            "name": "IMAGE_3",
+            "type": "IMAGE",
+            "linkIds": [
+              194
+            ],
+            "label": "middle_left",
+            "pos": [
+              3450,
+              5350
+            ]
+          },
+          {
+            "id": "4fb9c99c-3340-4de5-ba2d-51a653aab0b3",
+            "name": "IMAGE_4",
+            "type": "IMAGE",
+            "linkIds": [
+              195
+            ],
+            "label": "middle_center",
+            "pos": [
+              3450,
+              5370
+            ]
+          },
+          {
+            "id": "398643e8-e349-4d59-9c68-6403b7a2772d",
+            "name": "IMAGE_5",
+            "type": "IMAGE",
+            "linkIds": [
+              196
+            ],
+            "label": "middle_right",
+            "pos": [
+              3450,
+              5390
+            ]
+          },
+          {
+            "id": "5b11949c-f4cc-4525-86ae-690e30d3dada",
+            "name": "IMAGE_6",
+            "type": "IMAGE",
+            "linkIds": [
+              197
+            ],
+            "label": "bottom_left",
+            "pos": [
+              3450,
+              5410
+            ]
+          },
+          {
+            "id": "82c69fd9-de36-4c8f-8311-a9e49159640b",
+            "name": "IMAGE_7",
+            "type": "IMAGE",
+            "linkIds": [
+              198
+            ],
+            "label": "bottom_center",
+            "pos": [
+              3450,
+              5430
+            ]
+          },
+          {
+            "id": "aef678db-20aa-47d4-be8a-978065f078c6",
+            "name": "IMAGE_8",
+            "type": "IMAGE",
+            "linkIds": [
+              199
+            ],
+            "label": "bottom_right",
+            "pos": [
+              3450,
+              5450
+            ]
+          },
+          {
+            "id": "77574277-edde-439c-8720-7daa849f4f27",
+            "name": "IMAGE_9",
+            "type": "IMAGE",
+            "linkIds": [
+              226
+            ],
+            "label": "images",
+            "pos": [
+              3450,
+              5470
+            ]
+          }
+        ],
+        "widgets": [],
+        "nodes": [
+          {
+            "id": 50,
+            "type": "ComfyMathExpression",
+            "pos": [
+              770,
+              5310
+            ],
+            "size": [
+              370,
+              190
+            ],
+            "flags": {},
+            "order": 1,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 73
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": 108
+              },
+              {
+                "label": "c",
+                "localized_name": "values.c",
+                "name": "values.c",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": null
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  77,
+                  85,
+                  89,
+                  97,
+                  99,
+                  127,
+                  142,
+                  146,
+                  152,
+                  300
+                ]
+              }
+            ],
+            "title": "Math Expression （Width）",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ComfyMathExpression"
+            },
+            "widgets_values": [
+              "max(1, int(a/b))"
+            ]
+          },
+          {
+            "id": 51,
+            "type": "GetImageSize",
+            "pos": [
+              440,
+              5390
+            ],
+            "size": [
+              230,
+              120
+            ],
+            "flags": {},
+            "order": 2,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 74
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "links": [
+                  73,
+                  300
+                ]
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "links": [
+                  79,
+                  305
+                ]
+              },
+              {
+                "localized_name": "batch_size",
+                "name": "batch_size",
+                "type": "INT",
+                "links": null
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "GetImageSize"
+            }
+          },
+          {
+            "id": 52,
+            "type": "PrimitiveInt",
+            "pos": [
+              440,
+              5590
+            ],
+            "size": [
+              230,
+              110
+            ],
+            "flags": {},
+            "order": 0,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  80,
+                  108
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "PrimitiveInt"
+            },
+            "widgets_values": [
+              3,
+              "fixed"
+            ]
+          },
+          {
+            "id": 53,
+            "type": "ImageCropV2",
+            "pos": [
+              2080,
+              3020
+            ],
+            "size": [
+              300,
+              480
+            ],
+            "flags": {},
+            "order": 3,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 75
+              },
+              {
+                "localized_name": "crop_region",
+                "name": "crop_region",
+                "type": "BOUNDING_BOX",
+                "widget": {
+                  "name": "crop_region"
+                },
+                "link": 76
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "links": [
+                  101,
+                  227
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ImageCropV2"
+            },
+            "widgets_values": [
+              {
+                "x": 0,
+                "y": 0,
+                "width": 512,
+                "height": 512
+              },
+              0,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 54,
+            "type": "PrimitiveBoundingBox",
+            "pos": [
+              1740,
+              3160
+            ],
+            "size": [
+              270,
+              200
+            ],
+            "flags": {},
+            "order": 4,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "x",
+                "name": "x",
+                "type": "INT",
+                "widget": {
+                  "name": "x"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "y",
+                "name": "y",
+                "type": "INT",
+                "widget": {
+                  "name": "y"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 77
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 78
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "BOUNDING_BOX",
+                "name": "BOUNDING_BOX",
+                "type": "BOUNDING_BOX",
+                "links": [
+                  76
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "PrimitiveBoundingBox"
+            },
+            "widgets_values": [
+              0,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 55,
+            "type": "ComfyMathExpression",
+            "pos": [
+              780,
+              5570
+            ],
+            "size": [
+              370,
+              190
+            ],
+            "flags": {},
+            "order": 5,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 79
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": 80
+              },
+              {
+                "label": "c",
+                "localized_name": "values.c",
+                "name": "values.c",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": null
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  78,
+                  84,
+                  86,
+                  88,
+                  90,
+                  98,
+                  100,
+                  121,
+                  123,
+                  126,
+                  161
+                ]
+              }
+            ],
+            "title": "Math Expression(Height)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ComfyMathExpression"
+            },
+            "widgets_values": [
+              "max(1, int(a/b))"
+            ]
+          },
+          {
+            "id": 57,
+            "type": "ImageCropV2",
+            "pos": [
+              2080,
+              4700
+            ],
+            "size": [
+              300,
+              480
+            ],
+            "flags": {},
+            "order": 6,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 82
+              },
+              {
+                "localized_name": "crop_region",
+                "name": "crop_region",
+                "type": "BOUNDING_BOX",
+                "widget": {
+                  "name": "crop_region"
+                },
+                "link": 83
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "links": [
+                  194,
+                  230
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ImageCropV2"
+            },
+            "widgets_values": [
+              {
+                "x": 0,
+                "y": 0,
+                "width": 512,
+                "height": 512
+              },
+              0,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 58,
+            "type": "PrimitiveBoundingBox",
+            "pos": [
+              1740,
+              4830
+            ],
+            "size": [
+              270,
+              200
+            ],
+            "flags": {},
+            "order": 7,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "x",
+                "name": "x",
+                "type": "INT",
+                "widget": {
+                  "name": "x"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "y",
+                "name": "y",
+                "type": "INT",
+                "widget": {
+                  "name": "y"
+                },
+                "link": 84
+              },
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 85
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 86
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "BOUNDING_BOX",
+                "name": "BOUNDING_BOX",
+                "type": "BOUNDING_BOX",
+                "links": [
+                  83
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "PrimitiveBoundingBox"
+            },
+            "widgets_values": [
+              0,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 60,
+            "type": "PrimitiveBoundingBox",
+            "pos": [
+              1740,
+              3700
+            ],
+            "size": [
+              270,
+              200
+            ],
+            "flags": {},
+            "order": 8,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "x",
+                "name": "x",
+                "type": "INT",
+                "widget": {
+                  "name": "x"
+                },
+                "link": 88
+              },
+              {
+                "localized_name": "y",
+                "name": "y",
+                "type": "INT",
+                "widget": {
+                  "name": "y"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 89
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 90
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "BOUNDING_BOX",
+                "name": "BOUNDING_BOX",
+                "type": "BOUNDING_BOX",
+                "links": [
+                  92
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "PrimitiveBoundingBox"
+            },
+            "widgets_values": [
+              6,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 61,
+            "type": "ImageCropV2",
+            "pos": [
+              2100,
+              3570
+            ],
+            "size": [
+              300,
+              480
+            ],
+            "flags": {},
+            "order": 9,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 91
+              },
+              {
+                "localized_name": "crop_region",
+                "name": "crop_region",
+                "type": "BOUNDING_BOX",
+                "widget": {
+                  "name": "crop_region"
+                },
+                "link": 92
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "links": [
+                  192,
+                  228
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ImageCropV2"
+            },
+            "widgets_values": [
+              {
+                "x": 0,
+                "y": 0,
+                "width": 512,
+                "height": 512
+              },
+              0,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 63,
+            "type": "ImageCropV2",
+            "pos": [
+              2080,
+              5310
+            ],
+            "size": [
+              300,
+              480
+            ],
+            "flags": {},
+            "order": 10,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 94
+              },
+              {
+                "localized_name": "crop_region",
+                "name": "crop_region",
+                "type": "BOUNDING_BOX",
+                "widget": {
+                  "name": "crop_region"
+                },
+                "link": 95
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "links": [
+                  195,
+                  231
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ImageCropV2"
+            },
+            "widgets_values": [
+              {
+                "x": 0,
+                "y": 0,
+                "width": 512,
+                "height": 512
+              },
+              0,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 65,
+            "type": "PrimitiveBoundingBox",
+            "pos": [
+              1750,
+              5330
+            ],
+            "size": [
+              270,
+              200
+            ],
+            "flags": {},
+            "order": 11,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "x",
+                "name": "x",
+                "type": "INT",
+                "widget": {
+                  "name": "x"
+                },
+                "link": 97
+              },
+              {
+                "localized_name": "y",
+                "name": "y",
+                "type": "INT",
+                "widget": {
+                  "name": "y"
+                },
+                "link": 98
+              },
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 99
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 100
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "BOUNDING_BOX",
+                "name": "BOUNDING_BOX",
+                "type": "BOUNDING_BOX",
+                "links": [
+                  95
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "PrimitiveBoundingBox"
+            },
+            "widgets_values": [
+              6,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 71,
+            "type": "ComfyMathExpression",
+            "pos": [
+              780,
+              6090
+            ],
+            "size": [
+              400,
+              190
+            ],
+            "flags": {},
+            "order": 12,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 126
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": null
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  136,
+                  147,
+                  156,
+                  306
+                ]
+              }
+            ],
+            "title": "Math Expression(height)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ComfyMathExpression"
+            },
+            "widgets_values": [
+              "2 * a"
+            ]
+          },
+          {
+            "id": 75,
+            "type": "ImageCropV2",
+            "pos": [
+              2100,
+              5900
+            ],
+            "size": [
+              300,
+              480
+            ],
+            "flags": {},
+            "order": 13,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 117
+              },
+              {
+                "localized_name": "crop_region",
+                "name": "crop_region",
+                "type": "BOUNDING_BOX",
+                "widget": {
+                  "name": "crop_region"
+                },
+                "link": 118
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "links": [
+                  196,
+                  232
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ImageCropV2"
+            },
+            "widgets_values": [
+              {
+                "x": 0,
+                "y": 0,
+                "width": 512,
+                "height": 512
+              },
+              0,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 77,
+            "type": "PrimitiveBoundingBox",
+            "pos": [
+              1750,
+              5970
+            ],
+            "size": [
+              270,
+              200
+            ],
+            "flags": {},
+            "order": 14,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "x",
+                "name": "x",
+                "type": "INT",
+                "widget": {
+                  "name": "x"
+                },
+                "link": 128
+              },
+              {
+                "localized_name": "y",
+                "name": "y",
+                "type": "INT",
+                "widget": {
+                  "name": "y"
+                },
+                "link": 121
+              },
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 302
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 123
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "BOUNDING_BOX",
+                "name": "BOUNDING_BOX",
+                "type": "BOUNDING_BOX",
+                "links": [
+                  118
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "PrimitiveBoundingBox"
+            },
+            "widgets_values": [
+              6,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 78,
+            "type": "ComfyMathExpression",
+            "pos": [
+              780,
+              5820
+            ],
+            "size": [
+              400,
+              200
+            ],
+            "flags": {},
+            "order": 15,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 127
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": null
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  128,
+                  132,
+                  163,
+                  301
+                ]
+              }
+            ],
+            "title": "Math Expression(width)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ComfyMathExpression"
+            },
+            "widgets_values": [
+              "2 * a"
+            ]
+          },
+          {
+            "id": 140,
+            "type": "ComfyMathExpression",
+            "pos": [
+              1240,
+              5640
+            ],
+            "size": [
+              420,
+              190
+            ],
+            "flags": {},
+            "order": 24,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 300
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": 301
+              },
+              {
+                "label": "c",
+                "localized_name": "values.c",
+                "name": "values.c",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": null
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  302,
+                  303,
+                  304
+                ]
+              }
+            ],
+            "title": "Math Expression (Right Width)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ComfyMathExpression"
+            },
+            "widgets_values": [
+              "max(1, a - b)"
+            ]
+          },
+          {
+            "id": 141,
+            "type": "ComfyMathExpression",
+            "pos": [
+              1230,
+              6340
+            ],
+            "size": [
+              420,
+              190
+            ],
+            "flags": {},
+            "order": 25,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 305
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": 306
+              },
+              {
+                "label": "c",
+                "localized_name": "values.c",
+                "name": "values.c",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": null
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  307,
+                  308,
+                  309
+                ]
+              }
+            ],
+            "title": "Math Expression (Bottom Height)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ComfyMathExpression"
+            },
+            "widgets_values": [
+              "max(1, a - b)"
+            ]
+          },
+          {
+            "id": 79,
+            "type": "ImageCropV2",
+            "pos": [
+              2120,
+              7580
+            ],
+            "size": [
+              300,
+              480
+            ],
+            "flags": {},
+            "order": 16,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 129
+              },
+              {
+                "localized_name": "crop_region",
+                "name": "crop_region",
+                "type": "BOUNDING_BOX",
+                "widget": {
+                  "name": "crop_region"
+                },
+                "link": 130
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "links": [
+                  199,
+                  235
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ImageCropV2"
+            },
+            "widgets_values": [
+              {
+                "x": 0,
+                "y": 0,
+                "width": 512,
+                "height": 512
+              },
+              0,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 81,
+            "type": "PrimitiveBoundingBox",
+            "pos": [
+              1720,
+              7620
+            ],
+            "size": [
+              270,
+              200
+            ],
+            "flags": {},
+            "order": 17,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "x",
+                "name": "x",
+                "type": "INT",
+                "widget": {
+                  "name": "x"
+                },
+                "link": 132
+              },
+              {
+                "localized_name": "y",
+                "name": "y",
+                "type": "INT",
+                "widget": {
+                  "name": "y"
+                },
+                "link": 136
+              },
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 303
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 307
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "BOUNDING_BOX",
+                "name": "BOUNDING_BOX",
+                "type": "BOUNDING_BOX",
+                "links": [
+                  130
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "PrimitiveBoundingBox"
+            },
+            "widgets_values": [
+              6,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 82,
+            "type": "ImageCropV2",
+            "pos": [
+              2120,
+              7040
+            ],
+            "size": [
+              300,
+              480
+            ],
+            "flags": {},
+            "order": 18,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 137
+              },
+              {
+                "localized_name": "crop_region",
+                "name": "crop_region",
+                "type": "BOUNDING_BOX",
+                "widget": {
+                  "name": "crop_region"
+                },
+                "link": 138
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "links": [
+                  198,
+                  234
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ImageCropV2"
+            },
+            "widgets_values": [
+              {
+                "x": 0,
+                "y": 0,
+                "width": 512,
+                "height": 512
+              },
+              0,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 84,
+            "type": "PrimitiveBoundingBox",
+            "pos": [
+              1720,
+              7080
+            ],
+            "size": [
+              270,
+              200
+            ],
+            "flags": {},
+            "order": 19,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "x",
+                "name": "x",
+                "type": "INT",
+                "widget": {
+                  "name": "x"
+                },
+                "link": 146
+              },
+              {
+                "localized_name": "y",
+                "name": "y",
+                "type": "INT",
+                "widget": {
+                  "name": "y"
+                },
+                "link": 147
+              },
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 142
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 308
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "BOUNDING_BOX",
+                "name": "BOUNDING_BOX",
+                "type": "BOUNDING_BOX",
+                "links": [
+                  138
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "PrimitiveBoundingBox"
+            },
+            "widgets_values": [
+              6,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 85,
+            "type": "ImageCropV2",
+            "pos": [
+              2110,
+              6480
+            ],
+            "size": [
+              300,
+              480
+            ],
+            "flags": {},
+            "order": 20,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 148
+              },
+              {
+                "localized_name": "crop_region",
+                "name": "crop_region",
+                "type": "BOUNDING_BOX",
+                "widget": {
+                  "name": "crop_region"
+                },
+                "link": 149
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "links": [
+                  197,
+                  233
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ImageCropV2"
+            },
+            "widgets_values": [
+              {
+                "x": 0,
+                "y": 0,
+                "width": 512,
+                "height": 512
+              },
+              0,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 86,
+            "type": "PrimitiveBoundingBox",
+            "pos": [
+              1670,
+              6570
+            ],
+            "size": [
+              270,
+              200
+            ],
+            "flags": {},
+            "order": 21,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "x",
+                "name": "x",
+                "type": "INT",
+                "widget": {
+                  "name": "x"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "y",
+                "name": "y",
+                "type": "INT",
+                "widget": {
+                  "name": "y"
+                },
+                "link": 156
+              },
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 152
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 309
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "BOUNDING_BOX",
+                "name": "BOUNDING_BOX",
+                "type": "BOUNDING_BOX",
+                "links": [
+                  149
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "PrimitiveBoundingBox"
+            },
+            "widgets_values": [
+              0,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 88,
+            "type": "ImageCropV2",
+            "pos": [
+              2060,
+              4140
+            ],
+            "size": [
+              300,
+              480
+            ],
+            "flags": {},
+            "order": 22,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 157
+              },
+              {
+                "localized_name": "crop_region",
+                "name": "crop_region",
+                "type": "BOUNDING_BOX",
+                "widget": {
+                  "name": "crop_region"
+                },
+                "link": 158
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "links": [
+                  193,
+                  229
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ImageCropV2"
+            },
+            "widgets_values": [
+              {
+                "x": 0,
+                "y": 0,
+                "width": 512,
+                "height": 512
+              },
+              0,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 89,
+            "type": "PrimitiveBoundingBox",
+            "pos": [
+              1720,
+              4150
+            ],
+            "size": [
+              270,
+              200
+            ],
+            "flags": {},
+            "order": 23,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "x",
+                "name": "x",
+                "type": "INT",
+                "widget": {
+                  "name": "x"
+                },
+                "link": 163
+              },
+              {
+                "localized_name": "y",
+                "name": "y",
+                "type": "INT",
+                "widget": {
+                  "name": "y"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 304
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 161
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "BOUNDING_BOX",
+                "name": "BOUNDING_BOX",
+                "type": "BOUNDING_BOX",
+                "links": [
+                  158
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "PrimitiveBoundingBox"
+            },
+            "widgets_values": [
+              6,
+              0,
+              512,
+              512
+            ]
+          },
+          {
+            "id": 136,
+            "type": "BatchImagesNode",
+            "pos": [
+              3170,
+              5640
+            ],
+            "size": [
+              230,
+              290
+            ],
+            "flags": {},
+            "order": 24,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "image0",
+                "localized_name": "images.image0",
+                "name": "images.image0",
+                "type": "IMAGE",
+                "link": 227
+              },
+              {
+                "label": "image1",
+                "localized_name": "images.image1",
+                "name": "images.image1",
+                "type": "IMAGE",
+                "link": 228
+              },
+              {
+                "label": "image2",
+                "localized_name": "images.image2",
+                "name": "images.image2",
+                "shape": 7,
+                "type": "IMAGE",
+                "link": 229
+              },
+              {
+                "label": "image3",
+                "localized_name": "images.image3",
+                "name": "images.image3",
+                "shape": 7,
+                "type": "IMAGE",
+                "link": 230
+              },
+              {
+                "label": "image4",
+                "localized_name": "images.image4",
+                "name": "images.image4",
+                "shape": 7,
+                "type": "IMAGE",
+                "link": 231
+              },
+              {
+                "label": "image5",
+                "localized_name": "images.image5",
+                "name": "images.image5",
+                "shape": 7,
+                "type": "IMAGE",
+                "link": 232
+              },
+              {
+                "label": "image6",
+                "localized_name": "images.image6",
+                "name": "images.image6",
+                "shape": 7,
+                "type": "IMAGE",
+                "link": 233
+              },
+              {
+                "label": "image7",
+                "localized_name": "images.image7",
+                "name": "images.image7",
+                "shape": 7,
+                "type": "IMAGE",
+                "link": 234
+              },
+              {
+                "label": "image8",
+                "localized_name": "images.image8",
+                "name": "images.image8",
+                "shape": 7,
+                "type": "IMAGE",
+                "link": 235
+              },
+              {
+                "label": "image9",
+                "localized_name": "images.image9",
+                "name": "images.image9",
+                "shape": 7,
+                "type": "IMAGE",
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "links": [
+                  226
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "BatchImagesNode"
+            }
+          }
+        ],
+        "groups": [
+          {
+            "id": 3,
+            "title": "Crop Images 3x3",
+            "bounding": [
+              100,
+              2700,
+              2640,
+              5480
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          }
+        ],
+        "links": [
+          {
+            "id": 73,
+            "origin_id": 51,
+            "origin_slot": 0,
+            "target_id": 50,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 108,
+            "origin_id": 52,
+            "origin_slot": 0,
+            "target_id": 50,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 76,
+            "origin_id": 54,
+            "origin_slot": 0,
+            "target_id": 53,
+            "target_slot": 1,
+            "type": "BOUNDING_BOX"
+          },
+          {
+            "id": 77,
+            "origin_id": 50,
+            "origin_slot": 1,
+            "target_id": 54,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 78,
+            "origin_id": 55,
+            "origin_slot": 1,
+            "target_id": 54,
+            "target_slot": 3,
+            "type": "INT"
+          },
+          {
+            "id": 79,
+            "origin_id": 51,
+            "origin_slot": 1,
+            "target_id": 55,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 80,
+            "origin_id": 52,
+            "origin_slot": 0,
+            "target_id": 55,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 83,
+            "origin_id": 58,
+            "origin_slot": 0,
+            "target_id": 57,
+            "target_slot": 1,
+            "type": "BOUNDING_BOX"
+          },
+          {
+            "id": 84,
+            "origin_id": 55,
+            "origin_slot": 1,
+            "target_id": 58,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 85,
+            "origin_id": 50,
+            "origin_slot": 1,
+            "target_id": 58,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 86,
+            "origin_id": 55,
+            "origin_slot": 1,
+            "target_id": 58,
+            "target_slot": 3,
+            "type": "INT"
+          },
+          {
+            "id": 88,
+            "origin_id": 50,
+            "origin_slot": 1,
+            "target_id": 60,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 89,
+            "origin_id": 50,
+            "origin_slot": 1,
+            "target_id": 60,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 90,
+            "origin_id": 55,
+            "origin_slot": 1,
+            "target_id": 60,
+            "target_slot": 3,
+            "type": "INT"
+          },
+          {
+            "id": 92,
+            "origin_id": 60,
+            "origin_slot": 0,
+            "target_id": 61,
+            "target_slot": 1,
+            "type": "BOUNDING_BOX"
+          },
+          {
+            "id": 95,
+            "origin_id": 65,
+            "origin_slot": 0,
+            "target_id": 63,
+            "target_slot": 1,
+            "type": "BOUNDING_BOX"
+          },
+          {
+            "id": 97,
+            "origin_id": 50,
+            "origin_slot": 1,
+            "target_id": 65,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 98,
+            "origin_id": 55,
+            "origin_slot": 1,
+            "target_id": 65,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 99,
+            "origin_id": 50,
+            "origin_slot": 1,
+            "target_id": 65,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 100,
+            "origin_id": 55,
+            "origin_slot": 1,
+            "target_id": 65,
+            "target_slot": 3,
+            "type": "INT"
+          },
+          {
+            "id": 126,
+            "origin_id": 55,
+            "origin_slot": 1,
+            "target_id": 71,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 118,
+            "origin_id": 77,
+            "origin_slot": 0,
+            "target_id": 75,
+            "target_slot": 1,
+            "type": "BOUNDING_BOX"
+          },
+          {
+            "id": 128,
+            "origin_id": 78,
+            "origin_slot": 1,
+            "target_id": 77,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 121,
+            "origin_id": 55,
+            "origin_slot": 1,
+            "target_id": 77,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 302,
+            "origin_id": 140,
+            "origin_slot": 1,
+            "target_id": 77,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 123,
+            "origin_id": 55,
+            "origin_slot": 1,
+            "target_id": 77,
+            "target_slot": 3,
+            "type": "INT"
+          },
+          {
+            "id": 127,
+            "origin_id": 50,
+            "origin_slot": 1,
+            "target_id": 78,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 130,
+            "origin_id": 81,
+            "origin_slot": 0,
+            "target_id": 79,
+            "target_slot": 1,
+            "type": "BOUNDING_BOX"
+          },
+          {
+            "id": 132,
+            "origin_id": 78,
+            "origin_slot": 1,
+            "target_id": 81,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 136,
+            "origin_id": 71,
+            "origin_slot": 1,
+            "target_id": 81,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 303,
+            "origin_id": 140,
+            "origin_slot": 1,
+            "target_id": 81,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 307,
+            "origin_id": 141,
+            "origin_slot": 1,
+            "target_id": 81,
+            "target_slot": 3,
+            "type": "INT"
+          },
+          {
+            "id": 138,
+            "origin_id": 84,
+            "origin_slot": 0,
+            "target_id": 82,
+            "target_slot": 1,
+            "type": "BOUNDING_BOX"
+          },
+          {
+            "id": 146,
+            "origin_id": 50,
+            "origin_slot": 1,
+            "target_id": 84,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 147,
+            "origin_id": 71,
+            "origin_slot": 1,
+            "target_id": 84,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 142,
+            "origin_id": 50,
+            "origin_slot": 1,
+            "target_id": 84,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 308,
+            "origin_id": 141,
+            "origin_slot": 1,
+            "target_id": 84,
+            "target_slot": 3,
+            "type": "INT"
+          },
+          {
+            "id": 149,
+            "origin_id": 86,
+            "origin_slot": 0,
+            "target_id": 85,
+            "target_slot": 1,
+            "type": "BOUNDING_BOX"
+          },
+          {
+            "id": 156,
+            "origin_id": 71,
+            "origin_slot": 1,
+            "target_id": 86,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 152,
+            "origin_id": 50,
+            "origin_slot": 1,
+            "target_id": 86,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 309,
+            "origin_id": 141,
+            "origin_slot": 1,
+            "target_id": 86,
+            "target_slot": 3,
+            "type": "INT"
+          },
+          {
+            "id": 158,
+            "origin_id": 89,
+            "origin_slot": 0,
+            "target_id": 88,
+            "target_slot": 1,
+            "type": "BOUNDING_BOX"
+          },
+          {
+            "id": 163,
+            "origin_id": 78,
+            "origin_slot": 1,
+            "target_id": 89,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 304,
+            "origin_id": 140,
+            "origin_slot": 1,
+            "target_id": 89,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 161,
+            "origin_id": 55,
+            "origin_slot": 1,
+            "target_id": 89,
+            "target_slot": 3,
+            "type": "INT"
+          },
+          {
+            "id": 300,
+            "origin_id": 51,
+            "origin_slot": 0,
+            "target_id": 140,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 301,
+            "origin_id": 78,
+            "origin_slot": 1,
+            "target_id": 140,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 305,
+            "origin_id": 51,
+            "origin_slot": 1,
+            "target_id": 141,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 306,
+            "origin_id": 71,
+            "origin_slot": 1,
+            "target_id": 141,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 74,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 51,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 75,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 53,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 82,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 57,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 91,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 61,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 94,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 63,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 117,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 75,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 129,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 79,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 137,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 82,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 148,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 85,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 157,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 88,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 101,
+            "origin_id": 53,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 192,
+            "origin_id": 61,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 1,
+            "type": "IMAGE"
+          },
+          {
+            "id": 193,
+            "origin_id": 88,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 2,
+            "type": "IMAGE"
+          },
+          {
+            "id": 194,
+            "origin_id": 57,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 3,
+            "type": "IMAGE"
+          },
+          {
+            "id": 195,
+            "origin_id": 63,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 4,
+            "type": "IMAGE"
+          },
+          {
+            "id": 196,
+            "origin_id": 75,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 5,
+            "type": "IMAGE"
+          },
+          {
+            "id": 197,
+            "origin_id": 85,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 6,
+            "type": "IMAGE"
+          },
+          {
+            "id": 198,
+            "origin_id": 82,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 7,
+            "type": "IMAGE"
+          },
+          {
+            "id": 199,
+            "origin_id": 79,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 8,
+            "type": "IMAGE"
+          },
+          {
+            "id": 226,
+            "origin_id": 136,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 9,
+            "type": "IMAGE"
+          },
+          {
+            "id": 227,
+            "origin_id": 53,
+            "origin_slot": 0,
+            "target_id": 136,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 228,
+            "origin_id": 61,
+            "origin_slot": 0,
+            "target_id": 136,
+            "target_slot": 1,
+            "type": "IMAGE"
+          },
+          {
+            "id": 229,
+            "origin_id": 88,
+            "origin_slot": 0,
+            "target_id": 136,
+            "target_slot": 2,
+            "type": "IMAGE"
+          },
+          {
+            "id": 230,
+            "origin_id": 57,
+            "origin_slot": 0,
+            "target_id": 136,
+            "target_slot": 3,
+            "type": "IMAGE"
+          },
+          {
+            "id": 231,
+            "origin_id": 63,
+            "origin_slot": 0,
+            "target_id": 136,
+            "target_slot": 4,
+            "type": "IMAGE"
+          },
+          {
+            "id": 232,
+            "origin_id": 75,
+            "origin_slot": 0,
+            "target_id": 136,
+            "target_slot": 5,
+            "type": "IMAGE"
+          },
+          {
+            "id": 233,
+            "origin_id": 85,
+            "origin_slot": 0,
+            "target_id": 136,
+            "target_slot": 6,
+            "type": "IMAGE"
+          },
+          {
+            "id": 234,
+            "origin_id": 82,
+            "origin_slot": 0,
+            "target_id": 136,
+            "target_slot": 7,
+            "type": "IMAGE"
+          },
+          {
+            "id": 235,
+            "origin_id": 79,
+            "origin_slot": 0,
+            "target_id": 136,
+            "target_slot": 8,
+            "type": "IMAGE"
+          }
+        ],
+        "extra": {},
+        "category": "Image Tools/Crop"
+      }
+    ]
+  },
+  "extra": {
+    "ue_links": [],
+    "links_added_by_ue": []
+  }
+}
\ No newline at end of file
diff --git a/blueprints/Depth to Image (Z-Image-Turbo).json b/blueprints/Depth to Image (Z-Image-Turbo).json
index 0b657534f..4f69a8149 100644
--- a/blueprints/Depth to Image (Z-Image-Turbo).json	
+++ b/blueprints/Depth to Image (Z-Image-Turbo).json	
@@ -160,7 +160,7 @@
         },
         "revision": 0,
         "config": {},
-        "name": "local-Depth to Image (Z-Image-Turbo)",
+        "name": "Depth to Image (Z-Image-Turbo)",
         "inputNode": {
           "id": -10,
           "bounding": [
@@ -2482,4 +2482,4 @@
     "VHS_KeepIntermediate": true
   },
   "version": 0.4
-}
+}
\ No newline at end of file
diff --git a/blueprints/Depth to Video (ltx 2.0).json b/blueprints/Depth to Video (ltx 2.0).json
index 98c39eea5..f15212520 100644
--- a/blueprints/Depth to Video (ltx 2.0).json	
+++ b/blueprints/Depth to Video (ltx 2.0).json	
@@ -261,7 +261,7 @@
         },
         "revision": 0,
         "config": {},
-        "name": "local-Depth to Video (LTX 2.0)",
+        "name": "Depth to Video (LTX 2.0)",
         "inputNode": {
           "id": -10,
           "bounding": [
@@ -5208,4 +5208,4 @@
     "workflowRendererVersion": "LG"
   },
   "version": 0.4
-}
+}
\ No newline at end of file
diff --git a/blueprints/First-Last-Frame to Video (LTX-2.3).json b/blueprints/First-Last-Frame to Video (LTX-2.3).json
new file mode 100644
index 000000000..8ec9ed61a
--- /dev/null
+++ b/blueprints/First-Last-Frame to Video (LTX-2.3).json	
@@ -0,0 +1,3360 @@
+{
+  "revision": 0,
+  "last_node_id": 228,
+  "last_link_id": 0,
+  "nodes": [
+    {
+      "id": 228,
+      "type": "a5982aee-8136-4819-86a0-cf9d9e510ad6",
+      "pos": [
+        1490,
+        4730
+      ],
+      "size": [
+        274.8169921875,
+        276
+      ],
+      "flags": {},
+      "order": 3,
+      "mode": 0,
+      "inputs": [
+        {
+          "label": "first_frame",
+          "localized_name": "input",
+          "name": "input",
+          "type": "IMAGE,MASK",
+          "link": null
+        },
+        {
+          "label": "last_frame",
+          "localized_name": "input_1",
+          "name": "input_1",
+          "type": "IMAGE,MASK",
+          "link": null
+        },
+        {
+          "name": "text",
+          "type": "STRING",
+          "widget": {
+            "name": "text"
+          },
+          "link": null
+        },
+        {
+          "label": "width",
+          "name": "value",
+          "type": "INT",
+          "widget": {
+            "name": "value"
+          },
+          "link": null
+        },
+        {
+          "label": "height",
+          "name": "value_1",
+          "type": "INT",
+          "widget": {
+            "name": "value_1"
+          },
+          "link": null
+        },
+        {
+          "label": "duration",
+          "name": "value_2",
+          "type": "INT",
+          "widget": {
+            "name": "value_2"
+          },
+          "link": null
+        },
+        {
+          "label": "fps",
+          "name": "value_3",
+          "type": "INT",
+          "widget": {
+            "name": "value_3"
+          },
+          "link": null
+        },
+        {
+          "name": "noise_seed",
+          "type": "INT",
+          "widget": {
+            "name": "noise_seed"
+          },
+          "link": null
+        },
+        {
+          "label": "ckpt_name",
+          "name": "ckpt_name_1",
+          "type": "COMBO",
+          "widget": {
+            "name": "ckpt_name_1"
+          },
+          "link": null
+        },
+        {
+          "name": "text_encoder",
+          "type": "COMBO",
+          "widget": {
+            "name": "text_encoder"
+          },
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "localized_name": "VIDEO",
+          "name": "VIDEO",
+          "type": "VIDEO",
+          "links": []
+        }
+      ],
+      "properties": {
+        "proxyWidgets": [
+          [
+            "222",
+            "text"
+          ],
+          [
+            "215",
+            "value"
+          ],
+          [
+            "216",
+            "value"
+          ],
+          [
+            "198",
+            "value"
+          ],
+          [
+            "205",
+            "value"
+          ],
+          [
+            "196",
+            "noise_seed"
+          ],
+          [
+            "224",
+            "ckpt_name"
+          ],
+          [
+            "225",
+            "text_encoder"
+          ]
+        ],
+        "ue_properties": {
+          "widget_ue_connectable": {},
+          "input_ue_unconnectable": {},
+          "version": "7.7"
+        },
+        "cnr_id": "comfy-core",
+        "ver": "0.18.1"
+      },
+      "widgets_values": [],
+      "title": "First-Last-Frame to Video (LTX-2.3)"
+    }
+  ],
+  "links": [],
+  "version": 0.4,
+  "definitions": {
+    "subgraphs": [
+      {
+        "id": "a5982aee-8136-4819-86a0-cf9d9e510ad6",
+        "version": 1,
+        "state": {
+          "lastGroupId": 22,
+          "lastNodeId": 228,
+          "lastLinkId": 276,
+          "lastRerouteId": 0
+        },
+        "revision": 0,
+        "config": {},
+        "name": "First-Last-Frame to Video (LTX-2.3)",
+        "inputNode": {
+          "id": -10,
+          "bounding": [
+            270,
+            3100,
+            120,
+            240
+          ]
+        },
+        "outputNode": {
+          "id": -20,
+          "bounding": [
+            3620,
+            3120,
+            120,
+            60
+          ]
+        },
+        "inputs": [
+          {
+            "id": "6fe179c4-d96f-4383-b202-844f6de4922e",
+            "name": "input",
+            "type": "IMAGE,MASK",
+            "linkIds": [
+              251
+            ],
+            "localized_name": "input",
+            "label": "first_frame",
+            "pos": [
+              370,
+              3120
+            ]
+          },
+          {
+            "id": "e80df1ae-5f39-4f86-91bd-0467635e2f2d",
+            "name": "input_1",
+            "type": "IMAGE,MASK",
+            "linkIds": [
+              253
+            ],
+            "localized_name": "input_1",
+            "label": "last_frame",
+            "pos": [
+              370,
+              3140
+            ]
+          },
+          {
+            "id": "433148fa-bf73-4ab1-81d9-09e2e38ed861",
+            "name": "text",
+            "type": "STRING",
+            "linkIds": [
+              265
+            ],
+            "pos": [
+              370,
+              3160
+            ]
+          },
+          {
+            "id": "36915bc8-a6ed-4d48-8619-e0e8723228e9",
+            "name": "value",
+            "type": "INT",
+            "linkIds": [
+              266
+            ],
+            "label": "width",
+            "pos": [
+              370,
+              3180
+            ]
+          },
+          {
+            "id": "425a36b8-91ab-41b7-81e9-496eba064ec8",
+            "name": "value_1",
+            "type": "INT",
+            "linkIds": [
+              267
+            ],
+            "label": "height",
+            "pos": [
+              370,
+              3200
+            ]
+          },
+          {
+            "id": "0c9e003b-bd07-4b7d-aa6d-789e138ed161",
+            "name": "value_2",
+            "type": "INT",
+            "linkIds": [
+              268
+            ],
+            "label": "duration",
+            "pos": [
+              370,
+              3220
+            ]
+          },
+          {
+            "id": "581b52ff-21c5-4774-ac2a-8f69a7e09e2e",
+            "name": "value_3",
+            "type": "INT",
+            "linkIds": [
+              269
+            ],
+            "label": "fps",
+            "pos": [
+              370,
+              3240
+            ]
+          },
+          {
+            "id": "d03cc171-45da-4658-99aa-77252bbcf522",
+            "name": "noise_seed",
+            "type": "INT",
+            "linkIds": [
+              270
+            ],
+            "pos": [
+              370,
+              3260
+            ]
+          },
+          {
+            "id": "e68e61c8-905e-43ac-8c76-65ac52270a08",
+            "name": "ckpt_name_1",
+            "type": "COMBO",
+            "linkIds": [
+              272,
+              275,
+              276
+            ],
+            "label": "ckpt_name",
+            "pos": [
+              370,
+              3280
+            ]
+          },
+          {
+            "id": "5d065f3b-891b-499f-950b-c2df0be24536",
+            "name": "text_encoder",
+            "type": "COMBO",
+            "linkIds": [
+              273
+            ],
+            "pos": [
+              370,
+              3300
+            ]
+          }
+        ],
+        "outputs": [
+          {
+            "id": "0c8c2dc0-c67c-4bc2-9e57-6aa00db2e3a9",
+            "name": "VIDEO",
+            "type": "VIDEO",
+            "linkIds": [
+              252
+            ],
+            "localized_name": "VIDEO",
+            "pos": [
+              3640,
+              3140
+            ]
+          }
+        ],
+        "widgets": [],
+        "nodes": [
+          {
+            "id": 195,
+            "type": "LTXVPreprocess",
+            "pos": [
+              1480,
+              3780
+            ],
+            "size": [
+              230,
+              110
+            ],
+            "flags": {
+              "collapsed": false
+            },
+            "order": 2,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 203
+              },
+              {
+                "localized_name": "img_compression",
+                "name": "img_compression",
+                "type": "INT",
+                "widget": {
+                  "name": "img_compression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "output_image",
+                "name": "output_image",
+                "type": "IMAGE",
+                "links": [
+                  229
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "LTXVPreprocess",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              25
+            ]
+          },
+          {
+            "id": 196,
+            "type": "RandomNoise",
+            "pos": [
+              1990,
+              2320
+            ],
+            "size": [
+              280,
+              110
+            ],
+            "flags": {},
+            "order": 3,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "noise_seed",
+                "name": "noise_seed",
+                "type": "INT",
+                "widget": {
+                  "name": "noise_seed"
+                },
+                "link": 270
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "NOISE",
+                "name": "NOISE",
+                "type": "NOISE",
+                "links": [
+                  246
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {
+                  "noise_seed": true
+                },
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.14.1",
+              "Node name for S&R": "RandomNoise",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              315253765879496,
+              "randomize"
+            ]
+          },
+          {
+            "id": 197,
+            "type": "LTXVEmptyLatentAudio",
+            "pos": [
+              2090,
+              3820
+            ],
+            "size": [
+              280,
+              170
+            ],
+            "flags": {},
+            "order": 4,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "audio_vae",
+                "name": "audio_vae",
+                "type": "VAE",
+                "link": 205
+              },
+              {
+                "localized_name": "frames_number",
+                "name": "frames_number",
+                "type": "INT",
+                "widget": {
+                  "name": "frames_number"
+                },
+                "link": 262
+              },
+              {
+                "localized_name": "frame_rate",
+                "name": "frame_rate",
+                "type": "INT",
+                "widget": {
+                  "name": "frame_rate"
+                },
+                "link": 207
+              },
+              {
+                "localized_name": "batch_size",
+                "name": "batch_size",
+                "type": "INT",
+                "widget": {
+                  "name": "batch_size"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "Latent",
+                "name": "Latent",
+                "type": "LATENT",
+                "links": [
+                  245
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.5.2",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.68",
+              "Node name for S&R": "LTXVEmptyLatentAudio",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              97,
+              25,
+              1
+            ]
+          },
+          {
+            "id": 198,
+            "type": "PrimitiveInt",
+            "pos": [
+              760,
+              3650
+            ],
+            "size": [
+              230,
+              110
+            ],
+            "flags": {},
+            "order": 5,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": 268
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  260
+                ]
+              }
+            ],
+            "title": "Duration",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.5.2",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "PrimitiveInt",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              5,
+              "fixed"
+            ]
+          },
+          {
+            "id": 199,
+            "type": "LTXVPreprocess",
+            "pos": [
+              1480,
+              3340
+            ],
+            "size": [
+              230,
+              110
+            ],
+            "flags": {
+              "collapsed": false
+            },
+            "order": 6,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 210
+              },
+              {
+                "localized_name": "img_compression",
+                "name": "img_compression",
+                "type": "INT",
+                "widget": {
+                  "name": "img_compression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "output_image",
+                "name": "output_image",
+                "type": "IMAGE",
+                "links": [
+                  240
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "LTXVPreprocess",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              25
+            ]
+          },
+          {
+            "id": 200,
+            "type": "LTXVCropGuides",
+            "pos": [
+              2820,
+              2450
+            ],
+            "size": [
+              280,
+              120
+            ],
+            "flags": {},
+            "order": 7,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 213
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 214
+              },
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "link": 215
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "links": []
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "links": []
+              },
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "links": [
+                  211
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.5.2"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.8.2",
+              "Node name for S&R": "LTXVCropGuides",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 201,
+            "type": "EmptyLTXVLatentVideo",
+            "pos": [
+              2090,
+              3580
+            ],
+            "size": [
+              280,
+              200
+            ],
+            "flags": {},
+            "order": 8,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 218
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 219
+              },
+              {
+                "localized_name": "length",
+                "name": "length",
+                "type": "INT",
+                "widget": {
+                  "name": "length"
+                },
+                "link": 263
+              },
+              {
+                "localized_name": "batch_size",
+                "name": "batch_size",
+                "type": "INT",
+                "widget": {
+                  "name": "batch_size"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "links": [
+                  239
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.5.2",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.60",
+              "Node name for S&R": "EmptyLTXVLatentVideo",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              768,
+              512,
+              97,
+              1
+            ]
+          },
+          {
+            "id": 202,
+            "type": "LTXVConditioning",
+            "pos": [
+              2090,
+              3400
+            ],
+            "size": [
+              280,
+              130
+            ],
+            "flags": {},
+            "order": 9,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 221
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 222
+              },
+              {
+                "localized_name": "frame_rate",
+                "name": "frame_rate",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "frame_rate"
+                },
+                "link": 223
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "links": [
+                  236
+                ]
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "links": [
+                  237
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.5.2",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.56",
+              "Node name for S&R": "LTXVConditioning",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              25
+            ]
+          },
+          {
+            "id": 203,
+            "type": "GetImageSize",
+            "pos": [
+              1480,
+              3500
+            ],
+            "size": [
+              230,
+              130
+            ],
+            "flags": {},
+            "order": 10,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 224
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "links": [
+                  218
+                ]
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "links": [
+                  219
+                ]
+              },
+              {
+                "localized_name": "batch_size",
+                "name": "batch_size",
+                "type": "INT",
+                "links": []
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.14.1",
+              "Node name for S&R": "GetImageSize",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 204,
+            "type": "LTXVAddGuide",
+            "pos": [
+              2750,
+              3700
+            ],
+            "size": [
+              280,
+              240
+            ],
+            "flags": {},
+            "order": 11,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 225
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 226
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 227
+              },
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "link": 228
+              },
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 229
+              },
+              {
+                "localized_name": "frame_idx",
+                "name": "frame_idx",
+                "type": "INT",
+                "widget": {
+                  "name": "frame_idx"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "strength",
+                "name": "strength",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "strength"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "links": [
+                  213,
+                  242
+                ]
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "links": [
+                  214,
+                  243
+                ]
+              },
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "links": [
+                  244
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.12.3",
+              "Node name for S&R": "LTXVAddGuide",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              -1,
+              0.7
+            ]
+          },
+          {
+            "id": 205,
+            "type": "PrimitiveInt",
+            "pos": [
+              760,
+              3800
+            ],
+            "size": [
+              230,
+              110
+            ],
+            "flags": {},
+            "order": 12,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": 269
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  207,
+                  235,
+                  261
+                ]
+              }
+            ],
+            "title": "Frame Rate(int)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.5.2",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "PrimitiveInt",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              25,
+              "fixed"
+            ]
+          },
+          {
+            "id": 206,
+            "type": "LTXVAddGuide",
+            "pos": [
+              2750,
+              3430
+            ],
+            "size": [
+              280,
+              240
+            ],
+            "flags": {},
+            "order": 13,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 236
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 237
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 238
+              },
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "link": 239
+              },
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 240
+              },
+              {
+                "localized_name": "frame_idx",
+                "name": "frame_idx",
+                "type": "INT",
+                "widget": {
+                  "name": "frame_idx"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "strength",
+                "name": "strength",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "strength"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "links": [
+                  225
+                ]
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "links": [
+                  226
+                ]
+              },
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "links": [
+                  228
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.12.3",
+              "Node name for S&R": "LTXVAddGuide",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              0,
+              0.7
+            ]
+          },
+          {
+            "id": 207,
+            "type": "CFGGuider",
+            "pos": [
+              1990,
+              2500
+            ],
+            "size": [
+              280,
+              160
+            ],
+            "flags": {},
+            "order": 14,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 241
+              },
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 242
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 243
+              },
+              {
+                "localized_name": "cfg",
+                "name": "cfg",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "cfg"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "GUIDER",
+                "name": "GUIDER",
+                "type": "GUIDER",
+                "links": [
+                  247
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.14.1",
+              "Node name for S&R": "CFGGuider",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              1
+            ]
+          },
+          {
+            "id": 208,
+            "type": "SamplerEulerAncestral",
+            "pos": [
+              1990,
+              2720
+            ],
+            "size": [
+              280,
+              120
+            ],
+            "flags": {},
+            "order": 0,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "eta",
+                "name": "eta",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "eta"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "s_noise",
+                "name": "s_noise",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "s_noise"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "SAMPLER",
+                "name": "SAMPLER",
+                "type": "SAMPLER",
+                "links": [
+                  248
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.14.1",
+              "Node name for S&R": "SamplerEulerAncestral",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              0,
+              1
+            ]
+          },
+          {
+            "id": 209,
+            "type": "ManualSigmas",
+            "pos": [
+              1990,
+              2910
+            ],
+            "size": [
+              280,
+              110
+            ],
+            "flags": {},
+            "order": 1,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "sigmas",
+                "name": "sigmas",
+                "type": "STRING",
+                "widget": {
+                  "name": "sigmas"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "SIGMAS",
+                "name": "SIGMAS",
+                "type": "SIGMAS",
+                "links": [
+                  249
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.14.1",
+              "Node name for S&R": "ManualSigmas",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "1., 0.99375, 0.9875, 0.98125, 0.975, 0.909375, 0.725, 0.421875, 0.0"
+            ]
+          },
+          {
+            "id": 210,
+            "type": "LTXVConcatAVLatent",
+            "pos": [
+              1990,
+              3090
+            ],
+            "size": [
+              280,
+              100
+            ],
+            "flags": {},
+            "order": 15,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "video_latent",
+                "name": "video_latent",
+                "type": "LATENT",
+                "link": 244
+              },
+              {
+                "localized_name": "audio_latent",
+                "name": "audio_latent",
+                "type": "LATENT",
+                "link": 245
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "links": [
+                  250
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.5.2",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "LTXVConcatAVLatent",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 211,
+            "type": "SamplerCustomAdvanced",
+            "pos": [
+              2460,
+              2330
+            ],
+            "size": [
+              230,
+              170
+            ],
+            "flags": {},
+            "order": 16,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "noise",
+                "name": "noise",
+                "type": "NOISE",
+                "link": 246
+              },
+              {
+                "localized_name": "guider",
+                "name": "guider",
+                "type": "GUIDER",
+                "link": 247
+              },
+              {
+                "localized_name": "sampler",
+                "name": "sampler",
+                "type": "SAMPLER",
+                "link": 248
+              },
+              {
+                "localized_name": "sigmas",
+                "name": "sigmas",
+                "type": "SIGMAS",
+                "link": 249
+              },
+              {
+                "localized_name": "latent_image",
+                "name": "latent_image",
+                "type": "LATENT",
+                "link": 250
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "output",
+                "name": "output",
+                "type": "LATENT",
+                "links": []
+              },
+              {
+                "localized_name": "denoised_output",
+                "name": "denoised_output",
+                "type": "LATENT",
+                "links": [
+                  204
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.14.1",
+              "Node name for S&R": "SamplerCustomAdvanced",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 212,
+            "type": "ComfyMathExpression",
+            "pos": [
+              760,
+              3970
+            ],
+            "size": [
+              230,
+              170
+            ],
+            "flags": {
+              "collapsed": true
+            },
+            "order": 17,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 235
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": [
+                  223,
+                  234
+                ]
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": []
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.17.0",
+              "Node name for S&R": "ComfyMathExpression",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "a"
+            ]
+          },
+          {
+            "id": 213,
+            "type": "ResizeImageMaskNode",
+            "pos": [
+              1130,
+              3340
+            ],
+            "size": [
+              280,
+              160
+            ],
+            "flags": {},
+            "order": 18,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "input",
+                "name": "input",
+                "type": "IMAGE,MASK",
+                "link": 251
+              },
+              {
+                "localized_name": "resize_type",
+                "name": "resize_type",
+                "type": "COMFY_DYNAMICCOMBO_V3",
+                "widget": {
+                  "name": "resize_type"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "width",
+                "name": "resize_type.width",
+                "type": "INT",
+                "widget": {
+                  "name": "resize_type.width"
+                },
+                "link": 208
+              },
+              {
+                "localized_name": "height",
+                "name": "resize_type.height",
+                "type": "INT",
+                "widget": {
+                  "name": "resize_type.height"
+                },
+                "link": 209
+              },
+              {
+                "localized_name": "crop",
+                "name": "resize_type.crop",
+                "type": "COMBO",
+                "widget": {
+                  "name": "resize_type.crop"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "scale_method",
+                "name": "scale_method",
+                "type": "COMBO",
+                "widget": {
+                  "name": "scale_method"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "resized",
+                "name": "resized",
+                "type": "*",
+                "links": [
+                  210,
+                  224
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {
+                  "resize_type.width": true,
+                  "resize_type.height": true
+                },
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.14.1",
+              "Node name for S&R": "ResizeImageMaskNode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "scale dimensions",
+              640,
+              360,
+              "center",
+              "nearest-exact"
+            ]
+          },
+          {
+            "id": 214,
+            "type": "ResizeImageMaskNode",
+            "pos": [
+              1130,
+              3780
+            ],
+            "size": [
+              280,
+              160
+            ],
+            "flags": {},
+            "order": 19,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "input",
+                "name": "input",
+                "type": "IMAGE,MASK",
+                "link": 253
+              },
+              {
+                "localized_name": "resize_type",
+                "name": "resize_type",
+                "type": "COMFY_DYNAMICCOMBO_V3",
+                "widget": {
+                  "name": "resize_type"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "width",
+                "name": "resize_type.width",
+                "type": "INT",
+                "widget": {
+                  "name": "resize_type.width"
+                },
+                "link": 201
+              },
+              {
+                "localized_name": "height",
+                "name": "resize_type.height",
+                "type": "INT",
+                "widget": {
+                  "name": "resize_type.height"
+                },
+                "link": 202
+              },
+              {
+                "localized_name": "crop",
+                "name": "resize_type.crop",
+                "type": "COMBO",
+                "widget": {
+                  "name": "resize_type.crop"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "scale_method",
+                "name": "scale_method",
+                "type": "COMBO",
+                "widget": {
+                  "name": "scale_method"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "resized",
+                "name": "resized",
+                "type": "*",
+                "links": [
+                  203
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {
+                  "resize_type.width": true,
+                  "resize_type.height": true
+                },
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.14.1",
+              "Node name for S&R": "ResizeImageMaskNode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "scale dimensions",
+              640,
+              360,
+              "center",
+              "nearest-exact"
+            ]
+          },
+          {
+            "id": 215,
+            "type": "PrimitiveInt",
+            "pos": [
+              760,
+              3340
+            ],
+            "size": [
+              230,
+              110
+            ],
+            "flags": {},
+            "order": 20,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": 266
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  201,
+                  208
+                ]
+              }
+            ],
+            "title": "Width",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.5.2",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "PrimitiveInt",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              1280,
+              "fixed"
+            ]
+          },
+          {
+            "id": 216,
+            "type": "PrimitiveInt",
+            "pos": [
+              760,
+              3490
+            ],
+            "size": [
+              230,
+              110
+            ],
+            "flags": {},
+            "order": 21,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": 267
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  202,
+                  209
+                ]
+              }
+            ],
+            "title": "height",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.5.2",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "PrimitiveInt",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              720,
+              "fixed"
+            ]
+          },
+          {
+            "id": 217,
+            "type": "CLIPTextEncode",
+            "pos": [
+              1320,
+              2870
+            ],
+            "size": [
+              590,
+              200
+            ],
+            "flags": {
+              "collapsed": false
+            },
+            "order": 22,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 230
+              },
+              {
+                "localized_name": "text",
+                "name": "text",
+                "type": "STRING",
+                "widget": {
+                  "name": "text"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "links": [
+                  222
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.5.2",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.56",
+              "Node name for S&R": "CLIPTextEncode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "blurry, out of focus, overexposed, underexposed, low contrast, washed out colors, excessive noise, grainy texture, poor lighting, flickering, motion blur, distorted proportions, unnatural skin tones, deformed facial features, asymmetrical face, missing facial features, extra limbs, disfigured hands, wrong hand count, artifacts around text, unreadable text on shirt or hat, incorrect lettering on cap (“PNTR”), incorrect t-shirt slogan (“JUST DO IT”), missing microphone, misplaced microphone, inconsistent perspective, camera shake, incorrect depth of field, background too sharp, background clutter, distracting reflections, harsh shadows, inconsistent lighting direction, color banding, cartoonish rendering, 3D CGI look, unrealistic materials, uncanny valley effect, incorrect ethnicity, wrong gender, exaggerated expressions, smiling, laughing, exaggerated sadness, wrong gaze direction, eyes looking at camera, mismatched lip sync, silent or muted audio, distorted voice, robotic voice, echo, background noise, off-sync audio, missing sniff sounds, incorrect dialogue, added dialogue, repetitive speech, jittery movement, awkward pauses, incorrect timing, unnatural transitions, inconsistent framing, tilted camera, missing door or shelves, missing shallow depth of field, flat lighting, inconsistent tone, cinematic oversaturation, stylized filters, or AI artifacts."
+            ],
+            "color": "#323",
+            "bgcolor": "#535"
+          },
+          {
+            "id": 218,
+            "type": "CreateVideo",
+            "pos": [
+              3280,
+              2320
+            ],
+            "size": [
+              280,
+              130
+            ],
+            "flags": {},
+            "order": 23,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "images",
+                "name": "images",
+                "type": "IMAGE",
+                "link": 232
+              },
+              {
+                "localized_name": "audio",
+                "name": "audio",
+                "shape": 7,
+                "type": "AUDIO",
+                "link": 233
+              },
+              {
+                "localized_name": "fps",
+                "name": "fps",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "fps"
+                },
+                "link": 234
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "VIDEO",
+                "name": "VIDEO",
+                "type": "VIDEO",
+                "links": [
+                  252
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.14.1",
+              "Node name for S&R": "CreateVideo",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              24
+            ]
+          },
+          {
+            "id": 219,
+            "type": "VAEDecodeTiled",
+            "pos": [
+              2820,
+              2630
+            ],
+            "size": [
+              280,
+              200
+            ],
+            "flags": {},
+            "order": 24,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "samples",
+                "name": "samples",
+                "type": "LATENT",
+                "link": 211
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 212
+              },
+              {
+                "localized_name": "tile_size",
+                "name": "tile_size",
+                "type": "INT",
+                "widget": {
+                  "name": "tile_size"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "overlap",
+                "name": "overlap",
+                "type": "INT",
+                "widget": {
+                  "name": "overlap"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "temporal_size",
+                "name": "temporal_size",
+                "type": "INT",
+                "widget": {
+                  "name": "temporal_size"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "temporal_overlap",
+                "name": "temporal_overlap",
+                "type": "INT",
+                "widget": {
+                  "name": "temporal_overlap"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "links": [
+                  232
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.5.2",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "VAEDecodeTiled",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              768,
+              64,
+              4096,
+              64
+            ]
+          },
+          {
+            "id": 220,
+            "type": "LTXVAudioVAEDecode",
+            "pos": [
+              2820,
+              2920
+            ],
+            "size": [
+              280,
+              100
+            ],
+            "flags": {},
+            "order": 25,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "samples",
+                "name": "samples",
+                "type": "LATENT",
+                "link": 216
+              },
+              {
+                "label": "Audio VAE",
+                "localized_name": "audio_vae",
+                "name": "audio_vae",
+                "type": "VAE",
+                "link": 217
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "Audio",
+                "name": "Audio",
+                "type": "AUDIO",
+                "links": [
+                  233
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.5.2",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "LTXVAudioVAEDecode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 221,
+            "type": "LTXVSeparateAVLatent",
+            "pos": [
+              2460,
+              2580
+            ],
+            "size": [
+              250,
+              100
+            ],
+            "flags": {},
+            "order": 26,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "av_latent",
+                "name": "av_latent",
+                "type": "LATENT",
+                "link": 204
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "video_latent",
+                "name": "video_latent",
+                "type": "LATENT",
+                "links": [
+                  215
+                ]
+              },
+              {
+                "localized_name": "audio_latent",
+                "name": "audio_latent",
+                "type": "LATENT",
+                "links": [
+                  216
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.5.2",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "LTXVSeparateAVLatent",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 222,
+            "type": "CLIPTextEncode",
+            "pos": [
+              1310,
+              2380
+            ],
+            "size": [
+              620,
+              420
+            ],
+            "flags": {},
+            "order": 27,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 231
+              },
+              {
+                "localized_name": "text",
+                "name": "text",
+                "type": "STRING",
+                "widget": {
+                  "name": "text"
+                },
+                "link": 265
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "links": [
+                  221
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.5.2",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.56",
+              "Node name for S&R": "CLIPTextEncode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              ""
+            ],
+            "color": "#232",
+            "bgcolor": "#353"
+          },
+          {
+            "id": 223,
+            "type": "CheckpointLoaderSimple",
+            "pos": [
+              770,
+              2380
+            ],
+            "size": [
+              420,
+              160
+            ],
+            "flags": {},
+            "order": 28,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "ckpt_name",
+                "name": "ckpt_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "ckpt_name"
+                },
+                "link": 276
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "links": [
+                  241
+                ]
+              },
+              {
+                "localized_name": "CLIP",
+                "name": "CLIP",
+                "type": "CLIP",
+                "links": []
+              },
+              {
+                "localized_name": "VAE",
+                "name": "VAE",
+                "type": "VAE",
+                "links": [
+                  212,
+                  227,
+                  238
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.5.2"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.10.0",
+              "Node name for S&R": "CheckpointLoaderSimple",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "ltx-2.3-22b-distilled-fp8.safetensors",
+                  "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-distilled-fp8.safetensors",
+                  "directory": "checkpoints"
+                }
+              ]
+            },
+            "widgets_values": [
+              "ltx-2.3-22b-distilled-fp8.safetensors"
+            ]
+          },
+          {
+            "id": 224,
+            "type": "LTXVAudioVAELoader",
+            "pos": [
+              770,
+              2660
+            ],
+            "size": [
+              420,
+              110
+            ],
+            "flags": {},
+            "order": 29,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "ckpt_name",
+                "name": "ckpt_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "ckpt_name"
+                },
+                "link": 272
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "Audio VAE",
+                "name": "Audio VAE",
+                "type": "VAE",
+                "links": [
+                  205,
+                  217
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.5.2"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.10.0",
+              "Node name for S&R": "LTXVAudioVAELoader",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "ltx-2.3-22b-distilled-fp8.safetensors",
+                  "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-distilled-fp8.safetensors",
+                  "directory": "checkpoints"
+                }
+              ]
+            },
+            "widgets_values": [
+              "ltx-2.3-22b-distilled-fp8.safetensors"
+            ]
+          },
+          {
+            "id": 225,
+            "type": "LTXAVTextEncoderLoader",
+            "pos": [
+              770,
+              2890
+            ],
+            "size": [
+              410,
+              160
+            ],
+            "flags": {},
+            "order": 30,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "text_encoder",
+                "name": "text_encoder",
+                "type": "COMBO",
+                "widget": {
+                  "name": "text_encoder"
+                },
+                "link": 273
+              },
+              {
+                "localized_name": "ckpt_name",
+                "name": "ckpt_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "ckpt_name"
+                },
+                "link": 275
+              },
+              {
+                "localized_name": "device",
+                "name": "device",
+                "type": "COMBO",
+                "widget": {
+                  "name": "device"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CLIP",
+                "name": "CLIP",
+                "type": "CLIP",
+                "links": [
+                  230,
+                  231
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.5.2"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.10.0",
+              "Node name for S&R": "LTXAVTextEncoderLoader",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "gemma_3_12B_it_fp4_mixed.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/ltx-2/resolve/main/split_files/text_encoders/gemma_3_12B_it_fp4_mixed.safetensors",
+                  "directory": "text_encoders"
+                },
+                {
+                  "name": "ltx-2.3-22b-distilled-fp8.safetensors",
+                  "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-distilled-fp8.safetensors",
+                  "directory": "checkpoints"
+                }
+              ]
+            },
+            "widgets_values": [
+              "gemma_3_12B_it_fp4_mixed.safetensors",
+              "ltx-2.3-22b-distilled-fp8.safetensors",
+              "default"
+            ]
+          },
+          {
+            "id": 226,
+            "type": "ComfyMathExpression",
+            "pos": [
+              760,
+              4020
+            ],
+            "size": [
+              400,
+              200
+            ],
+            "flags": {
+              "collapsed": true
+            },
+            "order": 31,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 260
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": 261
+              },
+              {
+                "label": "c",
+                "localized_name": "values.c",
+                "name": "values.c",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": null
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  262,
+                  263
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ComfyMathExpression"
+            },
+            "widgets_values": [
+              "a * b + 1"
+            ]
+          }
+        ],
+        "groups": [
+          {
+            "id": 1,
+            "title": "Conditioning",
+            "bounding": [
+              1850,
+              3250,
+              1370,
+              800
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 2,
+            "title": "Settings",
+            "bounding": [
+              730,
+              3250,
+              290,
+              800
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 3,
+            "title": "FIrst Frame",
+            "bounding": [
+              1050,
+              3250,
+              770,
+              400
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 4,
+            "title": "Last Frame",
+            "bounding": [
+              1050,
+              3680,
+              770,
+              370
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 5,
+            "title": "Model",
+            "bounding": [
+              730,
+              2240,
+              500,
+              980
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 6,
+            "title": "Prompt",
+            "bounding": [
+              1260,
+              2240,
+              680,
+              980
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 7,
+            "title": "Sampling",
+            "bounding": [
+              1970,
+              2240,
+              770,
+              980
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 8,
+            "title": "Decoding",
+            "bounding": [
+              2770,
+              2240,
+              450,
+              980
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          }
+        ],
+        "links": [
+          {
+            "id": 203,
+            "origin_id": 214,
+            "origin_slot": 0,
+            "target_id": 195,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 205,
+            "origin_id": 224,
+            "origin_slot": 0,
+            "target_id": 197,
+            "target_slot": 0,
+            "type": "VAE"
+          },
+          {
+            "id": 207,
+            "origin_id": 205,
+            "origin_slot": 0,
+            "target_id": 197,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 210,
+            "origin_id": 213,
+            "origin_slot": 0,
+            "target_id": 199,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 213,
+            "origin_id": 204,
+            "origin_slot": 0,
+            "target_id": 200,
+            "target_slot": 0,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 214,
+            "origin_id": 204,
+            "origin_slot": 1,
+            "target_id": 200,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 215,
+            "origin_id": 221,
+            "origin_slot": 0,
+            "target_id": 200,
+            "target_slot": 2,
+            "type": "LATENT"
+          },
+          {
+            "id": 218,
+            "origin_id": 203,
+            "origin_slot": 0,
+            "target_id": 201,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 219,
+            "origin_id": 203,
+            "origin_slot": 1,
+            "target_id": 201,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 221,
+            "origin_id": 222,
+            "origin_slot": 0,
+            "target_id": 202,
+            "target_slot": 0,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 222,
+            "origin_id": 217,
+            "origin_slot": 0,
+            "target_id": 202,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 223,
+            "origin_id": 212,
+            "origin_slot": 0,
+            "target_id": 202,
+            "target_slot": 2,
+            "type": "FLOAT"
+          },
+          {
+            "id": 224,
+            "origin_id": 213,
+            "origin_slot": 0,
+            "target_id": 203,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 225,
+            "origin_id": 206,
+            "origin_slot": 0,
+            "target_id": 204,
+            "target_slot": 0,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 226,
+            "origin_id": 206,
+            "origin_slot": 1,
+            "target_id": 204,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 227,
+            "origin_id": 223,
+            "origin_slot": 2,
+            "target_id": 204,
+            "target_slot": 2,
+            "type": "VAE"
+          },
+          {
+            "id": 228,
+            "origin_id": 206,
+            "origin_slot": 2,
+            "target_id": 204,
+            "target_slot": 3,
+            "type": "LATENT"
+          },
+          {
+            "id": 229,
+            "origin_id": 195,
+            "origin_slot": 0,
+            "target_id": 204,
+            "target_slot": 4,
+            "type": "IMAGE"
+          },
+          {
+            "id": 236,
+            "origin_id": 202,
+            "origin_slot": 0,
+            "target_id": 206,
+            "target_slot": 0,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 237,
+            "origin_id": 202,
+            "origin_slot": 1,
+            "target_id": 206,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 238,
+            "origin_id": 223,
+            "origin_slot": 2,
+            "target_id": 206,
+            "target_slot": 2,
+            "type": "VAE"
+          },
+          {
+            "id": 239,
+            "origin_id": 201,
+            "origin_slot": 0,
+            "target_id": 206,
+            "target_slot": 3,
+            "type": "LATENT"
+          },
+          {
+            "id": 240,
+            "origin_id": 199,
+            "origin_slot": 0,
+            "target_id": 206,
+            "target_slot": 4,
+            "type": "IMAGE"
+          },
+          {
+            "id": 241,
+            "origin_id": 223,
+            "origin_slot": 0,
+            "target_id": 207,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 242,
+            "origin_id": 204,
+            "origin_slot": 0,
+            "target_id": 207,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 243,
+            "origin_id": 204,
+            "origin_slot": 1,
+            "target_id": 207,
+            "target_slot": 2,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 244,
+            "origin_id": 204,
+            "origin_slot": 2,
+            "target_id": 210,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 245,
+            "origin_id": 197,
+            "origin_slot": 0,
+            "target_id": 210,
+            "target_slot": 1,
+            "type": "LATENT"
+          },
+          {
+            "id": 246,
+            "origin_id": 196,
+            "origin_slot": 0,
+            "target_id": 211,
+            "target_slot": 0,
+            "type": "NOISE"
+          },
+          {
+            "id": 247,
+            "origin_id": 207,
+            "origin_slot": 0,
+            "target_id": 211,
+            "target_slot": 1,
+            "type": "GUIDER"
+          },
+          {
+            "id": 248,
+            "origin_id": 208,
+            "origin_slot": 0,
+            "target_id": 211,
+            "target_slot": 2,
+            "type": "SAMPLER"
+          },
+          {
+            "id": 249,
+            "origin_id": 209,
+            "origin_slot": 0,
+            "target_id": 211,
+            "target_slot": 3,
+            "type": "SIGMAS"
+          },
+          {
+            "id": 250,
+            "origin_id": 210,
+            "origin_slot": 0,
+            "target_id": 211,
+            "target_slot": 4,
+            "type": "LATENT"
+          },
+          {
+            "id": 235,
+            "origin_id": 205,
+            "origin_slot": 0,
+            "target_id": 212,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 208,
+            "origin_id": 215,
+            "origin_slot": 0,
+            "target_id": 213,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 209,
+            "origin_id": 216,
+            "origin_slot": 0,
+            "target_id": 213,
+            "target_slot": 3,
+            "type": "INT"
+          },
+          {
+            "id": 201,
+            "origin_id": 215,
+            "origin_slot": 0,
+            "target_id": 214,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 202,
+            "origin_id": 216,
+            "origin_slot": 0,
+            "target_id": 214,
+            "target_slot": 3,
+            "type": "INT"
+          },
+          {
+            "id": 230,
+            "origin_id": 225,
+            "origin_slot": 0,
+            "target_id": 217,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 232,
+            "origin_id": 219,
+            "origin_slot": 0,
+            "target_id": 218,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 233,
+            "origin_id": 220,
+            "origin_slot": 0,
+            "target_id": 218,
+            "target_slot": 1,
+            "type": "AUDIO"
+          },
+          {
+            "id": 234,
+            "origin_id": 212,
+            "origin_slot": 0,
+            "target_id": 218,
+            "target_slot": 2,
+            "type": "FLOAT"
+          },
+          {
+            "id": 211,
+            "origin_id": 200,
+            "origin_slot": 2,
+            "target_id": 219,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 212,
+            "origin_id": 223,
+            "origin_slot": 2,
+            "target_id": 219,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 216,
+            "origin_id": 221,
+            "origin_slot": 1,
+            "target_id": 220,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 217,
+            "origin_id": 224,
+            "origin_slot": 0,
+            "target_id": 220,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 204,
+            "origin_id": 211,
+            "origin_slot": 1,
+            "target_id": 221,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 231,
+            "origin_id": 225,
+            "origin_slot": 0,
+            "target_id": 222,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 251,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 213,
+            "target_slot": 0,
+            "type": "IMAGE,MASK"
+          },
+          {
+            "id": 253,
+            "origin_id": -10,
+            "origin_slot": 1,
+            "target_id": 214,
+            "target_slot": 0,
+            "type": "IMAGE,MASK"
+          },
+          {
+            "id": 252,
+            "origin_id": 218,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 0,
+            "type": "VIDEO"
+          },
+          {
+            "id": 260,
+            "origin_id": 198,
+            "origin_slot": 0,
+            "target_id": 226,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 261,
+            "origin_id": 205,
+            "origin_slot": 0,
+            "target_id": 226,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 262,
+            "origin_id": 226,
+            "origin_slot": 1,
+            "target_id": 197,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 263,
+            "origin_id": 226,
+            "origin_slot": 1,
+            "target_id": 201,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 265,
+            "origin_id": -10,
+            "origin_slot": 2,
+            "target_id": 222,
+            "target_slot": 1,
+            "type": "STRING"
+          },
+          {
+            "id": 266,
+            "origin_id": -10,
+            "origin_slot": 3,
+            "target_id": 215,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 267,
+            "origin_id": -10,
+            "origin_slot": 4,
+            "target_id": 216,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 268,
+            "origin_id": -10,
+            "origin_slot": 5,
+            "target_id": 198,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 269,
+            "origin_id": -10,
+            "origin_slot": 6,
+            "target_id": 205,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 270,
+            "origin_id": -10,
+            "origin_slot": 7,
+            "target_id": 196,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 272,
+            "origin_id": -10,
+            "origin_slot": 8,
+            "target_id": 224,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 273,
+            "origin_id": -10,
+            "origin_slot": 9,
+            "target_id": 225,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 275,
+            "origin_id": -10,
+            "origin_slot": 8,
+            "target_id": 225,
+            "target_slot": 1,
+            "type": "COMBO"
+          },
+          {
+            "id": 276,
+            "origin_id": -10,
+            "origin_slot": 8,
+            "target_id": 223,
+            "target_slot": 0,
+            "type": "COMBO"
+          }
+        ],
+        "extra": {},
+        "category": "Video generation and editing/First-Last-Frame to Video"
+      }
+    ]
+  },
+  "extra": {
+    "ue_links": []
+  }
+}
\ No newline at end of file
diff --git a/blueprints/Image Edit (FireRed Image Edit 1.1).json b/blueprints/Image Edit (FireRed Image Edit 1.1).json
new file mode 100644
index 000000000..c34246ce6
--- /dev/null
+++ b/blueprints/Image Edit (FireRed Image Edit 1.1).json	
@@ -0,0 +1,2148 @@
+{
+  "revision": 0,
+  "last_node_id": 172,
+  "last_link_id": 0,
+  "nodes": [
+    {
+      "id": 172,
+      "type": "edf73971-14ee-4d39-b58e-46ce2a89d3d0",
+      "pos": [
+        30,
+        200
+      ],
+      "size": [
+        500,
+        570
+      ],
+      "flags": {},
+      "order": 2,
+      "mode": 0,
+      "inputs": [
+        {
+          "localized_name": "image",
+          "name": "image",
+          "type": "IMAGE",
+          "link": null
+        },
+        {
+          "label": "image2 (optional)",
+          "name": "image2_1",
+          "type": "IMAGE",
+          "link": null
+        },
+        {
+          "label": "image3 (optional)",
+          "name": "image3_1",
+          "type": "IMAGE",
+          "link": null
+        },
+        {
+          "name": "prompt",
+          "type": "STRING",
+          "widget": {
+            "name": "prompt"
+          },
+          "link": null
+        },
+        {
+          "label": "enable_turbo_mode",
+          "name": "value",
+          "type": "BOOLEAN",
+          "widget": {
+            "name": "value"
+          },
+          "link": null
+        },
+        {
+          "name": "seed",
+          "type": "INT",
+          "widget": {
+            "name": "seed"
+          },
+          "link": null
+        },
+        {
+          "name": "unet_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "unet_name"
+          },
+          "link": null
+        },
+        {
+          "name": "clip_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "clip_name"
+          },
+          "link": null
+        },
+        {
+          "name": "vae_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "vae_name"
+          },
+          "link": null
+        },
+        {
+          "name": "lora_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "lora_name"
+          },
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "localized_name": "IMAGE",
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": []
+        }
+      ],
+      "properties": {
+        "proxyWidgets": [
+          [
+            "118",
+            "prompt"
+          ],
+          [
+            "153",
+            "value"
+          ],
+          [
+            "130",
+            "seed"
+          ],
+          [
+            "128",
+            "unet_name"
+          ],
+          [
+            "115",
+            "clip_name"
+          ],
+          [
+            "116",
+            "vae_name"
+          ],
+          [
+            "151",
+            "lora_name"
+          ],
+          [
+            "130",
+            "control_after_generate"
+          ]
+        ],
+        "ue_properties": {
+          "widget_ue_connectable": {},
+          "input_ue_unconnectable": {}
+        },
+        "cnr_id": "comfy-core",
+        "ver": "0.15.1",
+        "enableTabs": false,
+        "tabWidth": 65,
+        "tabXOffset": 10,
+        "hasSecondTab": false,
+        "secondTabText": "Send Back",
+        "secondTabOffset": 80,
+        "secondTabWidth": 65
+      },
+      "widgets_values": [],
+      "title": "Image Edit (FireRed Image Edit 1.1)"
+    }
+  ],
+  "links": [],
+  "version": 0.4,
+  "definitions": {
+    "subgraphs": [
+      {
+        "id": "edf73971-14ee-4d39-b58e-46ce2a89d3d0",
+        "version": 1,
+        "state": {
+          "lastGroupId": 8,
+          "lastNodeId": 174,
+          "lastLinkId": 376,
+          "lastRerouteId": 0
+        },
+        "revision": 0,
+        "config": {},
+        "name": "Image Edit (FireRed Image Edit 1.1)",
+        "inputNode": {
+          "id": -10,
+          "bounding": [
+            -1670,
+            -1370,
+            151.744140625,
+            240
+          ]
+        },
+        "outputNode": {
+          "id": -20,
+          "bounding": [
+            1147.5,
+            -1215,
+            120,
+            60
+          ]
+        },
+        "inputs": [
+          {
+            "id": "1d810e30-f1fb-4d10-95f8-3c5f7db2c8b7",
+            "name": "image",
+            "type": "IMAGE",
+            "linkIds": [
+              371
+            ],
+            "localized_name": "image",
+            "pos": [
+              -1538.255859375,
+              -1350
+            ]
+          },
+          {
+            "id": "a8decf32-2262-4cdd-9e6b-c0ca7d4cdebe",
+            "name": "image2_1",
+            "type": "IMAGE",
+            "linkIds": [
+              355,
+              356
+            ],
+            "label": "image2 (optional)",
+            "pos": [
+              -1538.255859375,
+              -1330
+            ]
+          },
+          {
+            "id": "3ff7a4ed-8e3d-45d4-b1d8-40ed88a6def6",
+            "name": "image3_1",
+            "type": "IMAGE",
+            "linkIds": [
+              357,
+              358
+            ],
+            "label": "image3 (optional)",
+            "pos": [
+              -1538.255859375,
+              -1310
+            ]
+          },
+          {
+            "id": "01d9e68c-c664-4584-9cde-66f60e54eb3c",
+            "name": "prompt",
+            "type": "STRING",
+            "linkIds": [
+              359
+            ],
+            "pos": [
+              -1538.255859375,
+              -1290
+            ]
+          },
+          {
+            "id": "97d24b10-6540-48c4-81eb-a432832f5729",
+            "name": "value",
+            "type": "BOOLEAN",
+            "linkIds": [
+              364
+            ],
+            "label": "enable_turbo_mode",
+            "pos": [
+              -1538.255859375,
+              -1270
+            ]
+          },
+          {
+            "id": "15890efb-ba15-41cd-91ef-5adad7a52167",
+            "name": "seed",
+            "type": "INT",
+            "linkIds": [
+              372
+            ],
+            "pos": [
+              -1538.255859375,
+              -1250
+            ]
+          },
+          {
+            "id": "43f22fe2-6836-4f75-8146-04c84fbba75d",
+            "name": "unet_name",
+            "type": "COMBO",
+            "linkIds": [
+              373
+            ],
+            "pos": [
+              -1538.255859375,
+              -1230
+            ]
+          },
+          {
+            "id": "cd5e4502-2aca-4645-9e2e-ca8719f05bf6",
+            "name": "clip_name",
+            "type": "COMBO",
+            "linkIds": [
+              374
+            ],
+            "pos": [
+              -1538.255859375,
+              -1210
+            ]
+          },
+          {
+            "id": "f6ae73dc-39e8-44b2-958d-705ae159ea86",
+            "name": "vae_name",
+            "type": "COMBO",
+            "linkIds": [
+              375
+            ],
+            "pos": [
+              -1538.255859375,
+              -1190
+            ]
+          },
+          {
+            "id": "66dc179d-e6c9-4485-a2db-a47d25b44363",
+            "name": "lora_name",
+            "type": "COMBO",
+            "linkIds": [
+              376
+            ],
+            "pos": [
+              -1538.255859375,
+              -1170
+            ]
+          }
+        ],
+        "outputs": [
+          {
+            "id": "712c5c76-8620-44e1-9c9d-0798b6cdb77a",
+            "name": "IMAGE",
+            "type": "IMAGE",
+            "linkIds": [
+              292
+            ],
+            "localized_name": "IMAGE",
+            "pos": [
+              1167.5,
+              -1195
+            ]
+          }
+        ],
+        "widgets": [],
+        "nodes": [
+          {
+            "id": 120,
+            "type": "ModelSamplingAuraFlow",
+            "pos": [
+              1060,
+              -1760
+            ],
+            "size": [
+              290,
+              110
+            ],
+            "flags": {},
+            "order": 8,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 326
+              },
+              {
+                "localized_name": "shift",
+                "name": "shift",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "shift"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "links": [
+                  294
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "ModelSamplingAuraFlow",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              3.1
+            ]
+          },
+          {
+            "id": 154,
+            "type": "ComfySwitchNode",
+            "pos": [
+              680,
+              -1690
+            ],
+            "size": [
+              260,
+              140
+            ],
+            "flags": {},
+            "order": 16,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "on_false",
+                "name": "on_false",
+                "type": "*",
+                "link": 324
+              },
+              {
+                "localized_name": "on_true",
+                "name": "on_true",
+                "type": "*",
+                "link": 325
+              },
+              {
+                "localized_name": "switch",
+                "name": "switch",
+                "type": "BOOLEAN",
+                "widget": {
+                  "name": "switch"
+                },
+                "link": 323
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "output",
+                "name": "output",
+                "type": "*",
+                "links": [
+                  326
+                ]
+              }
+            ],
+            "title": "Switch (Model)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.15.1",
+              "Node name for S&R": "ComfySwitchNode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              false
+            ]
+          },
+          {
+            "id": 155,
+            "type": "PrimitiveInt",
+            "pos": [
+              190,
+              -1680
+            ],
+            "size": [
+              230,
+              110
+            ],
+            "flags": {},
+            "order": 0,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  329
+                ]
+              }
+            ],
+            "title": "Int (Steps)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.15.1",
+              "Node name for S&R": "PrimitiveInt",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              40,
+              "fixed"
+            ]
+          },
+          {
+            "id": 123,
+            "type": "CFGNorm",
+            "pos": [
+              1060,
+              -1590
+            ],
+            "size": [
+              290,
+              110
+            ],
+            "flags": {},
+            "order": 9,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 294
+              },
+              {
+                "localized_name": "strength",
+                "name": "strength",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "strength"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "patched_model",
+                "name": "patched_model",
+                "type": "MODEL",
+                "links": [
+                  295
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "CFGNorm",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              1
+            ]
+          },
+          {
+            "id": 164,
+            "type": "ComfySwitchNode",
+            "pos": [
+              680,
+              -1250
+            ],
+            "size": [
+              230,
+              130
+            ],
+            "flags": {},
+            "order": 18,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "on_false",
+                "name": "on_false",
+                "type": "*",
+                "link": 333
+              },
+              {
+                "localized_name": "on_true",
+                "name": "on_true",
+                "type": "*",
+                "link": 334
+              },
+              {
+                "localized_name": "switch",
+                "name": "switch",
+                "type": "BOOLEAN",
+                "widget": {
+                  "name": "switch"
+                },
+                "link": 336
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "output",
+                "name": "output",
+                "type": "*",
+                "links": [
+                  335
+                ]
+              }
+            ],
+            "title": "Switch (CFG)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.15.1",
+              "Node name for S&R": "ComfySwitchNode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              false
+            ]
+          },
+          {
+            "id": 156,
+            "type": "PrimitiveInt",
+            "pos": [
+              190,
+              -1060
+            ],
+            "size": [
+              230,
+              110
+            ],
+            "flags": {},
+            "order": 1,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  337
+                ]
+              }
+            ],
+            "title": "Float (Steps)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.15.1",
+              "Node name for S&R": "PrimitiveInt",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              8,
+              "fixed"
+            ]
+          },
+          {
+            "id": 162,
+            "type": "PrimitiveFloat",
+            "pos": [
+              190,
+              -1500
+            ],
+            "size": [
+              230,
+              110
+            ],
+            "flags": {},
+            "order": 2,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": [
+                  333
+                ]
+              }
+            ],
+            "title": "Float (CFG)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.15.1",
+              "Node name for S&R": "PrimitiveFloat",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              4
+            ]
+          },
+          {
+            "id": 163,
+            "type": "PrimitiveFloat",
+            "pos": [
+              190,
+              -1230
+            ],
+            "size": [
+              230,
+              110
+            ],
+            "flags": {},
+            "order": 3,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": [
+                  334
+                ]
+              }
+            ],
+            "title": "Float (CFG)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.15.1",
+              "Node name for S&R": "PrimitiveFloat",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              1
+            ]
+          },
+          {
+            "id": 157,
+            "type": "ComfySwitchNode",
+            "pos": [
+              680,
+              -1470
+            ],
+            "size": [
+              230,
+              130
+            ],
+            "flags": {},
+            "order": 17,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "on_false",
+                "name": "on_false",
+                "type": "*",
+                "link": 329
+              },
+              {
+                "localized_name": "on_true",
+                "name": "on_true",
+                "type": "*",
+                "link": 337
+              },
+              {
+                "localized_name": "switch",
+                "name": "switch",
+                "type": "BOOLEAN",
+                "widget": {
+                  "name": "switch"
+                },
+                "link": 330
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "output",
+                "name": "output",
+                "type": "*",
+                "links": [
+                  345
+                ]
+              }
+            ],
+            "title": "Switch (Steps)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.15.1",
+              "Node name for S&R": "ComfySwitchNode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              false
+            ]
+          },
+          {
+            "id": 116,
+            "type": "VAELoader",
+            "pos": [
+              -950,
+              -1040
+            ],
+            "size": [
+              400,
+              110
+            ],
+            "flags": {
+              "collapsed": false
+            },
+            "order": 5,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "vae_name",
+                "name": "vae_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "vae_name"
+                },
+                "link": 375
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "VAE",
+                "name": "VAE",
+                "type": "VAE",
+                "slot_index": 0,
+                "links": [
+                  298,
+                  299,
+                  300,
+                  314
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "VAELoader",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "qwen_image_vae.safetensors",
+                  "url": "https://huggingface.co/FireRedTeam/FireRed-Image-Edit-1.0-ComfyUI/resolve/main/qwen_image_vae.safetensors",
+                  "directory": "vae"
+                }
+              ]
+            },
+            "widgets_values": [
+              "qwen_image_vae.safetensors"
+            ]
+          },
+          {
+            "id": 115,
+            "type": "CLIPLoader",
+            "pos": [
+              -960,
+              -1370
+            ],
+            "size": [
+              400,
+              150
+            ],
+            "flags": {},
+            "order": 4,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip_name",
+                "name": "clip_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "clip_name"
+                },
+                "link": 374
+              },
+              {
+                "localized_name": "type",
+                "name": "type",
+                "type": "COMBO",
+                "widget": {
+                  "name": "type"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "device",
+                "name": "device",
+                "shape": 7,
+                "type": "COMBO",
+                "widget": {
+                  "name": "device"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CLIP",
+                "name": "CLIP",
+                "type": "CLIP",
+                "links": [
+                  296,
+                  297
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "CLIPLoader",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "qwen_2.5_vl_7b_fp8_scaled.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/HunyuanVideo_1.5_repackaged/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors",
+                  "directory": "text_encoders"
+                }
+              ]
+            },
+            "widgets_values": [
+              "qwen_2.5_vl_7b_fp8_scaled.safetensors",
+              "qwen_image",
+              "default"
+            ]
+          },
+          {
+            "id": 151,
+            "type": "LoraLoaderModelOnly",
+            "pos": [
+              100,
+              -900
+            ],
+            "size": [
+              400,
+              140
+            ],
+            "flags": {},
+            "order": 14,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 316
+              },
+              {
+                "localized_name": "lora_name",
+                "name": "lora_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "lora_name"
+                },
+                "link": 376
+              },
+              {
+                "localized_name": "strength_model",
+                "name": "strength_model",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "strength_model"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "links": [
+                  325
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.15.1",
+              "Node name for S&R": "LoraLoaderModelOnly",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "FireRed-Image-Edit-1.0-Lightning-8steps-v1.0.safetensors",
+                  "url": "https://huggingface.co/FireRedTeam/FireRed-Image-Edit-1.0-ComfyUI/resolve/main/FireRed-Image-Edit-1.0-Lightning-8steps-v1.0.safetensors",
+                  "directory": "loras"
+                }
+              ]
+            },
+            "widgets_values": [
+              "FireRed-Image-Edit-1.0-Lightning-8steps-v1.0.safetensors",
+              1
+            ]
+          },
+          {
+            "id": 128,
+            "type": "UNETLoader",
+            "pos": [
+              -960,
+              -1670
+            ],
+            "size": [
+              400,
+              110
+            ],
+            "flags": {},
+            "order": 12,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "unet_name",
+                "name": "unet_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "unet_name"
+                },
+                "link": 373
+              },
+              {
+                "localized_name": "weight_dtype",
+                "name": "weight_dtype",
+                "type": "COMBO",
+                "widget": {
+                  "name": "weight_dtype"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "slot_index": 0,
+                "links": [
+                  316,
+                  324
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "UNETLoader",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "FireRed-Image-Edit-1.1-transformer.safetensors",
+                  "url": "https://huggingface.co/FireRedTeam/FireRed-Image-Edit-1.1-ComfyUI/resolve/main/FireRed-Image-Edit-1.1-transformer.safetensors",
+                  "directory": "diffusion_models"
+                }
+              ]
+            },
+            "widgets_values": [
+              "FireRed-Image-Edit-1.1-transformer.safetensors",
+              "default"
+            ]
+          },
+          {
+            "id": 125,
+            "type": "VAEEncode",
+            "pos": [
+              -390,
+              -810
+            ],
+            "size": [
+              390,
+              100
+            ],
+            "flags": {},
+            "order": 10,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "pixels",
+                "name": "pixels",
+                "type": "IMAGE",
+                "link": 368
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 300
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "links": [
+                  303
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "VAEEncode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 153,
+            "type": "PrimitiveBoolean",
+            "pos": [
+              160,
+              -650
+            ],
+            "size": [
+              400,
+              100
+            ],
+            "flags": {},
+            "order": 15,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "BOOLEAN",
+                "widget": {
+                  "name": "value"
+                },
+                "link": 364
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "BOOLEAN",
+                "name": "BOOLEAN",
+                "type": "BOOLEAN",
+                "links": [
+                  323,
+                  330,
+                  336
+                ]
+              }
+            ],
+            "title": "Enable Lightning LoRA?",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.15.1",
+              "Node name for S&R": "PrimitiveBoolean",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              false
+            ]
+          },
+          {
+            "id": 118,
+            "type": "TextEncodeQwenImageEditPlus",
+            "pos": [
+              -480,
+              -1690
+            ],
+            "size": [
+              470,
+              370
+            ],
+            "flags": {},
+            "order": 7,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 296
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "shape": 7,
+                "type": "VAE",
+                "link": 298
+              },
+              {
+                "localized_name": "image1",
+                "name": "image1",
+                "shape": 7,
+                "type": "IMAGE",
+                "link": 369
+              },
+              {
+                "localized_name": "image2",
+                "name": "image2",
+                "shape": 7,
+                "type": "IMAGE",
+                "link": 355
+              },
+              {
+                "localized_name": "image3",
+                "name": "image3",
+                "shape": 7,
+                "type": "IMAGE",
+                "link": 357
+              },
+              {
+                "localized_name": "prompt",
+                "name": "prompt",
+                "type": "STRING",
+                "widget": {
+                  "name": "prompt"
+                },
+                "link": 359
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "links": [
+                  312
+                ]
+              }
+            ],
+            "title": "TextEncodeQwenImageEditPlus (Positive)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "TextEncodeQwenImageEditPlus",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              ""
+            ],
+            "color": "#232",
+            "bgcolor": "#353"
+          },
+          {
+            "id": 117,
+            "type": "TextEncodeQwenImageEditPlus",
+            "pos": [
+              -470,
+              -1240
+            ],
+            "size": [
+              460,
+              290
+            ],
+            "flags": {},
+            "order": 6,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 297
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "shape": 7,
+                "type": "VAE",
+                "link": 299
+              },
+              {
+                "localized_name": "image1",
+                "name": "image1",
+                "shape": 7,
+                "type": "IMAGE",
+                "link": 370
+              },
+              {
+                "localized_name": "image2",
+                "name": "image2",
+                "shape": 7,
+                "type": "IMAGE",
+                "link": 356
+              },
+              {
+                "localized_name": "image3",
+                "name": "image3",
+                "shape": 7,
+                "type": "IMAGE",
+                "link": 358
+              },
+              {
+                "localized_name": "prompt",
+                "name": "prompt",
+                "type": "STRING",
+                "widget": {
+                  "name": "prompt"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "links": [
+                  313
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "TextEncodeQwenImageEditPlus",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              ""
+            ],
+            "color": "#323",
+            "bgcolor": "#535"
+          },
+          {
+            "id": 130,
+            "type": "KSampler",
+            "pos": [
+              1060,
+              -1420
+            ],
+            "size": [
+              270,
+              480
+            ],
+            "flags": {},
+            "order": 13,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 295
+              },
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 312
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 313
+              },
+              {
+                "localized_name": "latent_image",
+                "name": "latent_image",
+                "type": "LATENT",
+                "link": 303
+              },
+              {
+                "localized_name": "seed",
+                "name": "seed",
+                "type": "INT",
+                "widget": {
+                  "name": "seed"
+                },
+                "link": 372
+              },
+              {
+                "localized_name": "steps",
+                "name": "steps",
+                "type": "INT",
+                "widget": {
+                  "name": "steps"
+                },
+                "link": 345
+              },
+              {
+                "localized_name": "cfg",
+                "name": "cfg",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "cfg"
+                },
+                "link": 335
+              },
+              {
+                "localized_name": "sampler_name",
+                "name": "sampler_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "sampler_name"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "scheduler",
+                "name": "scheduler",
+                "type": "COMBO",
+                "widget": {
+                  "name": "scheduler"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "denoise",
+                "name": "denoise",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "denoise"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "links": [
+                  273
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "KSampler",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              43,
+              "fixed",
+              40,
+              4,
+              "euler",
+              "simple",
+              1
+            ]
+          },
+          {
+            "id": 126,
+            "type": "VAEDecode",
+            "pos": [
+              1360,
+              -1420
+            ],
+            "size": [
+              230,
+              100
+            ],
+            "flags": {
+              "collapsed": false
+            },
+            "order": 11,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "samples",
+                "name": "samples",
+                "type": "LATENT",
+                "link": 273
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 314
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "slot_index": 0,
+                "links": [
+                  292
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "VAEDecode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 174,
+            "type": "ResizeImageMaskNode",
+            "pos": [
+              -900,
+              -810
+            ],
+            "size": [
+              280,
+              110
+            ],
+            "flags": {},
+            "order": 19,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "input",
+                "name": "input",
+                "type": "IMAGE,MASK",
+                "link": 371
+              },
+              {
+                "localized_name": "resize_type",
+                "name": "resize_type",
+                "type": "COMFY_DYNAMICCOMBO_V3",
+                "widget": {
+                  "name": "resize_type"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "resize_type.megapixels",
+                "name": "resize_type.megapixels",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "resize_type.megapixels"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "scale_method",
+                "name": "scale_method",
+                "type": "COMBO",
+                "widget": {
+                  "name": "scale_method"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "resized",
+                "name": "resized",
+                "type": "*",
+                "links": [
+                  368,
+                  369,
+                  370
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ResizeImageMaskNode"
+            },
+            "widgets_values": [
+              "scale total pixels",
+              1,
+              "area"
+            ]
+          }
+        ],
+        "groups": [
+          {
+            "id": 1,
+            "title": "Model",
+            "bounding": [
+              -990,
+              -1770,
+              460,
+              870
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 2,
+            "title": "Prompt",
+            "bounding": [
+              -500,
+              -1770,
+              510,
+              870
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 7,
+            "title": "Original",
+            "bounding": [
+              40,
+              -1770,
+              530,
+              410
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 8,
+            "title": "Lightning LoRA",
+            "bounding": [
+              40,
+              -1330,
+              560,
+              610
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          }
+        ],
+        "links": [
+          {
+            "id": 326,
+            "origin_id": 154,
+            "origin_slot": 0,
+            "target_id": 120,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 324,
+            "origin_id": 128,
+            "origin_slot": 0,
+            "target_id": 154,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 325,
+            "origin_id": 151,
+            "origin_slot": 0,
+            "target_id": 154,
+            "target_slot": 1,
+            "type": "MODEL"
+          },
+          {
+            "id": 323,
+            "origin_id": 153,
+            "origin_slot": 0,
+            "target_id": 154,
+            "target_slot": 2,
+            "type": "BOOLEAN"
+          },
+          {
+            "id": 294,
+            "origin_id": 120,
+            "origin_slot": 0,
+            "target_id": 123,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 333,
+            "origin_id": 162,
+            "origin_slot": 0,
+            "target_id": 164,
+            "target_slot": 0,
+            "type": "FLOAT"
+          },
+          {
+            "id": 334,
+            "origin_id": 163,
+            "origin_slot": 0,
+            "target_id": 164,
+            "target_slot": 1,
+            "type": "FLOAT"
+          },
+          {
+            "id": 336,
+            "origin_id": 153,
+            "origin_slot": 0,
+            "target_id": 164,
+            "target_slot": 2,
+            "type": "BOOLEAN"
+          },
+          {
+            "id": 329,
+            "origin_id": 155,
+            "origin_slot": 0,
+            "target_id": 157,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 337,
+            "origin_id": 156,
+            "origin_slot": 0,
+            "target_id": 157,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 330,
+            "origin_id": 153,
+            "origin_slot": 0,
+            "target_id": 157,
+            "target_slot": 2,
+            "type": "BOOLEAN"
+          },
+          {
+            "id": 297,
+            "origin_id": 115,
+            "origin_slot": 0,
+            "target_id": 117,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 299,
+            "origin_id": 116,
+            "origin_slot": 0,
+            "target_id": 117,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 316,
+            "origin_id": 128,
+            "origin_slot": 0,
+            "target_id": 151,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 296,
+            "origin_id": 115,
+            "origin_slot": 0,
+            "target_id": 118,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 298,
+            "origin_id": 116,
+            "origin_slot": 0,
+            "target_id": 118,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 300,
+            "origin_id": 116,
+            "origin_slot": 0,
+            "target_id": 125,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 295,
+            "origin_id": 123,
+            "origin_slot": 0,
+            "target_id": 130,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 312,
+            "origin_id": 118,
+            "origin_slot": 0,
+            "target_id": 130,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 313,
+            "origin_id": 117,
+            "origin_slot": 0,
+            "target_id": 130,
+            "target_slot": 2,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 303,
+            "origin_id": 125,
+            "origin_slot": 0,
+            "target_id": 130,
+            "target_slot": 3,
+            "type": "LATENT"
+          },
+          {
+            "id": 345,
+            "origin_id": 157,
+            "origin_slot": 0,
+            "target_id": 130,
+            "target_slot": 5,
+            "type": "INT"
+          },
+          {
+            "id": 335,
+            "origin_id": 164,
+            "origin_slot": 0,
+            "target_id": 130,
+            "target_slot": 6,
+            "type": "FLOAT"
+          },
+          {
+            "id": 273,
+            "origin_id": 130,
+            "origin_slot": 0,
+            "target_id": 126,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 314,
+            "origin_id": 116,
+            "origin_slot": 0,
+            "target_id": 126,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 292,
+            "origin_id": 126,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 355,
+            "origin_id": -10,
+            "origin_slot": 1,
+            "target_id": 118,
+            "target_slot": 3,
+            "type": "IMAGE"
+          },
+          {
+            "id": 356,
+            "origin_id": -10,
+            "origin_slot": 1,
+            "target_id": 117,
+            "target_slot": 3,
+            "type": "IMAGE"
+          },
+          {
+            "id": 357,
+            "origin_id": -10,
+            "origin_slot": 2,
+            "target_id": 118,
+            "target_slot": 4,
+            "type": "IMAGE"
+          },
+          {
+            "id": 358,
+            "origin_id": -10,
+            "origin_slot": 2,
+            "target_id": 117,
+            "target_slot": 4,
+            "type": "IMAGE"
+          },
+          {
+            "id": 359,
+            "origin_id": -10,
+            "origin_slot": 3,
+            "target_id": 118,
+            "target_slot": 5,
+            "type": "STRING"
+          },
+          {
+            "id": 364,
+            "origin_id": -10,
+            "origin_slot": 4,
+            "target_id": 153,
+            "target_slot": 0,
+            "type": "BOOLEAN"
+          },
+          {
+            "id": 368,
+            "origin_id": 174,
+            "origin_slot": 0,
+            "target_id": 125,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 369,
+            "origin_id": 174,
+            "origin_slot": 0,
+            "target_id": 118,
+            "target_slot": 2,
+            "type": "IMAGE"
+          },
+          {
+            "id": 370,
+            "origin_id": 174,
+            "origin_slot": 0,
+            "target_id": 117,
+            "target_slot": 2,
+            "type": "IMAGE"
+          },
+          {
+            "id": 371,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 174,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 372,
+            "origin_id": -10,
+            "origin_slot": 5,
+            "target_id": 130,
+            "target_slot": 4,
+            "type": "INT"
+          },
+          {
+            "id": 373,
+            "origin_id": -10,
+            "origin_slot": 6,
+            "target_id": 128,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 374,
+            "origin_id": -10,
+            "origin_slot": 7,
+            "target_id": 115,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 375,
+            "origin_id": -10,
+            "origin_slot": 8,
+            "target_id": 116,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 376,
+            "origin_id": -10,
+            "origin_slot": 9,
+            "target_id": 151,
+            "target_slot": 1,
+            "type": "COMBO"
+          }
+        ],
+        "extra": {
+          "workflowRendererVersion": "LG"
+        },
+        "category": "Image generation and editing/Edit image"
+      }
+    ]
+  },
+  "extra": {
+    "ue_links": []
+  }
+}
\ No newline at end of file
diff --git a/blueprints/Image Edit (Flux.2 Klein 4B).json b/blueprints/Image Edit (Flux.2 Klein 4B).json
index 78bbb7414..6f2f7dc01 100644
--- a/blueprints/Image Edit (Flux.2 Klein 4B).json	
+++ b/blueprints/Image Edit (Flux.2 Klein 4B).json	
@@ -128,7 +128,7 @@
         },
         "revision": 0,
         "config": {},
-        "name": "local-Image Edit (Flux.2 Klein 4B)",
+        "name": "Image Edit (Flux.2 Klein 4B)",
         "inputNode": {
           "id": -10,
           "bounding": [
@@ -1837,4 +1837,4 @@
     }
   },
   "version": 0.4
-}
+}
\ No newline at end of file
diff --git a/blueprints/Image Edit (LongCat Image Edit).json b/blueprints/Image Edit (LongCat Image Edit).json
new file mode 100644
index 000000000..5b4eb18f0
--- /dev/null
+++ b/blueprints/Image Edit (LongCat Image Edit).json	
@@ -0,0 +1,1427 @@
+{
+  "revision": 0,
+  "last_node_id": 176,
+  "last_link_id": 0,
+  "nodes": [
+    {
+      "id": 176,
+      "type": "372a02a0-a79c-40b4-84a9-34f246fe0e9c",
+      "pos": [
+        967.0861152473078,
+        4977.534165136897
+      ],
+      "size": [
+        330,
+        380
+      ],
+      "flags": {},
+      "order": 3,
+      "mode": 0,
+      "inputs": [
+        {
+          "localized_name": "image",
+          "name": "image",
+          "type": "IMAGE",
+          "link": null
+        },
+        {
+          "name": "prompt",
+          "type": "STRING",
+          "widget": {
+            "name": "prompt"
+          },
+          "link": null
+        },
+        {
+          "name": "steps",
+          "type": "INT",
+          "widget": {
+            "name": "steps"
+          },
+          "link": null
+        },
+        {
+          "name": "cfg",
+          "type": "FLOAT",
+          "widget": {
+            "name": "cfg"
+          },
+          "link": null
+        },
+        {
+          "name": "seed",
+          "type": "INT",
+          "widget": {
+            "name": "seed"
+          },
+          "link": null
+        },
+        {
+          "name": "unet_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "unet_name"
+          },
+          "link": null
+        },
+        {
+          "name": "clip_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "clip_name"
+          },
+          "link": null
+        },
+        {
+          "name": "vae_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "vae_name"
+          },
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "localized_name": "IMAGE",
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": []
+        }
+      ],
+      "properties": {
+        "proxyWidgets": [
+          [
+            "27",
+            "prompt"
+          ],
+          [
+            "33",
+            "steps"
+          ],
+          [
+            "33",
+            "cfg"
+          ],
+          [
+            "33",
+            "seed"
+          ],
+          [
+            "34",
+            "unet_name"
+          ],
+          [
+            "38",
+            "clip_name"
+          ],
+          [
+            "26",
+            "vae_name"
+          ]
+        ],
+        "cnr_id": "comfy-core",
+        "ver": "0.18.1",
+        "ue_properties": {
+          "widget_ue_connectable": {},
+          "version": "7.7",
+          "input_ue_unconnectable": {}
+        }
+      },
+      "widgets_values": [],
+      "title": "Image Edit (LongCat Image Edit)"
+    }
+  ],
+  "links": [],
+  "version": 0.4,
+  "definitions": {
+    "subgraphs": [
+      {
+        "id": "372a02a0-a79c-40b4-84a9-34f246fe0e9c",
+        "version": 1,
+        "state": {
+          "lastGroupId": 8,
+          "lastNodeId": 176,
+          "lastLinkId": 376,
+          "lastRerouteId": 0
+        },
+        "revision": 0,
+        "config": {},
+        "name": "Image Edit (LongCat Image Edit)",
+        "inputNode": {
+          "id": -10,
+          "bounding": [
+            -750,
+            380,
+            120,
+            200
+          ]
+        },
+        "outputNode": {
+          "id": -20,
+          "bounding": [
+            1680,
+            340,
+            120,
+            60
+          ]
+        },
+        "inputs": [
+          {
+            "id": "616c4f3e-8b64-4711-bee2-5ecbe1814fe4",
+            "name": "image",
+            "type": "IMAGE",
+            "linkIds": [
+              14
+            ],
+            "localized_name": "image",
+            "pos": [
+              -650,
+              400
+            ]
+          },
+          {
+            "id": "d39759fc-a5a9-4b82-a88f-df9b953f1d98",
+            "name": "prompt",
+            "type": "STRING",
+            "linkIds": [
+              36
+            ],
+            "pos": [
+              -650,
+              420
+            ]
+          },
+          {
+            "id": "48627f43-cdf1-4ea9-9e11-ec13451a7323",
+            "name": "steps",
+            "type": "INT",
+            "linkIds": [
+              37
+            ],
+            "pos": [
+              -650,
+              440
+            ]
+          },
+          {
+            "id": "2213f872-d40f-4fc3-be01-b8fc73f1d92c",
+            "name": "cfg",
+            "type": "FLOAT",
+            "linkIds": [
+              42
+            ],
+            "pos": [
+              -650,
+              460
+            ]
+          },
+          {
+            "id": "2c7b3e65-e71e-4a9b-a9f8-d2e814ccb6af",
+            "name": "seed",
+            "type": "INT",
+            "linkIds": [
+              43
+            ],
+            "pos": [
+              -650,
+              480
+            ]
+          },
+          {
+            "id": "bddb2317-7210-48d5-81fd-6b2d6fac33f4",
+            "name": "unet_name",
+            "type": "COMBO",
+            "linkIds": [
+              44
+            ],
+            "pos": [
+              -650,
+              500
+            ]
+          },
+          {
+            "id": "a283167b-6d7f-4d19-ad86-1fff2335c08d",
+            "name": "clip_name",
+            "type": "COMBO",
+            "linkIds": [
+              45
+            ],
+            "pos": [
+              -650,
+              520
+            ]
+          },
+          {
+            "id": "e033047f-cc37-4043-b4a0-25d7bab661af",
+            "name": "vae_name",
+            "type": "COMBO",
+            "linkIds": [
+              46
+            ],
+            "pos": [
+              -650,
+              540
+            ]
+          }
+        ],
+        "outputs": [
+          {
+            "id": "0a288e93-c03f-4805-80f3-4e320a6a492e",
+            "name": "IMAGE",
+            "type": "IMAGE",
+            "linkIds": [
+              20
+            ],
+            "localized_name": "IMAGE",
+            "pos": [
+              1700,
+              360
+            ]
+          }
+        ],
+        "widgets": [],
+        "nodes": [
+          {
+            "id": 26,
+            "type": "VAELoader",
+            "pos": [
+              -360,
+              590
+            ],
+            "size": [
+              270,
+              110
+            ],
+            "flags": {},
+            "order": 0,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "vae_name",
+                "name": "vae_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "vae_name"
+                },
+                "link": 46
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "VAE",
+                "name": "VAE",
+                "type": "VAE",
+                "slot_index": 0,
+                "links": [
+                  4,
+                  5,
+                  6,
+                  7
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.73",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "VAELoader",
+              "models": [
+                {
+                  "name": "ae.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors",
+                  "directory": "vae"
+                }
+              ]
+            },
+            "widgets_values": [
+              "ae.safetensors"
+            ]
+          },
+          {
+            "id": 27,
+            "type": "TextEncodeQwenImageEdit",
+            "pos": [
+              10,
+              200
+            ],
+            "size": [
+              280,
+              190
+            ],
+            "flags": {},
+            "order": 1,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 2
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "shape": 7,
+                "type": "VAE",
+                "link": 4
+              },
+              {
+                "localized_name": "image",
+                "name": "image",
+                "shape": 7,
+                "type": "IMAGE",
+                "link": 15
+              },
+              {
+                "localized_name": "prompt",
+                "name": "prompt",
+                "type": "STRING",
+                "widget": {
+                  "name": "prompt"
+                },
+                "link": 36
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "slot_index": 0,
+                "links": [
+                  8
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.73",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "TextEncodeQwenImageEdit"
+            },
+            "widgets_values": [
+              ""
+            ]
+          },
+          {
+            "id": 28,
+            "type": "TextEncodeQwenImageEdit",
+            "pos": [
+              10,
+              440
+            ],
+            "size": [
+              280,
+              190
+            ],
+            "flags": {},
+            "order": 2,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 3
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "shape": 7,
+                "type": "VAE",
+                "link": 5
+              },
+              {
+                "localized_name": "image",
+                "name": "image",
+                "shape": 7,
+                "type": "IMAGE",
+                "link": 16
+              },
+              {
+                "localized_name": "prompt",
+                "name": "prompt",
+                "type": "STRING",
+                "widget": {
+                  "name": "prompt"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "slot_index": 0,
+                "links": [
+                  9
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.73",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "TextEncodeQwenImageEdit"
+            },
+            "widgets_values": [
+              ""
+            ]
+          },
+          {
+            "id": 29,
+            "type": "FluxKontextMultiReferenceLatentMethod",
+            "pos": [
+              660,
+              200
+            ],
+            "size": [
+              270,
+              80
+            ],
+            "flags": {},
+            "order": 3,
+            "mode": 0,
+            "showAdvanced": false,
+            "inputs": [
+              {
+                "localized_name": "conditioning",
+                "name": "conditioning",
+                "type": "CONDITIONING",
+                "link": 10
+              },
+              {
+                "localized_name": "reference_latents_method",
+                "name": "reference_latents_method",
+                "type": "COMBO",
+                "widget": {
+                  "name": "reference_latents_method"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "slot_index": 0,
+                "links": [
+                  12
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.73",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "FluxKontextMultiReferenceLatentMethod"
+            },
+            "widgets_values": [
+              "index"
+            ]
+          },
+          {
+            "id": 30,
+            "type": "FluxGuidance",
+            "pos": [
+              330,
+              440
+            ],
+            "size": [
+              230,
+              110
+            ],
+            "flags": {},
+            "order": 4,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "conditioning",
+                "name": "conditioning",
+                "type": "CONDITIONING",
+                "link": 9
+              },
+              {
+                "localized_name": "guidance",
+                "name": "guidance",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "guidance"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "slot_index": 0,
+                "links": [
+                  11
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.73",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "FluxGuidance"
+            },
+            "widgets_values": [
+              4.5
+            ]
+          },
+          {
+            "id": 31,
+            "type": "FluxGuidance",
+            "pos": [
+              330,
+              200
+            ],
+            "size": [
+              230,
+              110
+            ],
+            "flags": {},
+            "order": 5,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "conditioning",
+                "name": "conditioning",
+                "type": "CONDITIONING",
+                "link": 8
+              },
+              {
+                "localized_name": "guidance",
+                "name": "guidance",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "guidance"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "slot_index": 0,
+                "links": [
+                  10
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.73",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "FluxGuidance"
+            },
+            "widgets_values": [
+              4.5
+            ]
+          },
+          {
+            "id": 32,
+            "type": "FluxKontextMultiReferenceLatentMethod",
+            "pos": [
+              660,
+              440
+            ],
+            "size": [
+              270,
+              80
+            ],
+            "flags": {},
+            "order": 6,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "conditioning",
+                "name": "conditioning",
+                "type": "CONDITIONING",
+                "link": 11
+              },
+              {
+                "localized_name": "reference_latents_method",
+                "name": "reference_latents_method",
+                "type": "COMBO",
+                "widget": {
+                  "name": "reference_latents_method"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "slot_index": 0,
+                "links": [
+                  13
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.73",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "FluxKontextMultiReferenceLatentMethod"
+            },
+            "widgets_values": [
+              "index"
+            ]
+          },
+          {
+            "id": 33,
+            "type": "KSampler",
+            "pos": [
+              1080,
+              210
+            ],
+            "size": [
+              270,
+              460
+            ],
+            "flags": {},
+            "order": 7,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 1
+              },
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 12
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 13
+              },
+              {
+                "localized_name": "latent_image",
+                "name": "latent_image",
+                "type": "LATENT",
+                "link": 18
+              },
+              {
+                "localized_name": "seed",
+                "name": "seed",
+                "type": "INT",
+                "widget": {
+                  "name": "seed"
+                },
+                "link": 43
+              },
+              {
+                "localized_name": "steps",
+                "name": "steps",
+                "type": "INT",
+                "widget": {
+                  "name": "steps"
+                },
+                "link": 37
+              },
+              {
+                "localized_name": "cfg",
+                "name": "cfg",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "cfg"
+                },
+                "link": 42
+              },
+              {
+                "localized_name": "sampler_name",
+                "name": "sampler_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "sampler_name"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "scheduler",
+                "name": "scheduler",
+                "type": "COMBO",
+                "widget": {
+                  "name": "scheduler"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "denoise",
+                "name": "denoise",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "denoise"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "slot_index": 0,
+                "links": [
+                  19
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.73",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "KSampler"
+            },
+            "widgets_values": [
+              43,
+              "fixed",
+              50,
+              4.5,
+              "euler",
+              "simple",
+              1
+            ]
+          },
+          {
+            "id": 34,
+            "type": "UNETLoader",
+            "pos": [
+              -360,
+              170
+            ],
+            "size": [
+              270,
+              110
+            ],
+            "flags": {},
+            "order": 8,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "unet_name",
+                "name": "unet_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "unet_name"
+                },
+                "link": 44
+              },
+              {
+                "localized_name": "weight_dtype",
+                "name": "weight_dtype",
+                "type": "COMBO",
+                "widget": {
+                  "name": "weight_dtype"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "slot_index": 0,
+                "links": [
+                  1
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.73",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "UNETLoader",
+              "models": [
+                {
+                  "name": "longcat_image_edit_bf16.safetensors",
+                  "url": "https://huggingface.co/TalmajM/LongCat-Image-Edit_ComfyUI_repackaged/resolve/main/split_files/diffusion_models/longcat_image_edit_bf16.safetensors",
+                  "directory": "diffusion_models"
+                }
+              ]
+            },
+            "widgets_values": [
+              "longcat_image_edit_bf16.safetensors",
+              "default"
+            ]
+          },
+          {
+            "id": 35,
+            "type": "VAEEncode",
+            "pos": [
+              710,
+              790
+            ],
+            "size": [
+              260,
+              100
+            ],
+            "flags": {},
+            "order": 9,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "pixels",
+                "name": "pixels",
+                "type": "IMAGE",
+                "link": 17
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 6
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "slot_index": 0,
+                "links": [
+                  18
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.73",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "VAEEncode"
+            }
+          },
+          {
+            "id": 36,
+            "type": "VAEDecode",
+            "pos": [
+              1100,
+              800
+            ],
+            "size": [
+              230,
+              100
+            ],
+            "flags": {},
+            "order": 10,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "samples",
+                "name": "samples",
+                "type": "LATENT",
+                "link": 19
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 7
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "slot_index": 0,
+                "links": [
+                  20
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.73",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "VAEDecode"
+            }
+          },
+          {
+            "id": 37,
+            "type": "ImageScaleToTotalPixels",
+            "pos": [
+              -370,
+              790
+            ],
+            "size": [
+              270,
+              140
+            ],
+            "flags": {},
+            "order": 11,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 14
+              },
+              {
+                "localized_name": "upscale_method",
+                "name": "upscale_method",
+                "type": "COMBO",
+                "widget": {
+                  "name": "upscale_method"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "megapixels",
+                "name": "megapixels",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "megapixels"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "resolution_steps",
+                "name": "resolution_steps",
+                "type": "INT",
+                "widget": {
+                  "name": "resolution_steps"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "slot_index": 0,
+                "links": [
+                  15,
+                  16,
+                  17
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.73",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "ImageScaleToTotalPixels"
+            },
+            "widgets_values": [
+              "lanczos",
+              1,
+              16
+            ]
+          },
+          {
+            "id": 38,
+            "type": "CLIPLoader",
+            "pos": [
+              -360,
+              360
+            ],
+            "size": [
+              270,
+              150
+            ],
+            "flags": {},
+            "order": 12,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip_name",
+                "name": "clip_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "clip_name"
+                },
+                "link": 45
+              },
+              {
+                "localized_name": "type",
+                "name": "type",
+                "type": "COMBO",
+                "widget": {
+                  "name": "type"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "device",
+                "name": "device",
+                "shape": 7,
+                "type": "COMBO",
+                "widget": {
+                  "name": "device"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CLIP",
+                "name": "CLIP",
+                "type": "CLIP",
+                "slot_index": 0,
+                "links": [
+                  2,
+                  3
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.73",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "CLIPLoader",
+              "models": [
+                {
+                  "name": "qwen_2.5_vl_7b_fp8_scaled.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors",
+                  "directory": "text_encoders"
+                }
+              ]
+            },
+            "widgets_values": [
+              "qwen_2.5_vl_7b_fp8_scaled.safetensors",
+              "longcat_image",
+              "default"
+            ]
+          }
+        ],
+        "groups": [
+          {
+            "id": 1,
+            "title": "Models",
+            "bounding": [
+              -380,
+              100,
+              320,
+              630
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 3,
+            "title": "Conditioning",
+            "bounding": [
+              -30,
+              100,
+              1030,
+              630
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 4,
+            "title": "Sample",
+            "bounding": [
+              1030,
+              100,
+              360,
+              630
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          }
+        ],
+        "links": [
+          {
+            "id": 2,
+            "origin_id": 38,
+            "origin_slot": 0,
+            "target_id": 27,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 4,
+            "origin_id": 26,
+            "origin_slot": 0,
+            "target_id": 27,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 15,
+            "origin_id": 37,
+            "origin_slot": 0,
+            "target_id": 27,
+            "target_slot": 2,
+            "type": "IMAGE"
+          },
+          {
+            "id": 3,
+            "origin_id": 38,
+            "origin_slot": 0,
+            "target_id": 28,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 5,
+            "origin_id": 26,
+            "origin_slot": 0,
+            "target_id": 28,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 16,
+            "origin_id": 37,
+            "origin_slot": 0,
+            "target_id": 28,
+            "target_slot": 2,
+            "type": "IMAGE"
+          },
+          {
+            "id": 10,
+            "origin_id": 31,
+            "origin_slot": 0,
+            "target_id": 29,
+            "target_slot": 0,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 9,
+            "origin_id": 28,
+            "origin_slot": 0,
+            "target_id": 30,
+            "target_slot": 0,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 8,
+            "origin_id": 27,
+            "origin_slot": 0,
+            "target_id": 31,
+            "target_slot": 0,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 11,
+            "origin_id": 30,
+            "origin_slot": 0,
+            "target_id": 32,
+            "target_slot": 0,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 1,
+            "origin_id": 34,
+            "origin_slot": 0,
+            "target_id": 33,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 12,
+            "origin_id": 29,
+            "origin_slot": 0,
+            "target_id": 33,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 13,
+            "origin_id": 32,
+            "origin_slot": 0,
+            "target_id": 33,
+            "target_slot": 2,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 18,
+            "origin_id": 35,
+            "origin_slot": 0,
+            "target_id": 33,
+            "target_slot": 3,
+            "type": "LATENT"
+          },
+          {
+            "id": 17,
+            "origin_id": 37,
+            "origin_slot": 0,
+            "target_id": 35,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 6,
+            "origin_id": 26,
+            "origin_slot": 0,
+            "target_id": 35,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 19,
+            "origin_id": 33,
+            "origin_slot": 0,
+            "target_id": 36,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 7,
+            "origin_id": 26,
+            "origin_slot": 0,
+            "target_id": 36,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 14,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 37,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 20,
+            "origin_id": 36,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 36,
+            "origin_id": -10,
+            "origin_slot": 1,
+            "target_id": 27,
+            "target_slot": 3,
+            "type": "STRING"
+          },
+          {
+            "id": 37,
+            "origin_id": -10,
+            "origin_slot": 2,
+            "target_id": 33,
+            "target_slot": 5,
+            "type": "INT"
+          },
+          {
+            "id": 42,
+            "origin_id": -10,
+            "origin_slot": 3,
+            "target_id": 33,
+            "target_slot": 6,
+            "type": "FLOAT"
+          },
+          {
+            "id": 43,
+            "origin_id": -10,
+            "origin_slot": 4,
+            "target_id": 33,
+            "target_slot": 4,
+            "type": "INT"
+          },
+          {
+            "id": 44,
+            "origin_id": -10,
+            "origin_slot": 5,
+            "target_id": 34,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 45,
+            "origin_id": -10,
+            "origin_slot": 6,
+            "target_id": 38,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 46,
+            "origin_id": -10,
+            "origin_slot": 7,
+            "target_id": 26,
+            "target_slot": 0,
+            "type": "COMBO"
+          }
+        ],
+        "extra": {},
+        "category": "Image generation and editing/Edit image"
+      }
+    ]
+  },
+  "extra": {
+    "ue_links": []
+  }
+}
\ No newline at end of file
diff --git a/blueprints/Image Inpainting (Flux.1 Fill Dev).json b/blueprints/Image Inpainting (Flux.1 Fill Dev).json
new file mode 100644
index 000000000..d40d63594
--- /dev/null
+++ b/blueprints/Image Inpainting (Flux.1 Fill Dev).json	
@@ -0,0 +1,1205 @@
+{
+  "revision": 0,
+  "last_node_id": 232,
+  "last_link_id": 0,
+  "nodes": [
+    {
+      "id": 232,
+      "type": "6e8d6e38-bdc3-436c-be85-ef9e67e70e07",
+      "pos": [
+        1270,
+        4640
+      ],
+      "size": [
+        400,
+        470
+      ],
+      "flags": {},
+      "order": 0,
+      "mode": 0,
+      "inputs": [
+        {
+          "label": "image",
+          "localized_name": "pixels",
+          "name": "pixels",
+          "type": "IMAGE",
+          "link": null
+        },
+        {
+          "localized_name": "mask",
+          "name": "mask",
+          "type": "MASK",
+          "link": null
+        },
+        {
+          "label": "prompt",
+          "name": "text",
+          "type": "STRING",
+          "widget": {
+            "name": "text"
+          },
+          "link": null
+        },
+        {
+          "name": "seed",
+          "type": "INT",
+          "widget": {
+            "name": "seed"
+          },
+          "link": null
+        },
+        {
+          "name": "unet_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "unet_name"
+          },
+          "link": null
+        },
+        {
+          "name": "clip_name1",
+          "type": "COMBO",
+          "widget": {
+            "name": "clip_name1"
+          },
+          "link": null
+        },
+        {
+          "name": "clip_name2",
+          "type": "COMBO",
+          "widget": {
+            "name": "clip_name2"
+          },
+          "link": null
+        },
+        {
+          "name": "vae_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "vae_name"
+          },
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "localized_name": "IMAGE",
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": []
+        }
+      ],
+      "properties": {
+        "proxyWidgets": [
+          [
+            "23",
+            "text"
+          ],
+          [
+            "3",
+            "seed"
+          ],
+          [
+            "31",
+            "unet_name"
+          ],
+          [
+            "34",
+            "clip_name1"
+          ],
+          [
+            "34",
+            "clip_name2"
+          ],
+          [
+            "230",
+            "vae_name"
+          ]
+        ],
+        "ue_properties": {
+          "widget_ue_connectable": {},
+          "input_ue_unconnectable": {}
+        },
+        "cnr_id": "comfy-core",
+        "ver": "0.18.1"
+      },
+      "widgets_values": [],
+      "title": "Image Inpainting (Flux.1 Fill Dev)"
+    }
+  ],
+  "links": [],
+  "version": 0.4,
+  "definitions": {
+    "subgraphs": [
+      {
+        "id": "6e8d6e38-bdc3-436c-be85-ef9e67e70e07",
+        "version": 1,
+        "state": {
+          "lastGroupId": 22,
+          "lastNodeId": 232,
+          "lastLinkId": 286,
+          "lastRerouteId": 0
+        },
+        "revision": 0,
+        "config": {},
+        "name": "Image Inpainting (Flux.1 Fill Dev)",
+        "inputNode": {
+          "id": -10,
+          "bounding": [
+            -850,
+            164,
+            120,
+            200
+          ]
+        },
+        "outputNode": {
+          "id": -20,
+          "bounding": [
+            1230,
+            140,
+            120,
+            60
+          ]
+        },
+        "inputs": [
+          {
+            "id": "65727ee9-09d0-40c9-bd86-11e0823eb676",
+            "name": "pixels",
+            "type": "IMAGE",
+            "linkIds": [
+              99
+            ],
+            "localized_name": "pixels",
+            "label": "image",
+            "pos": [
+              -750,
+              184
+            ]
+          },
+          {
+            "id": "28424f77-56c5-49c1-ba41-6bd78287c186",
+            "name": "mask",
+            "type": "MASK",
+            "linkIds": [
+              100
+            ],
+            "localized_name": "mask",
+            "pos": [
+              -750,
+              204
+            ]
+          },
+          {
+            "id": "2339e5e0-8f8d-4600-b158-7d7dae5f0535",
+            "name": "text",
+            "type": "STRING",
+            "linkIds": [
+              277
+            ],
+            "label": "prompt",
+            "pos": [
+              -750,
+              224
+            ]
+          },
+          {
+            "id": "5f433d9b-b97e-4bac-bb88-eb668de2d5a7",
+            "name": "seed",
+            "type": "INT",
+            "linkIds": [
+              282
+            ],
+            "pos": [
+              -750,
+              244
+            ]
+          },
+          {
+            "id": "35a8b6c1-c92c-4c1a-9b24-2e9bae7808f6",
+            "name": "unet_name",
+            "type": "COMBO",
+            "linkIds": [
+              283
+            ],
+            "pos": [
+              -750,
+              264
+            ]
+          },
+          {
+            "id": "3af8f8be-bce8-4ba0-aea0-ccf6b377d5f6",
+            "name": "clip_name1",
+            "type": "COMBO",
+            "linkIds": [
+              284
+            ],
+            "pos": [
+              -750,
+              284
+            ]
+          },
+          {
+            "id": "d9a4af80-4fa1-4792-b955-78bdaef4596e",
+            "name": "clip_name2",
+            "type": "COMBO",
+            "linkIds": [
+              285
+            ],
+            "pos": [
+              -750,
+              304
+            ]
+          },
+          {
+            "id": "d59398cf-7e9c-4dae-8c5a-08c4756f256a",
+            "name": "vae_name",
+            "type": "COMBO",
+            "linkIds": [
+              286
+            ],
+            "pos": [
+              -750,
+              324
+            ]
+          }
+        ],
+        "outputs": [
+          {
+            "id": "1dee24ec-54a8-41be-aa30-a8fb797d3d23",
+            "name": "IMAGE",
+            "type": "IMAGE",
+            "linkIds": [
+              95
+            ],
+            "localized_name": "IMAGE",
+            "pos": [
+              1250,
+              160
+            ]
+          }
+        ],
+        "widgets": [],
+        "nodes": [
+          {
+            "id": 34,
+            "type": "DualCLIPLoader",
+            "pos": [
+              -590,
+              150
+            ],
+            "size": [
+              320,
+              180
+            ],
+            "flags": {},
+            "order": 3,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip_name1",
+                "name": "clip_name1",
+                "type": "COMBO",
+                "widget": {
+                  "name": "clip_name1"
+                },
+                "link": 284
+              },
+              {
+                "localized_name": "clip_name2",
+                "name": "clip_name2",
+                "type": "COMBO",
+                "widget": {
+                  "name": "clip_name2"
+                },
+                "link": 285
+              },
+              {
+                "localized_name": "type",
+                "name": "type",
+                "type": "COMBO",
+                "widget": {
+                  "name": "type"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "device",
+                "name": "device",
+                "shape": 7,
+                "type": "COMBO",
+                "widget": {
+                  "name": "device"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CLIP",
+                "name": "CLIP",
+                "type": "CLIP",
+                "links": [
+                  62
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.64",
+              "Node name for S&R": "DualCLIPLoader",
+              "models": [
+                {
+                  "name": "clip_l.safetensors",
+                  "url": "https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/clip_l.safetensors",
+                  "directory": "text_encoders"
+                },
+                {
+                  "name": "t5xxl_fp16.safetensors",
+                  "url": "https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors",
+                  "directory": "text_encoders"
+                }
+              ]
+            },
+            "widgets_values": [
+              "clip_l.safetensors",
+              "t5xxl_fp16.safetensors",
+              "flux",
+              "default"
+            ]
+          },
+          {
+            "id": 229,
+            "type": "FluxGuidance",
+            "pos": [
+              410,
+              -40
+            ],
+            "size": [
+              320,
+              110
+            ],
+            "flags": {
+              "collapsed": false
+            },
+            "order": 7,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "conditioning",
+                "name": "conditioning",
+                "type": "CONDITIONING",
+                "link": 41
+              },
+              {
+                "localized_name": "guidance",
+                "name": "guidance",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "guidance"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "slot_index": 0,
+                "links": [
+                  80
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.64",
+              "Node name for S&R": "FluxGuidance"
+            },
+            "widgets_values": [
+              30
+            ]
+          },
+          {
+            "id": 230,
+            "type": "VAELoader",
+            "pos": [
+              -590,
+              450
+            ],
+            "size": [
+              320,
+              110
+            ],
+            "flags": {},
+            "order": 8,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "vae_name",
+                "name": "vae_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "vae_name"
+                },
+                "link": 286
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "VAE",
+                "name": "VAE",
+                "type": "VAE",
+                "slot_index": 0,
+                "links": [
+                  60,
+                  82
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.64",
+              "Node name for S&R": "VAELoader",
+              "models": [
+                {
+                  "name": "ae.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/Lumina_Image_2.0_Repackaged/resolve/main/split_files/vae/ae.safetensors",
+                  "directory": "vae"
+                }
+              ]
+            },
+            "widgets_values": [
+              "ae.safetensors"
+            ]
+          },
+          {
+            "id": 31,
+            "type": "UNETLoader",
+            "pos": [
+              -590,
+              -90
+            ],
+            "size": [
+              320,
+              110
+            ],
+            "flags": {},
+            "order": 2,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "unet_name",
+                "name": "unet_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "unet_name"
+                },
+                "link": 283
+              },
+              {
+                "localized_name": "weight_dtype",
+                "name": "weight_dtype",
+                "type": "COMBO",
+                "widget": {
+                  "name": "weight_dtype"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "slot_index": 0,
+                "links": [
+                  85
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.64",
+              "Node name for S&R": "UNETLoader",
+              "models": [
+                {
+                  "name": "flux1-fill-dev.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/flux1-dev/resolve/main/split_files/diffusion_models/flux1-fill-dev.safetensors",
+                  "directory": "diffusion_models"
+                }
+              ]
+            },
+            "widgets_values": [
+              "flux1-fill-dev.safetensors",
+              "default"
+            ]
+          },
+          {
+            "id": 46,
+            "type": "ConditioningZeroOut",
+            "pos": [
+              90,
+              420
+            ],
+            "size": [
+              230,
+              80
+            ],
+            "flags": {},
+            "order": 6,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "conditioning",
+                "name": "conditioning",
+                "type": "CONDITIONING",
+                "link": 101
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "links": [
+                  102
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.64",
+              "Node name for S&R": "ConditioningZeroOut"
+            }
+          },
+          {
+            "id": 23,
+            "type": "CLIPTextEncode",
+            "pos": [
+              -160,
+              -70
+            ],
+            "size": [
+              480,
+              410
+            ],
+            "flags": {},
+            "order": 1,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 62
+              },
+              {
+                "localized_name": "text",
+                "name": "text",
+                "type": "STRING",
+                "widget": {
+                  "name": "text"
+                },
+                "link": 277
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "slot_index": 0,
+                "links": [
+                  41,
+                  101
+                ]
+              }
+            ],
+            "title": "CLIP Text Encode (Positive Prompt)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.64",
+              "Node name for S&R": "CLIPTextEncode"
+            },
+            "widgets_values": [
+              ""
+            ],
+            "color": "#232",
+            "bgcolor": "#353"
+          },
+          {
+            "id": 39,
+            "type": "DifferentialDiffusion",
+            "pos": [
+              780,
+              -110
+            ],
+            "size": [
+              280,
+              110
+            ],
+            "flags": {},
+            "order": 5,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 85
+              },
+              {
+                "localized_name": "strength",
+                "name": "strength",
+                "shape": 7,
+                "type": "FLOAT",
+                "widget": {
+                  "name": "strength"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "slot_index": 0,
+                "links": [
+                  86
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.64",
+              "Node name for S&R": "DifferentialDiffusion"
+            },
+            "widgets_values": [
+              1
+            ]
+          },
+          {
+            "id": 231,
+            "type": "VAEDecode",
+            "pos": [
+              780,
+              590
+            ],
+            "size": [
+              230,
+              100
+            ],
+            "flags": {},
+            "order": 9,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "samples",
+                "name": "samples",
+                "type": "LATENT",
+                "link": 7
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 60
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "slot_index": 0,
+                "links": [
+                  95
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.64",
+              "Node name for S&R": "VAEDecode"
+            }
+          },
+          {
+            "id": 38,
+            "type": "InpaintModelConditioning",
+            "pos": [
+              420,
+              120
+            ],
+            "size": [
+              310,
+              200
+            ],
+            "flags": {},
+            "order": 4,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 80
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 102
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 82
+              },
+              {
+                "localized_name": "pixels",
+                "name": "pixels",
+                "type": "IMAGE",
+                "link": 99
+              },
+              {
+                "localized_name": "mask",
+                "name": "mask",
+                "type": "MASK",
+                "link": 100
+              },
+              {
+                "localized_name": "noise_mask",
+                "name": "noise_mask",
+                "type": "BOOLEAN",
+                "widget": {
+                  "name": "noise_mask"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "slot_index": 0,
+                "links": [
+                  77
+                ]
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "slot_index": 1,
+                "links": [
+                  78
+                ]
+              },
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "slot_index": 2,
+                "links": [
+                  88
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.64",
+              "Node name for S&R": "InpaintModelConditioning"
+            },
+            "widgets_values": [
+              true
+            ]
+          },
+          {
+            "id": 3,
+            "type": "KSampler",
+            "pos": [
+              770,
+              40
+            ],
+            "size": [
+              290,
+              470
+            ],
+            "flags": {},
+            "order": 0,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 86
+              },
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 77
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 78
+              },
+              {
+                "localized_name": "latent_image",
+                "name": "latent_image",
+                "type": "LATENT",
+                "link": 88
+              },
+              {
+                "localized_name": "seed",
+                "name": "seed",
+                "type": "INT",
+                "widget": {
+                  "name": "seed"
+                },
+                "link": 282
+              },
+              {
+                "localized_name": "steps",
+                "name": "steps",
+                "type": "INT",
+                "widget": {
+                  "name": "steps"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "cfg",
+                "name": "cfg",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "cfg"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "sampler_name",
+                "name": "sampler_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "sampler_name"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "scheduler",
+                "name": "scheduler",
+                "type": "COMBO",
+                "widget": {
+                  "name": "scheduler"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "denoise",
+                "name": "denoise",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "denoise"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "slot_index": 0,
+                "links": [
+                  7
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.64",
+              "Node name for S&R": "KSampler"
+            },
+            "widgets_values": [
+              0,
+              "randomize",
+              20,
+              1,
+              "euler",
+              "normal",
+              1
+            ]
+          }
+        ],
+        "groups": [
+          {
+            "id": 1,
+            "title": "Load models",
+            "bounding": [
+              -620,
+              -160,
+              410,
+              790
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 3,
+            "title": "Prompt",
+            "bounding": [
+              -180,
+              -160,
+              520,
+              670
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          }
+        ],
+        "links": [
+          {
+            "id": 41,
+            "origin_id": 23,
+            "origin_slot": 0,
+            "target_id": 229,
+            "target_slot": 0,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 101,
+            "origin_id": 23,
+            "origin_slot": 0,
+            "target_id": 46,
+            "target_slot": 0,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 62,
+            "origin_id": 34,
+            "origin_slot": 0,
+            "target_id": 23,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 85,
+            "origin_id": 31,
+            "origin_slot": 0,
+            "target_id": 39,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 86,
+            "origin_id": 39,
+            "origin_slot": 0,
+            "target_id": 3,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 77,
+            "origin_id": 38,
+            "origin_slot": 0,
+            "target_id": 3,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 78,
+            "origin_id": 38,
+            "origin_slot": 1,
+            "target_id": 3,
+            "target_slot": 2,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 88,
+            "origin_id": 38,
+            "origin_slot": 2,
+            "target_id": 3,
+            "target_slot": 3,
+            "type": "LATENT"
+          },
+          {
+            "id": 7,
+            "origin_id": 3,
+            "origin_slot": 0,
+            "target_id": 231,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 60,
+            "origin_id": 230,
+            "origin_slot": 0,
+            "target_id": 231,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 80,
+            "origin_id": 229,
+            "origin_slot": 0,
+            "target_id": 38,
+            "target_slot": 0,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 102,
+            "origin_id": 46,
+            "origin_slot": 0,
+            "target_id": 38,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 82,
+            "origin_id": 230,
+            "origin_slot": 0,
+            "target_id": 38,
+            "target_slot": 2,
+            "type": "VAE"
+          },
+          {
+            "id": 99,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 38,
+            "target_slot": 3,
+            "type": "IMAGE"
+          },
+          {
+            "id": 100,
+            "origin_id": -10,
+            "origin_slot": 1,
+            "target_id": 38,
+            "target_slot": 4,
+            "type": "MASK"
+          },
+          {
+            "id": 95,
+            "origin_id": 231,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 277,
+            "origin_id": -10,
+            "origin_slot": 2,
+            "target_id": 23,
+            "target_slot": 1,
+            "type": "STRING"
+          },
+          {
+            "id": 282,
+            "origin_id": -10,
+            "origin_slot": 3,
+            "target_id": 3,
+            "target_slot": 4,
+            "type": "INT"
+          },
+          {
+            "id": 283,
+            "origin_id": -10,
+            "origin_slot": 4,
+            "target_id": 31,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 284,
+            "origin_id": -10,
+            "origin_slot": 5,
+            "target_id": 34,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 285,
+            "origin_id": -10,
+            "origin_slot": 6,
+            "target_id": 34,
+            "target_slot": 1,
+            "type": "COMBO"
+          },
+          {
+            "id": 286,
+            "origin_id": -10,
+            "origin_slot": 7,
+            "target_id": 230,
+            "target_slot": 0,
+            "type": "COMBO"
+          }
+        ],
+        "extra": {
+          "workflowRendererVersion": "LG"
+        },
+        "category": "Image generation and editing/Inpaint image"
+      }
+    ]
+  },
+  "extra": {
+    "ds": {
+      "scale": 0.8480949417360862,
+      "offset": [
+        833.9510730024642,
+        210.32152847588895
+      ]
+    },
+    "ue_links": []
+  }
+}
\ No newline at end of file
diff --git a/blueprints/Image Inpainting (Qwen-image).json b/blueprints/Image Inpainting (Qwen-image).json
index d06f31dd2..95b2909fa 100644
--- a/blueprints/Image Inpainting (Qwen-image).json	
+++ b/blueprints/Image Inpainting (Qwen-image).json	
@@ -124,7 +124,7 @@
         },
         "revision": 0,
         "config": {},
-        "name": "local-Image Inpainting (Qwen-image)",
+        "name": "Image Inpainting (Qwen-image)",
         "inputNode": {
           "id": -10,
           "bounding": [
@@ -1923,4 +1923,4 @@
     "workflowRendererVersion": "LG"
   },
   "version": 0.4
-}
+}
\ No newline at end of file
diff --git a/blueprints/Image Outpainting (Qwen-Image).json b/blueprints/Image Outpainting (Qwen-Image).json
index bf2c4241a..218fdc775 100644
--- a/blueprints/Image Outpainting (Qwen-Image).json	
+++ b/blueprints/Image Outpainting (Qwen-Image).json	
@@ -204,7 +204,7 @@
         },
         "revision": 0,
         "config": {},
-        "name": "local-Image Outpainting (Qwen-Image)",
+        "name": "Image Outpainting (Qwen-Image)",
         "inputNode": {
           "id": -10,
           "bounding": [
@@ -2749,4 +2749,4 @@
     }
   },
   "version": 0.4
-}
+}
\ No newline at end of file
diff --git a/blueprints/Image to Layers(Qwen-Image Layered).json b/blueprints/Image to Layers(Qwen-Image-Layered).json
similarity index 83%
rename from blueprints/Image to Layers(Qwen-Image Layered).json
rename to blueprints/Image to Layers(Qwen-Image-Layered).json
index 164ffbd8d..8a525e7a5 100644
--- a/blueprints/Image to Layers(Qwen-Image Layered).json	
+++ b/blueprints/Image to Layers(Qwen-Image-Layered).json	
@@ -1,15 +1,14 @@
 {
-  "id": "1a761372-7c82-4016-b9bf-fa285967e1e9",
   "revision": 0,
-  "last_node_id": 83,
+  "last_node_id": 176,
   "last_link_id": 0,
   "nodes": [
     {
-      "id": 83,
-      "type": "f754a936-daaf-4b6e-9658-41fdc54d301d",
+      "id": 176,
+      "type": "2d2e3c8e-53b3-4618-be52-6d1d99382f0e",
       "pos": [
-        61.999827823554256,
-        153.3332507624185
+        -1150,
+        200
       ],
       "size": [
         400,
@@ -56,6 +55,38 @@
             "name": "layers"
           },
           "link": null
+        },
+        {
+          "name": "seed",
+          "type": "INT",
+          "widget": {
+            "name": "seed"
+          },
+          "link": null
+        },
+        {
+          "name": "unet_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "unet_name"
+          },
+          "link": null
+        },
+        {
+          "name": "clip_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "clip_name"
+          },
+          "link": null
+        },
+        {
+          "name": "vae_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "vae_name"
+          },
+          "link": null
         }
       ],
       "outputs": [
@@ -66,28 +97,41 @@
           "links": []
         }
       ],
+      "title": "Image to Layers (Qwen-Image-Layered)",
       "properties": {
         "proxyWidgets": [
           [
-            "-1",
+            "6",
             "text"
           ],
           [
-            "-1",
+            "3",
             "steps"
           ],
           [
-            "-1",
+            "3",
             "cfg"
           ],
           [
-            "-1",
+            "83",
             "layers"
           ],
           [
             "3",
             "seed"
           ],
+          [
+            "37",
+            "unet_name"
+          ],
+          [
+            "38",
+            "clip_name"
+          ],
+          [
+            "39",
+            "vae_name"
+          ],
           [
             "3",
             "control_after_generate"
@@ -95,6 +139,11 @@
         ],
         "cnr_id": "comfy-core",
         "ver": "0.5.1",
+        "ue_properties": {
+          "widget_ue_connectable": {},
+          "input_ue_unconnectable": {},
+          "version": "7.7"
+        },
         "enableTabs": false,
         "tabWidth": 65,
         "tabXOffset": 10,
@@ -103,25 +152,20 @@
         "secondTabOffset": 80,
         "secondTabWidth": 65
       },
-      "widgets_values": [
-        "",
-        20,
-        2.5,
-        2
-      ]
+      "widgets_values": []
     }
   ],
   "links": [],
-  "groups": [],
+  "version": 0.4,
   "definitions": {
     "subgraphs": [
       {
-        "id": "f754a936-daaf-4b6e-9658-41fdc54d301d",
+        "id": "2d2e3c8e-53b3-4618-be52-6d1d99382f0e",
         "version": 1,
         "state": {
-          "lastGroupId": 3,
-          "lastNodeId": 83,
-          "lastLinkId": 159,
+          "lastGroupId": 8,
+          "lastNodeId": 176,
+          "lastLinkId": 380,
           "lastRerouteId": 0
         },
         "revision": 0,
@@ -130,10 +174,10 @@
         "inputNode": {
           "id": -10,
           "bounding": [
-            -510,
-            523,
+            -720,
+            720,
             120,
-            140
+            220
           ]
         },
         "outputNode": {
@@ -156,8 +200,8 @@
             ],
             "localized_name": "image",
             "pos": [
-              -410,
-              543
+              -620,
+              740
             ]
           },
           {
@@ -168,8 +212,8 @@
               150
             ],
             "pos": [
-              -410,
-              563
+              -620,
+              760
             ]
           },
           {
@@ -180,8 +224,8 @@
               153
             ],
             "pos": [
-              -410,
-              583
+              -620,
+              780
             ]
           },
           {
@@ -192,8 +236,8 @@
               154
             ],
             "pos": [
-              -410,
-              603
+              -620,
+              800
             ]
           },
           {
@@ -204,8 +248,56 @@
               159
             ],
             "pos": [
-              -410,
-              623
+              -620,
+              820
+            ]
+          },
+          {
+            "id": "9f76338b-f4ca-4bb3-b61a-57b3f233061e",
+            "name": "seed",
+            "type": "INT",
+            "linkIds": [
+              377
+            ],
+            "pos": [
+              -620,
+              840
+            ]
+          },
+          {
+            "id": "8d0422d5-5eee-4f7e-9817-dc613cc62eca",
+            "name": "unet_name",
+            "type": "COMBO",
+            "linkIds": [
+              378
+            ],
+            "pos": [
+              -620,
+              860
+            ]
+          },
+          {
+            "id": "552eece2-a735-4d00-ae78-ded454622bc1",
+            "name": "clip_name",
+            "type": "COMBO",
+            "linkIds": [
+              379
+            ],
+            "pos": [
+              -620,
+              880
+            ]
+          },
+          {
+            "id": "1e6d141c-d0f9-4a2b-895c-b6780e57cfa0",
+            "name": "vae_name",
+            "type": "COMBO",
+            "linkIds": [
+              380
+            ],
+            "pos": [
+              -620,
+              900
             ]
           }
         ],
@@ -231,14 +323,14 @@
             "type": "CLIPLoader",
             "pos": [
               -320,
-              310
+              360
             ],
             "size": [
-              346.7470703125,
-              106
+              350,
+              150
             ],
             "flags": {},
-            "order": 0,
+            "order": 5,
             "mode": 0,
             "inputs": [
               {
@@ -248,7 +340,7 @@
                 "widget": {
                   "name": "clip_name"
                 },
-                "link": null
+                "link": 379
               },
               {
                 "localized_name": "type",
@@ -283,9 +375,14 @@
               }
             ],
             "properties": {
-              "Node name for S&R": "CLIPLoader",
               "cnr_id": "comfy-core",
               "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "Node name for S&R": "CLIPLoader",
               "models": [
                 {
                   "name": "qwen_2.5_vl_7b_fp8_scaled.safetensors",
@@ -312,14 +409,14 @@
             "type": "VAELoader",
             "pos": [
               -320,
-              460
+              580
             ],
             "size": [
-              346.7470703125,
-              58
+              350,
+              110
             ],
             "flags": {},
-            "order": 1,
+            "order": 6,
             "mode": 0,
             "inputs": [
               {
@@ -329,7 +426,7 @@
                 "widget": {
                   "name": "vae_name"
                 },
-                "link": null
+                "link": 380
               }
             ],
             "outputs": [
@@ -345,9 +442,14 @@
               }
             ],
             "properties": {
-              "Node name for S&R": "VAELoader",
               "cnr_id": "comfy-core",
               "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "Node name for S&R": "VAELoader",
               "models": [
                 {
                   "name": "qwen_image_layered_vae.safetensors",
@@ -375,11 +477,11 @@
               420
             ],
             "size": [
-              425.27801513671875,
-              180.6060791015625
+              430,
+              190
             ],
             "flags": {},
-            "order": 3,
+            "order": 2,
             "mode": 0,
             "inputs": [
               {
@@ -411,9 +513,14 @@
             ],
             "title": "CLIP Text Encode (Negative Prompt)",
             "properties": {
-              "Node name for S&R": "CLIPTextEncode",
               "cnr_id": "comfy-core",
               "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "Node name for S&R": "CLIPTextEncode",
               "enableTabs": false,
               "tabWidth": 65,
               "tabXOffset": 10,
@@ -432,12 +539,12 @@
             "id": 70,
             "type": "ReferenceLatent",
             "pos": [
-              330,
-              670
+              140,
+              700
             ],
             "size": [
-              204.1666717529297,
-              46
+              210,
+              50
             ],
             "flags": {
               "collapsed": true
@@ -470,9 +577,14 @@
               }
             ],
             "properties": {
-              "Node name for S&R": "ReferenceLatent",
               "cnr_id": "comfy-core",
               "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "Node name for S&R": "ReferenceLatent",
               "enableTabs": false,
               "tabWidth": 65,
               "tabXOffset": 10,
@@ -480,19 +592,18 @@
               "secondTabText": "Send Back",
               "secondTabOffset": 80,
               "secondTabWidth": 65
-            },
-            "widgets_values": []
+            }
           },
           {
             "id": 69,
             "type": "ReferenceLatent",
             "pos": [
-              330,
-              710
+              160,
+              820
             ],
             "size": [
-              204.1666717529297,
-              46
+              210,
+              50
             ],
             "flags": {
               "collapsed": true
@@ -525,9 +636,14 @@
               }
             ],
             "properties": {
-              "Node name for S&R": "ReferenceLatent",
               "cnr_id": "comfy-core",
               "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "Node name for S&R": "ReferenceLatent",
               "enableTabs": false,
               "tabWidth": 65,
               "tabXOffset": 10,
@@ -535,8 +651,7 @@
               "secondTabText": "Send Back",
               "secondTabOffset": 80,
               "secondTabWidth": 65
-            },
-            "widgets_values": []
+            }
           },
           {
             "id": 66,
@@ -547,10 +662,10 @@
             ],
             "size": [
               270,
-              58
+              110
             ],
             "flags": {},
-            "order": 4,
+            "order": 7,
             "mode": 0,
             "inputs": [
               {
@@ -580,9 +695,14 @@
               }
             ],
             "properties": {
-              "Node name for S&R": "ModelSamplingAuraFlow",
               "cnr_id": "comfy-core",
               "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "Node name for S&R": "ModelSamplingAuraFlow",
               "enableTabs": false,
               "tabWidth": 65,
               "tabXOffset": 10,
@@ -600,11 +720,11 @@
             "type": "LatentCutToBatch",
             "pos": [
               830,
-              160
+              140
             ],
             "size": [
               270,
-              82
+              140
             ],
             "flags": {},
             "order": 11,
@@ -646,9 +766,14 @@
               }
             ],
             "properties": {
-              "Node name for S&R": "LatentCutToBatch",
               "cnr_id": "comfy-core",
               "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "Node name for S&R": "LatentCutToBatch",
               "enableTabs": false,
               "tabWidth": 65,
               "tabXOffset": 10,
@@ -666,12 +791,12 @@
             "id": 71,
             "type": "VAEEncode",
             "pos": [
-              100,
-              690
+              -280,
+              780
             ],
             "size": [
-              140,
-              46
+              230,
+              100
             ],
             "flags": {
               "collapsed": false
@@ -704,9 +829,14 @@
               }
             ],
             "properties": {
-              "Node name for S&R": "VAEEncode",
               "cnr_id": "comfy-core",
               "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "Node name for S&R": "VAEEncode",
               "enableTabs": false,
               "tabWidth": 65,
               "tabXOffset": 10,
@@ -714,24 +844,23 @@
               "secondTabText": "Send Back",
               "secondTabOffset": 80,
               "secondTabWidth": 65
-            },
-            "widgets_values": []
+            }
           },
           {
             "id": 8,
             "type": "VAEDecode",
             "pos": [
               850,
-              310
+              370
             ],
             "size": [
               210,
-              46
+              50
             ],
             "flags": {
               "collapsed": true
             },
-            "order": 7,
+            "order": 3,
             "mode": 0,
             "inputs": [
               {
@@ -759,9 +888,14 @@
               }
             ],
             "properties": {
-              "Node name for S&R": "VAEDecode",
               "cnr_id": "comfy-core",
               "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "Node name for S&R": "VAEDecode",
               "enableTabs": false,
               "tabWidth": 65,
               "tabXOffset": 10,
@@ -769,8 +903,7 @@
               "secondTabText": "Send Back",
               "secondTabOffset": 80,
               "secondTabWidth": 65
-            },
-            "widgets_values": []
+            }
           },
           {
             "id": 6,
@@ -780,11 +913,11 @@
               180
             ],
             "size": [
-              422.84503173828125,
-              164.31304931640625
+              430,
+              170
             ],
             "flags": {},
-            "order": 6,
+            "order": 1,
             "mode": 0,
             "inputs": [
               {
@@ -816,9 +949,14 @@
             ],
             "title": "CLIP Text Encode (Positive Prompt)",
             "properties": {
-              "Node name for S&R": "CLIPTextEncode",
               "cnr_id": "comfy-core",
               "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "Node name for S&R": "CLIPTextEncode",
               "enableTabs": false,
               "tabWidth": 65,
               "tabXOffset": 10,
@@ -838,14 +976,14 @@
             "type": "KSampler",
             "pos": [
               530,
-              280
+              340
             ],
             "size": [
               270,
               400
             ],
             "flags": {},
-            "order": 5,
+            "order": 0,
             "mode": 0,
             "inputs": [
               {
@@ -879,7 +1017,7 @@
                 "widget": {
                   "name": "seed"
                 },
-                "link": null
+                "link": 377
               },
               {
                 "localized_name": "steps",
@@ -939,9 +1077,14 @@
               }
             ],
             "properties": {
-              "Node name for S&R": "KSampler",
               "cnr_id": "comfy-core",
               "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "Node name for S&R": "KSampler",
               "enableTabs": false,
               "tabWidth": 65,
               "tabXOffset": 10,
@@ -964,12 +1107,12 @@
             "id": 78,
             "type": "GetImageSize",
             "pos": [
-              80,
-              790
+              -280,
+              930
             ],
             "size": [
-              210,
-              136
+              230,
+              140
             ],
             "flags": {},
             "order": 12,
@@ -1007,9 +1150,14 @@
               }
             ],
             "properties": {
-              "Node name for S&R": "GetImageSize",
               "cnr_id": "comfy-core",
               "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "Node name for S&R": "GetImageSize",
               "enableTabs": false,
               "tabWidth": 65,
               "tabXOffset": 10,
@@ -1017,23 +1165,23 @@
               "secondTabText": "Send Back",
               "secondTabOffset": 80,
               "secondTabWidth": 65
-            },
-            "widgets_values": []
+            }
           },
           {
             "id": 83,
             "type": "EmptyQwenImageLayeredLatentImage",
             "pos": [
-              320,
-              790
+              -280,
+              1120
             ],
             "size": [
-              330.9341796875,
-              130
+              340,
+              200
             ],
             "flags": {},
             "order": 13,
             "mode": 0,
+            "showAdvanced": true,
             "inputs": [
               {
                 "localized_name": "width",
@@ -1083,9 +1231,14 @@
               }
             ],
             "properties": {
-              "Node name for S&R": "EmptyQwenImageLayeredLatentImage",
               "cnr_id": "comfy-core",
               "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "Node name for S&R": "EmptyQwenImageLayeredLatentImage",
               "enableTabs": false,
               "tabWidth": 65,
               "tabXOffset": 10,
@@ -1109,11 +1262,11 @@
               180
             ],
             "size": [
-              346.7470703125,
-              82
+              350,
+              110
             ],
             "flags": {},
-            "order": 2,
+            "order": 4,
             "mode": 0,
             "inputs": [
               {
@@ -1123,7 +1276,7 @@
                 "widget": {
                   "name": "unet_name"
                 },
-                "link": null
+                "link": 378
               },
               {
                 "localized_name": "weight_dtype",
@@ -1147,9 +1300,14 @@
               }
             ],
             "properties": {
-              "Node name for S&R": "UNETLoader",
               "cnr_id": "comfy-core",
               "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {},
+                "version": "7.7"
+              },
+              "Node name for S&R": "UNETLoader",
               "models": [
                 {
                   "name": "qwen_image_layered_bf16.safetensors",
@@ -1191,8 +1349,8 @@
             "bounding": [
               -330,
               110,
-              366.7470703125,
-              421.6
+              370,
+              610
             ],
             "color": "#3f789e",
             "font_size": 24,
@@ -1391,6 +1549,38 @@
             "target_id": 83,
             "target_slot": 2,
             "type": "INT"
+          },
+          {
+            "id": 377,
+            "origin_id": -10,
+            "origin_slot": 5,
+            "target_id": 3,
+            "target_slot": 4,
+            "type": "INT"
+          },
+          {
+            "id": 378,
+            "origin_id": -10,
+            "origin_slot": 6,
+            "target_id": 37,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 379,
+            "origin_id": -10,
+            "origin_slot": 7,
+            "target_id": 38,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 380,
+            "origin_id": -10,
+            "origin_slot": 8,
+            "target_id": 39,
+            "target_slot": 0,
+            "type": "COMBO"
           }
         ],
         "extra": {
@@ -1400,7 +1590,6 @@
       }
     ]
   },
-  "config": {},
   "extra": {
     "ds": {
       "scale": 1.14,
@@ -1409,7 +1598,6 @@
         6.855893974423647
       ]
     },
-    "workflowRendererVersion": "LG"
-  },
-  "version": 0.4
-}
+    "ue_links": []
+  }
+}
\ No newline at end of file
diff --git a/blueprints/Image to Video (LTX-2.3).json b/blueprints/Image to Video (LTX-2.3).json
new file mode 100644
index 000000000..86a601130
--- /dev/null
+++ b/blueprints/Image to Video (LTX-2.3).json	
@@ -0,0 +1,4233 @@
+{
+  "revision": 0,
+  "last_node_id": 320,
+  "last_link_id": 0,
+  "nodes": [
+    {
+      "id": 320,
+      "type": "2454ad83-157c-40dd-9f19-5daaf4041ce0",
+      "pos": [
+        30,
+        4150
+      ],
+      "size": [
+        390,
+        466.625
+      ],
+      "flags": {},
+      "order": 2,
+      "mode": 0,
+      "inputs": [
+        {
+          "label": "first_frame",
+          "localized_name": "input",
+          "name": "input",
+          "type": "IMAGE,MASK",
+          "link": null
+        },
+        {
+          "name": "value",
+          "type": "STRING",
+          "widget": {
+            "name": "value"
+          },
+          "link": null
+        },
+        {
+          "label": "width",
+          "name": "value_2",
+          "type": "INT",
+          "widget": {
+            "name": "value_2"
+          },
+          "link": null
+        },
+        {
+          "label": "height",
+          "name": "value_3",
+          "type": "INT",
+          "widget": {
+            "name": "value_3"
+          },
+          "link": null
+        },
+        {
+          "label": "duration",
+          "name": "value_4",
+          "type": "INT",
+          "widget": {
+            "name": "value_4"
+          },
+          "link": null
+        },
+        {
+          "name": "ckpt_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "ckpt_name"
+          },
+          "link": null
+        },
+        {
+          "label": "distilled_lora",
+          "name": "lora_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "lora_name"
+          },
+          "link": null
+        },
+        {
+          "name": "text_encoder",
+          "type": "COMBO",
+          "widget": {
+            "name": "text_encoder"
+          },
+          "link": null
+        },
+        {
+          "label": "latent_upscale_model",
+          "name": "model_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "model_name"
+          },
+          "link": null
+        },
+        {
+          "label": "fps",
+          "name": "value_5",
+          "type": "INT",
+          "widget": {
+            "name": "value_5"
+          },
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "localized_name": "VIDEO",
+          "name": "VIDEO",
+          "type": "VIDEO",
+          "links": []
+        }
+      ],
+      "title": "Image to Video (LTX-2.3)",
+      "properties": {
+        "proxyWidgets": [
+          [
+            "319",
+            "value"
+          ],
+          [
+            "312",
+            "value"
+          ],
+          [
+            "299",
+            "value"
+          ],
+          [
+            "301",
+            "value"
+          ],
+          [
+            "300",
+            "value"
+          ],
+          [
+            "316",
+            "ckpt_name"
+          ],
+          [
+            "277",
+            "control_after_generate"
+          ],
+          [
+            "277",
+            "noise_seed"
+          ],
+          [
+            "285",
+            "lora_name"
+          ],
+          [
+            "317",
+            "text_encoder"
+          ],
+          [
+            "311",
+            "model_name"
+          ]
+        ],
+        "ue_properties": {
+          "widget_ue_connectable": {
+            "value_1": true,
+            "value_2": true,
+            "value_3": true,
+            "value_4": true,
+            "lora_name": true,
+            "model_name": true,
+            "value_5": true
+          },
+          "version": "7.7",
+          "input_ue_unconnectable": {}
+        },
+        "cnr_id": "comfy-core",
+        "ver": "0.16.3",
+        "enableTabs": false,
+        "tabWidth": 65,
+        "tabXOffset": 10,
+        "hasSecondTab": false,
+        "secondTabText": "Send Back",
+        "secondTabOffset": 80,
+        "secondTabWidth": 65
+      },
+      "widgets_values": []
+    }
+  ],
+  "links": [],
+  "version": 0.4,
+  "definitions": {
+    "subgraphs": [
+      {
+        "id": "2454ad83-157c-40dd-9f19-5daaf4041ce0",
+        "version": 1,
+        "state": {
+          "lastGroupId": 25,
+          "lastNodeId": 323,
+          "lastLinkId": 631,
+          "lastRerouteId": 0
+        },
+        "revision": 0,
+        "config": {},
+        "name": "Image to Video (LTX-2.3)",
+        "inputNode": {
+          "id": -10,
+          "bounding": [
+            730,
+            4110,
+            162.162109375,
+            240
+          ]
+        },
+        "outputNode": {
+          "id": -20,
+          "bounding": [
+            6590,
+            4360,
+            120,
+            60
+          ]
+        },
+        "inputs": [
+          {
+            "id": "7afd6ea8-c738-4fd9-97b8-66fa905cd381",
+            "name": "input",
+            "type": "IMAGE,MASK",
+            "linkIds": [
+              535
+            ],
+            "localized_name": "input",
+            "label": "first_frame",
+            "pos": [
+              872.162109375,
+              4130
+            ]
+          },
+          {
+            "id": "9494c550-4172-49c6-930e-5b508f775e77",
+            "name": "value",
+            "type": "STRING",
+            "linkIds": [
+              595
+            ],
+            "pos": [
+              872.162109375,
+              4150
+            ]
+          },
+          {
+            "id": "58dbb3f6-f924-4548-96ef-e0e34610bd4e",
+            "name": "value_2",
+            "type": "INT",
+            "linkIds": [
+              597
+            ],
+            "label": "width",
+            "pos": [
+              872.162109375,
+              4170
+            ]
+          },
+          {
+            "id": "6086d5b8-2586-448c-a641-dd14d76dd102",
+            "name": "value_3",
+            "type": "INT",
+            "linkIds": [
+              598
+            ],
+            "label": "height",
+            "pos": [
+              872.162109375,
+              4190
+            ]
+          },
+          {
+            "id": "feb8c2eb-ae48-4fa8-bc24-929552d656c3",
+            "name": "value_4",
+            "type": "INT",
+            "linkIds": [
+              599
+            ],
+            "label": "duration",
+            "pos": [
+              872.162109375,
+              4210
+            ]
+          },
+          {
+            "id": "d7255058-319a-4880-8f9a-7e542c8f3c3c",
+            "name": "ckpt_name",
+            "type": "COMBO",
+            "linkIds": [
+              601,
+              604,
+              605
+            ],
+            "pos": [
+              872.162109375,
+              4230
+            ]
+          },
+          {
+            "id": "4afce68d-8f65-4342-9d6d-ae0a7688c3e3",
+            "name": "lora_name",
+            "type": "COMBO",
+            "linkIds": [
+              602
+            ],
+            "label": "distilled_lora",
+            "pos": [
+              872.162109375,
+              4250
+            ]
+          },
+          {
+            "id": "ab842b4b-c977-4679-b421-424722785b57",
+            "name": "text_encoder",
+            "type": "COMBO",
+            "linkIds": [
+              606
+            ],
+            "pos": [
+              872.162109375,
+              4270
+            ]
+          },
+          {
+            "id": "9e47372d-28d9-4311-91e9-e90d03f4eb43",
+            "name": "model_name",
+            "type": "COMBO",
+            "linkIds": [
+              607
+            ],
+            "label": "latent_upscale_model",
+            "pos": [
+              872.162109375,
+              4290
+            ]
+          },
+          {
+            "id": "3e32ce15-0ae7-4cd0-909f-a354e8e9c4c9",
+            "name": "value_5",
+            "type": "INT",
+            "linkIds": [
+              624
+            ],
+            "label": "fps",
+            "pos": [
+              872.162109375,
+              4310
+            ]
+          }
+        ],
+        "outputs": [
+          {
+            "id": "954ef307-c897-4eea-8b5c-5c6ce15a5357",
+            "name": "VIDEO",
+            "type": "VIDEO",
+            "linkIds": [
+              536
+            ],
+            "localized_name": "VIDEO",
+            "pos": [
+              6610,
+              4380
+            ]
+          }
+        ],
+        "widgets": [],
+        "nodes": [
+          {
+            "id": 276,
+            "type": "RandomNoise",
+            "pos": [
+              4700,
+              3650
+            ],
+            "size": [
+              280,
+              110
+            ],
+            "flags": {},
+            "order": 0,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "noise_seed",
+                "name": "noise_seed",
+                "type": "INT",
+                "widget": {
+                  "name": "noise_seed"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "NOISE",
+                "name": "NOISE",
+                "type": "NOISE",
+                "links": [
+                  490
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.75",
+              "Node name for S&R": "RandomNoise",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              42,
+              "fixed"
+            ]
+          },
+          {
+            "id": 277,
+            "type": "RandomNoise",
+            "pos": [
+              3160,
+              3630
+            ],
+            "size": [
+              280,
+              110
+            ],
+            "flags": {},
+            "order": 1,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "noise_seed",
+                "name": "noise_seed",
+                "type": "INT",
+                "widget": {
+                  "name": "noise_seed"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "NOISE",
+                "name": "NOISE",
+                "type": "NOISE",
+                "links": [
+                  483
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.56",
+              "Node name for S&R": "RandomNoise",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              519681071352364,
+              "randomize"
+            ]
+          },
+          {
+            "id": 278,
+            "type": "LTXVConcatAVLatent",
+            "pos": [
+              4710,
+              4490
+            ],
+            "size": [
+              280,
+              100
+            ],
+            "flags": {},
+            "order": 7,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "video_latent",
+                "name": "video_latent",
+                "type": "LATENT",
+                "link": 512
+              },
+              {
+                "localized_name": "audio_latent",
+                "name": "audio_latent",
+                "type": "LATENT",
+                "link": 513
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "links": [
+                  494
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "LTXVConcatAVLatent",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 279,
+            "type": "LTXVAudioVAELoader",
+            "pos": [
+              1660,
+              4100
+            ],
+            "size": [
+              430,
+              110
+            ],
+            "flags": {},
+            "order": 8,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "ckpt_name",
+                "name": "ckpt_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "ckpt_name"
+                },
+                "link": 604
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "Audio VAE",
+                "name": "Audio VAE",
+                "type": "VAE",
+                "links": [
+                  481,
+                  496
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.68",
+              "Node name for S&R": "LTXVAudioVAELoader",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "ltx-2.3-22b-dev-fp8.safetensors",
+                  "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-dev-fp8.safetensors",
+                  "directory": "checkpoints"
+                }
+              ]
+            },
+            "widgets_values": [
+              "ltx-2.3-22b-dev-fp8.safetensors"
+            ]
+          },
+          {
+            "id": 280,
+            "type": "KSamplerSelect",
+            "pos": [
+              4700,
+              4100
+            ],
+            "size": [
+              280,
+              110
+            ],
+            "flags": {},
+            "order": 2,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "sampler_name",
+                "name": "sampler_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "sampler_name"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "SAMPLER",
+                "name": "SAMPLER",
+                "type": "SAMPLER",
+                "links": [
+                  492
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.75",
+              "Node name for S&R": "KSamplerSelect",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "euler_cfg_pp"
+            ]
+          },
+          {
+            "id": 281,
+            "type": "ManualSigmas",
+            "pos": [
+              4700,
+              4290
+            ],
+            "size": [
+              280,
+              110
+            ],
+            "flags": {},
+            "order": 3,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "sigmas",
+                "name": "sigmas",
+                "type": "STRING",
+                "widget": {
+                  "name": "sigmas"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "SIGMAS",
+                "name": "SIGMAS",
+                "type": "SIGMAS",
+                "links": [
+                  493
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "ManualSigmas",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "0.85, 0.7250, 0.4219, 0.0"
+            ]
+          },
+          {
+            "id": 282,
+            "type": "CFGGuider",
+            "pos": [
+              4700,
+              3850
+            ],
+            "size": [
+              280,
+              160
+            ],
+            "flags": {},
+            "order": 9,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 478
+              },
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 479
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 480
+              },
+              {
+                "localized_name": "cfg",
+                "name": "cfg",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "cfg"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "GUIDER",
+                "name": "GUIDER",
+                "type": "GUIDER",
+                "links": [
+                  491
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.71",
+              "Node name for S&R": "CFGGuider",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              1
+            ]
+          },
+          {
+            "id": 283,
+            "type": "SamplerCustomAdvanced",
+            "pos": [
+              3550,
+              3630
+            ],
+            "size": [
+              230,
+              170
+            ],
+            "flags": {},
+            "order": 10,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "noise",
+                "name": "noise",
+                "type": "NOISE",
+                "link": 483
+              },
+              {
+                "localized_name": "guider",
+                "name": "guider",
+                "type": "GUIDER",
+                "link": 484
+              },
+              {
+                "localized_name": "sampler",
+                "name": "sampler",
+                "type": "SAMPLER",
+                "link": 485
+              },
+              {
+                "localized_name": "sigmas",
+                "name": "sigmas",
+                "type": "SIGMAS",
+                "link": 544
+              },
+              {
+                "localized_name": "latent_image",
+                "name": "latent_image",
+                "type": "LATENT",
+                "link": 487
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "output",
+                "name": "output",
+                "type": "LATENT",
+                "links": [
+                  488
+                ]
+              },
+              {
+                "localized_name": "denoised_output",
+                "name": "denoised_output",
+                "type": "LATENT",
+                "links": []
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.60",
+              "Node name for S&R": "SamplerCustomAdvanced",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 284,
+            "type": "LTXVCropGuides",
+            "pos": [
+              3830,
+              3810
+            ],
+            "size": [
+              250,
+              120
+            ],
+            "flags": {},
+            "order": 11,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 475
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 476
+              },
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "link": 477
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "links": [
+                  479
+                ]
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "links": [
+                  480
+                ]
+              },
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "slot_index": 2,
+                "links": []
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.68",
+              "Node name for S&R": "LTXVCropGuides",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 285,
+            "type": "LoraLoaderModelOnly",
+            "pos": [
+              1660,
+              3890
+            ],
+            "size": [
+              430,
+              140
+            ],
+            "flags": {},
+            "order": 12,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 520
+              },
+              {
+                "localized_name": "lora_name",
+                "name": "lora_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "lora_name"
+                },
+                "link": 602
+              },
+              {
+                "localized_name": "strength_model",
+                "name": "strength_model",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "strength_model"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "links": [
+                  478,
+                  541
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.75",
+              "Node name for S&R": "LoraLoaderModelOnly",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "ltx-2.3-22b-distilled-lora-384.safetensors",
+                  "url": "https://huggingface.co/Lightricks/LTX-2.3/resolve/main/ltx-2.3-22b-distilled-lora-384.safetensors",
+                  "directory": "loras"
+                }
+              ]
+            },
+            "widgets_values": [
+              "ltx-2.3-22b-distilled-lora-384.safetensors",
+              0.5
+            ]
+          },
+          {
+            "id": 286,
+            "type": "ResizeImagesByLongerEdge",
+            "pos": [
+              2070,
+              4810
+            ],
+            "size": [
+              310,
+              110
+            ],
+            "flags": {
+              "collapsed": false
+            },
+            "order": 13,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "images",
+                "name": "images",
+                "type": "IMAGE",
+                "link": 523
+              },
+              {
+                "localized_name": "longer_edge",
+                "name": "longer_edge",
+                "type": "INT",
+                "widget": {
+                  "name": "longer_edge"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "images",
+                "name": "images",
+                "type": "IMAGE",
+                "links": [
+                  505
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "ResizeImagesByLongerEdge",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              1536
+            ]
+          },
+          {
+            "id": 287,
+            "type": "LTXVLatentUpsampler",
+            "pos": [
+              4250,
+              3760
+            ],
+            "size": [
+              330,
+              120
+            ],
+            "flags": {},
+            "order": 14,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "samples",
+                "name": "samples",
+                "type": "LATENT",
+                "link": 547
+              },
+              {
+                "localized_name": "upscale_model",
+                "name": "upscale_model",
+                "type": "LATENT_UPSCALE_MODEL",
+                "link": 545
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 554
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "links": [
+                  548
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.14.1",
+              "Node name for S&R": "LTXVLatentUpsampler",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 288,
+            "type": "LTXVImgToVideoInplace",
+            "pos": [
+              4230,
+              4100
+            ],
+            "size": [
+              300,
+              180
+            ],
+            "flags": {},
+            "order": 15,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 552
+              },
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 515
+              },
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "link": 548
+              },
+              {
+                "localized_name": "strength",
+                "name": "strength",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "strength"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "bypass",
+                "name": "bypass",
+                "type": "BOOLEAN",
+                "widget": {
+                  "name": "bypass"
+                },
+                "link": 543
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "links": [
+                  512
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "LTXVImgToVideoInplace",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              1,
+              false
+            ]
+          },
+          {
+            "id": 289,
+            "type": "LTXVPreprocess",
+            "pos": [
+              2100,
+              5010
+            ],
+            "size": [
+              290,
+              110
+            ],
+            "flags": {},
+            "order": 16,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 505
+              },
+              {
+                "localized_name": "img_compression",
+                "name": "img_compression",
+                "type": "INT",
+                "widget": {
+                  "name": "img_compression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "output_image",
+                "name": "output_image",
+                "type": "IMAGE",
+                "links": [
+                  510,
+                  515
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "LTXVPreprocess",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              18
+            ]
+          },
+          {
+            "id": 290,
+            "type": "ResizeImageMaskNode",
+            "pos": [
+              1660,
+              4810
+            ],
+            "size": [
+              300,
+              160
+            ],
+            "flags": {},
+            "order": 17,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "input",
+                "name": "input",
+                "type": "IMAGE,MASK",
+                "link": 535
+              },
+              {
+                "localized_name": "resize_type",
+                "name": "resize_type",
+                "type": "COMFY_DYNAMICCOMBO_V3",
+                "widget": {
+                  "name": "resize_type"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "width",
+                "name": "resize_type.width",
+                "type": "INT",
+                "widget": {
+                  "name": "resize_type.width"
+                },
+                "link": 558
+              },
+              {
+                "localized_name": "height",
+                "name": "resize_type.height",
+                "type": "INT",
+                "widget": {
+                  "name": "resize_type.height"
+                },
+                "link": 559
+              },
+              {
+                "localized_name": "crop",
+                "name": "resize_type.crop",
+                "type": "COMBO",
+                "widget": {
+                  "name": "resize_type.crop"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "scale_method",
+                "name": "scale_method",
+                "type": "COMBO",
+                "widget": {
+                  "name": "scale_method"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "resized",
+                "name": "resized",
+                "type": "*",
+                "links": [
+                  523
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "ResizeImageMaskNode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "scale dimensions",
+              1920,
+              1088,
+              "center",
+              "lanczos"
+            ]
+          },
+          {
+            "id": 291,
+            "type": "KSamplerSelect",
+            "pos": [
+              3160,
+              4040
+            ],
+            "size": [
+              280,
+              110
+            ],
+            "flags": {},
+            "order": 4,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "sampler_name",
+                "name": "sampler_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "sampler_name"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "SAMPLER",
+                "name": "SAMPLER",
+                "type": "SAMPLER",
+                "links": [
+                  485
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.56",
+              "Node name for S&R": "KSamplerSelect",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "euler_ancestral_cfg_pp"
+            ]
+          },
+          {
+            "id": 292,
+            "type": "ComfyMathExpression",
+            "pos": [
+              2540,
+              4830
+            ],
+            "size": [
+              210,
+              80
+            ],
+            "flags": {
+              "collapsed": true
+            },
+            "order": 18,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 560
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": null
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  561
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.16.3",
+              "Node name for S&R": "ComfyMathExpression",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "a/2"
+            ]
+          },
+          {
+            "id": 293,
+            "type": "Reroute",
+            "pos": [
+              3850,
+              4050
+            ],
+            "size": [
+              230,
+              40
+            ],
+            "flags": {},
+            "order": 19,
+            "mode": 0,
+            "inputs": [
+              {
+                "name": "",
+                "type": "*",
+                "link": 557
+              }
+            ],
+            "outputs": [
+              {
+                "name": "",
+                "type": "VAE",
+                "links": [
+                  552,
+                  553,
+                  554
+                ]
+              }
+            ],
+            "properties": {
+              "showOutputText": false,
+              "horizontal": false,
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              }
+            }
+          },
+          {
+            "id": 294,
+            "type": "ComfyMathExpression",
+            "pos": [
+              2550,
+              4890
+            ],
+            "size": [
+              210,
+              80
+            ],
+            "flags": {
+              "collapsed": true
+            },
+            "order": 20,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 562
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": null
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  563
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.16.3",
+              "Node name for S&R": "ComfyMathExpression",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "a/2"
+            ]
+          },
+          {
+            "id": 295,
+            "type": "EmptyLTXVLatentVideo",
+            "pos": [
+              2870,
+              4940
+            ],
+            "size": [
+              280,
+              200
+            ],
+            "flags": {},
+            "order": 21,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 561
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 563
+              },
+              {
+                "localized_name": "length",
+                "name": "length",
+                "type": "INT",
+                "widget": {
+                  "name": "length"
+                },
+                "link": 631
+              },
+              {
+                "localized_name": "batch_size",
+                "name": "batch_size",
+                "type": "INT",
+                "widget": {
+                  "name": "batch_size"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "links": [
+                  511
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.60",
+              "Node name for S&R": "EmptyLTXVLatentVideo",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              768,
+              512,
+              97,
+              1
+            ]
+          },
+          {
+            "id": 296,
+            "type": "LTXVImgToVideoInplace",
+            "pos": [
+              3230,
+              4810
+            ],
+            "size": [
+              280,
+              180
+            ],
+            "flags": {},
+            "order": 22,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 556
+              },
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 510
+              },
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "link": 511
+              },
+              {
+                "localized_name": "strength",
+                "name": "strength",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "strength"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "bypass",
+                "name": "bypass",
+                "type": "BOOLEAN",
+                "widget": {
+                  "name": "bypass"
+                },
+                "link": 542
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "links": [
+                  497
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "LTXVImgToVideoInplace",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              0.7,
+              false
+            ]
+          },
+          {
+            "id": 297,
+            "type": "LTXVAudioVAEDecode",
+            "pos": [
+              5760,
+              3970
+            ],
+            "size": [
+              270,
+              100
+            ],
+            "flags": {},
+            "order": 23,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "samples",
+                "name": "samples",
+                "type": "LATENT",
+                "link": 495
+              },
+              {
+                "label": "Audio VAE",
+                "localized_name": "audio_vae",
+                "name": "audio_vae",
+                "type": "VAE",
+                "link": 496
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "Audio",
+                "name": "Audio",
+                "type": "AUDIO",
+                "links": [
+                  534
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "LTXVAudioVAEDecode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 298,
+            "type": "ComfyMathExpression",
+            "pos": [
+              2540,
+              5030
+            ],
+            "size": [
+              210,
+              80
+            ],
+            "flags": {
+              "collapsed": true
+            },
+            "order": 24,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 564
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": [
+                  566,
+                  591
+                ]
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  565
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.16.3",
+              "Node name for S&R": "ComfyMathExpression",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "a"
+            ]
+          },
+          {
+            "id": 299,
+            "type": "PrimitiveInt",
+            "pos": [
+              1190,
+              4650
+            ],
+            "size": [
+              370,
+              110
+            ],
+            "flags": {},
+            "order": 25,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": 598
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  559,
+                  562
+                ]
+              }
+            ],
+            "title": "Height",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.16.3",
+              "Node name for S&R": "PrimitiveInt",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              720,
+              "fixed"
+            ]
+          },
+          {
+            "id": 300,
+            "type": "PrimitiveInt",
+            "pos": [
+              1190,
+              4840
+            ],
+            "size": [
+              370,
+              110
+            ],
+            "flags": {},
+            "order": 26,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": 624
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  564,
+                  629
+                ]
+              }
+            ],
+            "title": "Frame Rate",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.16.3",
+              "Node name for S&R": "PrimitiveInt",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              25,
+              "fixed"
+            ]
+          },
+          {
+            "id": 301,
+            "type": "PrimitiveInt",
+            "pos": [
+              1190,
+              4280
+            ],
+            "size": [
+              370,
+              110
+            ],
+            "flags": {},
+            "order": 27,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": 599
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  628
+                ]
+              }
+            ],
+            "title": "Duration",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "PrimitiveInt",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              5,
+              "fixed"
+            ]
+          },
+          {
+            "id": 302,
+            "type": "PrimitiveBoolean",
+            "pos": [
+              1190,
+              4110
+            ],
+            "size": [
+              370,
+              100
+            ],
+            "flags": {},
+            "order": 5,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "BOOLEAN",
+                "widget": {
+                  "name": "value"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "BOOLEAN",
+                "name": "BOOLEAN",
+                "type": "BOOLEAN",
+                "links": [
+                  542,
+                  543
+                ]
+              }
+            ],
+            "title": "Switch to Text to Video?",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.16.0",
+              "Node name for S&R": "PrimitiveBoolean",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              false
+            ]
+          },
+          {
+            "id": 303,
+            "type": "CLIPTextEncode",
+            "pos": [
+              2170,
+              3640
+            ],
+            "size": [
+              600,
+              390
+            ],
+            "flags": {},
+            "order": 28,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 615
+              },
+              {
+                "localized_name": "text",
+                "name": "text",
+                "type": "STRING",
+                "widget": {
+                  "name": "text"
+                },
+                "link": 625
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "links": [
+                  526
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.56",
+              "Node name for S&R": "CLIPTextEncode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              ""
+            ],
+            "color": "#232",
+            "bgcolor": "#353"
+          },
+          {
+            "id": 304,
+            "type": "LTXVConditioning",
+            "pos": [
+              2800,
+              3810
+            ],
+            "size": [
+              280,
+              130
+            ],
+            "flags": {},
+            "order": 29,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 526
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 527
+              },
+              {
+                "localized_name": "frame_rate",
+                "name": "frame_rate",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "frame_rate"
+                },
+                "link": 566
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "links": [
+                  475,
+                  518
+                ]
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "links": [
+                  476,
+                  519
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.56",
+              "Node name for S&R": "LTXVConditioning",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              24
+            ]
+          },
+          {
+            "id": 305,
+            "type": "LTXVEmptyLatentAudio",
+            "pos": [
+              3540,
+              4960
+            ],
+            "size": [
+              280,
+              170
+            ],
+            "flags": {},
+            "order": 30,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "audio_vae",
+                "name": "audio_vae",
+                "type": "VAE",
+                "link": 481
+              },
+              {
+                "localized_name": "frames_number",
+                "name": "frames_number",
+                "type": "INT",
+                "widget": {
+                  "name": "frames_number"
+                },
+                "link": 630
+              },
+              {
+                "localized_name": "frame_rate",
+                "name": "frame_rate",
+                "type": "INT",
+                "widget": {
+                  "name": "frame_rate"
+                },
+                "link": 565
+              },
+              {
+                "localized_name": "batch_size",
+                "name": "batch_size",
+                "type": "INT",
+                "widget": {
+                  "name": "batch_size"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "Latent",
+                "name": "Latent",
+                "type": "LATENT",
+                "links": [
+                  498
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.68",
+              "Node name for S&R": "LTXVEmptyLatentAudio",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              97,
+              25,
+              1
+            ]
+          },
+          {
+            "id": 306,
+            "type": "ManualSigmas",
+            "pos": [
+              3160,
+              4220
+            ],
+            "size": [
+              500,
+              110
+            ],
+            "flags": {},
+            "order": 6,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "sigmas",
+                "name": "sigmas",
+                "type": "STRING",
+                "widget": {
+                  "name": "sigmas"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "SIGMAS",
+                "name": "SIGMAS",
+                "type": "SIGMAS",
+                "links": [
+                  544
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.14.1",
+              "Node name for S&R": "ManualSigmas",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "1.0, 0.99375, 0.9875, 0.98125, 0.975, 0.909375, 0.725, 0.421875, 0.0"
+            ]
+          },
+          {
+            "id": 307,
+            "type": "LTXVSeparateAVLatent",
+            "pos": [
+              3820,
+              3630
+            ],
+            "size": [
+              250,
+              100
+            ],
+            "flags": {},
+            "order": 31,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "av_latent",
+                "name": "av_latent",
+                "type": "LATENT",
+                "link": 488
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "video_latent",
+                "name": "video_latent",
+                "type": "LATENT",
+                "links": [
+                  477,
+                  547
+                ]
+              },
+              {
+                "localized_name": "audio_latent",
+                "name": "audio_latent",
+                "type": "LATENT",
+                "links": [
+                  513
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "LTXVSeparateAVLatent",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 308,
+            "type": "SamplerCustomAdvanced",
+            "pos": [
+              5050,
+              3650
+            ],
+            "size": [
+              230,
+              170
+            ],
+            "flags": {},
+            "order": 32,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "noise",
+                "name": "noise",
+                "type": "NOISE",
+                "link": 490
+              },
+              {
+                "localized_name": "guider",
+                "name": "guider",
+                "type": "GUIDER",
+                "link": 491
+              },
+              {
+                "localized_name": "sampler",
+                "name": "sampler",
+                "type": "SAMPLER",
+                "link": 492
+              },
+              {
+                "localized_name": "sigmas",
+                "name": "sigmas",
+                "type": "SIGMAS",
+                "link": 493
+              },
+              {
+                "localized_name": "latent_image",
+                "name": "latent_image",
+                "type": "LATENT",
+                "link": 494
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "output",
+                "name": "output",
+                "type": "LATENT",
+                "links": [
+                  578
+                ]
+              },
+              {
+                "localized_name": "denoised_output",
+                "name": "denoised_output",
+                "type": "LATENT",
+                "links": []
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.75",
+              "Node name for S&R": "SamplerCustomAdvanced",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 309,
+            "type": "LTXVSeparateAVLatent",
+            "pos": [
+              5390,
+              3650
+            ],
+            "size": [
+              230,
+              100
+            ],
+            "flags": {},
+            "order": 33,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "av_latent",
+                "name": "av_latent",
+                "type": "LATENT",
+                "link": 578
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "video_latent",
+                "name": "video_latent",
+                "type": "LATENT",
+                "links": [
+                  539
+                ]
+              },
+              {
+                "localized_name": "audio_latent",
+                "name": "audio_latent",
+                "type": "LATENT",
+                "links": [
+                  495
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "LTXVSeparateAVLatent",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 310,
+            "type": "CreateVideo",
+            "pos": [
+              6050,
+              4490
+            ],
+            "size": [
+              280,
+              130
+            ],
+            "flags": {},
+            "order": 34,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "images",
+                "name": "images",
+                "type": "IMAGE",
+                "link": 538
+              },
+              {
+                "localized_name": "audio",
+                "name": "audio",
+                "shape": 7,
+                "type": "AUDIO",
+                "link": 534
+              },
+              {
+                "localized_name": "fps",
+                "name": "fps",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "fps"
+                },
+                "link": 591
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "VIDEO",
+                "name": "VIDEO",
+                "type": "VIDEO",
+                "links": [
+                  536
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "CreateVideo",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              24
+            ]
+          },
+          {
+            "id": 311,
+            "type": "LatentUpscaleModelLoader",
+            "pos": [
+              1670,
+              4550
+            ],
+            "size": [
+              400,
+              110
+            ],
+            "flags": {},
+            "order": 35,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model_name",
+                "name": "model_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "model_name"
+                },
+                "link": 607
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT_UPSCALE_MODEL",
+                "name": "LATENT_UPSCALE_MODEL",
+                "type": "LATENT_UPSCALE_MODEL",
+                "links": [
+                  545
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "LatentUpscaleModelLoader",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "ltx-2.3-spatial-upscaler-x2-1.1.safetensors",
+                  "url": "https://huggingface.co/Lightricks/LTX-2.3/resolve/main/ltx-2.3-spatial-upscaler-x2-1.1.safetensors",
+                  "directory": "latent_upscale_models"
+                }
+              ]
+            },
+            "widgets_values": [
+              "ltx-2.3-spatial-upscaler-x2-1.1.safetensors"
+            ]
+          },
+          {
+            "id": 312,
+            "type": "PrimitiveInt",
+            "pos": [
+              1190,
+              4470
+            ],
+            "size": [
+              370,
+              110
+            ],
+            "flags": {},
+            "order": 36,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": 597
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  558,
+                  560
+                ]
+              }
+            ],
+            "title": "Width",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.16.3",
+              "Node name for S&R": "PrimitiveInt",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              1280,
+              "fixed"
+            ]
+          },
+          {
+            "id": 313,
+            "type": "CLIPTextEncode",
+            "pos": [
+              2180,
+              4120
+            ],
+            "size": [
+              600,
+              170
+            ],
+            "flags": {},
+            "order": 37,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 627
+              },
+              {
+                "localized_name": "text",
+                "name": "text",
+                "type": "STRING",
+                "widget": {
+                  "name": "text"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "links": [
+                  527
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.56",
+              "Node name for S&R": "CLIPTextEncode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "pc game, console game, video game, cartoon, childish, ugly"
+            ],
+            "color": "#323",
+            "bgcolor": "#535"
+          },
+          {
+            "id": 314,
+            "type": "CFGGuider",
+            "pos": [
+              3160,
+              3810
+            ],
+            "size": [
+              280,
+              160
+            ],
+            "flags": {},
+            "order": 38,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 541
+              },
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 518
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 519
+              },
+              {
+                "localized_name": "cfg",
+                "name": "cfg",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "cfg"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "GUIDER",
+                "name": "GUIDER",
+                "type": "GUIDER",
+                "links": [
+                  484
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.64",
+              "Node name for S&R": "CFGGuider",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              1
+            ]
+          },
+          {
+            "id": 315,
+            "type": "VAEDecodeTiled",
+            "pos": [
+              5750,
+              3610
+            ],
+            "size": [
+              280,
+              200
+            ],
+            "flags": {},
+            "order": 39,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "samples",
+                "name": "samples",
+                "type": "LATENT",
+                "link": 539
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 553
+              },
+              {
+                "localized_name": "tile_size",
+                "name": "tile_size",
+                "type": "INT",
+                "widget": {
+                  "name": "tile_size"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "overlap",
+                "name": "overlap",
+                "type": "INT",
+                "widget": {
+                  "name": "overlap"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "temporal_size",
+                "name": "temporal_size",
+                "type": "INT",
+                "widget": {
+                  "name": "temporal_size"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "temporal_overlap",
+                "name": "temporal_overlap",
+                "type": "INT",
+                "widget": {
+                  "name": "temporal_overlap"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "links": [
+                  538
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.14.1",
+              "Node name for S&R": "VAEDecodeTiled",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              768,
+              64,
+              4096,
+              4
+            ]
+          },
+          {
+            "id": 316,
+            "type": "CheckpointLoaderSimple",
+            "pos": [
+              1660,
+              3660
+            ],
+            "size": [
+              430,
+              160
+            ],
+            "flags": {},
+            "order": 40,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "ckpt_name",
+                "name": "ckpt_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "ckpt_name"
+                },
+                "link": 601
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "links": [
+                  520
+                ]
+              },
+              {
+                "localized_name": "CLIP",
+                "name": "CLIP",
+                "type": "CLIP",
+                "links": []
+              },
+              {
+                "localized_name": "VAE",
+                "name": "VAE",
+                "type": "VAE",
+                "links": [
+                  556,
+                  557
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.56",
+              "Node name for S&R": "CheckpointLoaderSimple",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "ltx-2.3-22b-dev-fp8.safetensors",
+                  "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-dev-fp8.safetensors",
+                  "directory": "checkpoints"
+                }
+              ]
+            },
+            "widgets_values": [
+              "ltx-2.3-22b-dev-fp8.safetensors"
+            ]
+          },
+          {
+            "id": 317,
+            "type": "LTXAVTextEncoderLoader",
+            "pos": [
+              1660,
+              4280
+            ],
+            "size": [
+              430,
+              170
+            ],
+            "flags": {},
+            "order": 41,
+            "mode": 0,
+            "showAdvanced": false,
+            "inputs": [
+              {
+                "localized_name": "text_encoder",
+                "name": "text_encoder",
+                "type": "COMBO",
+                "widget": {
+                  "name": "text_encoder"
+                },
+                "link": 606
+              },
+              {
+                "localized_name": "ckpt_name",
+                "name": "ckpt_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "ckpt_name"
+                },
+                "link": 605
+              },
+              {
+                "localized_name": "device",
+                "name": "device",
+                "type": "COMBO",
+                "widget": {
+                  "name": "device"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CLIP",
+                "name": "CLIP",
+                "type": "CLIP",
+                "links": [
+                  615,
+                  627
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "LTXAVTextEncoderLoader",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "ltx-2.3-22b-dev-fp8.safetensors",
+                  "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-dev-fp8.safetensors",
+                  "directory": "checkpoints"
+                },
+                {
+                  "name": "gemma_3_12B_it_fp4_mixed.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/ltx-2/resolve/main/split_files/text_encoders/gemma_3_12B_it_fp4_mixed.safetensors",
+                  "directory": "text_encoders"
+                }
+              ]
+            },
+            "widgets_values": [
+              "gemma_3_12B_it_fp4_mixed.safetensors",
+              "ltx-2.3-22b-dev-fp8.safetensors",
+              "default"
+            ]
+          },
+          {
+            "id": 318,
+            "type": "LTXVConcatAVLatent",
+            "pos": [
+              3860,
+              4830
+            ],
+            "size": [
+              240,
+              100
+            ],
+            "flags": {},
+            "order": 42,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "video_latent",
+                "name": "video_latent",
+                "type": "LATENT",
+                "link": 497
+              },
+              {
+                "localized_name": "audio_latent",
+                "name": "audio_latent",
+                "type": "LATENT",
+                "link": 498
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "links": [
+                  487
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "LTXVConcatAVLatent",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 319,
+            "type": "PrimitiveStringMultiline",
+            "pos": [
+              1190,
+              3680
+            ],
+            "size": [
+              370,
+              350
+            ],
+            "flags": {},
+            "order": 43,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "STRING",
+                "widget": {
+                  "name": "value"
+                },
+                "link": 595
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "STRING",
+                "name": "STRING",
+                "type": "STRING",
+                "links": [
+                  625
+                ]
+              }
+            ],
+            "title": "Prompt",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.16.3",
+              "Node name for S&R": "PrimitiveStringMultiline",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              ""
+            ]
+          },
+          {
+            "id": 323,
+            "type": "ComfyMathExpression",
+            "pos": [
+              1210,
+              5040
+            ],
+            "size": [
+              360,
+              210
+            ],
+            "flags": {
+              "collapsed": true
+            },
+            "order": 44,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 628
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": 629
+              },
+              {
+                "label": "c",
+                "localized_name": "values.c",
+                "name": "values.c",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": null
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  630,
+                  631
+                ]
+              }
+            ],
+            "title": "Math Expression (length)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ComfyMathExpression"
+            },
+            "widgets_values": [
+              "a * b + 1"
+            ]
+          }
+        ],
+        "groups": [
+          {
+            "id": 1,
+            "title": "Model",
+            "bounding": [
+              1630,
+              3550,
+              480,
+              1140
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 2,
+            "title": "Generate Low Resolution",
+            "bounding": [
+              3130,
+              3550,
+              1000,
+              1140
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 3,
+            "title": "Prompt",
+            "bounding": [
+              2140,
+              3550,
+              960,
+              1140
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 6,
+            "title": "Generate High Resolution",
+            "bounding": [
+              4670,
+              3550,
+              990,
+              1130
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 7,
+            "title": "Lantent Upscale",
+            "bounding": [
+              4160,
+              3550,
+              480,
+              1130
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 19,
+            "title": "Video Settings",
+            "bounding": [
+              1150,
+              3550,
+              460,
+              1610
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 20,
+            "title": "Image Preprocess",
+            "bounding": [
+              1630,
+              4720,
+              830,
+              440
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 21,
+            "title": "Empty Latent",
+            "bounding": [
+              2820,
+              4720,
+              1310,
+              450
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 22,
+            "title": "Number conversion",
+            "bounding": [
+              2480,
+              4720,
+              310,
+              440
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          }
+        ],
+        "links": [
+          {
+            "id": 512,
+            "origin_id": 288,
+            "origin_slot": 0,
+            "target_id": 278,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 513,
+            "origin_id": 307,
+            "origin_slot": 1,
+            "target_id": 278,
+            "target_slot": 1,
+            "type": "LATENT"
+          },
+          {
+            "id": 478,
+            "origin_id": 285,
+            "origin_slot": 0,
+            "target_id": 282,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 479,
+            "origin_id": 284,
+            "origin_slot": 0,
+            "target_id": 282,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 480,
+            "origin_id": 284,
+            "origin_slot": 1,
+            "target_id": 282,
+            "target_slot": 2,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 541,
+            "origin_id": 285,
+            "origin_slot": 0,
+            "target_id": 314,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 518,
+            "origin_id": 304,
+            "origin_slot": 0,
+            "target_id": 314,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 519,
+            "origin_id": 304,
+            "origin_slot": 1,
+            "target_id": 314,
+            "target_slot": 2,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 483,
+            "origin_id": 277,
+            "origin_slot": 0,
+            "target_id": 283,
+            "target_slot": 0,
+            "type": "NOISE"
+          },
+          {
+            "id": 484,
+            "origin_id": 314,
+            "origin_slot": 0,
+            "target_id": 283,
+            "target_slot": 1,
+            "type": "GUIDER"
+          },
+          {
+            "id": 485,
+            "origin_id": 291,
+            "origin_slot": 0,
+            "target_id": 283,
+            "target_slot": 2,
+            "type": "SAMPLER"
+          },
+          {
+            "id": 544,
+            "origin_id": 306,
+            "origin_slot": 0,
+            "target_id": 283,
+            "target_slot": 3,
+            "type": "SIGMAS"
+          },
+          {
+            "id": 487,
+            "origin_id": 318,
+            "origin_slot": 0,
+            "target_id": 283,
+            "target_slot": 4,
+            "type": "LATENT"
+          },
+          {
+            "id": 475,
+            "origin_id": 304,
+            "origin_slot": 0,
+            "target_id": 284,
+            "target_slot": 0,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 476,
+            "origin_id": 304,
+            "origin_slot": 1,
+            "target_id": 284,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 477,
+            "origin_id": 307,
+            "origin_slot": 0,
+            "target_id": 284,
+            "target_slot": 2,
+            "type": "LATENT"
+          },
+          {
+            "id": 520,
+            "origin_id": 316,
+            "origin_slot": 0,
+            "target_id": 285,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 523,
+            "origin_id": 290,
+            "origin_slot": 0,
+            "target_id": 286,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 547,
+            "origin_id": 307,
+            "origin_slot": 0,
+            "target_id": 287,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 545,
+            "origin_id": 311,
+            "origin_slot": 0,
+            "target_id": 287,
+            "target_slot": 1,
+            "type": "LATENT_UPSCALE_MODEL"
+          },
+          {
+            "id": 554,
+            "origin_id": 293,
+            "origin_slot": 0,
+            "target_id": 287,
+            "target_slot": 2,
+            "type": "VAE"
+          },
+          {
+            "id": 552,
+            "origin_id": 293,
+            "origin_slot": 0,
+            "target_id": 288,
+            "target_slot": 0,
+            "type": "VAE"
+          },
+          {
+            "id": 515,
+            "origin_id": 289,
+            "origin_slot": 0,
+            "target_id": 288,
+            "target_slot": 1,
+            "type": "IMAGE"
+          },
+          {
+            "id": 548,
+            "origin_id": 287,
+            "origin_slot": 0,
+            "target_id": 288,
+            "target_slot": 2,
+            "type": "LATENT"
+          },
+          {
+            "id": 543,
+            "origin_id": 302,
+            "origin_slot": 0,
+            "target_id": 288,
+            "target_slot": 4,
+            "type": "BOOLEAN"
+          },
+          {
+            "id": 505,
+            "origin_id": 286,
+            "origin_slot": 0,
+            "target_id": 289,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 558,
+            "origin_id": 312,
+            "origin_slot": 0,
+            "target_id": 290,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 559,
+            "origin_id": 299,
+            "origin_slot": 0,
+            "target_id": 290,
+            "target_slot": 3,
+            "type": "INT"
+          },
+          {
+            "id": 560,
+            "origin_id": 312,
+            "origin_slot": 0,
+            "target_id": 292,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 557,
+            "origin_id": 316,
+            "origin_slot": 2,
+            "target_id": 293,
+            "target_slot": 0,
+            "type": "VAE"
+          },
+          {
+            "id": 562,
+            "origin_id": 299,
+            "origin_slot": 0,
+            "target_id": 294,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 561,
+            "origin_id": 292,
+            "origin_slot": 1,
+            "target_id": 295,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 563,
+            "origin_id": 294,
+            "origin_slot": 1,
+            "target_id": 295,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 556,
+            "origin_id": 316,
+            "origin_slot": 2,
+            "target_id": 296,
+            "target_slot": 0,
+            "type": "VAE"
+          },
+          {
+            "id": 510,
+            "origin_id": 289,
+            "origin_slot": 0,
+            "target_id": 296,
+            "target_slot": 1,
+            "type": "IMAGE"
+          },
+          {
+            "id": 511,
+            "origin_id": 295,
+            "origin_slot": 0,
+            "target_id": 296,
+            "target_slot": 2,
+            "type": "LATENT"
+          },
+          {
+            "id": 542,
+            "origin_id": 302,
+            "origin_slot": 0,
+            "target_id": 296,
+            "target_slot": 4,
+            "type": "BOOLEAN"
+          },
+          {
+            "id": 495,
+            "origin_id": 309,
+            "origin_slot": 1,
+            "target_id": 297,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 496,
+            "origin_id": 279,
+            "origin_slot": 0,
+            "target_id": 297,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 564,
+            "origin_id": 300,
+            "origin_slot": 0,
+            "target_id": 298,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 526,
+            "origin_id": 303,
+            "origin_slot": 0,
+            "target_id": 304,
+            "target_slot": 0,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 527,
+            "origin_id": 313,
+            "origin_slot": 0,
+            "target_id": 304,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 566,
+            "origin_id": 298,
+            "origin_slot": 0,
+            "target_id": 304,
+            "target_slot": 2,
+            "type": "FLOAT"
+          },
+          {
+            "id": 497,
+            "origin_id": 296,
+            "origin_slot": 0,
+            "target_id": 318,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 498,
+            "origin_id": 305,
+            "origin_slot": 0,
+            "target_id": 318,
+            "target_slot": 1,
+            "type": "LATENT"
+          },
+          {
+            "id": 481,
+            "origin_id": 279,
+            "origin_slot": 0,
+            "target_id": 305,
+            "target_slot": 0,
+            "type": "VAE"
+          },
+          {
+            "id": 565,
+            "origin_id": 298,
+            "origin_slot": 1,
+            "target_id": 305,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 488,
+            "origin_id": 283,
+            "origin_slot": 0,
+            "target_id": 307,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 490,
+            "origin_id": 276,
+            "origin_slot": 0,
+            "target_id": 308,
+            "target_slot": 0,
+            "type": "NOISE"
+          },
+          {
+            "id": 491,
+            "origin_id": 282,
+            "origin_slot": 0,
+            "target_id": 308,
+            "target_slot": 1,
+            "type": "GUIDER"
+          },
+          {
+            "id": 492,
+            "origin_id": 280,
+            "origin_slot": 0,
+            "target_id": 308,
+            "target_slot": 2,
+            "type": "SAMPLER"
+          },
+          {
+            "id": 493,
+            "origin_id": 281,
+            "origin_slot": 0,
+            "target_id": 308,
+            "target_slot": 3,
+            "type": "SIGMAS"
+          },
+          {
+            "id": 494,
+            "origin_id": 278,
+            "origin_slot": 0,
+            "target_id": 308,
+            "target_slot": 4,
+            "type": "LATENT"
+          },
+          {
+            "id": 578,
+            "origin_id": 308,
+            "origin_slot": 0,
+            "target_id": 309,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 539,
+            "origin_id": 309,
+            "origin_slot": 0,
+            "target_id": 315,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 553,
+            "origin_id": 293,
+            "origin_slot": 0,
+            "target_id": 315,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 538,
+            "origin_id": 315,
+            "origin_slot": 0,
+            "target_id": 310,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 534,
+            "origin_id": 297,
+            "origin_slot": 0,
+            "target_id": 310,
+            "target_slot": 1,
+            "type": "AUDIO"
+          },
+          {
+            "id": 591,
+            "origin_id": 298,
+            "origin_slot": 0,
+            "target_id": 310,
+            "target_slot": 2,
+            "type": "FLOAT"
+          },
+          {
+            "id": 535,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 290,
+            "target_slot": 0,
+            "type": "IMAGE,MASK"
+          },
+          {
+            "id": 536,
+            "origin_id": 310,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 0,
+            "type": "VIDEO"
+          },
+          {
+            "id": 595,
+            "origin_id": -10,
+            "origin_slot": 1,
+            "target_id": 319,
+            "target_slot": 0,
+            "type": "STRING"
+          },
+          {
+            "id": 597,
+            "origin_id": -10,
+            "origin_slot": 2,
+            "target_id": 312,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 598,
+            "origin_id": -10,
+            "origin_slot": 3,
+            "target_id": 299,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 599,
+            "origin_id": -10,
+            "origin_slot": 4,
+            "target_id": 301,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 601,
+            "origin_id": -10,
+            "origin_slot": 5,
+            "target_id": 316,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 602,
+            "origin_id": -10,
+            "origin_slot": 6,
+            "target_id": 285,
+            "target_slot": 1,
+            "type": "COMBO"
+          },
+          {
+            "id": 604,
+            "origin_id": -10,
+            "origin_slot": 5,
+            "target_id": 279,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 605,
+            "origin_id": -10,
+            "origin_slot": 5,
+            "target_id": 317,
+            "target_slot": 1,
+            "type": "COMBO"
+          },
+          {
+            "id": 606,
+            "origin_id": -10,
+            "origin_slot": 7,
+            "target_id": 317,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 607,
+            "origin_id": -10,
+            "origin_slot": 8,
+            "target_id": 311,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 615,
+            "origin_id": 317,
+            "origin_slot": 0,
+            "target_id": 303,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 624,
+            "origin_id": -10,
+            "origin_slot": 9,
+            "target_id": 300,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 625,
+            "origin_id": 319,
+            "origin_slot": 0,
+            "target_id": 303,
+            "target_slot": 1,
+            "type": "STRING"
+          },
+          {
+            "id": 627,
+            "origin_id": 317,
+            "origin_slot": 0,
+            "target_id": 313,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 628,
+            "origin_id": 301,
+            "origin_slot": 0,
+            "target_id": 323,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 629,
+            "origin_id": 300,
+            "origin_slot": 0,
+            "target_id": 323,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 630,
+            "origin_id": 323,
+            "origin_slot": 1,
+            "target_id": 305,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 631,
+            "origin_id": 323,
+            "origin_slot": 1,
+            "target_id": 295,
+            "target_slot": 2,
+            "type": "INT"
+          }
+        ],
+        "extra": {
+          "workflowRendererVersion": "Vue-corrected"
+        },
+        "category": "Video generation and editing/Image to video"
+      }
+    ]
+  },
+  "extra": {
+    "ue_links": []
+  }
+}
\ No newline at end of file
diff --git a/blueprints/Pose to Video (LTX 2.0).json b/blueprints/Pose to Video (LTX 2.0).json
index ae369941c..580900bc0 100644
--- a/blueprints/Pose to Video (LTX 2.0).json	
+++ b/blueprints/Pose to Video (LTX 2.0).json	
@@ -1,28 +1,26 @@
 {
-  "id": "01cd475b-52df-43bf-aafa-484a5976d2d2",
   "revision": 0,
-  "last_node_id": 160,
-  "last_link_id": 410,
+  "last_node_id": 143,
+  "last_link_id": 0,
   "nodes": [
     {
-      "id": 1,
-      "type": "f0e58a6b-7246-4103-9fec-73b423634b1f",
+      "id": 143,
+      "type": "68857357-cbc2-4c3a-a786-c3a58d43f9b1",
       "pos": [
-        210,
-        3830
+        290,
+        3960
       ],
       "size": [
-        420,
+        400,
         500
       ],
       "flags": {
         "collapsed": false
       },
-      "order": 0,
+      "order": 13,
       "mode": 0,
       "inputs": [
         {
-          "label": "prompt",
           "name": "text",
           "type": "STRING",
           "widget": {
@@ -31,33 +29,32 @@
           "link": null
         },
         {
-          "label": "first_frame_strength",
-          "name": "strength",
-          "type": "FLOAT",
-          "widget": {
-            "name": "strength"
-          },
-          "link": null
-        },
-        {
-          "label": "disable_first_frame",
-          "name": "bypass",
-          "type": "BOOLEAN",
-          "widget": {
-            "name": "bypass"
-          },
-          "link": null
-        },
-        {
-          "label": "first frame",
+          "label": "control_images",
           "name": "image",
           "type": "IMAGE",
           "link": null
         },
         {
-          "label": "control image",
-          "name": "input",
-          "type": "IMAGE,MASK",
+          "label": "first_frame",
+          "name": "image_1",
+          "type": "IMAGE",
+          "link": null
+        },
+        {
+          "label": "image_strength",
+          "name": "strength_1",
+          "type": "FLOAT",
+          "widget": {
+            "name": "strength_1"
+          },
+          "link": null
+        },
+        {
+          "name": "noise_seed",
+          "type": "INT",
+          "widget": {
+            "name": "noise_seed"
+          },
           "link": null
         },
         {
@@ -69,6 +66,7 @@
           "link": null
         },
         {
+          "label": "control_lora",
           "name": "lora_name",
           "type": "COMBO",
           "widget": {
@@ -77,7 +75,15 @@
           "link": null
         },
         {
-          "label": "distll_lora",
+          "name": "text_encoder",
+          "type": "COMBO",
+          "widget": {
+            "name": "text_encoder"
+          },
+          "link": null
+        },
+        {
+          "label": "distill_lora",
           "name": "lora_name_1",
           "type": "COMBO",
           "widget": {
@@ -93,30 +99,6 @@
             "name": "model_name"
           },
           "link": null
-        },
-        {
-          "name": "resize_type.width",
-          "type": "INT",
-          "widget": {
-            "name": "resize_type.width"
-          },
-          "link": null
-        },
-        {
-          "name": "resize_type.height",
-          "type": "INT",
-          "widget": {
-            "name": "resize_type.height"
-          },
-          "link": null
-        },
-        {
-          "name": "length",
-          "type": "INT",
-          "widget": {
-            "name": "length"
-          },
-          "link": null
         }
       ],
       "outputs": [
@@ -130,56 +112,49 @@
       "properties": {
         "proxyWidgets": [
           [
-            "-1",
+            "124",
             "text"
           ],
           [
-            "-1",
-            "resize_type.width"
-          ],
-          [
-            "-1",
-            "resize_type.height"
-          ],
-          [
-            "-1",
-            "length"
-          ],
-          [
-            "-1",
+            "149",
             "strength"
           ],
-          [
-            "-1",
-            "bypass"
-          ],
           [
             "126",
             "noise_seed"
           ],
           [
-            "126",
-            "control_after_generate"
-          ],
-          [
-            "-1",
+            "103",
             "ckpt_name"
           ],
           [
-            "-1",
+            "134",
             "lora_name"
           ],
           [
-            "-1",
-            "model_name"
+            "97",
+            "text_encoder"
           ],
           [
-            "-1",
-            "lora_name_1"
+            "105",
+            "lora_name"
+          ],
+          [
+            "100",
+            "model_name"
           ]
         ],
         "cnr_id": "comfy-core",
         "ver": "0.7.0",
+        "ue_properties": {
+          "widget_ue_connectable": {
+            "lora_name": true,
+            "strength": true,
+            "bypass": true
+          },
+          "version": "7.7",
+          "input_ue_unconnectable": {}
+        },
         "enableTabs": false,
         "tabWidth": 65,
         "tabXOffset": 10,
@@ -188,52 +163,40 @@
         "secondTabOffset": 80,
         "secondTabWidth": 65
       },
-      "widgets_values": [
-        "",
-        1280,
-        720,
-        97,
-        1,
-        false,
-        null,
-        null,
-        "ltx-2-19b-dev-fp8.safetensors",
-        "ltx-2-19b-ic-lora-pose-control.safetensors",
-        "ltx-2-spatial-upscaler-x2-1.0.safetensors",
-        "ltx-2-19b-distilled-lora-384.safetensors"
-      ]
+      "widgets_values": [],
+      "title": "Pose to Video (LTX 2.0)"
     }
   ],
   "links": [],
-  "groups": [],
+  "version": 0.4,
   "definitions": {
     "subgraphs": [
       {
-        "id": "f0e58a6b-7246-4103-9fec-73b423634b1f",
+        "id": "68857357-cbc2-4c3a-a786-c3a58d43f9b1",
         "version": 1,
         "state": {
-          "lastGroupId": 11,
-          "lastNodeId": 160,
-          "lastLinkId": 410,
+          "lastGroupId": 14,
+          "lastNodeId": 701,
+          "lastLinkId": 1774,
           "lastRerouteId": 0
         },
         "revision": 0,
         "config": {},
-        "name": "local-Pose to Video (LTX 2.0)",
+        "name": "Pose to Video (LTX 2.0)",
         "inputNode": {
           "id": -10,
           "bounding": [
-            -2220,
-            4180,
-            153.3203125,
-            280
+            -2050,
+            4100,
+            127.029296875,
+            240
           ]
         },
         "outputNode": {
           "id": -20,
           "bounding": [
-            1750.2777777777776,
-            4091.1111111111113,
+            1750,
+            4090,
             120,
             60
           ]
@@ -246,154 +209,128 @@
             "linkIds": [
               345
             ],
-            "label": "prompt",
             "pos": [
-              -2086.6796875,
+              -1942.970703125,
+              4120
+            ]
+          },
+          {
+            "id": "35a07084-3ecf-482a-a330-b40278770ca3",
+            "name": "image",
+            "type": "IMAGE",
+            "linkIds": [
+              348,
+              380
+            ],
+            "label": "control_images",
+            "pos": [
+              -1942.970703125,
+              4140
+            ]
+          },
+          {
+            "id": "bea20802-d654-4287-a8ef-0f834314bcf9",
+            "name": "image_1",
+            "type": "IMAGE",
+            "linkIds": [
+              364,
+              379
+            ],
+            "label": "first_frame",
+            "pos": [
+              -1942.970703125,
+              4160
+            ]
+          },
+          {
+            "id": "b9b4151d-df88-40c0-a2bd-6e35b94557fe",
+            "name": "strength_1",
+            "type": "FLOAT",
+            "linkIds": [
+              1758,
+              1759
+            ],
+            "label": "image_strength",
+            "pos": [
+              -1942.970703125,
+              4180
+            ]
+          },
+          {
+            "id": "b51f6a12-9152-4526-b115-443cfd23003f",
+            "name": "noise_seed",
+            "type": "INT",
+            "linkIds": [
+              1767
+            ],
+            "pos": [
+              -1942.970703125,
               4200
             ]
           },
           {
-            "id": "59430efe-1090-4e36-8afe-b21ce7f4268b",
-            "name": "strength",
-            "type": "FLOAT",
+            "id": "47248f12-f174-4e35-854c-fa5eebea2903",
+            "name": "ckpt_name",
+            "type": "COMBO",
             "linkIds": [
-              370,
-              371
+              1768,
+              1770,
+              1771
             ],
-            "label": "first_frame_strength",
             "pos": [
-              -2086.6796875,
+              -1942.970703125,
               4220
             ]
           },
           {
-            "id": "6145a9b9-68ed-4956-89f7-7a5ebdd5c99e",
-            "name": "bypass",
-            "type": "BOOLEAN",
+            "id": "6feb34cf-7972-4d3a-91fc-11070a84dc5f",
+            "name": "lora_name",
+            "type": "COMBO",
             "linkIds": [
-              363,
-              368
+              1769
             ],
-            "label": "disable_first_frame",
+            "label": "control_lora",
             "pos": [
-              -2086.6796875,
+              -1942.970703125,
               4240
             ]
           },
           {
-            "id": "f7aa8c12-bdba-4bbd-84cf-b49cfc32a1dd",
-            "name": "image",
-            "type": "IMAGE",
+            "id": "6b423a3e-6c0e-445d-93c0-2cc3945400d1",
+            "name": "text_encoder",
+            "type": "COMBO",
             "linkIds": [
-              398,
-              399
+              1772
             ],
-            "label": "first frame",
             "pos": [
-              -2086.6796875,
+              -1942.970703125,
               4260
             ]
           },
           {
-            "id": "da40a4c0-cd19-46c6-8eb3-62d0026fbe85",
-            "name": "input",
-            "type": "IMAGE,MASK",
+            "id": "ffd38c52-cc57-4e68-b140-94e7b03499b1",
+            "name": "lora_name_1",
+            "type": "COMBO",
             "linkIds": [
-              400
+              1773
             ],
-            "label": "control image",
+            "label": "distill_lora",
             "pos": [
-              -2086.6796875,
+              -1942.970703125,
               4280
             ]
           },
           {
-            "id": "8005344b-99d6-4829-a619-c4e8ef640eb9",
-            "name": "ckpt_name",
-            "type": "COMBO",
-            "linkIds": [
-              401,
-              402,
-              403
-            ],
-            "pos": [
-              -2086.6796875,
-              4300
-            ]
-          },
-          {
-            "id": "25e7c4e8-850c-4f37-bc14-e3f4b5f228c0",
-            "name": "lora_name",
-            "type": "COMBO",
-            "linkIds": [
-              404,
-              405
-            ],
-            "pos": [
-              -2086.6796875,
-              4320
-            ]
-          },
-          {
-            "id": "f16a18dd-947e-400a-8889-02cf998f760a",
-            "name": "lora_name_1",
-            "type": "COMBO",
-            "linkIds": [
-              406
-            ],
-            "label": "distll_lora",
-            "pos": [
-              -2086.6796875,
-              4340
-            ]
-          },
-          {
-            "id": "1abf156c-4c85-4ee5-8671-62df3177d835",
+            "id": "6d8b9605-acf0-4dd7-8d45-f824c2fd5895",
             "name": "model_name",
             "type": "COMBO",
             "linkIds": [
-              407
+              1774
             ],
             "label": "upscale_model",
             "pos": [
-              -2086.6796875,
-              4360
-            ]
-          },
-          {
-            "id": "203402cf-4253-4daf-bf78-5def9496e0af",
-            "name": "resize_type.width",
-            "type": "INT",
-            "linkIds": [
-              408
-            ],
-            "pos": [
-              -2086.6796875,
-              4380
-            ]
-          },
-          {
-            "id": "e6d8ac4a-34d4-46c6-bcb2-4e66a696438c",
-            "name": "resize_type.height",
-            "type": "INT",
-            "linkIds": [
-              409
-            ],
-            "pos": [
-              -2086.6796875,
-              4400
-            ]
-          },
-          {
-            "id": "6aa6cf2c-bc4f-4f8b-be62-aa15793375dc",
-            "name": "length",
-            "type": "INT",
-            "linkIds": [
-              410
-            ],
-            "pos": [
-              -2086.6796875,
-              4420
+              -1942.970703125,
+              4300
             ]
           }
         ],
@@ -407,8 +344,8 @@
             ],
             "localized_name": "VIDEO",
             "pos": [
-              1770.2777777777776,
-              4111.111111111111
+              1770,
+              4110
             ]
           }
         ],
@@ -418,15 +355,15 @@
             "id": 93,
             "type": "CFGGuider",
             "pos": [
-              -697.721823660531,
-              3671.1105325465196
+              -690,
+              3710
             ],
             "size": [
-              269.97395833333337,
-              98
+              270,
+              160
             ],
             "flags": {},
-            "order": 16,
+            "order": 7,
             "mode": 0,
             "inputs": [
               {
@@ -470,6 +407,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.3.64",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "CFGGuider",
               "enableTabs": false,
               "tabWidth": 65,
@@ -487,12 +429,12 @@
             "id": 94,
             "type": "KSamplerSelect",
             "pos": [
-              -697.721823660531,
-              3841.1107362825187
+              -690,
+              3940
             ],
             "size": [
-              269.97395833333337,
-              58
+              270,
+              110
             ],
             "flags": {},
             "order": 0,
@@ -521,6 +463,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.3.56",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "KSamplerSelect",
               "enableTabs": false,
               "tabWidth": 65,
@@ -538,12 +485,12 @@
             "id": 99,
             "type": "ManualSigmas",
             "pos": [
-              410.27824286284044,
-              3851.110970278795
+              450,
+              3910
             ],
             "size": [
-              269.97395833333337,
-              58
+              270,
+              110
             ],
             "flags": {},
             "order": 1,
@@ -572,6 +519,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "ManualSigmas",
               "enableTabs": false,
               "tabWidth": 65,
@@ -589,15 +541,15 @@
             "id": 100,
             "type": "LatentUpscaleModelLoader",
             "pos": [
-              -69.72208571196083,
-              3701.1104657166875
+              -70,
+              3790
             ],
             "size": [
-              389.97395833333337,
-              58
+              390,
+              110
             ],
             "flags": {},
-            "order": 2,
+            "order": 11,
             "mode": 0,
             "inputs": [
               {
@@ -607,7 +559,7 @@
                 "widget": {
                   "name": "model_name"
                 },
-                "link": 407
+                "link": 1774
               }
             ],
             "outputs": [
@@ -623,21 +575,26 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.7.0",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "LatentUpscaleModelLoader",
-              "models": [
-                {
-                  "name": "ltx-2-spatial-upscaler-x2-1.0.safetensors",
-                  "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-spatial-upscaler-x2-1.0.safetensors",
-                  "directory": "latent_upscale_models"
-                }
-              ],
               "enableTabs": false,
               "tabWidth": 65,
               "tabXOffset": 10,
               "hasSecondTab": false,
               "secondTabText": "Send Back",
               "secondTabOffset": 80,
-              "secondTabWidth": 65
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "ltx-2-spatial-upscaler-x2-1.0.safetensors",
+                  "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-spatial-upscaler-x2-1.0.safetensors",
+                  "directory": "latent_upscale_models"
+                }
+              ]
             },
             "widgets_values": [
               "ltx-2-spatial-upscaler-x2-1.0.safetensors"
@@ -647,15 +604,15 @@
             "id": 101,
             "type": "LTXVConcatAVLatent",
             "pos": [
-              410.27824286284044,
-              4101.110949206838
+              450,
+              4220
             ],
             "size": [
-              269.97395833333337,
-              46
+              270,
+              120
             ],
             "flags": {},
-            "order": 18,
+            "order": 12,
             "mode": 0,
             "inputs": [
               {
@@ -684,6 +641,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "LTXVConcatAVLatent",
               "enableTabs": false,
               "tabWidth": 65,
@@ -692,22 +654,21 @@
               "secondTabText": "Send Back",
               "secondTabOffset": 80,
               "secondTabWidth": 65
-            },
-            "widgets_values": []
+            }
           },
           {
             "id": 108,
             "type": "CFGGuider",
             "pos": [
-              410.27824286284044,
-              3701.1104657166875
+              450,
+              3720
             ],
             "size": [
-              269.97395833333337,
-              98
+              270,
+              160
             ],
             "flags": {},
-            "order": 22,
+            "order": 18,
             "mode": 0,
             "inputs": [
               {
@@ -751,6 +712,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.3.71",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "CFGGuider",
               "enableTabs": false,
               "tabWidth": 65,
@@ -764,19 +730,101 @@
               1
             ]
           },
+          {
+            "id": 111,
+            "type": "LTXVEmptyLatentAudio",
+            "pos": [
+              -1100,
+              4940
+            ],
+            "size": [
+              270,
+              170
+            ],
+            "flags": {},
+            "order": 20,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "audio_vae",
+                "name": "audio_vae",
+                "type": "VAE",
+                "link": 285
+              },
+              {
+                "localized_name": "frames_number",
+                "name": "frames_number",
+                "type": "INT",
+                "widget": {
+                  "name": "frames_number"
+                },
+                "link": 329
+              },
+              {
+                "localized_name": "frame_rate",
+                "name": "frame_rate",
+                "type": "INT",
+                "widget": {
+                  "name": "frame_rate"
+                },
+                "link": 354
+              },
+              {
+                "localized_name": "batch_size",
+                "name": "batch_size",
+                "type": "INT",
+                "widget": {
+                  "name": "batch_size"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "Latent",
+                "name": "Latent",
+                "type": "LATENT",
+                "links": [
+                  300
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.68",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "LTXVEmptyLatentAudio",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              97,
+              25,
+              1
+            ]
+          },
           {
             "id": 123,
             "type": "SamplerCustomAdvanced",
             "pos": [
-              -387.72197839215096,
-              3521.1103425011374
+              -380,
+              3530
             ],
             "size": [
-              213.09895833333334,
-              106
+              230,
+              170
             ],
             "flags": {},
-            "order": 31,
+            "order": 29,
             "mode": 0,
             "inputs": [
               {
@@ -829,6 +877,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.3.60",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "SamplerCustomAdvanced",
               "enableTabs": false,
               "tabWidth": 65,
@@ -837,22 +890,21 @@
               "secondTabText": "Send Back",
               "secondTabOffset": 80,
               "secondTabWidth": 65
-            },
-            "widgets_values": []
+            }
           },
           {
             "id": 114,
             "type": "LTXVConditioning",
             "pos": [
-              -1133.7215420073496,
-              4141.110347554622
+              -1130,
+              4140
             ],
             "size": [
-              269.97395833333337,
-              78
+              270,
+              130
             ],
             "flags": {},
-            "order": 27,
+            "order": 23,
             "mode": 0,
             "inputs": [
               {
@@ -898,6 +950,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.3.56",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "LTXVConditioning",
               "enableTabs": false,
               "tabWidth": 65,
@@ -915,15 +972,15 @@
             "id": 119,
             "type": "CLIPTextEncode",
             "pos": [
-              -1163.7218246405453,
-              3881.1109034489627
+              -1160,
+              3880
             ],
             "size": [
               400,
-              88
+              200
             ],
             "flags": {},
-            "order": 12,
+            "order": 27,
             "mode": 0,
             "inputs": [
               {
@@ -955,6 +1012,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.3.56",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "CLIPTextEncode",
               "enableTabs": false,
               "tabWidth": 65,
@@ -974,15 +1036,15 @@
             "id": 116,
             "type": "LTXVConcatAVLatent",
             "pos": [
-              -519.7217122979332,
-              4701.110031965835
+              -520,
+              4830
             ],
             "size": [
-              187.5,
-              46
+              230,
+              100
             ],
             "flags": {},
-            "order": 29,
+            "order": 25,
             "mode": 0,
             "inputs": [
               {
@@ -1012,6 +1074,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.7.0",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "LTXVConcatAVLatent",
               "enableTabs": false,
               "tabWidth": 65,
@@ -1020,22 +1087,21 @@
               "secondTabText": "Send Back",
               "secondTabOffset": 80,
               "secondTabWidth": 65
-            },
-            "widgets_values": []
+            }
           },
           {
             "id": 122,
             "type": "LTXVSeparateAVLatent",
             "pos": [
-              -393.72183921949465,
-              3801.1107787938904
+              -380,
+              3810
             ],
             "size": [
-              239.97395833333334,
-              46
+              240,
+              100
             ],
             "flags": {},
-            "order": 30,
+            "order": 28,
             "mode": 0,
             "inputs": [
               {
@@ -1066,6 +1132,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "LTXVSeparateAVLatent",
               "enableTabs": false,
               "tabWidth": 65,
@@ -1074,22 +1145,21 @@
               "secondTabText": "Send Back",
               "secondTabOffset": 80,
               "secondTabWidth": 65
-            },
-            "widgets_values": []
+            }
           },
           {
             "id": 124,
             "type": "CLIPTextEncode",
             "pos": [
-              -1174.7214530029996,
-              3515.1112854387566
+              -1170,
+              3510
             ],
             "size": [
-              409.97395833333337,
-              88
+              410,
+              320
             ],
             "flags": {},
-            "order": 32,
+            "order": 30,
             "mode": 0,
             "inputs": [
               {
@@ -1121,6 +1191,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.3.56",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "CLIPTextEncode",
               "enableTabs": false,
               "tabWidth": 65,
@@ -1140,15 +1215,15 @@
             "id": 98,
             "type": "KSamplerSelect",
             "pos": [
-              410.27824286284044,
-              3981.1101681370833
+              450,
+              4070
             ],
             "size": [
-              269.97395833333337,
-              58
+              270,
+              110
             ],
             "flags": {},
-            "order": 3,
+            "order": 2,
             "mode": 0,
             "inputs": [
               {
@@ -1174,6 +1249,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.3.75",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "KSamplerSelect",
               "enableTabs": false,
               "tabWidth": 65,
@@ -1191,12 +1271,12 @@
             "id": 105,
             "type": "LoraLoaderModelOnly",
             "pos": [
-              -69.72208571196083,
-              3571.110499039739
+              -70,
+              3570
             ],
             "size": [
-              389.97395833333337,
-              82
+              390,
+              140
             ],
             "flags": {},
             "order": 15,
@@ -1215,7 +1295,7 @@
                 "widget": {
                   "name": "lora_name"
                 },
-                "link": 406
+                "link": 1773
               },
               {
                 "localized_name": "strength_model",
@@ -1240,21 +1320,26 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.3.75",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "LoraLoaderModelOnly",
-              "models": [
-                {
-                  "name": "ltx-2-19b-distilled-lora-384.safetensors",
-                  "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-distilled-lora-384.safetensors",
-                  "directory": "loras"
-                }
-              ],
               "enableTabs": false,
               "tabWidth": 65,
               "tabXOffset": 10,
               "hasSecondTab": false,
               "secondTabText": "Send Back",
               "secondTabOffset": 80,
-              "secondTabWidth": 65
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "ltx-2-19b-distilled-lora-384.safetensors",
+                  "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-distilled-lora-384.safetensors",
+                  "directory": "loras"
+                }
+              ]
             },
             "widgets_values": [
               "ltx-2-19b-distilled-lora-384.safetensors",
@@ -1265,15 +1350,15 @@
             "id": 95,
             "type": "LTXVScheduler",
             "pos": [
-              -699.7218704597861,
-              3981.1101681370833
+              -690,
+              4130
             ],
             "size": [
-              269.97395833333337,
-              154
+              270,
+              170
             ],
             "flags": {},
-            "order": 17,
+            "order": 8,
             "mode": 0,
             "inputs": [
               {
@@ -1342,6 +1427,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.3.56",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "LTXVScheduler",
               "enableTabs": false,
               "tabWidth": 65,
@@ -1363,15 +1453,15 @@
             "id": 126,
             "type": "RandomNoise",
             "pos": [
-              -697.721823660531,
-              3521.1103425011374
+              -690,
+              3520
             ],
             "size": [
-              269.97395833333337,
-              82
+              270,
+              110
             ],
             "flags": {},
-            "order": 4,
+            "order": 31,
             "mode": 0,
             "inputs": [
               {
@@ -1381,7 +1471,7 @@
                 "widget": {
                   "name": "noise_seed"
                 },
-                "link": null
+                "link": 1767
               }
             ],
             "outputs": [
@@ -1397,6 +1487,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.3.56",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "RandomNoise",
               "enableTabs": false,
               "tabWidth": 65,
@@ -1408,22 +1503,22 @@
             },
             "widgets_values": [
               0,
-              "randomize"
+              "fixed"
             ]
           },
           {
             "id": 107,
             "type": "SamplerCustomAdvanced",
             "pos": [
-              710.2782734905775,
-              3571.110499039739
+              730,
+              3570
             ],
             "size": [
-              212.36979166666669,
-              106
+              230,
+              170
             ],
             "flags": {},
-            "order": 21,
+            "order": 17,
             "mode": 0,
             "inputs": [
               {
@@ -1476,6 +1571,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.3.75",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "SamplerCustomAdvanced",
               "enableTabs": false,
               "tabWidth": 65,
@@ -1484,22 +1584,21 @@
               "secondTabText": "Send Back",
               "secondTabOffset": 80,
               "secondTabWidth": 65
-            },
-            "widgets_values": []
+            }
           },
           {
-            "id": 143,
+            "id": 187,
             "type": "RandomNoise",
             "pos": [
-              410.27824286284044,
-              3571.110499039739
+              450,
+              3570
             ],
             "size": [
-              269.97395833333337,
-              82
+              270,
+              110
             ],
             "flags": {},
-            "order": 5,
+            "order": 3,
             "mode": 0,
             "inputs": [
               {
@@ -1525,6 +1624,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.3.56",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "RandomNoise",
               "enableTabs": false,
               "tabWidth": 65,
@@ -1543,12 +1647,12 @@
             "id": 139,
             "type": "LTXVAudioVAEDecode",
             "pos": [
-              1130.2783163694094,
-              3841.1107362825187
+              1130,
+              3840
             ],
             "size": [
-              239.97395833333334,
-              46
+              240,
+              100
             ],
             "flags": {},
             "order": 35,
@@ -1565,7 +1669,7 @@
                 "localized_name": "audio_vae",
                 "name": "audio_vae",
                 "type": "VAE",
-                "link": 383
+                "link": 340
               }
             ],
             "outputs": [
@@ -1581,6 +1685,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.7.0",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "LTXVAudioVAEDecode",
               "enableTabs": false,
               "tabWidth": 65,
@@ -1589,22 +1698,21 @@
               "secondTabText": "Send Back",
               "secondTabOffset": 80,
               "secondTabWidth": 65
-            },
-            "widgets_values": []
+            }
           },
           {
             "id": 106,
             "type": "CreateVideo",
             "pos": [
-              1420.2783925712918,
-              3761.1104019496292
+              1420,
+              3760
             ],
             "size": [
-              269.97395833333337,
-              78
+              270,
+              130
             ],
             "flags": {},
-            "order": 20,
+            "order": 16,
             "mode": 0,
             "inputs": [
               {
@@ -1643,6 +1751,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "CreateVideo",
               "enableTabs": false,
               "tabWidth": 65,
@@ -1660,15 +1773,15 @@
             "id": 134,
             "type": "LoraLoaderModelOnly",
             "pos": [
-              -1649.721454901846,
-              3761.1104019496292
+              -1650,
+              3750
             ],
             "size": [
-              419.97395833333337,
-              82
+              420,
+              140
             ],
             "flags": {},
-            "order": 13,
+            "order": 33,
             "mode": 0,
             "inputs": [
               {
@@ -1684,7 +1797,7 @@
                 "widget": {
                   "name": "lora_name"
                 },
-                "link": 404
+                "link": 1769
               },
               {
                 "localized_name": "strength_model",
@@ -1710,21 +1823,26 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.7.0",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "LoraLoaderModelOnly",
-              "models": [
-                {
-                  "name": "ltx-2-19b-ic-lora-pose-control.safetensors",
-                  "url": "https://huggingface.co/Lightricks/LTX-2-19b-IC-LoRA-Pose-Control/resolve/main/ltx-2-19b-ic-lora-pose-control.safetensors",
-                  "directory": "loras"
-                }
-              ],
               "enableTabs": false,
               "tabWidth": 65,
               "tabXOffset": 10,
               "hasSecondTab": false,
               "secondTabText": "Send Back",
               "secondTabOffset": 80,
-              "secondTabWidth": 65
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "ltx-2-19b-ic-lora-pose-control.safetensors",
+                  "url": "https://huggingface.co/Lightricks/LTX-2-19b-IC-LoRA-Pose-Control/resolve/main/ltx-2-19b-ic-lora-pose-control.safetensors",
+                  "directory": "loras"
+                }
+              ]
             },
             "widgets_values": [
               "ltx-2-19b-ic-lora-pose-control.safetensors",
@@ -1737,12 +1855,12 @@
             "id": 138,
             "type": "LTXVSeparateAVLatent",
             "pos": [
-              730.2784619127078,
-              3731.1109580277
+              740,
+              3810
             ],
             "size": [
-              193.2916015625,
-              46
+              230,
+              100
             ],
             "flags": {},
             "order": 34,
@@ -1777,6 +1895,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "LTXVSeparateAVLatent",
               "enableTabs": false,
               "tabWidth": 65,
@@ -1785,22 +1908,21 @@
               "secondTabText": "Send Back",
               "secondTabOffset": 80,
               "secondTabWidth": 65
-            },
-            "widgets_values": []
+            }
           },
           {
-            "id": 144,
+            "id": 188,
             "type": "VAEDecodeTiled",
             "pos": [
-              1120.2783619435547,
-              3641.110599376351
+              1120,
+              3640
             ],
             "size": [
-              269.97395833333337,
+              270,
               150
             ],
             "flags": {},
-            "order": 36,
+            "order": 38,
             "mode": 0,
             "inputs": [
               {
@@ -1865,6 +1987,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.7.0",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "VAEDecodeTiled",
               "enableTabs": false,
               "tabWidth": 65,
@@ -1885,15 +2012,15 @@
             "id": 113,
             "type": "VAEDecode",
             "pos": [
-              1130.2783163694094,
-              3531.1113453160738
+              1130,
+              3530
             ],
             "size": [
-              239.97395833333334,
-              46
+              240,
+              100
             ],
             "flags": {},
-            "order": 26,
+            "order": 22,
             "mode": 0,
             "inputs": [
               {
@@ -1920,6 +2047,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.3.75",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "VAEDecode",
               "enableTabs": false,
               "tabWidth": 65,
@@ -1928,22 +2060,21 @@
               "secondTabText": "Send Back",
               "secondTabOffset": 80,
               "secondTabWidth": 65
-            },
-            "widgets_values": []
+            }
           },
           {
             "id": 145,
             "type": "PrimitiveInt",
             "pos": [
-              -1600,
-              4940
+              -1610,
+              4800
             ],
             "size": [
-              269.97395833333337,
-              82
+              270,
+              110
             ],
             "flags": {},
-            "order": 6,
+            "order": 4,
             "mode": 0,
             "inputs": [
               {
@@ -1969,6 +2100,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.7.0",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "PrimitiveInt",
               "enableTabs": false,
               "tabWidth": 65,
@@ -1987,15 +2123,15 @@
             "id": 148,
             "type": "PrimitiveFloat",
             "pos": [
-              -1600,
-              5070
+              -1610,
+              4930
             ],
             "size": [
-              269.97395833333337,
-              58
+              270,
+              110
             ],
             "flags": {},
-            "order": 7,
+            "order": 5,
             "mode": 0,
             "inputs": [
               {
@@ -2022,6 +2158,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.7.0",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "PrimitiveFloat",
               "enableTabs": false,
               "tabWidth": 65,
@@ -2035,19 +2176,105 @@
               24
             ]
           },
+          {
+            "id": 115,
+            "type": "EmptyLTXVLatentVideo",
+            "pos": [
+              -1100,
+              4740
+            ],
+            "size": [
+              270,
+              200
+            ],
+            "flags": {},
+            "order": 24,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 296
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 297
+              },
+              {
+                "localized_name": "length",
+                "name": "length",
+                "type": "INT",
+                "widget": {
+                  "name": "length"
+                },
+                "link": 330
+              },
+              {
+                "localized_name": "batch_size",
+                "name": "batch_size",
+                "type": "INT",
+                "widget": {
+                  "name": "batch_size"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "links": [
+                  360
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.60",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "EmptyLTXVLatentVideo",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              768,
+              512,
+              97,
+              1
+            ]
+          },
           {
             "id": 118,
             "type": "Reroute",
             "pos": [
-              -229.7217758812614,
-              4211.111007032079
+              -350,
+              3980
             ],
             "size": [
-              75,
-              26
+              230,
+              40
             ],
             "flags": {},
-            "order": 14,
+            "order": 26,
             "mode": 0,
             "inputs": [
               {
@@ -2069,22 +2296,29 @@
             ],
             "properties": {
               "showOutputText": false,
-              "horizontal": false
+              "horizontal": false,
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              }
             }
           },
           {
-            "id": 151,
+            "id": 189,
             "type": "LTXVImgToVideoInplace",
             "pos": [
-              -19.72161465663438,
-              4071.1107364662485
+              180,
+              4040
             ],
             "size": [
-              269.97395833333337,
-              122
+              260,
+              190
             ],
-            "flags": {},
-            "order": 38,
+            "flags": {
+              "collapsed": false
+            },
+            "order": 39,
             "mode": 0,
             "inputs": [
               {
@@ -2097,7 +2331,7 @@
                 "localized_name": "image",
                 "name": "image",
                 "type": "IMAGE",
-                "link": 398
+                "link": 379
               },
               {
                 "localized_name": "latent",
@@ -2112,7 +2346,7 @@
                 "widget": {
                   "name": "strength"
                 },
-                "link": 371
+                "link": 1759
               },
               {
                 "localized_name": "bypass",
@@ -2137,6 +2371,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.7.0",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "LTXVImgToVideoInplace",
               "enableTabs": false,
               "tabWidth": 65,
@@ -2155,15 +2394,15 @@
             "id": 104,
             "type": "LTXVCropGuides",
             "pos": [
-              -9.721939801202097,
-              3841.1107362825187
+              -90,
+              4210
             ],
             "size": [
-              239.97395833333334,
-              66
+              240,
+              120
             ],
             "flags": {},
-            "order": 19,
+            "order": 14,
             "mode": 0,
             "inputs": [
               {
@@ -2215,6 +2454,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.3.68",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "LTXVCropGuides",
               "enableTabs": false,
               "tabWidth": 65,
@@ -2223,22 +2467,21 @@
               "secondTabText": "Send Back",
               "secondTabOffset": 80,
               "secondTabWidth": 65
-            },
-            "widgets_values": []
+            }
           },
           {
             "id": 112,
             "type": "LTXVLatentUpsampler",
             "pos": [
-              -9.721939801202097,
-              3961.111517352274
+              -90,
+              4030
             ],
             "size": [
-              259.97395833333337,
-              66
+              260,
+              120
             ],
             "flags": {},
-            "order": 25,
+            "order": 21,
             "mode": 0,
             "inputs": [
               {
@@ -2274,6 +2517,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.7.0",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "LTXVLatentUpsampler",
               "enableTabs": false,
               "tabWidth": 65,
@@ -2282,22 +2530,117 @@
               "secondTabText": "Send Back",
               "secondTabOffset": 80,
               "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 154,
+            "type": "MarkdownNote",
+            "pos": [
+              -1640,
+              5050
+            ],
+            "size": [
+              350,
+              170
+            ],
+            "flags": {
+              "collapsed": false
             },
-            "widgets_values": []
+            "order": 6,
+            "mode": 0,
+            "inputs": [],
+            "outputs": [],
+            "title": "Frame Rate Note",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              }
+            },
+            "widgets_values": [
+              "Please make sure the frame rate value is the same in both boxes"
+            ],
+            "color": "#432",
+            "bgcolor": "#653"
+          },
+          {
+            "id": 96,
+            "type": "LTXVAudioVAELoader",
+            "pos": [
+              -1650,
+              3970
+            ],
+            "size": [
+              420,
+              110
+            ],
+            "flags": {},
+            "order": 9,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "ckpt_name",
+                "name": "ckpt_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "ckpt_name"
+                },
+                "link": 1770
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "Audio VAE",
+                "name": "Audio VAE",
+                "type": "VAE",
+                "links": [
+                  285,
+                  340
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.68",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "LTXVAudioVAELoader",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "ltx-2-19b-dev-fp8.safetensors",
+                  "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors",
+                  "directory": "checkpoints"
+                }
+              ]
+            },
+            "widgets_values": [
+              "ltx-2-19b-dev-fp8.safetensors"
+            ]
           },
           {
             "id": 97,
             "type": "LTXAVTextEncoderLoader",
             "pos": [
-              -1649.721454901846,
-              4041.1110828665023
+              -1650,
+              4160
             ],
             "size": [
-              419.97395833333337,
-              106
+              420,
+              150
             ],
             "flags": {},
-            "order": 8,
+            "order": 10,
             "mode": 0,
             "inputs": [
               {
@@ -2307,7 +2650,7 @@
                 "widget": {
                   "name": "text_encoder"
                 },
-                "link": 405
+                "link": 1772
               },
               {
                 "localized_name": "ckpt_name",
@@ -2316,7 +2659,7 @@
                 "widget": {
                   "name": "ckpt_name"
                 },
-                "link": 403
+                "link": 1771
               },
               {
                 "localized_name": "device",
@@ -2342,7 +2685,19 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.7.0",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "LTXAVTextEncoderLoader",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
               "models": [
                 {
                   "name": "ltx-2-19b-dev-fp8.safetensors",
@@ -2354,17 +2709,10 @@
                   "url": "https://huggingface.co/Comfy-Org/ltx-2/resolve/main/split_files/text_encoders/gemma_3_12B_it_fp4_mixed.safetensors",
                   "directory": "text_encoders"
                 }
-              ],
-              "enableTabs": false,
-              "tabWidth": 65,
-              "tabXOffset": 10,
-              "hasSecondTab": false,
-              "secondTabText": "Send Back",
-              "secondTabOffset": 80,
-              "secondTabWidth": 65
+              ]
             },
             "widgets_values": [
-              "ltx-2-19b-ic-lora-pose-control.safetensors",
+              "gemma_3_12B_it_fp4_mixed.safetensors",
               "ltx-2-19b-dev-fp8.safetensors",
               "default"
             ]
@@ -2373,15 +2721,15 @@
             "id": 103,
             "type": "CheckpointLoaderSimple",
             "pos": [
-              -1649.721454901846,
-              3591.1104777840524
+              -1650,
+              3520
             ],
             "size": [
-              419.97395833333337,
-              98
+              420,
+              160
             ],
             "flags": {},
-            "order": 9,
+            "order": 13,
             "mode": 0,
             "inputs": [
               {
@@ -2391,7 +2739,7 @@
                 "widget": {
                   "name": "ckpt_name"
                 },
-                "link": 401
+                "link": 1768
               }
             ],
             "outputs": [
@@ -2424,137 +2772,89 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.3.56",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "CheckpointLoaderSimple",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
               "models": [
                 {
                   "name": "ltx-2-19b-dev-fp8.safetensors",
                   "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors",
                   "directory": "checkpoints"
                 }
-              ],
-              "enableTabs": false,
-              "tabWidth": 65,
-              "tabXOffset": 10,
-              "hasSecondTab": false,
-              "secondTabText": "Send Back",
-              "secondTabOffset": 80,
-              "secondTabWidth": 65
+              ]
             },
             "widgets_values": [
               "ltx-2-19b-dev-fp8.safetensors"
             ]
           },
           {
-            "id": 156,
-            "type": "LTXVAudioVAELoader",
+            "id": 110,
+            "type": "GetImageSize",
             "pos": [
-              -1636.9543279290153,
-              3911.095334870057
+              -1610,
+              4630
             ],
             "size": [
-              399.0494791666667,
-              58
+              260,
+              120
             ],
             "flags": {},
-            "order": 10,
+            "order": 19,
             "mode": 0,
             "inputs": [
-              {
-                "localized_name": "ckpt_name",
-                "name": "ckpt_name",
-                "type": "COMBO",
-                "widget": {
-                  "name": "ckpt_name"
-                },
-                "link": 402
-              }
-            ],
-            "outputs": [
-              {
-                "localized_name": "Audio VAE",
-                "name": "Audio VAE",
-                "type": "VAE",
-                "links": [
-                  382,
-                  383
-                ]
-              }
-            ],
-            "properties": {
-              "cnr_id": "comfy-core",
-              "ver": "0.11.0",
-              "Node name for S&R": "LTXVAudioVAELoader"
-            },
-            "widgets_values": [
-              "ltx-2-19b-dev-fp8.safetensors"
-            ]
-          },
-          {
-            "id": 149,
-            "type": "LTXVImgToVideoInplace",
-            "pos": [
-              -1089.7215608128167,
-              4401.110560478942
-            ],
-            "size": [
-              269.97395833333337,
-              122
-            ],
-            "flags": {},
-            "order": 37,
-            "mode": 0,
-            "inputs": [
-              {
-                "localized_name": "vae",
-                "name": "vae",
-                "type": "VAE",
-                "link": 359
-              },
               {
                 "localized_name": "image",
                 "name": "image",
                 "type": "IMAGE",
-                "link": 399
-              },
-              {
-                "localized_name": "latent",
-                "name": "latent",
-                "type": "LATENT",
-                "link": 360
-              },
-              {
-                "localized_name": "strength",
-                "name": "strength",
-                "type": "FLOAT",
-                "widget": {
-                  "name": "strength"
-                },
-                "link": 370
-              },
-              {
-                "localized_name": "bypass",
-                "name": "bypass",
-                "type": "BOOLEAN",
-                "widget": {
-                  "name": "bypass"
-                },
-                "link": 363
+                "link": 381
               }
             ],
             "outputs": [
               {
-                "localized_name": "latent",
-                "name": "latent",
-                "type": "LATENT",
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
                 "links": [
-                  357
+                  296
+                ]
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "links": [
+                  297
+                ]
+              },
+              {
+                "localized_name": "batch_size",
+                "name": "batch_size",
+                "type": "INT",
+                "links": [
+                  329,
+                  330
                 ]
               }
             ],
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.7.0",
-              "Node name for S&R": "LTXVImgToVideoInplace",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "GetImageSize",
               "enableTabs": false,
               "tabWidth": 65,
               "tabXOffset": 10,
@@ -2562,25 +2862,21 @@
               "secondTabText": "Send Back",
               "secondTabOffset": 80,
               "secondTabWidth": 65
-            },
-            "widgets_values": [
-              1,
-              false
-            ]
+            }
           },
           {
             "id": 132,
             "type": "LTXVAddGuide",
             "pos": [
-              -599.7217670603999,
-              4421.110609115862
+              -600,
+              4550
             ],
             "size": [
-              269.97395833333337,
-              162
+              270,
+              240
             ],
             "flags": {},
-            "order": 33,
+            "order": 32,
             "mode": 0,
             "inputs": [
               {
@@ -2611,7 +2907,7 @@
                 "localized_name": "image",
                 "name": "image",
                 "type": "IMAGE",
-                "link": 395
+                "link": 348
               },
               {
                 "localized_name": "frame_idx",
@@ -2663,6 +2959,11 @@
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.3.75",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
               "Node name for S&R": "LTXVAddGuide",
               "enableTabs": false,
               "tabWidth": 65,
@@ -2678,114 +2979,76 @@
             ]
           },
           {
-            "id": 154,
-            "type": "MarkdownNote",
+            "id": 149,
+            "type": "LTXVImgToVideoInplace",
             "pos": [
-              -1630,
-              5190
+              -1090,
+              4530
             ],
             "size": [
-              350,
-              88
-            ],
-            "flags": {
-              "collapsed": false
-            },
-            "order": 11,
-            "mode": 0,
-            "inputs": [],
-            "outputs": [],
-            "title": "Frame Rate Note",
-            "properties": {},
-            "widgets_values": [
-              "Please make sure the frame rate value is the same in both boxes"
-            ],
-            "color": "#432",
-            "bgcolor": "#653"
-          },
-          {
-            "id": 159,
-            "type": "ResizeImageMaskNode",
-            "pos": [
-              -1610,
-              4580
-            ],
-            "size": [
-              284.375,
-              154
+              270,
+              180
             ],
             "flags": {},
-            "order": 39,
+            "order": 36,
             "mode": 0,
             "inputs": [
               {
-                "localized_name": "input",
-                "name": "input",
-                "type": "IMAGE,MASK",
-                "link": 400
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 359
               },
               {
-                "localized_name": "resize_type",
-                "name": "resize_type",
-                "type": "COMFY_DYNAMICCOMBO_V3",
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 364
+              },
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "link": 360
+              },
+              {
+                "localized_name": "strength",
+                "name": "strength",
+                "type": "FLOAT",
                 "widget": {
-                  "name": "resize_type"
+                  "name": "strength"
                 },
-                "link": null
+                "link": 1758
               },
               {
-                "localized_name": "width",
-                "name": "resize_type.width",
-                "type": "INT",
+                "localized_name": "bypass",
+                "name": "bypass",
+                "type": "BOOLEAN",
                 "widget": {
-                  "name": "resize_type.width"
-                },
-                "link": 408
-              },
-              {
-                "localized_name": "height",
-                "name": "resize_type.height",
-                "type": "INT",
-                "widget": {
-                  "name": "resize_type.height"
-                },
-                "link": 409
-              },
-              {
-                "localized_name": "crop",
-                "name": "resize_type.crop",
-                "type": "COMBO",
-                "widget": {
-                  "name": "resize_type.crop"
-                },
-                "link": null
-              },
-              {
-                "localized_name": "scale_method",
-                "name": "scale_method",
-                "type": "COMBO",
-                "widget": {
-                  "name": "scale_method"
+                  "name": "bypass"
                 },
                 "link": null
               }
             ],
             "outputs": [
               {
-                "localized_name": "resized",
-                "name": "resized",
-                "type": "IMAGE,MASK",
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
                 "links": [
-                  391,
-                  392,
-                  395
+                  357
                 ]
               }
             ],
             "properties": {
               "cnr_id": "comfy-core",
               "ver": "0.7.0",
-              "Node name for S&R": "ResizeImageMaskNode",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "LTXVImgToVideoInplace",
               "enableTabs": false,
               "tabWidth": 65,
               "tabXOffset": 10,
@@ -2795,139 +3058,69 @@
               "secondTabWidth": 65
             },
             "widgets_values": [
-              "scale dimensions",
-              1280,
-              720,
-              "center",
-              "lanczos"
+              1,
+              false
             ]
           },
           {
-            "id": 110,
-            "type": "GetImageSize",
+            "id": 155,
+            "type": "ImageScaleBy",
             "pos": [
-              -1600,
-              4780
+              -1620,
+              4440
             ],
             "size": [
-              259.97395833333337,
-              66
+              280,
+              140
             ],
             "flags": {},
-            "order": 23,
+            "order": 37,
             "mode": 0,
             "inputs": [
               {
                 "localized_name": "image",
                 "name": "image",
                 "type": "IMAGE",
-                "link": 391
-              }
-            ],
-            "outputs": [
-              {
-                "localized_name": "width",
-                "name": "width",
-                "type": "INT",
-                "links": [
-                  296
-                ]
+                "link": 380
               },
               {
-                "localized_name": "height",
-                "name": "height",
-                "type": "INT",
-                "links": [
-                  297
-                ]
-              },
-              {
-                "localized_name": "batch_size",
-                "name": "batch_size",
-                "type": "INT",
-                "links": []
-              }
-            ],
-            "properties": {
-              "cnr_id": "comfy-core",
-              "ver": "0.7.0",
-              "Node name for S&R": "GetImageSize",
-              "enableTabs": false,
-              "tabWidth": 65,
-              "tabXOffset": 10,
-              "hasSecondTab": false,
-              "secondTabText": "Send Back",
-              "secondTabOffset": 80,
-              "secondTabWidth": 65
-            },
-            "widgets_values": []
-          },
-          {
-            "id": 115,
-            "type": "EmptyLTXVLatentVideo",
-            "pos": [
-              -1099.721794809093,
-              4611.11072170357
-            ],
-            "size": [
-              269.97395833333337,
-              130
-            ],
-            "flags": {},
-            "order": 28,
-            "mode": 0,
-            "inputs": [
-              {
-                "localized_name": "width",
-                "name": "width",
-                "type": "INT",
+                "localized_name": "upscale_method",
+                "name": "upscale_method",
+                "type": "COMBO",
                 "widget": {
-                  "name": "width"
+                  "name": "upscale_method"
                 },
-                "link": 296
+                "link": null
               },
               {
-                "localized_name": "height",
-                "name": "height",
-                "type": "INT",
+                "localized_name": "scale_by",
+                "name": "scale_by",
+                "type": "FLOAT",
                 "widget": {
-                  "name": "height"
-                },
-                "link": 297
-              },
-              {
-                "localized_name": "length",
-                "name": "length",
-                "type": "INT",
-                "widget": {
-                  "name": "length"
-                },
-                "link": 410
-              },
-              {
-                "localized_name": "batch_size",
-                "name": "batch_size",
-                "type": "INT",
-                "widget": {
-                  "name": "batch_size"
+                  "name": "scale_by"
                 },
                 "link": null
               }
             ],
             "outputs": [
               {
-                "localized_name": "LATENT",
-                "name": "LATENT",
-                "type": "LATENT",
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
                 "links": [
-                  360
+                  381
                 ]
               }
             ],
             "properties": {
               "cnr_id": "comfy-core",
-              "ver": "0.3.60",
-              "Node name for S&R": "EmptyLTXVLatentVideo",
+              "ver": "0.5.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "ImageScaleBy",
               "enableTabs": false,
               "tabWidth": 65,
               "tabXOffset": 10,
@@ -2937,87 +3130,8 @@
               "secondTabWidth": 65
             },
             "widgets_values": [
-              768,
-              512,
-              97,
-              1
-            ]
-          },
-          {
-            "id": 111,
-            "type": "LTXVEmptyLatentAudio",
-            "pos": [
-              -1099.721794809093,
-              4811.110229576288
-            ],
-            "size": [
-              269.97395833333337,
-              106
-            ],
-            "flags": {},
-            "order": 24,
-            "mode": 0,
-            "inputs": [
-              {
-                "localized_name": "audio_vae",
-                "name": "audio_vae",
-                "type": "VAE",
-                "link": 382
-              },
-              {
-                "localized_name": "frames_number",
-                "name": "frames_number",
-                "type": "INT",
-                "widget": {
-                  "name": "frames_number"
-                },
-                "link": null
-              },
-              {
-                "localized_name": "frame_rate",
-                "name": "frame_rate",
-                "type": "INT",
-                "widget": {
-                  "name": "frame_rate"
-                },
-                "link": 354
-              },
-              {
-                "localized_name": "batch_size",
-                "name": "batch_size",
-                "type": "INT",
-                "widget": {
-                  "name": "batch_size"
-                },
-                "link": null
-              }
-            ],
-            "outputs": [
-              {
-                "localized_name": "Latent",
-                "name": "Latent",
-                "type": "LATENT",
-                "links": [
-                  300
-                ]
-              }
-            ],
-            "properties": {
-              "cnr_id": "comfy-core",
-              "ver": "0.3.68",
-              "Node name for S&R": "LTXVEmptyLatentAudio",
-              "enableTabs": false,
-              "tabWidth": 65,
-              "tabXOffset": 10,
-              "hasSecondTab": false,
-              "secondTabText": "Send Back",
-              "secondTabOffset": 80,
-              "secondTabWidth": 65
-            },
-            "widgets_values": [
-              97,
-              25,
-              1
+              "lanczos",
+              0.5
             ]
           }
         ],
@@ -3028,8 +3142,8 @@
             "bounding": [
               -1660,
               3440,
-              440,
-              820
+              450,
+              940
             ],
             "color": "#3f789e",
             "font_size": 24,
@@ -3041,8 +3155,8 @@
             "bounding": [
               -700,
               3440,
-              570,
-              820
+              580,
+              940
             ],
             "color": "#3f789e",
             "font_size": 24,
@@ -3054,8 +3168,8 @@
             "bounding": [
               -1180,
               3440,
-              440,
-              820
+              450,
+              940
             ],
             "color": "#3f789e",
             "font_size": 24,
@@ -3066,7 +3180,7 @@
             "title": "Latent",
             "bounding": [
               -1180,
-              4290,
+              4420,
               1050,
               680
             ],
@@ -3080,8 +3194,8 @@
             "bounding": [
               -100,
               3440,
-              1090,
-              820
+              1110,
+              940
             ],
             "color": "#3f789e",
             "font_size": 24,
@@ -3091,10 +3205,10 @@
             "id": 6,
             "title": "Sampler",
             "bounding": [
-              350,
+              410,
               3480,
-              620,
-              750
+              590,
+              880
             ],
             "color": "#3f789e",
             "font_size": 24,
@@ -3106,8 +3220,8 @@
             "bounding": [
               -90,
               3480,
-              430,
-              310
+              450,
+              480
             ],
             "color": "#3f789e",
             "font_size": 24,
@@ -3117,8 +3231,8 @@
             "id": 11,
             "title": "Frame rate",
             "bounding": [
-              -1610,
-              4860,
+              -1620,
+              4730,
               290,
               271.6
             ],
@@ -3184,6 +3298,22 @@
             "target_slot": 2,
             "type": "CONDITIONING"
           },
+          {
+            "id": 285,
+            "origin_id": 96,
+            "origin_slot": 0,
+            "target_id": 111,
+            "target_slot": 0,
+            "type": "VAE"
+          },
+          {
+            "id": 329,
+            "origin_id": 110,
+            "origin_slot": 2,
+            "target_id": 111,
+            "target_slot": 1,
+            "type": "INT"
+          },
           {
             "id": 260,
             "origin_id": 126,
@@ -3240,6 +3370,14 @@
             "target_slot": 1,
             "type": "INT"
           },
+          {
+            "id": 330,
+            "origin_id": 110,
+            "origin_slot": 2,
+            "target_id": 115,
+            "target_slot": 2,
+            "type": "INT"
+          },
           {
             "id": 325,
             "origin_id": 103,
@@ -3360,6 +3498,14 @@
             "target_slot": 0,
             "type": "LATENT"
           },
+          {
+            "id": 340,
+            "origin_id": 96,
+            "origin_slot": 0,
+            "target_id": 139,
+            "target_slot": 1,
+            "type": "VAE"
+          },
           {
             "id": 337,
             "origin_id": 138,
@@ -3490,23 +3636,31 @@
           },
           {
             "id": 347,
-            "origin_id": 143,
+            "origin_id": 187,
             "origin_slot": 0,
             "target_id": 107,
             "target_slot": 0,
             "type": "NOISE"
           },
+          {
+            "id": 348,
+            "origin_id": -10,
+            "origin_slot": 1,
+            "target_id": 132,
+            "target_slot": 4,
+            "type": "IMAGE"
+          },
           {
             "id": 351,
             "origin_id": 138,
             "origin_slot": 0,
-            "target_id": 144,
+            "target_id": 188,
             "target_slot": 0,
             "type": "LATENT"
           },
           {
             "id": 352,
-            "origin_id": 144,
+            "origin_id": 188,
             "origin_slot": 0,
             "target_id": 106,
             "target_slot": 0,
@@ -3516,7 +3670,7 @@
             "id": 353,
             "origin_id": 103,
             "origin_slot": 2,
-            "target_id": 144,
+            "target_id": 188,
             "target_slot": 1,
             "type": "VAE"
           },
@@ -3569,16 +3723,16 @@
             "type": "LATENT"
           },
           {
-            "id": 363,
+            "id": 364,
             "origin_id": -10,
             "origin_slot": 2,
             "target_id": 149,
-            "target_slot": 4,
-            "type": "BOOLEAN"
+            "target_slot": 1,
+            "type": "IMAGE"
           },
           {
             "id": 365,
-            "origin_id": 151,
+            "origin_id": 189,
             "origin_slot": 0,
             "target_id": 101,
             "target_slot": 0,
@@ -3588,7 +3742,7 @@
             "id": 366,
             "origin_id": 112,
             "origin_slot": 0,
-            "target_id": 151,
+            "target_id": 189,
             "target_slot": 2,
             "type": "LATENT"
           },
@@ -3596,92 +3750,68 @@
             "id": 367,
             "origin_id": 118,
             "origin_slot": 0,
-            "target_id": 151,
+            "target_id": 189,
             "target_slot": 0,
             "type": "VAE"
           },
           {
             "id": 368,
             "origin_id": -10,
-            "origin_slot": 2,
-            "target_id": 151,
+            "origin_slot": 4,
+            "target_id": 189,
             "target_slot": 4,
             "type": "BOOLEAN"
           },
           {
-            "id": 370,
+            "id": 379,
             "origin_id": -10,
-            "origin_slot": 1,
-            "target_id": 149,
-            "target_slot": 3,
-            "type": "FLOAT"
-          },
-          {
-            "id": 371,
-            "origin_id": -10,
-            "origin_slot": 1,
-            "target_id": 151,
-            "target_slot": 3,
-            "type": "FLOAT"
-          },
-          {
-            "id": 382,
-            "origin_id": 156,
-            "origin_slot": 0,
-            "target_id": 111,
-            "target_slot": 0,
-            "type": "VAE"
-          },
-          {
-            "id": 383,
-            "origin_id": 156,
-            "origin_slot": 0,
-            "target_id": 139,
+            "origin_slot": 2,
+            "target_id": 189,
             "target_slot": 1,
-            "type": "VAE"
+            "type": "IMAGE"
           },
           {
-            "id": 391,
-            "origin_id": 159,
+            "id": 380,
+            "origin_id": -10,
+            "origin_slot": 1,
+            "target_id": 155,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 381,
+            "origin_id": 155,
             "origin_slot": 0,
             "target_id": 110,
             "target_slot": 0,
             "type": "IMAGE"
           },
           {
-            "id": 395,
-            "origin_id": 159,
-            "origin_slot": 0,
-            "target_id": 132,
-            "target_slot": 4,
-            "type": "IMAGE"
-          },
-          {
-            "id": 398,
-            "origin_id": -10,
-            "origin_slot": 3,
-            "target_id": 151,
-            "target_slot": 1,
-            "type": "IMAGE"
-          },
-          {
-            "id": 399,
+            "id": 1758,
             "origin_id": -10,
             "origin_slot": 3,
             "target_id": 149,
-            "target_slot": 1,
-            "type": "IMAGE"
+            "target_slot": 3,
+            "type": "FLOAT"
           },
           {
-            "id": 400,
+            "id": 1759,
+            "origin_id": -10,
+            "origin_slot": 3,
+            "target_id": 189,
+            "target_slot": 3,
+            "type": "FLOAT"
+          },
+          {
+            "id": 1767,
             "origin_id": -10,
             "origin_slot": 4,
-            "target_id": 159,
+            "target_id": 126,
             "target_slot": 0,
-            "type": "IMAGE,MASK"
+            "type": "INT"
           },
           {
-            "id": 401,
+            "id": 1768,
             "origin_id": -10,
             "origin_slot": 5,
             "target_id": 103,
@@ -3689,23 +3819,7 @@
             "type": "COMBO"
           },
           {
-            "id": 402,
-            "origin_id": -10,
-            "origin_slot": 5,
-            "target_id": 156,
-            "target_slot": 0,
-            "type": "COMBO"
-          },
-          {
-            "id": 403,
-            "origin_id": -10,
-            "origin_slot": 5,
-            "target_id": 97,
-            "target_slot": 1,
-            "type": "COMBO"
-          },
-          {
-            "id": 404,
+            "id": 1769,
             "origin_id": -10,
             "origin_slot": 6,
             "target_id": 134,
@@ -3713,52 +3827,44 @@
             "type": "COMBO"
           },
           {
-            "id": 405,
+            "id": 1770,
             "origin_id": -10,
-            "origin_slot": 6,
+            "origin_slot": 5,
+            "target_id": 96,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 1771,
+            "origin_id": -10,
+            "origin_slot": 5,
+            "target_id": 97,
+            "target_slot": 1,
+            "type": "COMBO"
+          },
+          {
+            "id": 1772,
+            "origin_id": -10,
+            "origin_slot": 7,
             "target_id": 97,
             "target_slot": 0,
             "type": "COMBO"
           },
           {
-            "id": 406,
+            "id": 1773,
             "origin_id": -10,
-            "origin_slot": 7,
+            "origin_slot": 8,
             "target_id": 105,
             "target_slot": 1,
             "type": "COMBO"
           },
           {
-            "id": 407,
+            "id": 1774,
             "origin_id": -10,
-            "origin_slot": 8,
+            "origin_slot": 9,
             "target_id": 100,
             "target_slot": 0,
             "type": "COMBO"
-          },
-          {
-            "id": 408,
-            "origin_id": -10,
-            "origin_slot": 9,
-            "target_id": 159,
-            "target_slot": 2,
-            "type": "INT"
-          },
-          {
-            "id": 409,
-            "origin_id": -10,
-            "origin_slot": 10,
-            "target_id": 159,
-            "target_slot": 3,
-            "type": "INT"
-          },
-          {
-            "id": 410,
-            "origin_id": -10,
-            "origin_slot": 11,
-            "target_id": 115,
-            "target_slot": 2,
-            "type": "INT"
           }
         ],
         "extra": {
@@ -3768,21 +3874,7 @@
       }
     ]
   },
-  "config": {},
   "extra": {
-    "ds": {
-      "scale": 1.3889423076923078,
-      "offset": [
-        217.0560747663551,
-        -3703.3333333333335
-      ]
-    },
-    "frontendVersion": "1.37.10",
-    "workflowRendererVersion": "LG",
-    "VHS_latentpreview": false,
-    "VHS_latentpreviewrate": 0,
-    "VHS_MetadataImage": true,
-    "VHS_KeepIntermediate": true
-  },
-  "version": 0.4
-}
+    "ue_links": []
+  }
+}
\ No newline at end of file
diff --git a/blueprints/Text to Image (Flux.1 Dev).json b/blueprints/Text to Image (Flux.1 Dev).json
new file mode 100644
index 000000000..04c3cb95a
--- /dev/null
+++ b/blueprints/Text to Image (Flux.1 Dev).json	
@@ -0,0 +1,1046 @@
+{
+  "revision": 0,
+  "last_node_id": 193,
+  "last_link_id": 0,
+  "nodes": [
+    {
+      "id": 193,
+      "type": "1fd98b34-59ef-4d8d-afbf-58bdd7a1cd35",
+      "pos": [
+        -1210,
+        -1770
+      ],
+      "size": [
+        400,
+        380
+      ],
+      "flags": {},
+      "order": 0,
+      "mode": 0,
+      "inputs": [
+        {
+          "label": "prompt",
+          "name": "text",
+          "type": "STRING",
+          "widget": {
+            "name": "text"
+          },
+          "link": null
+        },
+        {
+          "name": "width",
+          "type": "INT",
+          "widget": {
+            "name": "width"
+          },
+          "link": null
+        },
+        {
+          "name": "height",
+          "type": "INT",
+          "widget": {
+            "name": "height"
+          },
+          "link": null
+        },
+        {
+          "name": "seed",
+          "type": "INT",
+          "widget": {
+            "name": "seed"
+          },
+          "link": null
+        },
+        {
+          "name": "unet_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "unet_name"
+          },
+          "link": null
+        },
+        {
+          "name": "clip_name1",
+          "type": "COMBO",
+          "widget": {
+            "name": "clip_name1"
+          },
+          "link": null
+        },
+        {
+          "name": "clip_name2",
+          "type": "COMBO",
+          "widget": {
+            "name": "clip_name2"
+          },
+          "link": null
+        },
+        {
+          "name": "vae_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "vae_name"
+          },
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "localized_name": "IMAGE",
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": []
+        }
+      ],
+      "properties": {
+        "proxyWidgets": [
+          [
+            "45",
+            "text"
+          ],
+          [
+            "27",
+            "width"
+          ],
+          [
+            "27",
+            "height"
+          ],
+          [
+            "31",
+            "seed"
+          ],
+          [
+            "38",
+            "unet_name"
+          ],
+          [
+            "40",
+            "clip_name1"
+          ],
+          [
+            "40",
+            "clip_name2"
+          ],
+          [
+            "39",
+            "vae_name"
+          ],
+          [
+            "31",
+            "control_after_generate"
+          ]
+        ],
+        "ue_properties": {
+          "widget_ue_connectable": {},
+          "input_ue_unconnectable": {}
+        },
+        "cnr_id": "comfy-core",
+        "ver": "0.18.1"
+      },
+      "widgets_values": [],
+      "title": "Text to Image (Flux.1 Dev)"
+    }
+  ],
+  "links": [],
+  "version": 0.4,
+  "definitions": {
+    "subgraphs": [
+      {
+        "id": "1fd98b34-59ef-4d8d-afbf-58bdd7a1cd35",
+        "version": 1,
+        "state": {
+          "lastGroupId": 8,
+          "lastNodeId": 193,
+          "lastLinkId": 388,
+          "lastRerouteId": 0
+        },
+        "revision": 0,
+        "config": {},
+        "name": "Text to Image (Flux.1 Dev)",
+        "inputNode": {
+          "id": -10,
+          "bounding": [
+            -1090,
+            411,
+            120,
+            200
+          ]
+        },
+        "outputNode": {
+          "id": -20,
+          "bounding": [
+            540,
+            100,
+            120,
+            60
+          ]
+        },
+        "inputs": [
+          {
+            "id": "669e384e-5e26-4291-9bac-e1d1f04b4a16",
+            "name": "text",
+            "type": "STRING",
+            "linkIds": [
+              68
+            ],
+            "label": "prompt",
+            "pos": [
+              -990,
+              431
+            ]
+          },
+          {
+            "id": "5a5c0b01-5836-4ca6-a24f-68c0a4fb9802",
+            "name": "width",
+            "type": "INT",
+            "linkIds": [
+              69
+            ],
+            "pos": [
+              -990,
+              451
+            ]
+          },
+          {
+            "id": "5e01104a-ed7f-457b-aaee-934e8ecc088d",
+            "name": "height",
+            "type": "INT",
+            "linkIds": [
+              70
+            ],
+            "pos": [
+              -990,
+              471
+            ]
+          },
+          {
+            "id": "ea5ea317-a484-4605-8138-8628a4b8e502",
+            "name": "seed",
+            "type": "INT",
+            "linkIds": [
+              382
+            ],
+            "pos": [
+              -990,
+              491
+            ]
+          },
+          {
+            "id": "ea2332f5-bd49-4e2e-8c7a-95817dc56ed6",
+            "name": "unet_name",
+            "type": "COMBO",
+            "linkIds": [
+              385
+            ],
+            "pos": [
+              -990,
+              511
+            ]
+          },
+          {
+            "id": "4fca3f43-c05f-4337-bf84-2afe67e43739",
+            "name": "clip_name1",
+            "type": "COMBO",
+            "linkIds": [
+              386
+            ],
+            "pos": [
+              -990,
+              531
+            ]
+          },
+          {
+            "id": "357a679f-1370-4cd5-9269-0d5ae1986b49",
+            "name": "clip_name2",
+            "type": "COMBO",
+            "linkIds": [
+              387
+            ],
+            "pos": [
+              -990,
+              551
+            ]
+          },
+          {
+            "id": "924ffec5-81f8-4585-8761-5a80d5d775bc",
+            "name": "vae_name",
+            "type": "COMBO",
+            "linkIds": [
+              388
+            ],
+            "pos": [
+              -990,
+              571
+            ]
+          }
+        ],
+        "outputs": [
+          {
+            "id": "2185cb4d-8689-4cf8-b345-75319fb46a8e",
+            "name": "IMAGE",
+            "type": "IMAGE",
+            "linkIds": [
+              9
+            ],
+            "localized_name": "IMAGE",
+            "pos": [
+              560,
+              120
+            ]
+          }
+        ],
+        "widgets": [],
+        "nodes": [
+          {
+            "id": 39,
+            "type": "VAELoader",
+            "pos": [
+              -800,
+              670
+            ],
+            "size": [
+              270,
+              110
+            ],
+            "flags": {},
+            "order": 4,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "vae_name",
+                "name": "vae_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "vae_name"
+                },
+                "link": 388
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "VAE",
+                "name": "VAE",
+                "type": "VAE",
+                "links": [
+                  58
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.40",
+              "Node name for S&R": "VAELoader",
+              "models": [
+                {
+                  "name": "ae.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/Lumina_Image_2.0_Repackaged/resolve/main/split_files/vae/ae.safetensors",
+                  "directory": "vae"
+                }
+              ]
+            },
+            "widgets_values": [
+              "ae.safetensors"
+            ]
+          },
+          {
+            "id": 38,
+            "type": "UNETLoader",
+            "pos": [
+              -800,
+              160
+            ],
+            "size": [
+              270,
+              110
+            ],
+            "flags": {},
+            "order": 3,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "unet_name",
+                "name": "unet_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "unet_name"
+                },
+                "link": 385
+              },
+              {
+                "localized_name": "weight_dtype",
+                "name": "weight_dtype",
+                "type": "COMBO",
+                "widget": {
+                  "name": "weight_dtype"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "links": [
+                  61
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.40",
+              "Node name for S&R": "UNETLoader",
+              "models": [
+                {
+                  "name": "flux1-dev.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/flux1-dev/resolve/main/flux1-dev.safetensors",
+                  "directory": "diffusion_models"
+                }
+              ]
+            },
+            "widgets_values": [
+              "flux1-dev.safetensors",
+              "default"
+            ]
+          },
+          {
+            "id": 40,
+            "type": "DualCLIPLoader",
+            "pos": [
+              -800,
+              380
+            ],
+            "size": [
+              270,
+              180
+            ],
+            "flags": {},
+            "order": 5,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip_name1",
+                "name": "clip_name1",
+                "type": "COMBO",
+                "widget": {
+                  "name": "clip_name1"
+                },
+                "link": 386
+              },
+              {
+                "localized_name": "clip_name2",
+                "name": "clip_name2",
+                "type": "COMBO",
+                "widget": {
+                  "name": "clip_name2"
+                },
+                "link": 387
+              },
+              {
+                "localized_name": "type",
+                "name": "type",
+                "type": "COMBO",
+                "widget": {
+                  "name": "type"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "device",
+                "name": "device",
+                "shape": 7,
+                "type": "COMBO",
+                "widget": {
+                  "name": "device"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CLIP",
+                "name": "CLIP",
+                "type": "CLIP",
+                "links": [
+                  64
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.40",
+              "Node name for S&R": "DualCLIPLoader",
+              "models": [
+                {
+                  "name": "clip_l.safetensors",
+                  "url": "https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/clip_l.safetensors",
+                  "directory": "text_encoders"
+                },
+                {
+                  "name": "t5xxl_fp16.safetensors",
+                  "url": "https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors",
+                  "directory": "text_encoders"
+                }
+              ]
+            },
+            "widgets_values": [
+              "clip_l.safetensors",
+              "t5xxl_fp16.safetensors",
+              "flux",
+              "default"
+            ]
+          },
+          {
+            "id": 27,
+            "type": "EmptySD3LatentImage",
+            "pos": [
+              -420,
+              640
+            ],
+            "size": [
+              270,
+              170
+            ],
+            "flags": {},
+            "order": 1,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 69
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 70
+              },
+              {
+                "localized_name": "batch_size",
+                "name": "batch_size",
+                "type": "INT",
+                "widget": {
+                  "name": "batch_size"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "slot_index": 0,
+                "links": [
+                  51
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.40",
+              "Node name for S&R": "EmptySD3LatentImage"
+            },
+            "widgets_values": [
+              1024,
+              1024,
+              1
+            ]
+          },
+          {
+            "id": 45,
+            "type": "CLIPTextEncode",
+            "pos": [
+              -460,
+              150
+            ],
+            "size": [
+              330,
+              220
+            ],
+            "flags": {},
+            "order": 7,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 64
+              },
+              {
+                "localized_name": "text",
+                "name": "text",
+                "type": "STRING",
+                "widget": {
+                  "name": "text"
+                },
+                "link": 68
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "links": [
+                  65,
+                  66
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "CLIPTextEncode"
+            },
+            "widgets_values": [
+              ""
+            ]
+          },
+          {
+            "id": 31,
+            "type": "KSampler",
+            "pos": [
+              -50,
+              260
+            ],
+            "size": [
+              320,
+              350
+            ],
+            "flags": {},
+            "order": 2,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 61
+              },
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 65
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 63
+              },
+              {
+                "localized_name": "latent_image",
+                "name": "latent_image",
+                "type": "LATENT",
+                "link": 51
+              },
+              {
+                "localized_name": "seed",
+                "name": "seed",
+                "type": "INT",
+                "widget": {
+                  "name": "seed"
+                },
+                "link": 382
+              },
+              {
+                "localized_name": "steps",
+                "name": "steps",
+                "type": "INT",
+                "widget": {
+                  "name": "steps"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "cfg",
+                "name": "cfg",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "cfg"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "sampler_name",
+                "name": "sampler_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "sampler_name"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "scheduler",
+                "name": "scheduler",
+                "type": "COMBO",
+                "widget": {
+                  "name": "scheduler"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "denoise",
+                "name": "denoise",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "denoise"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "slot_index": 0,
+                "links": [
+                  52
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.40",
+              "Node name for S&R": "KSampler"
+            },
+            "widgets_values": [
+              0,
+              "randomize",
+              20,
+              1,
+              "euler",
+              "simple",
+              1
+            ]
+          },
+          {
+            "id": 8,
+            "type": "VAEDecode",
+            "pos": [
+              20,
+              120
+            ],
+            "size": [
+              230,
+              100
+            ],
+            "flags": {
+              "collapsed": false
+            },
+            "order": 0,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "samples",
+                "name": "samples",
+                "type": "LATENT",
+                "link": 52
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 58
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "slot_index": 0,
+                "links": [
+                  9
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.40",
+              "Node name for S&R": "VAEDecode"
+            }
+          },
+          {
+            "id": 42,
+            "type": "ConditioningZeroOut",
+            "pos": [
+              -350,
+              420
+            ],
+            "size": [
+              230,
+              80
+            ],
+            "flags": {
+              "collapsed": false
+            },
+            "order": 6,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "conditioning",
+                "name": "conditioning",
+                "type": "CONDITIONING",
+                "link": 66
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "links": [
+                  63
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.40",
+              "Node name for S&R": "ConditioningZeroOut"
+            }
+          }
+        ],
+        "groups": [
+          {
+            "id": 1,
+            "title": "Model",
+            "bounding": [
+              -820,
+              70,
+              320,
+              750
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 2,
+            "title": "Image Size",
+            "bounding": [
+              -470,
+              570,
+              380,
+              250
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 3,
+            "title": "Prompt",
+            "bounding": [
+              -470,
+              70,
+              380,
+              470
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          }
+        ],
+        "links": [
+          {
+            "id": 52,
+            "origin_id": 31,
+            "origin_slot": 0,
+            "target_id": 8,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 58,
+            "origin_id": 39,
+            "origin_slot": 0,
+            "target_id": 8,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 61,
+            "origin_id": 38,
+            "origin_slot": 0,
+            "target_id": 31,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 63,
+            "origin_id": 42,
+            "origin_slot": 0,
+            "target_id": 31,
+            "target_slot": 2,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 51,
+            "origin_id": 27,
+            "origin_slot": 0,
+            "target_id": 31,
+            "target_slot": 3,
+            "type": "LATENT"
+          },
+          {
+            "id": 9,
+            "origin_id": 8,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 64,
+            "origin_id": 40,
+            "origin_slot": 0,
+            "target_id": 45,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 65,
+            "origin_id": 45,
+            "origin_slot": 0,
+            "target_id": 31,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 66,
+            "origin_id": 45,
+            "origin_slot": 0,
+            "target_id": 42,
+            "target_slot": 0,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 68,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 45,
+            "target_slot": 1,
+            "type": "STRING"
+          },
+          {
+            "id": 69,
+            "origin_id": -10,
+            "origin_slot": 1,
+            "target_id": 27,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 70,
+            "origin_id": -10,
+            "origin_slot": 2,
+            "target_id": 27,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 382,
+            "origin_id": -10,
+            "origin_slot": 3,
+            "target_id": 31,
+            "target_slot": 4,
+            "type": "INT"
+          },
+          {
+            "id": 385,
+            "origin_id": -10,
+            "origin_slot": 4,
+            "target_id": 38,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 386,
+            "origin_id": -10,
+            "origin_slot": 5,
+            "target_id": 40,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 387,
+            "origin_id": -10,
+            "origin_slot": 6,
+            "target_id": 40,
+            "target_slot": 1,
+            "type": "COMBO"
+          },
+          {
+            "id": 388,
+            "origin_id": -10,
+            "origin_slot": 7,
+            "target_id": 39,
+            "target_slot": 0,
+            "type": "COMBO"
+          }
+        ],
+        "extra": {
+          "workflowRendererVersion": "LG"
+        },
+        "category": "Image generation and editing/Text to image"
+      }
+    ]
+  },
+  "extra": {
+    "ds": {
+      "scale": 0.7513148009015777,
+      "offset": [
+        1726.1426909346173,
+        146.66925047394233
+      ]
+    },
+    "ue_links": []
+  }
+}
\ No newline at end of file
diff --git a/blueprints/Text to Image (Flux.1 Krea Dev).json b/blueprints/Text to Image (Flux.1 Krea Dev).json
new file mode 100644
index 000000000..fe4db1cfc
--- /dev/null
+++ b/blueprints/Text to Image (Flux.1 Krea Dev).json	
@@ -0,0 +1,1040 @@
+{
+  "revision": 0,
+  "last_node_id": 196,
+  "last_link_id": 0,
+  "nodes": [
+    {
+      "id": 196,
+      "type": "aa0a207e-bf0e-477c-a87f-f58fcf5f7749",
+      "pos": [
+        1010,
+        130
+      ],
+      "size": [
+        410,
+        460
+      ],
+      "flags": {},
+      "order": 0,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "text",
+          "type": "STRING",
+          "widget": {
+            "name": "text"
+          },
+          "link": null
+        },
+        {
+          "name": "width",
+          "type": "INT",
+          "widget": {
+            "name": "width"
+          },
+          "link": null
+        },
+        {
+          "name": "height",
+          "type": "INT",
+          "widget": {
+            "name": "height"
+          },
+          "link": null
+        },
+        {
+          "name": "seed",
+          "type": "INT",
+          "widget": {
+            "name": "seed"
+          },
+          "link": null
+        },
+        {
+          "name": "unet_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "unet_name"
+          },
+          "link": null
+        },
+        {
+          "name": "clip_name1",
+          "type": "COMBO",
+          "widget": {
+            "name": "clip_name1"
+          },
+          "link": null
+        },
+        {
+          "name": "clip_name2",
+          "type": "COMBO",
+          "widget": {
+            "name": "clip_name2"
+          },
+          "link": null
+        },
+        {
+          "name": "vae_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "vae_name"
+          },
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "localized_name": "IMAGE",
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": []
+        }
+      ],
+      "properties": {
+        "proxyWidgets": [
+          [
+            "195",
+            "text"
+          ],
+          [
+            "27",
+            "width"
+          ],
+          [
+            "27",
+            "height"
+          ],
+          [
+            "31",
+            "seed"
+          ],
+          [
+            "38",
+            "unet_name"
+          ],
+          [
+            "40",
+            "clip_name1"
+          ],
+          [
+            "40",
+            "clip_name2"
+          ],
+          [
+            "39",
+            "vae_name"
+          ]
+        ],
+        "ue_properties": {
+          "widget_ue_connectable": {},
+          "input_ue_unconnectable": {}
+        },
+        "cnr_id": "comfy-core",
+        "ver": "0.18.1"
+      },
+      "widgets_values": [],
+      "title": "Text to Image (Flux.1 Krea Dev)"
+    }
+  ],
+  "links": [],
+  "version": 0.4,
+  "definitions": {
+    "subgraphs": [
+      {
+        "id": "aa0a207e-bf0e-477c-a87f-f58fcf5f7749",
+        "version": 1,
+        "state": {
+          "lastGroupId": 8,
+          "lastNodeId": 196,
+          "lastLinkId": 395,
+          "lastRerouteId": 0
+        },
+        "revision": 0,
+        "config": {},
+        "name": "Text to Image (Flux.1 Krea Dev)",
+        "inputNode": {
+          "id": -10,
+          "bounding": [
+            -1050,
+            426,
+            120,
+            200
+          ]
+        },
+        "outputNode": {
+          "id": -20,
+          "bounding": [
+            620,
+            140,
+            120,
+            60
+          ]
+        },
+        "inputs": [
+          {
+            "id": "c2515318-6e10-4ad9-9466-e6aa855bc849",
+            "name": "text",
+            "type": "STRING",
+            "linkIds": [
+              71
+            ],
+            "pos": [
+              -950,
+              446
+            ]
+          },
+          {
+            "id": "09f20672-c8a3-4180-823a-5a6af0113e4f",
+            "name": "width",
+            "type": "INT",
+            "linkIds": [
+              72
+            ],
+            "pos": [
+              -950,
+              466
+            ]
+          },
+          {
+            "id": "7f54c952-896e-4356-bfb2-970e1c8f2eb7",
+            "name": "height",
+            "type": "INT",
+            "linkIds": [
+              73
+            ],
+            "pos": [
+              -950,
+              486
+            ]
+          },
+          {
+            "id": "e2dc1c86-2fb4-4b80-b560-f30560af1897",
+            "name": "seed",
+            "type": "INT",
+            "linkIds": [
+              391
+            ],
+            "pos": [
+              -950,
+              506
+            ]
+          },
+          {
+            "id": "34b172e7-85b2-444a-9a4d-1221f272c46e",
+            "name": "unet_name",
+            "type": "COMBO",
+            "linkIds": [
+              392
+            ],
+            "pos": [
+              -950,
+              526
+            ]
+          },
+          {
+            "id": "073b7440-d943-4a2f-a3a1-fbdb8fcda9f9",
+            "name": "clip_name1",
+            "type": "COMBO",
+            "linkIds": [
+              393
+            ],
+            "pos": [
+              -950,
+              546
+            ]
+          },
+          {
+            "id": "55c1286a-4aca-41fc-b967-ae3d3fa7bc85",
+            "name": "clip_name2",
+            "type": "COMBO",
+            "linkIds": [
+              394
+            ],
+            "pos": [
+              -950,
+              566
+            ]
+          },
+          {
+            "id": "2241e4fc-9219-4be7-bf6d-3493b579ab5a",
+            "name": "vae_name",
+            "type": "COMBO",
+            "linkIds": [
+              395
+            ],
+            "pos": [
+              -950,
+              586
+            ]
+          }
+        ],
+        "outputs": [
+          {
+            "id": "5310184a-f0a2-405f-9917-dd2a352a4fac",
+            "name": "IMAGE",
+            "type": "IMAGE",
+            "linkIds": [
+              9
+            ],
+            "localized_name": "IMAGE",
+            "pos": [
+              640,
+              160
+            ]
+          }
+        ],
+        "widgets": [],
+        "nodes": [
+          {
+            "id": 40,
+            "type": "DualCLIPLoader",
+            "pos": [
+              -780,
+              360
+            ],
+            "size": [
+              270,
+              180
+            ],
+            "flags": {},
+            "order": 5,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip_name1",
+                "name": "clip_name1",
+                "type": "COMBO",
+                "widget": {
+                  "name": "clip_name1"
+                },
+                "link": 393
+              },
+              {
+                "localized_name": "clip_name2",
+                "name": "clip_name2",
+                "type": "COMBO",
+                "widget": {
+                  "name": "clip_name2"
+                },
+                "link": 394
+              },
+              {
+                "localized_name": "type",
+                "name": "type",
+                "type": "COMBO",
+                "widget": {
+                  "name": "type"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "device",
+                "name": "device",
+                "shape": 7,
+                "type": "COMBO",
+                "widget": {
+                  "name": "device"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CLIP",
+                "name": "CLIP",
+                "type": "CLIP",
+                "links": [
+                  64
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.40",
+              "Node name for S&R": "DualCLIPLoader",
+              "models": [
+                {
+                  "name": "clip_l.safetensors",
+                  "url": "https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/clip_l.safetensors",
+                  "directory": "text_encoders"
+                },
+                {
+                  "name": "t5xxl_fp16.safetensors",
+                  "url": "https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp16.safetensors",
+                  "directory": "text_encoders"
+                }
+              ]
+            },
+            "widgets_values": [
+              "clip_l.safetensors",
+              "t5xxl_fp16.safetensors",
+              "flux",
+              "default"
+            ]
+          },
+          {
+            "id": 39,
+            "type": "VAELoader",
+            "pos": [
+              -770,
+              630
+            ],
+            "size": [
+              240,
+              110
+            ],
+            "flags": {},
+            "order": 4,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "vae_name",
+                "name": "vae_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "vae_name"
+                },
+                "link": 395
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "VAE",
+                "name": "VAE",
+                "type": "VAE",
+                "links": [
+                  58
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.40",
+              "Node name for S&R": "VAELoader",
+              "models": [
+                {
+                  "name": "ae.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/Lumina_Image_2.0_Repackaged/resolve/main/split_files/vae/ae.safetensors",
+                  "directory": "vae"
+                }
+              ]
+            },
+            "widgets_values": [
+              "ae.safetensors"
+            ]
+          },
+          {
+            "id": 38,
+            "type": "UNETLoader",
+            "pos": [
+              -780,
+              170
+            ],
+            "size": [
+              270,
+              110
+            ],
+            "flags": {},
+            "order": 3,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "unet_name",
+                "name": "unet_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "unet_name"
+                },
+                "link": 392
+              },
+              {
+                "localized_name": "weight_dtype",
+                "name": "weight_dtype",
+                "type": "COMBO",
+                "widget": {
+                  "name": "weight_dtype"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "links": [
+                  61
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.40",
+              "Node name for S&R": "UNETLoader",
+              "models": [
+                {
+                  "name": "flux1-krea-dev_fp8_scaled.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/FLUX.1-Krea-dev_ComfyUI/resolve/main/split_files/diffusion_models/flux1-krea-dev_fp8_scaled.safetensors",
+                  "directory": "diffusion_models"
+                }
+              ]
+            },
+            "widgets_values": [
+              "flux1-krea-dev_fp8_scaled.safetensors",
+              "default"
+            ]
+          },
+          {
+            "id": 195,
+            "type": "CLIPTextEncode",
+            "pos": [
+              -440,
+              180
+            ],
+            "size": [
+              330,
+              210
+            ],
+            "flags": {},
+            "order": 7,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 64
+              },
+              {
+                "localized_name": "text",
+                "name": "text",
+                "type": "STRING",
+                "widget": {
+                  "name": "text"
+                },
+                "link": 71
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "links": [
+                  65,
+                  66
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.47",
+              "Node name for S&R": "CLIPTextEncode"
+            },
+            "widgets_values": [
+              ""
+            ]
+          },
+          {
+            "id": 27,
+            "type": "EmptySD3LatentImage",
+            "pos": [
+              -390,
+              650
+            ],
+            "size": [
+              270,
+              170
+            ],
+            "flags": {},
+            "order": 1,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 72
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 73
+              },
+              {
+                "localized_name": "batch_size",
+                "name": "batch_size",
+                "type": "INT",
+                "widget": {
+                  "name": "batch_size"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "slot_index": 0,
+                "links": [
+                  51
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.40",
+              "Node name for S&R": "EmptySD3LatentImage"
+            },
+            "widgets_values": [
+              1024,
+              1024,
+              1
+            ]
+          },
+          {
+            "id": 31,
+            "type": "KSampler",
+            "pos": [
+              0,
+              130
+            ],
+            "size": [
+              320,
+              350
+            ],
+            "flags": {},
+            "order": 2,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 61
+              },
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 65
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 63
+              },
+              {
+                "localized_name": "latent_image",
+                "name": "latent_image",
+                "type": "LATENT",
+                "link": 51
+              },
+              {
+                "localized_name": "seed",
+                "name": "seed",
+                "type": "INT",
+                "widget": {
+                  "name": "seed"
+                },
+                "link": 391
+              },
+              {
+                "localized_name": "steps",
+                "name": "steps",
+                "type": "INT",
+                "widget": {
+                  "name": "steps"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "cfg",
+                "name": "cfg",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "cfg"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "sampler_name",
+                "name": "sampler_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "sampler_name"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "scheduler",
+                "name": "scheduler",
+                "type": "COMBO",
+                "widget": {
+                  "name": "scheduler"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "denoise",
+                "name": "denoise",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "denoise"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "slot_index": 0,
+                "links": [
+                  52
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.40",
+              "Node name for S&R": "KSampler"
+            },
+            "widgets_values": [
+              0,
+              "randomize",
+              20,
+              1,
+              "euler",
+              "simple",
+              1
+            ]
+          },
+          {
+            "id": 8,
+            "type": "VAEDecode",
+            "pos": [
+              340,
+              140
+            ],
+            "size": [
+              230,
+              100
+            ],
+            "flags": {
+              "collapsed": false
+            },
+            "order": 0,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "samples",
+                "name": "samples",
+                "type": "LATENT",
+                "link": 52
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 58
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "slot_index": 0,
+                "links": [
+                  9
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.40",
+              "Node name for S&R": "VAEDecode"
+            }
+          },
+          {
+            "id": 42,
+            "type": "ConditioningZeroOut",
+            "pos": [
+              -340,
+              430
+            ],
+            "size": [
+              230,
+              80
+            ],
+            "flags": {
+              "collapsed": false
+            },
+            "order": 6,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "conditioning",
+                "name": "conditioning",
+                "type": "CONDITIONING",
+                "link": 66
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "links": [
+                  63
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.40",
+              "Node name for S&R": "ConditioningZeroOut"
+            }
+          }
+        ],
+        "groups": [
+          {
+            "id": 1,
+            "title": "Model",
+            "bounding": [
+              -800,
+              90,
+              310,
+              750
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 2,
+            "title": "Image Size",
+            "bounding": [
+              -460,
+              560,
+              400,
+              280
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 3,
+            "title": "Prompt",
+            "bounding": [
+              -460,
+              90,
+              400,
+              440
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          }
+        ],
+        "links": [
+          {
+            "id": 66,
+            "origin_id": 195,
+            "origin_slot": 0,
+            "target_id": 42,
+            "target_slot": 0,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 52,
+            "origin_id": 31,
+            "origin_slot": 0,
+            "target_id": 8,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 58,
+            "origin_id": 39,
+            "origin_slot": 0,
+            "target_id": 8,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 61,
+            "origin_id": 38,
+            "origin_slot": 0,
+            "target_id": 31,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 65,
+            "origin_id": 195,
+            "origin_slot": 0,
+            "target_id": 31,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 63,
+            "origin_id": 42,
+            "origin_slot": 0,
+            "target_id": 31,
+            "target_slot": 2,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 51,
+            "origin_id": 27,
+            "origin_slot": 0,
+            "target_id": 31,
+            "target_slot": 3,
+            "type": "LATENT"
+          },
+          {
+            "id": 64,
+            "origin_id": 40,
+            "origin_slot": 0,
+            "target_id": 195,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 9,
+            "origin_id": 8,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 71,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 195,
+            "target_slot": 1,
+            "type": "STRING"
+          },
+          {
+            "id": 72,
+            "origin_id": -10,
+            "origin_slot": 1,
+            "target_id": 27,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 73,
+            "origin_id": -10,
+            "origin_slot": 2,
+            "target_id": 27,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 391,
+            "origin_id": -10,
+            "origin_slot": 3,
+            "target_id": 31,
+            "target_slot": 4,
+            "type": "INT"
+          },
+          {
+            "id": 392,
+            "origin_id": -10,
+            "origin_slot": 4,
+            "target_id": 38,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 393,
+            "origin_id": -10,
+            "origin_slot": 5,
+            "target_id": 40,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 394,
+            "origin_id": -10,
+            "origin_slot": 6,
+            "target_id": 40,
+            "target_slot": 1,
+            "type": "COMBO"
+          },
+          {
+            "id": 395,
+            "origin_id": -10,
+            "origin_slot": 7,
+            "target_id": 39,
+            "target_slot": 0,
+            "type": "COMBO"
+          }
+        ],
+        "extra": {
+          "workflowRendererVersion": "LG"
+        },
+        "category": "Image generation and editing/Text to image"
+      }
+    ]
+  },
+  "extra": {
+    "ds": {
+      "scale": 0.735584459955559,
+      "offset": [
+        1936.5815687336737,
+        303.78330847702625
+      ]
+    },
+    "ue_links": []
+  }
+}
\ No newline at end of file
diff --git a/blueprints/Text to Image (NetaYume Lumina).json b/blueprints/Text to Image (NetaYume Lumina).json
new file mode 100644
index 000000000..394ad1608
--- /dev/null
+++ b/blueprints/Text to Image (NetaYume Lumina).json	
@@ -0,0 +1,1468 @@
+{
+  "revision": 0,
+  "last_node_id": 219,
+  "last_link_id": 0,
+  "nodes": [
+    {
+      "id": 219,
+      "type": "fc9485c9-2acd-482e-94f1-b5fa702f2536",
+      "pos": [
+        -1900,
+        2330
+      ],
+      "size": [
+        400,
+        540
+      ],
+      "flags": {},
+      "order": 3,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "value",
+          "type": "STRING",
+          "widget": {
+            "name": "value"
+          },
+          "link": null
+        },
+        {
+          "name": "width",
+          "type": "INT",
+          "widget": {
+            "name": "width"
+          },
+          "link": null
+        },
+        {
+          "name": "height",
+          "type": "INT",
+          "widget": {
+            "name": "height"
+          },
+          "link": null
+        },
+        {
+          "name": "seed",
+          "type": "INT",
+          "widget": {
+            "name": "seed"
+          },
+          "link": null
+        },
+        {
+          "name": "ckpt_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "ckpt_name"
+          },
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "localized_name": "IMAGE",
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": []
+        }
+      ],
+      "properties": {
+        "proxyWidgets": [
+          [
+            "62",
+            "value"
+          ],
+          [
+            "53",
+            "width"
+          ],
+          [
+            "53",
+            "height"
+          ],
+          [
+            "55",
+            "seed"
+          ],
+          [
+            "56",
+            "ckpt_name"
+          ],
+          [
+            "55",
+            "control_after_generate"
+          ]
+        ],
+        "cnr_id": "comfy-core",
+        "ver": "0.18.1",
+        "ue_properties": {
+          "widget_ue_connectable": {},
+          "input_ue_unconnectable": {}
+        }
+      },
+      "widgets_values": [],
+      "title": "Text to Image (NetaYume Lumina)"
+    }
+  ],
+  "links": [],
+  "version": 0.4,
+  "definitions": {
+    "subgraphs": [
+      {
+        "id": "fc9485c9-2acd-482e-94f1-b5fa702f2536",
+        "version": 1,
+        "state": {
+          "lastGroupId": 8,
+          "lastNodeId": 219,
+          "lastLinkId": 395,
+          "lastRerouteId": 0
+        },
+        "revision": 0,
+        "config": {},
+        "name": "Text to Image (NetaYume Lumina)",
+        "inputNode": {
+          "id": -10,
+          "bounding": [
+            -600,
+            90,
+            120,
+            140
+          ]
+        },
+        "outputNode": {
+          "id": -20,
+          "bounding": [
+            1740.333330193419,
+            286.3333328495138,
+            120,
+            60
+          ]
+        },
+        "inputs": [
+          {
+            "id": "b80a1e0c-e8a6-4c4f-8eb1-825cb7e4fdcf",
+            "name": "value",
+            "type": "STRING",
+            "linkIds": [
+              36
+            ],
+            "pos": [
+              -500,
+              110
+            ]
+          },
+          {
+            "id": "6583bb32-7cff-4921-a771-1f0dcdf779e6",
+            "name": "width",
+            "type": "INT",
+            "linkIds": [
+              39
+            ],
+            "pos": [
+              -500,
+              130
+            ]
+          },
+          {
+            "id": "c486937a-46c0-431b-8775-057897843cbd",
+            "name": "height",
+            "type": "INT",
+            "linkIds": [
+              40
+            ],
+            "pos": [
+              -500,
+              150
+            ]
+          },
+          {
+            "id": "9c85c0cc-c906-405a-a4d9-43b93c47cb53",
+            "name": "seed",
+            "type": "INT",
+            "linkIds": [
+              42
+            ],
+            "pos": [
+              -500,
+              170
+            ]
+          },
+          {
+            "id": "f7e288ec-fa1f-4a1d-b721-6b605de9cb51",
+            "name": "ckpt_name",
+            "type": "COMBO",
+            "linkIds": [
+              43
+            ],
+            "pos": [
+              -500,
+              190
+            ]
+          }
+        ],
+        "outputs": [
+          {
+            "id": "ea4b872b-a294-4cbf-99a9-70e55c0f8b3e",
+            "name": "IMAGE",
+            "type": "IMAGE",
+            "linkIds": [
+              16
+            ],
+            "localized_name": "IMAGE",
+            "pos": [
+              1760.333330193419,
+              306.3333328495138
+            ]
+          }
+        ],
+        "widgets": [],
+        "nodes": [
+          {
+            "id": 53,
+            "type": "EmptySD3LatentImage",
+            "pos": [
+              -220,
+              370
+            ],
+            "size": [
+              320,
+              170
+            ],
+            "flags": {},
+            "order": 2,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 39
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 40
+              },
+              {
+                "localized_name": "batch_size",
+                "name": "batch_size",
+                "type": "INT",
+                "widget": {
+                  "name": "batch_size"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "slot_index": 0,
+                "links": [
+                  17
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.64",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "EmptySD3LatentImage"
+            },
+            "widgets_values": [
+              1024,
+              1024,
+              1
+            ]
+          },
+          {
+            "id": 54,
+            "type": "ModelSamplingAuraFlow",
+            "pos": [
+              650,
+              40
+            ],
+            "size": [
+              310,
+              110
+            ],
+            "flags": {},
+            "order": 3,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 12
+              },
+              {
+                "localized_name": "shift",
+                "name": "shift",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "shift"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "slot_index": 0,
+                "links": [
+                  13
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.64",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "ModelSamplingAuraFlow"
+            },
+            "widgets_values": [
+              4
+            ]
+          },
+          {
+            "id": 55,
+            "type": "KSampler",
+            "pos": [
+              650,
+              200
+            ],
+            "size": [
+              320,
+              350
+            ],
+            "flags": {},
+            "order": 4,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 13
+              },
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 32
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 23
+              },
+              {
+                "localized_name": "latent_image",
+                "name": "latent_image",
+                "type": "LATENT",
+                "link": 17
+              },
+              {
+                "localized_name": "seed",
+                "name": "seed",
+                "type": "INT",
+                "widget": {
+                  "name": "seed"
+                },
+                "link": 42
+              },
+              {
+                "localized_name": "steps",
+                "name": "steps",
+                "type": "INT",
+                "widget": {
+                  "name": "steps"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "cfg",
+                "name": "cfg",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "cfg"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "sampler_name",
+                "name": "sampler_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "sampler_name"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "scheduler",
+                "name": "scheduler",
+                "type": "COMBO",
+                "widget": {
+                  "name": "scheduler"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "denoise",
+                "name": "denoise",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "denoise"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "slot_index": 0,
+                "links": [
+                  14
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.64",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "KSampler"
+            },
+            "widgets_values": [
+              0,
+              "randomize",
+              30,
+              4,
+              "res_multistep",
+              "simple",
+              1
+            ]
+          },
+          {
+            "id": 56,
+            "type": "CheckpointLoaderSimple",
+            "pos": [
+              -220,
+              70
+            ],
+            "size": [
+              320,
+              160
+            ],
+            "flags": {},
+            "order": 5,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "ckpt_name",
+                "name": "ckpt_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "ckpt_name"
+                },
+                "link": 43
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "slot_index": 0,
+                "links": [
+                  12
+                ]
+              },
+              {
+                "localized_name": "CLIP",
+                "name": "CLIP",
+                "type": "CLIP",
+                "slot_index": 1,
+                "links": [
+                  22,
+                  35
+                ]
+              },
+              {
+                "localized_name": "VAE",
+                "name": "VAE",
+                "type": "VAE",
+                "slot_index": 2,
+                "links": [
+                  8
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.64",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "CheckpointLoaderSimple",
+              "models": [
+                {
+                  "name": "NetaYumev35_pretrained_all_in_one.safetensors",
+                  "url": "https://huggingface.co/duongve/NetaYume-Lumina-Image-2.0/resolve/main/NetaYumev35_pretrained_all_in_one.safetensors",
+                  "directory": "checkpoints"
+                }
+              ]
+            },
+            "widgets_values": [
+              "NetaYumev35_pretrained_all_in_one.safetensors"
+            ]
+          },
+          {
+            "id": 57,
+            "type": "a07fdf06-1bda-4dac-bdbd-63ee8ebca1c9",
+            "pos": [
+              180,
+              360
+            ],
+            "size": [
+              400,
+              140
+            ],
+            "flags": {},
+            "order": 6,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 22
+              },
+              {
+                "name": "value",
+                "type": "STRING",
+                "widget": {
+                  "name": "value"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "links": [
+                  23
+                ]
+              }
+            ],
+            "properties": {
+              "proxyWidgets": [
+                [
+                  "218",
+                  "value"
+                ]
+              ],
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              }
+            },
+            "widgets_values": [],
+            "color": "#223",
+            "bgcolor": "#335"
+          },
+          {
+            "id": 217,
+            "type": "VAEDecode",
+            "pos": [
+              1040,
+              210
+            ],
+            "size": [
+              230,
+              100
+            ],
+            "flags": {
+              "collapsed": false
+            },
+            "order": 10,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "samples",
+                "name": "samples",
+                "type": "LATENT",
+                "link": 14
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 8
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "slot_index": 0,
+                "links": [
+                  16
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.64",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "VAEDecode"
+            }
+          },
+          {
+            "id": 59,
+            "type": "MarkdownNote",
+            "pos": [
+              640,
+              -390
+            ],
+            "size": [
+              370,
+              280
+            ],
+            "flags": {},
+            "order": 0,
+            "mode": 0,
+            "inputs": [],
+            "outputs": [],
+            "title": "Note: Prompt",
+            "properties": {},
+            "widgets_values": [
+              "Check the prompt book [here](https://nieta-art.feishu.cn/wiki/RY3GwpT59icIQlkWXEfcCqIMnQd)\n\nYou should keep the prefix part fixed until the **Prompt Start** tag\n\n@whatever in the prompt is for artist tags, such as @comfyanonymous\n\nYou can find more artist tags [here](https://gumgum10.github.io/gumgum.github.io/)\n"
+            ],
+            "color": "#222",
+            "bgcolor": "#000"
+          },
+          {
+            "id": 60,
+            "type": "StringConcatenate",
+            "pos": [
+              170,
+              -370
+            ],
+            "size": [
+              400,
+              250
+            ],
+            "flags": {
+              "collapsed": true
+            },
+            "order": 7,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "string_a",
+                "name": "string_a",
+                "type": "STRING",
+                "widget": {
+                  "name": "string_a"
+                },
+                "link": 30
+              },
+              {
+                "localized_name": "string_b",
+                "name": "string_b",
+                "type": "STRING",
+                "widget": {
+                  "name": "string_b"
+                },
+                "link": 31
+              },
+              {
+                "localized_name": "delimiter",
+                "name": "delimiter",
+                "type": "STRING",
+                "widget": {
+                  "name": "delimiter"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "STRING",
+                "name": "STRING",
+                "type": "STRING",
+                "links": [
+                  34
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.70",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "StringConcatenate"
+            },
+            "widgets_values": [
+              "",
+              "",
+              ""
+            ]
+          },
+          {
+            "id": 61,
+            "type": "CLIPTextEncode",
+            "pos": [
+              170,
+              60
+            ],
+            "size": [
+              430,
+              190
+            ],
+            "flags": {},
+            "order": 8,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 35
+              },
+              {
+                "localized_name": "text",
+                "name": "text",
+                "type": "STRING",
+                "widget": {
+                  "name": "text"
+                },
+                "link": 34
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "slot_index": 0,
+                "links": [
+                  32
+                ]
+              }
+            ],
+            "title": "CLIP Text Encode (Positive Prompt)",
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.64",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "CLIPTextEncode"
+            },
+            "widgets_values": [
+              ""
+            ],
+            "color": "#232",
+            "bgcolor": "#353"
+          },
+          {
+            "id": 62,
+            "type": "PrimitiveStringMultiline",
+            "pos": [
+              -240,
+              -210
+            ],
+            "size": [
+              370,
+              140
+            ],
+            "flags": {},
+            "order": 9,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "STRING",
+                "widget": {
+                  "name": "value"
+                },
+                "link": 36
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "STRING",
+                "name": "STRING",
+                "type": "STRING",
+                "links": [
+                  31
+                ]
+              }
+            ],
+            "title": "Prompt",
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.70",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "PrimitiveStringMultiline"
+            },
+            "widgets_values": [
+              ""
+            ]
+          },
+          {
+            "id": 63,
+            "type": "PrimitiveStringMultiline",
+            "pos": [
+              -240,
+              -390
+            ],
+            "size": [
+              370,
+              140
+            ],
+            "flags": {},
+            "order": 1,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "STRING",
+                "widget": {
+                  "name": "value"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "STRING",
+                "name": "STRING",
+                "type": "STRING",
+                "links": [
+                  30
+                ]
+              }
+            ],
+            "title": "System prompt",
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.70",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "PrimitiveStringMultiline"
+            },
+            "widgets_values": [
+              "You are an assistant designed to generate high quality anime images based on textual prompts. <Prompt Start> "
+            ]
+          }
+        ],
+        "groups": [
+          {
+            "id": 1,
+            "title": "Model",
+            "bounding": [
+              -250,
+              -30,
+              370,
+              280
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 2,
+            "title": "Image Size",
+            "bounding": [
+              -250,
+              280,
+              370,
+              290
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 3,
+            "title": "Prompt",
+            "bounding": [
+              150,
+              -30,
+              460,
+              600
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 4,
+            "title": "Prompt Builder",
+            "bounding": [
+              -250,
+              -460,
+              840,
+              400
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          }
+        ],
+        "links": [
+          {
+            "id": 12,
+            "origin_id": 56,
+            "origin_slot": 0,
+            "target_id": 54,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 13,
+            "origin_id": 54,
+            "origin_slot": 0,
+            "target_id": 55,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 23,
+            "origin_id": 57,
+            "origin_slot": 0,
+            "target_id": 55,
+            "target_slot": 2,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 17,
+            "origin_id": 53,
+            "origin_slot": 0,
+            "target_id": 55,
+            "target_slot": 3,
+            "type": "LATENT"
+          },
+          {
+            "id": 14,
+            "origin_id": 55,
+            "origin_slot": 0,
+            "target_id": 217,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 8,
+            "origin_id": 56,
+            "origin_slot": 2,
+            "target_id": 217,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 22,
+            "origin_id": 56,
+            "origin_slot": 1,
+            "target_id": 57,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 16,
+            "origin_id": 217,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 30,
+            "origin_id": 63,
+            "origin_slot": 0,
+            "target_id": 60,
+            "target_slot": 0,
+            "type": "STRING"
+          },
+          {
+            "id": 31,
+            "origin_id": 62,
+            "origin_slot": 0,
+            "target_id": 60,
+            "target_slot": 1,
+            "type": "STRING"
+          },
+          {
+            "id": 32,
+            "origin_id": 61,
+            "origin_slot": 0,
+            "target_id": 55,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 34,
+            "origin_id": 60,
+            "origin_slot": 0,
+            "target_id": 61,
+            "target_slot": 1,
+            "type": "STRING"
+          },
+          {
+            "id": 35,
+            "origin_id": 56,
+            "origin_slot": 1,
+            "target_id": 61,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 36,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 62,
+            "target_slot": 0,
+            "type": "STRING"
+          },
+          {
+            "id": 39,
+            "origin_id": -10,
+            "origin_slot": 1,
+            "target_id": 53,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 40,
+            "origin_id": -10,
+            "origin_slot": 2,
+            "target_id": 53,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 42,
+            "origin_id": -10,
+            "origin_slot": 3,
+            "target_id": 55,
+            "target_slot": 4,
+            "type": "INT"
+          },
+          {
+            "id": 43,
+            "origin_id": -10,
+            "origin_slot": 4,
+            "target_id": 56,
+            "target_slot": 0,
+            "type": "COMBO"
+          }
+        ],
+        "extra": {
+          "workflowRendererVersion": "LG"
+        },
+        "category": "Image generation and editing/Text to image"
+      },
+      {
+        "id": "a07fdf06-1bda-4dac-bdbd-63ee8ebca1c9",
+        "version": 1,
+        "state": {
+          "lastGroupId": 8,
+          "lastNodeId": 219,
+          "lastLinkId": 395,
+          "lastRerouteId": 0
+        },
+        "revision": 0,
+        "config": {},
+        "name": "CLIP Text Encode (Negative Prompt)",
+        "inputNode": {
+          "id": -10,
+          "bounding": [
+            -150,
+            675,
+            120,
+            80
+          ]
+        },
+        "outputNode": {
+          "id": -20,
+          "bounding": [
+            905.2780151367188,
+            675,
+            128.6640625,
+            60
+          ]
+        },
+        "inputs": [
+          {
+            "id": "47264a97-6fc9-454d-920f-b8a43fee0489",
+            "name": "clip",
+            "type": "CLIP",
+            "linkIds": [
+              5
+            ],
+            "localized_name": "clip",
+            "pos": [
+              -50,
+              695
+            ]
+          },
+          {
+            "id": "7cdb7919-1dad-4bd2-928d-c543c3fd712e",
+            "name": "value",
+            "type": "STRING",
+            "linkIds": [
+              22
+            ],
+            "pos": [
+              -50,
+              715
+            ]
+          }
+        ],
+        "outputs": [
+          {
+            "id": "c3f17ad9-6954-4333-bf8e-e1cf886c351b",
+            "name": "CONDITIONING",
+            "type": "CONDITIONING",
+            "linkIds": [
+              6
+            ],
+            "localized_name": "CONDITIONING",
+            "pos": [
+              925.2780151367188,
+              695
+            ]
+          }
+        ],
+        "widgets": [],
+        "nodes": [
+          {
+            "id": 64,
+            "type": "StringConcatenate",
+            "pos": [
+              420,
+              720
+            ],
+            "size": [
+              400,
+              200
+            ],
+            "flags": {},
+            "order": 1,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "string_a",
+                "name": "string_a",
+                "type": "STRING",
+                "widget": {
+                  "name": "string_a"
+                },
+                "link": 19
+              },
+              {
+                "localized_name": "string_b",
+                "name": "string_b",
+                "type": "STRING",
+                "widget": {
+                  "name": "string_b"
+                },
+                "link": 20
+              },
+              {
+                "localized_name": "delimiter",
+                "name": "delimiter",
+                "type": "STRING",
+                "widget": {
+                  "name": "delimiter"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "STRING",
+                "name": "STRING",
+                "type": "STRING",
+                "links": [
+                  21
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.70",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "StringConcatenate"
+            },
+            "widgets_values": [
+              "",
+              "",
+              ""
+            ]
+          },
+          {
+            "id": 65,
+            "type": "PrimitiveStringMultiline",
+            "pos": [
+              30,
+              720
+            ],
+            "size": [
+              370,
+              130
+            ],
+            "flags": {},
+            "order": 0,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "STRING",
+                "widget": {
+                  "name": "value"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "STRING",
+                "name": "STRING",
+                "type": "STRING",
+                "links": [
+                  19
+                ]
+              }
+            ],
+            "title": "System prompt",
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.70",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "PrimitiveStringMultiline"
+            },
+            "widgets_values": [
+              "You are an assistant designed to generate low-quality images based on textual prompts <Prompt Start> "
+            ]
+          },
+          {
+            "id": 218,
+            "type": "PrimitiveStringMultiline",
+            "pos": [
+              30,
+              900
+            ],
+            "size": [
+              370,
+              130
+            ],
+            "flags": {},
+            "order": 3,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "STRING",
+                "widget": {
+                  "name": "value"
+                },
+                "link": 22
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "STRING",
+                "name": "STRING",
+                "type": "STRING",
+                "links": [
+                  20
+                ]
+              }
+            ],
+            "title": "System prompt",
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.70",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "PrimitiveStringMultiline"
+            },
+            "widgets_values": [
+              "blurry, worst quality, low quality, jpeg artifacts, signature, watermark, username, error, deformed hands, bad anatomy, extra limbs, poorly drawn hands, poorly drawn face, mutation, deformed, extra eyes, extra arms, extra legs, malformed limbs, fused fingers, too many fingers, long neck, cross-eyed, bad proportions, missing arms, missing legs, extra digit, fewer digits, cropped"
+            ]
+          },
+          {
+            "id": 67,
+            "type": "CLIPTextEncode",
+            "pos": [
+              420,
+              410
+            ],
+            "size": [
+              430,
+              190
+            ],
+            "flags": {},
+            "order": 2,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 5
+              },
+              {
+                "localized_name": "text",
+                "name": "text",
+                "type": "STRING",
+                "widget": {
+                  "name": "text"
+                },
+                "link": 21
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "slot_index": 0,
+                "links": [
+                  6
+                ]
+              }
+            ],
+            "title": "CLIP Text Encode (Negative Prompt)",
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.64",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "CLIPTextEncode"
+            },
+            "widgets_values": [
+              "You are an assistant designed to generate low-quality images based on textual prompts <Prompt Start> blurry, worst quality, low quality, jpeg artifacts, signature, watermark, username, error, deformed hands, bad anatomy, extra limbs, poorly drawn hands, poorly drawn face, mutation, deformed, extra eyes, extra arms, extra legs, malformed limbs, fused fingers, too many fingers, long neck, cross-eyed, bad proportions, missing arms, missing legs, extra digit, fewer digits, cropped"
+            ],
+            "color": "#223",
+            "bgcolor": "#335"
+          }
+        ],
+        "groups": [],
+        "links": [
+          {
+            "id": 19,
+            "origin_id": 65,
+            "origin_slot": 0,
+            "target_id": 64,
+            "target_slot": 0,
+            "type": "STRING"
+          },
+          {
+            "id": 20,
+            "origin_id": 218,
+            "origin_slot": 0,
+            "target_id": 64,
+            "target_slot": 1,
+            "type": "STRING"
+          },
+          {
+            "id": 21,
+            "origin_id": 64,
+            "origin_slot": 0,
+            "target_id": 67,
+            "target_slot": 1,
+            "type": "STRING"
+          },
+          {
+            "id": 5,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 67,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 6,
+            "origin_id": 67,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 0,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 22,
+            "origin_id": -10,
+            "origin_slot": 1,
+            "target_id": 218,
+            "target_slot": 0,
+            "type": "STRING"
+          }
+        ],
+        "extra": {
+          "workflowRendererVersion": "LG"
+        }
+      }
+    ]
+  },
+  "extra": {
+    "ue_links": []
+  }
+}
\ No newline at end of file
diff --git a/blueprints/Text to Image (Qwen-Image 2512).json b/blueprints/Text to Image (Qwen-Image 2512).json
new file mode 100644
index 000000000..f52ea2ef2
--- /dev/null
+++ b/blueprints/Text to Image (Qwen-Image 2512).json	
@@ -0,0 +1,1951 @@
+{
+  "revision": 0,
+  "last_node_id": 263,
+  "last_link_id": 0,
+  "nodes": [
+    {
+      "id": 263,
+      "type": "fd6ee5f8-a0a9-487a-8b44-8cb65957532a",
+      "pos": [
+        750,
+        760
+      ],
+      "size": [
+        400,
+        0
+      ],
+      "flags": {},
+      "order": 2,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "text",
+          "type": "STRING",
+          "widget": {
+            "name": "text"
+          },
+          "link": null
+        },
+        {
+          "name": "width",
+          "type": "INT",
+          "widget": {
+            "name": "width"
+          },
+          "link": null
+        },
+        {
+          "name": "height",
+          "type": "INT",
+          "widget": {
+            "name": "height"
+          },
+          "link": null
+        },
+        {
+          "label": "enable_turbo_mode",
+          "name": "value",
+          "type": "BOOLEAN",
+          "widget": {
+            "name": "value"
+          },
+          "link": null
+        },
+        {
+          "name": "seed",
+          "type": "INT",
+          "widget": {
+            "name": "seed"
+          },
+          "link": null
+        },
+        {
+          "name": "unet_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "unet_name"
+          },
+          "link": null
+        },
+        {
+          "name": "clip_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "clip_name"
+          },
+          "link": null
+        },
+        {
+          "name": "vae_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "vae_name"
+          },
+          "link": null
+        },
+        {
+          "name": "lora_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "lora_name"
+          },
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "localized_name": "IMAGE",
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": []
+        }
+      ],
+      "properties": {
+        "proxyWidgets": [
+          [
+            "249",
+            "text"
+          ],
+          [
+            "252",
+            "width"
+          ],
+          [
+            "252",
+            "height"
+          ],
+          [
+            "256",
+            "value"
+          ],
+          [
+            "253",
+            "seed"
+          ],
+          [
+            "248",
+            "unet_name"
+          ],
+          [
+            "245",
+            "clip_name"
+          ],
+          [
+            "246",
+            "vae_name"
+          ],
+          [
+            "259",
+            "lora_name"
+          ]
+        ],
+        "ue_properties": {
+          "widget_ue_connectable": {
+            "value": true
+          },
+          "version": "7.7",
+          "input_ue_unconnectable": {}
+        },
+        "cnr_id": "comfy-core",
+        "ver": "0.16.4",
+        "enableTabs": false,
+        "tabWidth": 65,
+        "tabXOffset": 10,
+        "hasSecondTab": false,
+        "secondTabText": "Send Back",
+        "secondTabOffset": 80,
+        "secondTabWidth": 65
+      },
+      "widgets_values": [],
+      "title": "Text to Image (Qwen-Image 2512)"
+    }
+  ],
+  "links": [],
+  "version": 0.4,
+  "definitions": {
+    "subgraphs": [
+      {
+        "id": "fd6ee5f8-a0a9-487a-8b44-8cb65957532a",
+        "version": 1,
+        "state": {
+          "lastGroupId": 7,
+          "lastNodeId": 263,
+          "lastLinkId": 375,
+          "lastRerouteId": 0
+        },
+        "revision": 0,
+        "config": {},
+        "name": "Text to Image (Qwen-Image 2512)",
+        "inputNode": {
+          "id": -10,
+          "bounding": [
+            -1080,
+            1480,
+            151.744140625,
+            220
+          ]
+        },
+        "outputNode": {
+          "id": -20,
+          "bounding": [
+            1550,
+            1460,
+            120,
+            60
+          ]
+        },
+        "inputs": [
+          {
+            "id": "74d26021-a723-4a90-8e33-5d805a7e5deb",
+            "name": "text",
+            "type": "STRING",
+            "linkIds": [
+              360
+            ],
+            "pos": [
+              -948.255859375,
+              1500
+            ]
+          },
+          {
+            "id": "b55f69e6-c7cb-4641-9e1f-2cb1c1942ed0",
+            "name": "width",
+            "type": "INT",
+            "linkIds": [
+              361
+            ],
+            "pos": [
+              -948.255859375,
+              1520
+            ]
+          },
+          {
+            "id": "3e80284d-aba3-43cd-ab7b-ac2a619ef18c",
+            "name": "height",
+            "type": "INT",
+            "linkIds": [
+              362
+            ],
+            "pos": [
+              -948.255859375,
+              1540
+            ]
+          },
+          {
+            "id": "de06e137-6cec-4cb3-a6bb-737022310a7b",
+            "name": "value",
+            "type": "BOOLEAN",
+            "linkIds": [
+              370
+            ],
+            "label": "enable_turbo_mode",
+            "pos": [
+              -948.255859375,
+              1560
+            ]
+          },
+          {
+            "id": "9e500dee-a5b9-481b-ac46-64bab4bd3530",
+            "name": "seed",
+            "type": "INT",
+            "linkIds": [
+              371
+            ],
+            "pos": [
+              -948.255859375,
+              1580
+            ]
+          },
+          {
+            "id": "33422b12-24e5-41c6-96fc-f9a8dadd5d94",
+            "name": "unet_name",
+            "type": "COMBO",
+            "linkIds": [
+              372
+            ],
+            "pos": [
+              -948.255859375,
+              1600
+            ]
+          },
+          {
+            "id": "5cf753e4-236e-468e-9a06-6b8e238badc8",
+            "name": "clip_name",
+            "type": "COMBO",
+            "linkIds": [
+              373
+            ],
+            "pos": [
+              -948.255859375,
+              1620
+            ]
+          },
+          {
+            "id": "790e775c-a639-4e5f-9007-e2ee6764dc5e",
+            "name": "vae_name",
+            "type": "COMBO",
+            "linkIds": [
+              374
+            ],
+            "pos": [
+              -948.255859375,
+              1640
+            ]
+          },
+          {
+            "id": "3ebed521-3fe9-4922-ae26-2483e03d9305",
+            "name": "lora_name",
+            "type": "COMBO",
+            "linkIds": [
+              375
+            ],
+            "pos": [
+              -948.255859375,
+              1660
+            ]
+          }
+        ],
+        "outputs": [
+          {
+            "id": "7db1f9e2-40ee-4f9f-bb24-a0db7b96d45e",
+            "name": "IMAGE",
+            "type": "IMAGE",
+            "linkIds": [
+              333
+            ],
+            "localized_name": "IMAGE",
+            "pos": [
+              1570,
+              1480
+            ]
+          }
+        ],
+        "widgets": [],
+        "nodes": [
+          {
+            "id": 245,
+            "type": "CLIPLoader",
+            "pos": [
+              -590,
+              1370
+            ],
+            "size": [
+              280,
+              150
+            ],
+            "flags": {},
+            "order": 4,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip_name",
+                "name": "clip_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "clip_name"
+                },
+                "link": 373
+              },
+              {
+                "localized_name": "type",
+                "name": "type",
+                "type": "COMBO",
+                "widget": {
+                  "name": "type"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "device",
+                "name": "device",
+                "shape": 7,
+                "type": "COMBO",
+                "widget": {
+                  "name": "device"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CLIP",
+                "name": "CLIP",
+                "type": "CLIP",
+                "slot_index": 0,
+                "links": [
+                  314,
+                  315
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "version": "7.7",
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.48",
+              "Node name for S&R": "CLIPLoader",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "qwen_2.5_vl_7b_fp8_scaled.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors",
+                  "directory": "text_encoders"
+                },
+                {
+                  "name": "qwen_2.5_vl_7b_fp8_scaled.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors",
+                  "directory": "text_encoders"
+                }
+              ]
+            },
+            "widgets_values": [
+              "qwen_2.5_vl_7b_fp8_scaled.safetensors",
+              "qwen_image",
+              "default"
+            ]
+          },
+          {
+            "id": 246,
+            "type": "VAELoader",
+            "pos": [
+              -580,
+              1620
+            ],
+            "size": [
+              280,
+              110
+            ],
+            "flags": {},
+            "order": 5,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "vae_name",
+                "name": "vae_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "vae_name"
+                },
+                "link": 374
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "VAE",
+                "name": "VAE",
+                "type": "VAE",
+                "slot_index": 0,
+                "links": [
+                  323
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "version": "7.7",
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.48",
+              "Node name for S&R": "VAELoader",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "qwen_image_vae.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/vae/qwen_image_vae.safetensors",
+                  "directory": "vae"
+                },
+                {
+                  "name": "qwen_image_vae.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/vae/qwen_image_vae.safetensors",
+                  "directory": "vae"
+                }
+              ]
+            },
+            "widgets_values": [
+              "qwen_image_vae.safetensors"
+            ]
+          },
+          {
+            "id": 247,
+            "type": "ModelSamplingAuraFlow",
+            "pos": [
+              1040,
+              1110
+            ],
+            "size": [
+              250,
+              110
+            ],
+            "flags": {},
+            "order": 6,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 367
+              },
+              {
+                "localized_name": "shift",
+                "name": "shift",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "shift"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "links": [
+                  316
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "version": "7.7",
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.48",
+              "Node name for S&R": "ModelSamplingAuraFlow",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              3.1000000000000005
+            ]
+          },
+          {
+            "id": 248,
+            "type": "UNETLoader",
+            "pos": [
+              -590,
+              1140
+            ],
+            "size": [
+              280,
+              130
+            ],
+            "flags": {},
+            "order": 7,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "unet_name",
+                "name": "unet_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "unet_name"
+                },
+                "link": 372
+              },
+              {
+                "localized_name": "weight_dtype",
+                "name": "weight_dtype",
+                "type": "COMBO",
+                "widget": {
+                  "name": "weight_dtype"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "slot_index": 0,
+                "links": [
+                  312,
+                  324
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "version": "7.7",
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.48",
+              "Node name for S&R": "UNETLoader",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "qwen_image_2512_fp8_e4m3fn.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_2512_fp8_e4m3fn.safetensors",
+                  "directory": "diffusion_models"
+                },
+                {
+                  "name": "qwen_image_2512_fp8_e4m3fn.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_2512_fp8_e4m3fn.safetensors",
+                  "directory": "diffusion_models"
+                }
+              ]
+            },
+            "widgets_values": [
+              "qwen_image_2512_fp8_e4m3fn.safetensors",
+              "default"
+            ]
+          },
+          {
+            "id": 249,
+            "type": "CLIPTextEncode",
+            "pos": [
+              -200,
+              1140
+            ],
+            "size": [
+              360,
+              420
+            ],
+            "flags": {},
+            "order": 8,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 314
+              },
+              {
+                "localized_name": "text",
+                "name": "text",
+                "type": "STRING",
+                "widget": {
+                  "name": "text"
+                },
+                "link": 360
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "slot_index": 0,
+                "links": [
+                  317
+                ]
+              }
+            ],
+            "title": "CLIP Text Encode (Positive Prompt)",
+            "properties": {
+              "ue_properties": {
+                "version": "7.7",
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.48",
+              "Node name for S&R": "CLIPTextEncode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              ""
+            ],
+            "color": "#232",
+            "bgcolor": "#353"
+          },
+          {
+            "id": 250,
+            "type": "CLIPTextEncode",
+            "pos": [
+              -200,
+              1610
+            ],
+            "size": [
+              370,
+              170
+            ],
+            "flags": {},
+            "order": 9,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 315
+              },
+              {
+                "localized_name": "text",
+                "name": "text",
+                "type": "STRING",
+                "widget": {
+                  "name": "text"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "slot_index": 0,
+                "links": [
+                  318
+                ]
+              }
+            ],
+            "title": "CLIP Text Encode (Negative Prompt)",
+            "properties": {
+              "ue_properties": {
+                "version": "7.7",
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.48",
+              "Node name for S&R": "CLIPTextEncode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "低分辨率，低画质，肢体畸形，手指畸形，画面过饱和，蜡像感，人脸无细节，过度光滑，画面具有AI感。构图混乱。文字模糊，扭曲"
+            ],
+            "color": "#322",
+            "bgcolor": "#533"
+          },
+          {
+            "id": 251,
+            "type": "VAEDecode",
+            "pos": [
+              1320,
+              1120
+            ],
+            "size": [
+              230,
+              100
+            ],
+            "flags": {
+              "collapsed": false
+            },
+            "order": 10,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "samples",
+                "name": "samples",
+                "type": "LATENT",
+                "link": 322
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 323
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "slot_index": 0,
+                "links": [
+                  333
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "version": "7.7",
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.48",
+              "Node name for S&R": "VAEDecode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 252,
+            "type": "EmptySD3LatentImage",
+            "pos": [
+              -550,
+              1930
+            ],
+            "size": [
+              230,
+              170
+            ],
+            "flags": {},
+            "order": 11,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 361
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 362
+              },
+              {
+                "localized_name": "batch_size",
+                "name": "batch_size",
+                "type": "INT",
+                "widget": {
+                  "name": "batch_size"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "links": [
+                  319
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "version": "7.7",
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.48",
+              "Node name for S&R": "EmptySD3LatentImage",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              1328,
+              1328,
+              1
+            ]
+          },
+          {
+            "id": 253,
+            "type": "KSampler",
+            "pos": [
+              1040,
+              1250
+            ],
+            "size": [
+              250,
+              350
+            ],
+            "flags": {},
+            "order": 12,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 316
+              },
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 317
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 318
+              },
+              {
+                "localized_name": "latent_image",
+                "name": "latent_image",
+                "type": "LATENT",
+                "link": 319
+              },
+              {
+                "localized_name": "seed",
+                "name": "seed",
+                "type": "INT",
+                "widget": {
+                  "name": "seed"
+                },
+                "link": 371
+              },
+              {
+                "localized_name": "steps",
+                "name": "steps",
+                "type": "INT",
+                "widget": {
+                  "name": "steps"
+                },
+                "link": 368
+              },
+              {
+                "localized_name": "cfg",
+                "name": "cfg",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "cfg"
+                },
+                "link": 369
+              },
+              {
+                "localized_name": "sampler_name",
+                "name": "sampler_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "sampler_name"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "scheduler",
+                "name": "scheduler",
+                "type": "COMBO",
+                "widget": {
+                  "name": "scheduler"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "denoise",
+                "name": "denoise",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "denoise"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "slot_index": 0,
+                "links": [
+                  322
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "version": "7.7",
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.48",
+              "Node name for S&R": "KSampler",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              464857551335368,
+              "randomize",
+              50,
+              4,
+              "euler",
+              "simple",
+              1
+            ]
+          },
+          {
+            "id": 254,
+            "type": "PrimitiveInt",
+            "pos": [
+              300,
+              1150
+            ],
+            "size": [
+              230,
+              110
+            ],
+            "flags": {},
+            "order": 0,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  355
+                ]
+              }
+            ],
+            "title": "Int (Steps)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.12.3",
+              "Node name for S&R": "PrimitiveInt",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              50,
+              "fixed"
+            ]
+          },
+          {
+            "id": 255,
+            "type": "PrimitiveFloat",
+            "pos": [
+              300,
+              1290
+            ],
+            "size": [
+              230,
+              110
+            ],
+            "flags": {},
+            "order": 1,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": [
+                  357
+                ]
+              }
+            ],
+            "title": "Float (CFG)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.12.3",
+              "Node name for S&R": "PrimitiveFloat",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              4
+            ]
+          },
+          {
+            "id": 256,
+            "type": "PrimitiveBoolean",
+            "pos": [
+              300,
+              2060
+            ],
+            "size": [
+              230,
+              100
+            ],
+            "flags": {},
+            "order": 13,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "BOOLEAN",
+                "widget": {
+                  "name": "value"
+                },
+                "link": 370
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "BOOLEAN",
+                "name": "BOOLEAN",
+                "type": "BOOLEAN",
+                "links": [
+                  326,
+                  358,
+                  359
+                ]
+              }
+            ],
+            "title": "Enable 4 Steps LoRA?",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.12.3",
+              "Node name for S&R": "PrimitiveBoolean",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              false
+            ]
+          },
+          {
+            "id": 257,
+            "type": "PrimitiveInt",
+            "pos": [
+              290,
+              1540
+            ],
+            "size": [
+              230,
+              110
+            ],
+            "flags": {},
+            "order": 2,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  347,
+                  354
+                ]
+              }
+            ],
+            "title": "Int (Steps)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.12.3",
+              "Node name for S&R": "PrimitiveInt",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              4,
+              "fixed"
+            ]
+          },
+          {
+            "id": 258,
+            "type": "PrimitiveFloat",
+            "pos": [
+              290,
+              1670
+            ],
+            "size": [
+              230,
+              110
+            ],
+            "flags": {},
+            "order": 3,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": [
+                  356
+                ]
+              }
+            ],
+            "title": "Float (CFG)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.12.3",
+              "Node name for S&R": "PrimitiveFloat",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              1
+            ]
+          },
+          {
+            "id": 259,
+            "type": "LoraLoaderModelOnly",
+            "pos": [
+              240,
+              1820
+            ],
+            "size": [
+              330,
+              140
+            ],
+            "flags": {},
+            "order": 14,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 312
+              },
+              {
+                "localized_name": "lora_name",
+                "name": "lora_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "lora_name"
+                },
+                "link": 375
+              },
+              {
+                "localized_name": "strength_model",
+                "name": "strength_model",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "strength_model"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "links": [
+                  325
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.49",
+              "Node name for S&R": "LoraLoaderModelOnly",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "Qwen-Image-2512-Lightning-4steps-V1.0-fp32.safetensors",
+                  "url": "https://huggingface.co/lightx2v/Qwen-Image-2512-Lightning/resolve/main/Qwen-Image-2512-Lightning-4steps-V1.0-fp32.safetensors",
+                  "directory": "loras"
+                }
+              ]
+            },
+            "widgets_values": [
+              "Qwen-Image-2512-Lightning-4steps-V1.0-fp32.safetensors",
+              1
+            ]
+          },
+          {
+            "id": 260,
+            "type": "ComfySwitchNode",
+            "pos": [
+              710,
+              1170
+            ],
+            "size": [
+              230,
+              130
+            ],
+            "flags": {},
+            "order": 15,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "on_false",
+                "name": "on_false",
+                "type": "*",
+                "link": 324
+              },
+              {
+                "localized_name": "on_true",
+                "name": "on_true",
+                "type": "*",
+                "link": 325
+              },
+              {
+                "localized_name": "switch",
+                "name": "switch",
+                "type": "BOOLEAN",
+                "widget": {
+                  "name": "switch"
+                },
+                "link": 326
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "output",
+                "name": "output",
+                "type": "*",
+                "links": [
+                  367
+                ]
+              }
+            ],
+            "title": "Switch (model)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.12.3",
+              "Node name for S&R": "ComfySwitchNode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              false
+            ]
+          },
+          {
+            "id": 261,
+            "type": "ComfySwitchNode",
+            "pos": [
+              710,
+              1420
+            ],
+            "size": [
+              230,
+              130
+            ],
+            "flags": {},
+            "order": 16,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "on_false",
+                "name": "on_false",
+                "type": "*",
+                "link": 355
+              },
+              {
+                "localized_name": "on_true",
+                "name": "on_true",
+                "type": "*",
+                "link": 354
+              },
+              {
+                "localized_name": "switch",
+                "name": "switch",
+                "type": "BOOLEAN",
+                "widget": {
+                  "name": "switch"
+                },
+                "link": 359
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "output",
+                "name": "output",
+                "type": "*",
+                "links": [
+                  368
+                ]
+              }
+            ],
+            "title": "Switch (steps)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.12.3",
+              "Node name for S&R": "ComfySwitchNode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              false
+            ]
+          },
+          {
+            "id": 262,
+            "type": "ComfySwitchNode",
+            "pos": [
+              710,
+              1660
+            ],
+            "size": [
+              230,
+              130
+            ],
+            "flags": {},
+            "order": 17,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "on_false",
+                "name": "on_false",
+                "type": "*",
+                "link": 357
+              },
+              {
+                "localized_name": "on_true",
+                "name": "on_true",
+                "type": "*",
+                "link": 356
+              },
+              {
+                "localized_name": "switch",
+                "name": "switch",
+                "type": "BOOLEAN",
+                "widget": {
+                  "name": "switch"
+                },
+                "link": 358
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "output",
+                "name": "output",
+                "type": "*",
+                "links": [
+                  369
+                ]
+              }
+            ],
+            "title": "Switch (cfg)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.12.3",
+              "Node name for S&R": "ComfySwitchNode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              false
+            ]
+          }
+        ],
+        "groups": [
+          {
+            "id": 1,
+            "title": "Model",
+            "bounding": [
+              -640,
+              1060,
+              390,
+              740
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 2,
+            "title": "Image size",
+            "bounding": [
+              -630,
+              1830,
+              380,
+              290
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 3,
+            "title": "Prompt",
+            "bounding": [
+              -220,
+              1060,
+              400,
+              740
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 5,
+            "title": "4-steps LoRA",
+            "bounding": [
+              210,
+              1460,
+              410,
+              550
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 6,
+            "title": "Original Settings",
+            "bounding": [
+              210,
+              1060,
+              420,
+              370
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 7,
+            "title": "Swtich",
+            "bounding": [
+              660,
+              1060,
+              320,
+              750
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          }
+        ],
+        "links": [
+          {
+            "id": 312,
+            "origin_id": 248,
+            "origin_slot": 0,
+            "target_id": 259,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 314,
+            "origin_id": 245,
+            "origin_slot": 0,
+            "target_id": 249,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 315,
+            "origin_id": 245,
+            "origin_slot": 0,
+            "target_id": 250,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 322,
+            "origin_id": 253,
+            "origin_slot": 0,
+            "target_id": 251,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 323,
+            "origin_id": 246,
+            "origin_slot": 0,
+            "target_id": 251,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 316,
+            "origin_id": 247,
+            "origin_slot": 0,
+            "target_id": 253,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 317,
+            "origin_id": 249,
+            "origin_slot": 0,
+            "target_id": 253,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 318,
+            "origin_id": 250,
+            "origin_slot": 0,
+            "target_id": 253,
+            "target_slot": 2,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 319,
+            "origin_id": 252,
+            "origin_slot": 0,
+            "target_id": 253,
+            "target_slot": 3,
+            "type": "LATENT"
+          },
+          {
+            "id": 324,
+            "origin_id": 248,
+            "origin_slot": 0,
+            "target_id": 260,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 325,
+            "origin_id": 259,
+            "origin_slot": 0,
+            "target_id": 260,
+            "target_slot": 1,
+            "type": "MODEL"
+          },
+          {
+            "id": 326,
+            "origin_id": 256,
+            "origin_slot": 0,
+            "target_id": 260,
+            "target_slot": 2,
+            "type": "BOOLEAN"
+          },
+          {
+            "id": 333,
+            "origin_id": 251,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 347,
+            "origin_id": 257,
+            "origin_slot": 0,
+            "target_id": 253,
+            "target_slot": 4,
+            "type": "INT"
+          },
+          {
+            "id": 354,
+            "origin_id": 257,
+            "origin_slot": 0,
+            "target_id": 261,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 355,
+            "origin_id": 254,
+            "origin_slot": 0,
+            "target_id": 261,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 356,
+            "origin_id": 258,
+            "origin_slot": 0,
+            "target_id": 262,
+            "target_slot": 1,
+            "type": "FLOAT"
+          },
+          {
+            "id": 357,
+            "origin_id": 255,
+            "origin_slot": 0,
+            "target_id": 262,
+            "target_slot": 0,
+            "type": "FLOAT"
+          },
+          {
+            "id": 358,
+            "origin_id": 256,
+            "origin_slot": 0,
+            "target_id": 262,
+            "target_slot": 2,
+            "type": "BOOLEAN"
+          },
+          {
+            "id": 359,
+            "origin_id": 256,
+            "origin_slot": 0,
+            "target_id": 261,
+            "target_slot": 2,
+            "type": "BOOLEAN"
+          },
+          {
+            "id": 360,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 249,
+            "target_slot": 1,
+            "type": "STRING"
+          },
+          {
+            "id": 361,
+            "origin_id": -10,
+            "origin_slot": 1,
+            "target_id": 252,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 362,
+            "origin_id": -10,
+            "origin_slot": 2,
+            "target_id": 252,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 367,
+            "origin_id": 260,
+            "origin_slot": 0,
+            "target_id": 247,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 368,
+            "origin_id": 261,
+            "origin_slot": 0,
+            "target_id": 253,
+            "target_slot": 5,
+            "type": "INT"
+          },
+          {
+            "id": 369,
+            "origin_id": 262,
+            "origin_slot": 0,
+            "target_id": 253,
+            "target_slot": 6,
+            "type": "FLOAT"
+          },
+          {
+            "id": 370,
+            "origin_id": -10,
+            "origin_slot": 3,
+            "target_id": 256,
+            "target_slot": 0,
+            "type": "BOOLEAN"
+          },
+          {
+            "id": 371,
+            "origin_id": -10,
+            "origin_slot": 4,
+            "target_id": 253,
+            "target_slot": 4,
+            "type": "INT"
+          },
+          {
+            "id": 372,
+            "origin_id": -10,
+            "origin_slot": 5,
+            "target_id": 248,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 373,
+            "origin_id": -10,
+            "origin_slot": 6,
+            "target_id": 245,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 374,
+            "origin_id": -10,
+            "origin_slot": 7,
+            "target_id": 246,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 375,
+            "origin_id": -10,
+            "origin_slot": 8,
+            "target_id": 259,
+            "target_slot": 1,
+            "type": "COMBO"
+          }
+        ],
+        "extra": {
+          "workflowRendererVersion": "Vue-corrected"
+        },
+        "category": "Image generation and editing/Text to image"
+      }
+    ]
+  },
+  "extra": {
+    "ue_links": []
+  }
+}
\ No newline at end of file
diff --git a/blueprints/Text to Image (Qwen-Image).json b/blueprints/Text to Image (Qwen-Image).json
new file mode 100644
index 000000000..70b4b44b3
--- /dev/null
+++ b/blueprints/Text to Image (Qwen-Image).json	
@@ -0,0 +1,1881 @@
+{
+  "revision": 0,
+  "last_node_id": 76,
+  "last_link_id": 0,
+  "nodes": [
+    {
+      "id": 76,
+      "type": "e5cfe5ba-2ae0-4bc4-869f-ab2228cb44d3",
+      "pos": [
+        30,
+        10
+      ],
+      "size": [
+        470,
+        660
+      ],
+      "flags": {},
+      "order": 2,
+      "mode": 0,
+      "inputs": [
+        {
+          "label": "prompt",
+          "name": "text",
+          "type": "STRING",
+          "widget": {
+            "name": "text"
+          },
+          "link": null
+        },
+        {
+          "name": "width",
+          "type": "INT",
+          "widget": {
+            "name": "width"
+          },
+          "link": null
+        },
+        {
+          "name": "height",
+          "type": "INT",
+          "widget": {
+            "name": "height"
+          },
+          "link": null
+        },
+        {
+          "name": "seed",
+          "type": "INT",
+          "widget": {
+            "name": "seed"
+          },
+          "link": null
+        },
+        {
+          "name": "unet_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "unet_name"
+          },
+          "link": null
+        },
+        {
+          "name": "clip_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "clip_name"
+          },
+          "link": null
+        },
+        {
+          "name": "vae_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "vae_name"
+          },
+          "link": null
+        },
+        {
+          "label": "lightning_lora",
+          "name": "lora_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "lora_name"
+          },
+          "link": null
+        },
+        {
+          "label": "enable_turbo_mode",
+          "name": "value",
+          "type": "BOOLEAN",
+          "widget": {
+            "name": "value"
+          },
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "localized_name": "IMAGE",
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": []
+        }
+      ],
+      "properties": {
+        "proxyWidgets": [
+          [
+            "6",
+            "text"
+          ],
+          [
+            "58",
+            "width"
+          ],
+          [
+            "58",
+            "height"
+          ],
+          [
+            "3",
+            "seed"
+          ],
+          [
+            "37",
+            "unet_name"
+          ],
+          [
+            "38",
+            "clip_name"
+          ],
+          [
+            "39",
+            "vae_name"
+          ],
+          [
+            "73",
+            "lora_name"
+          ],
+          [
+            "86",
+            "value"
+          ],
+          [
+            "3",
+            "control_after_generate"
+          ]
+        ],
+        "cnr_id": "comfy-core",
+        "ver": "0.18.1",
+        "ue_properties": {
+          "widget_ue_connectable": {
+            "text": true,
+            "lora_name": true,
+            "value": true
+          },
+          "version": "7.7",
+          "input_ue_unconnectable": {}
+        }
+      },
+      "widgets_values": [],
+      "title": "Text to Image (Qwen-Image)"
+    }
+  ],
+  "links": [],
+  "version": 0.4,
+  "definitions": {
+    "subgraphs": [
+      {
+        "id": "e5cfe5ba-2ae0-4bc4-869f-ab2228cb44d3",
+        "version": 1,
+        "state": {
+          "lastGroupId": 5,
+          "lastNodeId": 87,
+          "lastLinkId": 153,
+          "lastRerouteId": 0
+        },
+        "revision": 0,
+        "config": {},
+        "name": "Text to Image (Qwen-Image)",
+        "inputNode": {
+          "id": -10,
+          "bounding": [
+            -810,
+            290,
+            151.744140625,
+            220
+          ]
+        },
+        "outputNode": {
+          "id": -20,
+          "bounding": [
+            2580,
+            340,
+            120,
+            60
+          ]
+        },
+        "inputs": [
+          {
+            "id": "846fd1a5-9f4a-4e83-af40-27cafe99e5c6",
+            "name": "text",
+            "type": "STRING",
+            "linkIds": [
+              132
+            ],
+            "label": "prompt",
+            "pos": [
+              -678.255859375,
+              310
+            ]
+          },
+          {
+            "id": "e941d29f-bb7f-4001-a956-90a9b29ae9f9",
+            "name": "width",
+            "type": "INT",
+            "linkIds": [
+              134
+            ],
+            "pos": [
+              -678.255859375,
+              330
+            ]
+          },
+          {
+            "id": "df798f50-87ba-481b-b847-ca8b7c7efff3",
+            "name": "height",
+            "type": "INT",
+            "linkIds": [
+              135
+            ],
+            "pos": [
+              -678.255859375,
+              350
+            ]
+          },
+          {
+            "id": "3fcf7667-f697-43ee-bdee-0d3fed39e777",
+            "name": "seed",
+            "type": "INT",
+            "linkIds": [
+              136
+            ],
+            "pos": [
+              -678.255859375,
+              370
+            ]
+          },
+          {
+            "id": "e8d70f26-d9f5-4633-a39e-0bf6cf93d566",
+            "name": "unet_name",
+            "type": "COMBO",
+            "linkIds": [
+              137
+            ],
+            "pos": [
+              -678.255859375,
+              390
+            ]
+          },
+          {
+            "id": "8c9b537a-c6c9-4365-96ad-dbbb82d917e0",
+            "name": "clip_name",
+            "type": "COMBO",
+            "linkIds": [
+              138
+            ],
+            "pos": [
+              -678.255859375,
+              410
+            ]
+          },
+          {
+            "id": "7cc2f92b-6e2f-4e4e-a316-b61f58ed1442",
+            "name": "vae_name",
+            "type": "COMBO",
+            "linkIds": [
+              139
+            ],
+            "pos": [
+              -678.255859375,
+              430
+            ]
+          },
+          {
+            "id": "3cb1ba7c-583c-4f92-afc1-71463161e2a4",
+            "name": "lora_name",
+            "type": "COMBO",
+            "linkIds": [
+              140
+            ],
+            "label": "lightning_lora",
+            "pos": [
+              -678.255859375,
+              450
+            ]
+          },
+          {
+            "id": "4278102d-766c-4c6b-af2e-0fb9f26bbb27",
+            "name": "value",
+            "type": "BOOLEAN",
+            "linkIds": [
+              153
+            ],
+            "label": "enable_turbo_mode",
+            "pos": [
+              -678.255859375,
+              470
+            ]
+          }
+        ],
+        "outputs": [
+          {
+            "id": "2af20250-dc7a-4643-bc84-0a180d9ca62b",
+            "name": "IMAGE",
+            "type": "IMAGE",
+            "linkIds": [
+              110
+            ],
+            "localized_name": "IMAGE",
+            "pos": [
+              2600,
+              360
+            ]
+          }
+        ],
+        "widgets": [],
+        "nodes": [
+          {
+            "id": 39,
+            "type": "VAELoader",
+            "pos": [
+              -260,
+              510
+            ],
+            "size": [
+              330,
+              110
+            ],
+            "flags": {},
+            "order": 11,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "vae_name",
+                "name": "vae_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "vae_name"
+                },
+                "link": 139
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "VAE",
+                "name": "VAE",
+                "type": "VAE",
+                "slot_index": 0,
+                "links": [
+                  76
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.48",
+              "ue_properties": {
+                "version": "7.7",
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "VAELoader",
+              "models": [
+                {
+                  "name": "qwen_image_vae.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/vae/qwen_image_vae.safetensors",
+                  "directory": "vae"
+                }
+              ],
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "qwen_image_vae.safetensors"
+            ]
+          },
+          {
+            "id": 38,
+            "type": "CLIPLoader",
+            "pos": [
+              -260,
+              280
+            ],
+            "size": [
+              330,
+              150
+            ],
+            "flags": {},
+            "order": 10,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip_name",
+                "name": "clip_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "clip_name"
+                },
+                "link": 138
+              },
+              {
+                "localized_name": "type",
+                "name": "type",
+                "type": "COMBO",
+                "widget": {
+                  "name": "type"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "device",
+                "name": "device",
+                "shape": 7,
+                "type": "COMBO",
+                "widget": {
+                  "name": "device"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CLIP",
+                "name": "CLIP",
+                "type": "CLIP",
+                "slot_index": 0,
+                "links": [
+                  74,
+                  75
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.48",
+              "ue_properties": {
+                "version": "7.7",
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "CLIPLoader",
+              "models": [
+                {
+                  "name": "qwen_2.5_vl_7b_fp8_scaled.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors",
+                  "directory": "text_encoders"
+                }
+              ],
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "qwen_2.5_vl_7b_fp8_scaled.safetensors",
+              "qwen_image",
+              "default"
+            ]
+          },
+          {
+            "id": 58,
+            "type": "EmptySD3LatentImage",
+            "pos": [
+              -240,
+              810
+            ],
+            "size": [
+              270,
+              170
+            ],
+            "flags": {},
+            "order": 12,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 134
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 135
+              },
+              {
+                "localized_name": "batch_size",
+                "name": "batch_size",
+                "type": "INT",
+                "widget": {
+                  "name": "batch_size"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "links": [
+                  107
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.48",
+              "ue_properties": {
+                "version": "7.7",
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "EmptySD3LatentImage",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              1328,
+              1328,
+              1
+            ]
+          },
+          {
+            "id": 66,
+            "type": "ModelSamplingAuraFlow",
+            "pos": [
+              1780,
+              180
+            ],
+            "size": [
+              300,
+              110
+            ],
+            "flags": {},
+            "order": 13,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 147
+              },
+              {
+                "localized_name": "shift",
+                "name": "shift",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "shift"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "links": [
+                  125
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.48",
+              "ue_properties": {
+                "version": "7.7",
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "ModelSamplingAuraFlow",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              3.1000000000000005
+            ]
+          },
+          {
+            "id": 37,
+            "type": "UNETLoader",
+            "pos": [
+              -260,
+              80
+            ],
+            "size": [
+              330,
+              110
+            ],
+            "flags": {},
+            "order": 9,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "unet_name",
+                "name": "unet_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "unet_name"
+                },
+                "link": 137
+              },
+              {
+                "localized_name": "weight_dtype",
+                "name": "weight_dtype",
+                "type": "COMBO",
+                "widget": {
+                  "name": "weight_dtype"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "slot_index": 0,
+                "links": [
+                  129,
+                  142
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.48",
+              "ue_properties": {
+                "version": "7.7",
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "UNETLoader",
+              "models": [
+                {
+                  "name": "qwen_image_fp8_e4m3fn.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_fp8_e4m3fn.safetensors",
+                  "directory": "diffusion_models"
+                }
+              ],
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "qwen_image_fp8_e4m3fn.safetensors",
+              "default"
+            ]
+          },
+          {
+            "id": 6,
+            "type": "CLIPTextEncode",
+            "pos": [
+              120,
+              60
+            ],
+            "size": [
+              440,
+              340
+            ],
+            "flags": {},
+            "order": 6,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 74
+              },
+              {
+                "localized_name": "text",
+                "name": "text",
+                "type": "STRING",
+                "widget": {
+                  "name": "text"
+                },
+                "link": 132
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "slot_index": 0,
+                "links": [
+                  46
+                ]
+              }
+            ],
+            "title": "CLIP Text Encode (Positive Prompt)",
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.48",
+              "ue_properties": {
+                "version": "7.7",
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "CLIPTextEncode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              ""
+            ],
+            "color": "#232",
+            "bgcolor": "#353"
+          },
+          {
+            "id": 7,
+            "type": "CLIPTextEncode",
+            "pos": [
+              130,
+              480
+            ],
+            "size": [
+              430,
+              180
+            ],
+            "flags": {},
+            "order": 7,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 75
+              },
+              {
+                "localized_name": "text",
+                "name": "text",
+                "type": "STRING",
+                "widget": {
+                  "name": "text"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "slot_index": 0,
+                "links": [
+                  52
+                ]
+              }
+            ],
+            "title": "CLIP Text Encode (Negative Prompt)",
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.48",
+              "ue_properties": {
+                "version": "7.7",
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "CLIPTextEncode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              ""
+            ],
+            "color": "#322",
+            "bgcolor": "#533"
+          },
+          {
+            "id": 8,
+            "type": "VAEDecode",
+            "pos": [
+              2190,
+              350
+            ],
+            "size": [
+              230,
+              100
+            ],
+            "flags": {
+              "collapsed": false
+            },
+            "order": 8,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "samples",
+                "name": "samples",
+                "type": "LATENT",
+                "link": 128
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 76
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "slot_index": 0,
+                "links": [
+                  110
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.48",
+              "ue_properties": {
+                "version": "7.7",
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "VAEDecode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 73,
+            "type": "LoraLoaderModelOnly",
+            "pos": [
+              670,
+              500
+            ],
+            "size": [
+              400,
+              140
+            ],
+            "flags": {},
+            "order": 14,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 129
+              },
+              {
+                "localized_name": "lora_name",
+                "name": "lora_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "lora_name"
+                },
+                "link": 140
+              },
+              {
+                "localized_name": "strength_model",
+                "name": "strength_model",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "strength_model"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "links": [
+                  141
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.49",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "LoraLoaderModelOnly",
+              "models": [
+                {
+                  "name": "Qwen-Image-Lightning-8steps-V1.0.safetensors",
+                  "url": "https://huggingface.co/lightx2v/Qwen-Image-Lightning/resolve/main/Qwen-Image-Lightning-8steps-V1.0.safetensors",
+                  "directory": "loras"
+                }
+              ]
+            },
+            "widgets_values": [
+              "Qwen-Image-Lightning-8steps-V1.0.safetensors",
+              1
+            ]
+          },
+          {
+            "id": 3,
+            "type": "KSampler",
+            "pos": [
+              1780,
+              330
+            ],
+            "size": [
+              300,
+              480
+            ],
+            "flags": {},
+            "order": 5,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 125
+              },
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 46
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 52
+              },
+              {
+                "localized_name": "latent_image",
+                "name": "latent_image",
+                "type": "LATENT",
+                "link": 107
+              },
+              {
+                "localized_name": "seed",
+                "name": "seed",
+                "type": "INT",
+                "widget": {
+                  "name": "seed"
+                },
+                "link": 136
+              },
+              {
+                "localized_name": "steps",
+                "name": "steps",
+                "type": "INT",
+                "widget": {
+                  "name": "steps"
+                },
+                "link": 148
+              },
+              {
+                "localized_name": "cfg",
+                "name": "cfg",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "cfg"
+                },
+                "link": 149
+              },
+              {
+                "localized_name": "sampler_name",
+                "name": "sampler_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "sampler_name"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "scheduler",
+                "name": "scheduler",
+                "type": "COMBO",
+                "widget": {
+                  "name": "scheduler"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "denoise",
+                "name": "denoise",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "denoise"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "slot_index": 0,
+                "links": [
+                  128
+                ]
+              }
+            ],
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.3.48",
+              "ue_properties": {
+                "version": "7.7",
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "KSampler",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              50347169638278,
+              "randomize",
+              8,
+              1,
+              "euler",
+              "simple",
+              1
+            ]
+          },
+          {
+            "id": 78,
+            "type": "ComfySwitchNode",
+            "pos": [
+              1320,
+              180
+            ],
+            "size": [
+              270,
+              130
+            ],
+            "flags": {},
+            "order": 15,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "on_false",
+                "name": "on_false",
+                "type": "*",
+                "link": 142
+              },
+              {
+                "localized_name": "on_true",
+                "name": "on_true",
+                "type": "*",
+                "link": 141
+              },
+              {
+                "localized_name": "switch",
+                "name": "switch",
+                "type": "BOOLEAN",
+                "widget": {
+                  "name": "switch"
+                },
+                "link": 150
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "output",
+                "name": "output",
+                "type": "*",
+                "links": [
+                  147
+                ]
+              }
+            ],
+            "title": "Switch (Model)",
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "ComfySwitchNode"
+            },
+            "widgets_values": [
+              false
+            ]
+          },
+          {
+            "id": 79,
+            "type": "PrimitiveInt",
+            "pos": [
+              680,
+              710
+            ],
+            "size": [
+              270,
+              110
+            ],
+            "flags": {},
+            "order": 0,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  143
+                ]
+              }
+            ],
+            "title": "Steps",
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "PrimitiveInt"
+            },
+            "widgets_values": [
+              8,
+              "fixed"
+            ]
+          },
+          {
+            "id": 81,
+            "type": "PrimitiveFloat",
+            "pos": [
+              680,
+              870
+            ],
+            "size": [
+              270,
+              110
+            ],
+            "flags": {},
+            "order": 1,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": [
+                  144
+                ]
+              }
+            ],
+            "title": "CFG",
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "PrimitiveFloat"
+            },
+            "widgets_values": [
+              1
+            ]
+          },
+          {
+            "id": 82,
+            "type": "ComfySwitchNode",
+            "pos": [
+              1320,
+              400
+            ],
+            "size": [
+              270,
+              130
+            ],
+            "flags": {},
+            "order": 16,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "on_false",
+                "name": "on_false",
+                "type": "*",
+                "link": 146
+              },
+              {
+                "localized_name": "on_true",
+                "name": "on_true",
+                "type": "*",
+                "link": 143
+              },
+              {
+                "localized_name": "switch",
+                "name": "switch",
+                "type": "BOOLEAN",
+                "widget": {
+                  "name": "switch"
+                },
+                "link": 151
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "output",
+                "name": "output",
+                "type": "*",
+                "links": [
+                  148
+                ]
+              }
+            ],
+            "title": "Switch (Steps)",
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "ComfySwitchNode"
+            },
+            "widgets_values": [
+              false
+            ]
+          },
+          {
+            "id": 83,
+            "type": "ComfySwitchNode",
+            "pos": [
+              1320,
+              600
+            ],
+            "size": [
+              270,
+              130
+            ],
+            "flags": {},
+            "order": 17,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "on_false",
+                "name": "on_false",
+                "type": "*",
+                "link": 145
+              },
+              {
+                "localized_name": "on_true",
+                "name": "on_true",
+                "type": "*",
+                "link": 144
+              },
+              {
+                "localized_name": "switch",
+                "name": "switch",
+                "type": "BOOLEAN",
+                "widget": {
+                  "name": "switch"
+                },
+                "link": 152
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "output",
+                "name": "output",
+                "type": "*",
+                "links": [
+                  149
+                ]
+              }
+            ],
+            "title": "Switch (CFG)",
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "ComfySwitchNode"
+            },
+            "widgets_values": [
+              false
+            ]
+          },
+          {
+            "id": 84,
+            "type": "PrimitiveInt",
+            "pos": [
+              680,
+              60
+            ],
+            "size": [
+              270,
+              110
+            ],
+            "flags": {},
+            "order": 2,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  146
+                ]
+              }
+            ],
+            "title": "Steps",
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "PrimitiveInt"
+            },
+            "widgets_values": [
+              20,
+              "fixed"
+            ]
+          },
+          {
+            "id": 85,
+            "type": "PrimitiveFloat",
+            "pos": [
+              680,
+              230
+            ],
+            "size": [
+              270,
+              110
+            ],
+            "flags": {},
+            "order": 3,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": [
+                  145
+                ]
+              }
+            ],
+            "title": "CFG",
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "PrimitiveFloat"
+            },
+            "widgets_values": [
+              4
+            ]
+          },
+          {
+            "id": 86,
+            "type": "PrimitiveBoolean",
+            "pos": [
+              710,
+              1070
+            ],
+            "size": [
+              270,
+              100
+            ],
+            "flags": {},
+            "order": 18,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "BOOLEAN",
+                "widget": {
+                  "name": "value"
+                },
+                "link": 153
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "BOOLEAN",
+                "name": "BOOLEAN",
+                "type": "BOOLEAN",
+                "links": [
+                  150,
+                  151,
+                  152
+                ]
+              }
+            ],
+            "title": "Enable Lightning LoRA",
+            "properties": {
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "Node name for S&R": "PrimitiveBoolean"
+            },
+            "widgets_values": [
+              false
+            ]
+          },
+          {
+            "id": 87,
+            "type": "MarkdownNote",
+            "pos": [
+              620,
+              -160
+            ],
+            "size": [
+              500,
+              120
+            ],
+            "flags": {},
+            "order": 4,
+            "mode": 0,
+            "inputs": [],
+            "outputs": [],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              }
+            },
+            "widgets_values": [
+              "Try 50 steps, if you want original the [qwen image](https://huggingface.co/Qwen/Qwen-Image)'s setting, but it will takes longer"
+            ],
+            "color": "#222",
+            "bgcolor": "#000"
+          }
+        ],
+        "groups": [
+          {
+            "id": 1,
+            "title": "Step1 - Load models",
+            "bounding": [
+              -280,
+              -20,
+              360,
+              700
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 2,
+            "title": "Step2 - Image size",
+            "bounding": [
+              -280,
+              710,
+              360,
+              300
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 3,
+            "title": "Step3 - Prompt",
+            "bounding": [
+              110,
+              -20,
+              470,
+              700
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 4,
+            "title": "Lightx2v 8steps LoRA",
+            "bounding": [
+              610,
+              390,
+              520,
+              620
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 5,
+            "title": "Original Settings",
+            "bounding": [
+              610,
+              -20,
+              520,
+              380
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          }
+        ],
+        "links": [
+          {
+            "id": 74,
+            "origin_id": 38,
+            "origin_slot": 0,
+            "target_id": 6,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 75,
+            "origin_id": 38,
+            "origin_slot": 0,
+            "target_id": 7,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 129,
+            "origin_id": 37,
+            "origin_slot": 0,
+            "target_id": 73,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 128,
+            "origin_id": 3,
+            "origin_slot": 0,
+            "target_id": 8,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 76,
+            "origin_id": 39,
+            "origin_slot": 0,
+            "target_id": 8,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 125,
+            "origin_id": 66,
+            "origin_slot": 0,
+            "target_id": 3,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 46,
+            "origin_id": 6,
+            "origin_slot": 0,
+            "target_id": 3,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 52,
+            "origin_id": 7,
+            "origin_slot": 0,
+            "target_id": 3,
+            "target_slot": 2,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 107,
+            "origin_id": 58,
+            "origin_slot": 0,
+            "target_id": 3,
+            "target_slot": 3,
+            "type": "LATENT"
+          },
+          {
+            "id": 110,
+            "origin_id": 8,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 132,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 6,
+            "target_slot": 1,
+            "type": "STRING"
+          },
+          {
+            "id": 134,
+            "origin_id": -10,
+            "origin_slot": 1,
+            "target_id": 58,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 135,
+            "origin_id": -10,
+            "origin_slot": 2,
+            "target_id": 58,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 136,
+            "origin_id": -10,
+            "origin_slot": 3,
+            "target_id": 3,
+            "target_slot": 4,
+            "type": "INT"
+          },
+          {
+            "id": 137,
+            "origin_id": -10,
+            "origin_slot": 4,
+            "target_id": 37,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 138,
+            "origin_id": -10,
+            "origin_slot": 5,
+            "target_id": 38,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 139,
+            "origin_id": -10,
+            "origin_slot": 6,
+            "target_id": 39,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 140,
+            "origin_id": -10,
+            "origin_slot": 7,
+            "target_id": 73,
+            "target_slot": 1,
+            "type": "COMBO"
+          },
+          {
+            "id": 141,
+            "origin_id": 73,
+            "origin_slot": 0,
+            "target_id": 78,
+            "target_slot": 1,
+            "type": "MODEL"
+          },
+          {
+            "id": 142,
+            "origin_id": 37,
+            "origin_slot": 0,
+            "target_id": 78,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 143,
+            "origin_id": 79,
+            "origin_slot": 0,
+            "target_id": 82,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 144,
+            "origin_id": 81,
+            "origin_slot": 0,
+            "target_id": 83,
+            "target_slot": 1,
+            "type": "FLOAT"
+          },
+          {
+            "id": 145,
+            "origin_id": 85,
+            "origin_slot": 0,
+            "target_id": 83,
+            "target_slot": 0,
+            "type": "FLOAT"
+          },
+          {
+            "id": 146,
+            "origin_id": 84,
+            "origin_slot": 0,
+            "target_id": 82,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 147,
+            "origin_id": 78,
+            "origin_slot": 0,
+            "target_id": 66,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 148,
+            "origin_id": 82,
+            "origin_slot": 0,
+            "target_id": 3,
+            "target_slot": 5,
+            "type": "INT"
+          },
+          {
+            "id": 149,
+            "origin_id": 83,
+            "origin_slot": 0,
+            "target_id": 3,
+            "target_slot": 6,
+            "type": "FLOAT"
+          },
+          {
+            "id": 150,
+            "origin_id": 86,
+            "origin_slot": 0,
+            "target_id": 78,
+            "target_slot": 2,
+            "type": "BOOLEAN"
+          },
+          {
+            "id": 151,
+            "origin_id": 86,
+            "origin_slot": 0,
+            "target_id": 82,
+            "target_slot": 2,
+            "type": "BOOLEAN"
+          },
+          {
+            "id": 152,
+            "origin_id": 86,
+            "origin_slot": 0,
+            "target_id": 83,
+            "target_slot": 2,
+            "type": "BOOLEAN"
+          },
+          {
+            "id": 153,
+            "origin_id": -10,
+            "origin_slot": 8,
+            "target_id": 86,
+            "target_slot": 0,
+            "type": "BOOLEAN"
+          }
+        ],
+        "extra": {
+          "workflowRendererVersion": "LG"
+        },
+        "category": "Image generation and editing/Text to image"
+      }
+    ]
+  },
+  "extra": {}
+}
\ No newline at end of file
diff --git a/blueprints/Text to Video (LTX-2.3).json b/blueprints/Text to Video (LTX-2.3).json
new file mode 100644
index 000000000..ff9bc6ccf
--- /dev/null
+++ b/blueprints/Text to Video (LTX-2.3).json	
@@ -0,0 +1,4296 @@
+{
+  "revision": 0,
+  "last_node_id": 324,
+  "last_link_id": 0,
+  "nodes": [
+    {
+      "id": 324,
+      "type": "871cf29d-2726-43a5-b61e-01fa939d699d",
+      "pos": [
+        -300,
+        4290
+      ],
+      "size": [
+        400,
+        170
+      ],
+      "flags": {},
+      "order": 1,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "value",
+          "type": "STRING",
+          "widget": {
+            "name": "value"
+          },
+          "link": null
+        },
+        {
+          "label": "width",
+          "name": "value_2",
+          "type": "INT",
+          "widget": {
+            "name": "value_2"
+          },
+          "link": null
+        },
+        {
+          "label": "height",
+          "name": "value_3",
+          "type": "INT",
+          "widget": {
+            "name": "value_3"
+          },
+          "link": null
+        },
+        {
+          "label": "duration",
+          "name": "value_4",
+          "type": "INT",
+          "widget": {
+            "name": "value_4"
+          },
+          "link": null
+        },
+        {
+          "name": "ckpt_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "ckpt_name"
+          },
+          "link": null
+        },
+        {
+          "label": "distilled_lora",
+          "name": "lora_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "lora_name"
+          },
+          "link": null
+        },
+        {
+          "name": "text_encoder",
+          "type": "COMBO",
+          "widget": {
+            "name": "text_encoder"
+          },
+          "link": null
+        },
+        {
+          "label": "latent_upscale_model",
+          "name": "model_name",
+          "type": "COMBO",
+          "widget": {
+            "name": "model_name"
+          },
+          "link": null
+        },
+        {
+          "label": "fps",
+          "name": "value_1",
+          "type": "INT",
+          "widget": {
+            "name": "value_1"
+          },
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "localized_name": "VIDEO",
+          "name": "VIDEO",
+          "type": "VIDEO",
+          "links": []
+        }
+      ],
+      "properties": {
+        "proxyWidgets": [
+          [
+            "320",
+            "value"
+          ],
+          [
+            "314",
+            "value"
+          ],
+          [
+            "301",
+            "value"
+          ],
+          [
+            "303",
+            "value"
+          ],
+          [
+            "318",
+            "ckpt_name"
+          ],
+          [
+            "287",
+            "lora_name"
+          ],
+          [
+            "319",
+            "text_encoder"
+          ],
+          [
+            "313",
+            "model_name"
+          ],
+          [
+            "302",
+            "value"
+          ],
+          [
+            "279",
+            "noise_seed"
+          ],
+          [
+            "279",
+            "control_after_generate"
+          ]
+        ],
+        "ue_properties": {
+          "widget_ue_connectable": {
+            "value_1": true,
+            "value_2": true,
+            "value_3": true,
+            "value_4": true,
+            "lora_name": true,
+            "model_name": true
+          },
+          "version": "7.7",
+          "input_ue_unconnectable": {}
+        },
+        "cnr_id": "comfy-core",
+        "ver": "0.16.3",
+        "enableTabs": false,
+        "tabWidth": 65,
+        "tabXOffset": 10,
+        "hasSecondTab": false,
+        "secondTabText": "Send Back",
+        "secondTabOffset": 80,
+        "secondTabWidth": 65
+      },
+      "widgets_values": [],
+      "title": "Text to Video (LTX-2.3)"
+    }
+  ],
+  "links": [],
+  "version": 0.4,
+  "definitions": {
+    "subgraphs": [
+      {
+        "id": "871cf29d-2726-43a5-b61e-01fa939d699d",
+        "version": 1,
+        "state": {
+          "lastGroupId": 26,
+          "lastNodeId": 324,
+          "lastLinkId": 631,
+          "lastRerouteId": 0
+        },
+        "revision": 0,
+        "config": {},
+        "name": "Text to Video (LTX-2.3)",
+        "inputNode": {
+          "id": -10,
+          "bounding": [
+            720,
+            4240,
+            162.162109375,
+            220
+          ]
+        },
+        "outputNode": {
+          "id": -20,
+          "bounding": [
+            6100,
+            4160,
+            120,
+            60
+          ]
+        },
+        "inputs": [
+          {
+            "id": "9494c550-4172-49c6-930e-5b508f775e77",
+            "name": "value",
+            "type": "STRING",
+            "linkIds": [
+              595
+            ],
+            "pos": [
+              862.162109375,
+              4260
+            ]
+          },
+          {
+            "id": "58dbb3f6-f924-4548-96ef-e0e34610bd4e",
+            "name": "value_2",
+            "type": "INT",
+            "linkIds": [
+              597
+            ],
+            "label": "width",
+            "pos": [
+              862.162109375,
+              4280
+            ]
+          },
+          {
+            "id": "6086d5b8-2586-448c-a641-dd14d76dd102",
+            "name": "value_3",
+            "type": "INT",
+            "linkIds": [
+              598
+            ],
+            "label": "height",
+            "pos": [
+              862.162109375,
+              4300
+            ]
+          },
+          {
+            "id": "feb8c2eb-ae48-4fa8-bc24-929552d656c3",
+            "name": "value_4",
+            "type": "INT",
+            "linkIds": [
+              599
+            ],
+            "label": "duration",
+            "pos": [
+              862.162109375,
+              4320
+            ]
+          },
+          {
+            "id": "d7255058-319a-4880-8f9a-7e542c8f3c3c",
+            "name": "ckpt_name",
+            "type": "COMBO",
+            "linkIds": [
+              601,
+              604,
+              605
+            ],
+            "pos": [
+              862.162109375,
+              4340
+            ]
+          },
+          {
+            "id": "4afce68d-8f65-4342-9d6d-ae0a7688c3e3",
+            "name": "lora_name",
+            "type": "COMBO",
+            "linkIds": [
+              602
+            ],
+            "label": "distilled_lora",
+            "pos": [
+              862.162109375,
+              4360
+            ]
+          },
+          {
+            "id": "ab842b4b-c977-4679-b421-424722785b57",
+            "name": "text_encoder",
+            "type": "COMBO",
+            "linkIds": [
+              606
+            ],
+            "pos": [
+              862.162109375,
+              4380
+            ]
+          },
+          {
+            "id": "9e47372d-28d9-4311-91e9-e90d03f4eb43",
+            "name": "model_name",
+            "type": "COMBO",
+            "linkIds": [
+              607
+            ],
+            "label": "latent_upscale_model",
+            "pos": [
+              862.162109375,
+              4400
+            ]
+          },
+          {
+            "id": "7951b137-465e-4844-b05f-88b89f0e1ba8",
+            "name": "value_1",
+            "type": "INT",
+            "linkIds": [
+              627
+            ],
+            "label": "fps",
+            "pos": [
+              862.162109375,
+              4420
+            ]
+          }
+        ],
+        "outputs": [
+          {
+            "id": "954ef307-c897-4eea-8b5c-5c6ce15a5357",
+            "name": "VIDEO",
+            "type": "VIDEO",
+            "linkIds": [
+              536
+            ],
+            "localized_name": "VIDEO",
+            "pos": [
+              6120,
+              4180
+            ]
+          }
+        ],
+        "widgets": [],
+        "nodes": [
+          {
+            "id": 278,
+            "type": "RandomNoise",
+            "pos": [
+              4720,
+              3750
+            ],
+            "size": [
+              280,
+              110
+            ],
+            "flags": {},
+            "order": 0,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "noise_seed",
+                "name": "noise_seed",
+                "type": "INT",
+                "widget": {
+                  "name": "noise_seed"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "NOISE",
+                "name": "NOISE",
+                "type": "NOISE",
+                "links": [
+                  490
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.75",
+              "Node name for S&R": "RandomNoise",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              42,
+              "fixed"
+            ]
+          },
+          {
+            "id": 279,
+            "type": "RandomNoise",
+            "pos": [
+              3200,
+              3900
+            ],
+            "size": [
+              280,
+              110
+            ],
+            "flags": {},
+            "order": 1,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "noise_seed",
+                "name": "noise_seed",
+                "type": "INT",
+                "widget": {
+                  "name": "noise_seed"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "NOISE",
+                "name": "NOISE",
+                "type": "NOISE",
+                "links": [
+                  483
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.56",
+              "Node name for S&R": "RandomNoise",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              343011291748534,
+              "randomize"
+            ]
+          },
+          {
+            "id": 280,
+            "type": "LTXVConcatAVLatent",
+            "pos": [
+              4730,
+              4520
+            ],
+            "size": [
+              280,
+              100
+            ],
+            "flags": {},
+            "order": 8,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "video_latent",
+                "name": "video_latent",
+                "type": "LATENT",
+                "link": 512
+              },
+              {
+                "localized_name": "audio_latent",
+                "name": "audio_latent",
+                "type": "LATENT",
+                "link": 513
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "links": [
+                  494
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "LTXVConcatAVLatent",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 281,
+            "type": "LTXVAudioVAELoader",
+            "pos": [
+              1660,
+              4140
+            ],
+            "size": [
+              430,
+              110
+            ],
+            "flags": {},
+            "order": 9,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "ckpt_name",
+                "name": "ckpt_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "ckpt_name"
+                },
+                "link": 604
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "Audio VAE",
+                "name": "Audio VAE",
+                "type": "VAE",
+                "links": [
+                  481,
+                  496
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.68",
+              "Node name for S&R": "LTXVAudioVAELoader",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "ltx-2.3-22b-dev-fp8.safetensors",
+                  "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-dev-fp8.safetensors",
+                  "directory": "checkpoints"
+                }
+              ]
+            },
+            "widgets_values": [
+              "ltx-2.3-22b-dev-fp8.safetensors"
+            ]
+          },
+          {
+            "id": 282,
+            "type": "KSamplerSelect",
+            "pos": [
+              4720,
+              4160
+            ],
+            "size": [
+              280,
+              110
+            ],
+            "flags": {},
+            "order": 2,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "sampler_name",
+                "name": "sampler_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "sampler_name"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "SAMPLER",
+                "name": "SAMPLER",
+                "type": "SAMPLER",
+                "links": [
+                  492
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.75",
+              "Node name for S&R": "KSamplerSelect",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "euler_cfg_pp"
+            ]
+          },
+          {
+            "id": 283,
+            "type": "ManualSigmas",
+            "pos": [
+              4720,
+              4340
+            ],
+            "size": [
+              280,
+              110
+            ],
+            "flags": {},
+            "order": 3,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "sigmas",
+                "name": "sigmas",
+                "type": "STRING",
+                "widget": {
+                  "name": "sigmas"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "SIGMAS",
+                "name": "SIGMAS",
+                "type": "SIGMAS",
+                "links": [
+                  493
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "ManualSigmas",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "0.85, 0.7250, 0.4219, 0.0"
+            ]
+          },
+          {
+            "id": 284,
+            "type": "CFGGuider",
+            "pos": [
+              4720,
+              3930
+            ],
+            "size": [
+              280,
+              160
+            ],
+            "flags": {},
+            "order": 10,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 478
+              },
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 479
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 480
+              },
+              {
+                "localized_name": "cfg",
+                "name": "cfg",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "cfg"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "GUIDER",
+                "name": "GUIDER",
+                "type": "GUIDER",
+                "links": [
+                  491
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.71",
+              "Node name for S&R": "CFGGuider",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              1
+            ]
+          },
+          {
+            "id": 285,
+            "type": "SamplerCustomAdvanced",
+            "pos": [
+              3620,
+              3990
+            ],
+            "size": [
+              230,
+              170
+            ],
+            "flags": {},
+            "order": 11,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "noise",
+                "name": "noise",
+                "type": "NOISE",
+                "link": 483
+              },
+              {
+                "localized_name": "guider",
+                "name": "guider",
+                "type": "GUIDER",
+                "link": 484
+              },
+              {
+                "localized_name": "sampler",
+                "name": "sampler",
+                "type": "SAMPLER",
+                "link": 485
+              },
+              {
+                "localized_name": "sigmas",
+                "name": "sigmas",
+                "type": "SIGMAS",
+                "link": 544
+              },
+              {
+                "localized_name": "latent_image",
+                "name": "latent_image",
+                "type": "LATENT",
+                "link": 487
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "output",
+                "name": "output",
+                "type": "LATENT",
+                "links": [
+                  488
+                ]
+              },
+              {
+                "localized_name": "denoised_output",
+                "name": "denoised_output",
+                "type": "LATENT",
+                "links": []
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.60",
+              "Node name for S&R": "SamplerCustomAdvanced",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 286,
+            "type": "LTXVCropGuides",
+            "pos": [
+              3900,
+              3700
+            ],
+            "size": [
+              250,
+              120
+            ],
+            "flags": {},
+            "order": 12,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 475
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 476
+              },
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "link": 477
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "links": [
+                  479
+                ]
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "links": [
+                  480
+                ]
+              },
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "slot_index": 2,
+                "links": []
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.68",
+              "Node name for S&R": "LTXVCropGuides",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 287,
+            "type": "LoraLoaderModelOnly",
+            "pos": [
+              1660,
+              3910
+            ],
+            "size": [
+              430,
+              140
+            ],
+            "flags": {},
+            "order": 13,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 520
+              },
+              {
+                "localized_name": "lora_name",
+                "name": "lora_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "lora_name"
+                },
+                "link": 602
+              },
+              {
+                "localized_name": "strength_model",
+                "name": "strength_model",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "strength_model"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "links": [
+                  478,
+                  541
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.75",
+              "Node name for S&R": "LoraLoaderModelOnly",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "ltx-2.3-22b-distilled-lora-384.safetensors",
+                  "url": "https://huggingface.co/Lightricks/LTX-2.3/resolve/main/ltx-2.3-22b-distilled-lora-384.safetensors",
+                  "directory": "loras"
+                }
+              ]
+            },
+            "widgets_values": [
+              "ltx-2.3-22b-distilled-lora-384.safetensors",
+              0.5
+            ]
+          },
+          {
+            "id": 288,
+            "type": "ResizeImagesByLongerEdge",
+            "pos": [
+              2120,
+              5040
+            ],
+            "size": [
+              310,
+              110
+            ],
+            "flags": {
+              "collapsed": false
+            },
+            "order": 14,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "images",
+                "name": "images",
+                "type": "IMAGE",
+                "link": 523
+              },
+              {
+                "localized_name": "longer_edge",
+                "name": "longer_edge",
+                "type": "INT",
+                "widget": {
+                  "name": "longer_edge"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "images",
+                "name": "images",
+                "type": "IMAGE",
+                "links": [
+                  505
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "ResizeImagesByLongerEdge",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              1536
+            ]
+          },
+          {
+            "id": 289,
+            "type": "LTXVLatentUpsampler",
+            "pos": [
+              4270,
+              3910
+            ],
+            "size": [
+              330,
+              120
+            ],
+            "flags": {},
+            "order": 15,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "samples",
+                "name": "samples",
+                "type": "LATENT",
+                "link": 547
+              },
+              {
+                "localized_name": "upscale_model",
+                "name": "upscale_model",
+                "type": "LATENT_UPSCALE_MODEL",
+                "link": 545
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 554
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "links": [
+                  548
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.14.1",
+              "Node name for S&R": "LTXVLatentUpsampler",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 290,
+            "type": "LTXVImgToVideoInplace",
+            "pos": [
+              4280,
+              4150
+            ],
+            "size": [
+              300,
+              180
+            ],
+            "flags": {},
+            "order": 16,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 552
+              },
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 515
+              },
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "link": 548
+              },
+              {
+                "localized_name": "strength",
+                "name": "strength",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "strength"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "bypass",
+                "name": "bypass",
+                "type": "BOOLEAN",
+                "widget": {
+                  "name": "bypass"
+                },
+                "link": 543
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "links": [
+                  512
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "LTXVImgToVideoInplace",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              1,
+              false
+            ]
+          },
+          {
+            "id": 291,
+            "type": "LTXVPreprocess",
+            "pos": [
+              2130,
+              5190
+            ],
+            "size": [
+              290,
+              110
+            ],
+            "flags": {},
+            "order": 17,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 505
+              },
+              {
+                "localized_name": "img_compression",
+                "name": "img_compression",
+                "type": "INT",
+                "widget": {
+                  "name": "img_compression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "output_image",
+                "name": "output_image",
+                "type": "IMAGE",
+                "links": [
+                  510,
+                  515
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "LTXVPreprocess",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              18
+            ]
+          },
+          {
+            "id": 292,
+            "type": "ResizeImageMaskNode",
+            "pos": [
+              1670,
+              5040
+            ],
+            "size": [
+              300,
+              160
+            ],
+            "flags": {},
+            "order": 18,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "input",
+                "name": "input",
+                "type": "IMAGE,MASK",
+                "link": 626
+              },
+              {
+                "localized_name": "resize_type",
+                "name": "resize_type",
+                "type": "COMFY_DYNAMICCOMBO_V3",
+                "widget": {
+                  "name": "resize_type"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "width",
+                "name": "resize_type.width",
+                "type": "INT",
+                "widget": {
+                  "name": "resize_type.width"
+                },
+                "link": 558
+              },
+              {
+                "localized_name": "height",
+                "name": "resize_type.height",
+                "type": "INT",
+                "widget": {
+                  "name": "resize_type.height"
+                },
+                "link": 559
+              },
+              {
+                "localized_name": "crop",
+                "name": "resize_type.crop",
+                "type": "COMBO",
+                "widget": {
+                  "name": "resize_type.crop"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "scale_method",
+                "name": "scale_method",
+                "type": "COMBO",
+                "widget": {
+                  "name": "scale_method"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "resized",
+                "name": "resized",
+                "type": "*",
+                "links": [
+                  523
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "ResizeImageMaskNode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "scale dimensions",
+              1920,
+              1088,
+              "center",
+              "lanczos"
+            ]
+          },
+          {
+            "id": 293,
+            "type": "KSamplerSelect",
+            "pos": [
+              3200,
+              4350
+            ],
+            "size": [
+              280,
+              110
+            ],
+            "flags": {},
+            "order": 4,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "sampler_name",
+                "name": "sampler_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "sampler_name"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "SAMPLER",
+                "name": "SAMPLER",
+                "type": "SAMPLER",
+                "links": [
+                  485
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.56",
+              "Node name for S&R": "KSamplerSelect",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "euler_ancestral_cfg_pp"
+            ]
+          },
+          {
+            "id": 294,
+            "type": "ComfyMathExpression",
+            "pos": [
+              2530,
+              5070
+            ],
+            "size": [
+              230,
+              170
+            ],
+            "flags": {
+              "collapsed": true
+            },
+            "order": 19,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 560
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": null
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  561
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.16.3",
+              "Node name for S&R": "ComfyMathExpression",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "a/2"
+            ]
+          },
+          {
+            "id": 295,
+            "type": "Reroute",
+            "pos": [
+              3930,
+              4090
+            ],
+            "size": [
+              80,
+              30
+            ],
+            "flags": {},
+            "order": 20,
+            "mode": 0,
+            "inputs": [
+              {
+                "name": "",
+                "type": "*",
+                "link": 557
+              }
+            ],
+            "outputs": [
+              {
+                "name": "",
+                "type": "VAE",
+                "links": [
+                  552,
+                  553,
+                  554
+                ]
+              }
+            ],
+            "properties": {
+              "showOutputText": false,
+              "horizontal": false,
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              }
+            }
+          },
+          {
+            "id": 296,
+            "type": "ComfyMathExpression",
+            "pos": [
+              2530,
+              5130
+            ],
+            "size": [
+              230,
+              170
+            ],
+            "flags": {
+              "collapsed": true
+            },
+            "order": 21,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 562
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": null
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  563
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.16.3",
+              "Node name for S&R": "ComfyMathExpression",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "a/2"
+            ]
+          },
+          {
+            "id": 297,
+            "type": "EmptyLTXVLatentVideo",
+            "pos": [
+              2980,
+              5200
+            ],
+            "size": [
+              280,
+              200
+            ],
+            "flags": {},
+            "order": 22,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "width",
+                "name": "width",
+                "type": "INT",
+                "widget": {
+                  "name": "width"
+                },
+                "link": 561
+              },
+              {
+                "localized_name": "height",
+                "name": "height",
+                "type": "INT",
+                "widget": {
+                  "name": "height"
+                },
+                "link": 563
+              },
+              {
+                "localized_name": "length",
+                "name": "length",
+                "type": "INT",
+                "widget": {
+                  "name": "length"
+                },
+                "link": 631
+              },
+              {
+                "localized_name": "batch_size",
+                "name": "batch_size",
+                "type": "INT",
+                "widget": {
+                  "name": "batch_size"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT",
+                "name": "LATENT",
+                "type": "LATENT",
+                "links": [
+                  511
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.60",
+              "Node name for S&R": "EmptyLTXVLatentVideo",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              768,
+              512,
+              97,
+              1
+            ]
+          },
+          {
+            "id": 298,
+            "type": "LTXVImgToVideoInplace",
+            "pos": [
+              3420,
+              4990
+            ],
+            "size": [
+              280,
+              180
+            ],
+            "flags": {},
+            "order": 23,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 556
+              },
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "IMAGE",
+                "link": 510
+              },
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "link": 511
+              },
+              {
+                "localized_name": "strength",
+                "name": "strength",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "strength"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "bypass",
+                "name": "bypass",
+                "type": "BOOLEAN",
+                "widget": {
+                  "name": "bypass"
+                },
+                "link": 542
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "links": [
+                  497
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "LTXVImgToVideoInplace",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              0.7,
+              false
+            ]
+          },
+          {
+            "id": 299,
+            "type": "LTXVAudioVAEDecode",
+            "pos": [
+              5770,
+              3940
+            ],
+            "size": [
+              270,
+              100
+            ],
+            "flags": {},
+            "order": 24,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "samples",
+                "name": "samples",
+                "type": "LATENT",
+                "link": 495
+              },
+              {
+                "label": "Audio VAE",
+                "localized_name": "audio_vae",
+                "name": "audio_vae",
+                "type": "VAE",
+                "link": 496
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "Audio",
+                "name": "Audio",
+                "type": "AUDIO",
+                "links": [
+                  534
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "LTXVAudioVAEDecode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 300,
+            "type": "ComfyMathExpression",
+            "pos": [
+              2530,
+              5270
+            ],
+            "size": [
+              230,
+              170
+            ],
+            "flags": {
+              "collapsed": true
+            },
+            "order": 25,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 564
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": [
+                  566,
+                  591
+                ]
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  565
+                ]
+              }
+            ],
+            "title": "Math Expression (fps)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.16.3",
+              "Node name for S&R": "ComfyMathExpression",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "a"
+            ]
+          },
+          {
+            "id": 301,
+            "type": "PrimitiveInt",
+            "pos": [
+              1160,
+              4530
+            ],
+            "size": [
+              370,
+              110
+            ],
+            "flags": {},
+            "order": 26,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": 598
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  559,
+                  562
+                ]
+              }
+            ],
+            "title": "Height",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.16.3",
+              "Node name for S&R": "PrimitiveInt",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              720,
+              "fixed"
+            ]
+          },
+          {
+            "id": 302,
+            "type": "PrimitiveInt",
+            "pos": [
+              1160,
+              4680
+            ],
+            "size": [
+              370,
+              110
+            ],
+            "flags": {},
+            "order": 27,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": 627
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  564,
+                  629
+                ]
+              }
+            ],
+            "title": "Frame Rate",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.16.3",
+              "Node name for S&R": "PrimitiveInt",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              25,
+              "fixed"
+            ]
+          },
+          {
+            "id": 303,
+            "type": "PrimitiveInt",
+            "pos": [
+              1160,
+              4230
+            ],
+            "size": [
+              370,
+              110
+            ],
+            "flags": {},
+            "order": 28,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": 599
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  628
+                ]
+              }
+            ],
+            "title": "Duration",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "PrimitiveInt",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              5,
+              "fixed"
+            ]
+          },
+          {
+            "id": 304,
+            "type": "PrimitiveBoolean",
+            "pos": [
+              1170,
+              4080
+            ],
+            "size": [
+              370,
+              100
+            ],
+            "flags": {},
+            "order": 5,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "BOOLEAN",
+                "widget": {
+                  "name": "value"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "BOOLEAN",
+                "name": "BOOLEAN",
+                "type": "BOOLEAN",
+                "links": [
+                  542,
+                  543
+                ]
+              }
+            ],
+            "title": "Switch to Text to Video?",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.16.0",
+              "Node name for S&R": "PrimitiveBoolean",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              true
+            ]
+          },
+          {
+            "id": 305,
+            "type": "CLIPTextEncode",
+            "pos": [
+              2170,
+              3640
+            ],
+            "size": [
+              550,
+              740
+            ],
+            "flags": {},
+            "order": 29,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 615
+              },
+              {
+                "localized_name": "text",
+                "name": "text",
+                "type": "STRING",
+                "widget": {
+                  "name": "text"
+                },
+                "link": 623
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "links": [
+                  526
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.56",
+              "Node name for S&R": "CLIPTextEncode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              ""
+            ],
+            "color": "#232",
+            "bgcolor": "#353"
+          },
+          {
+            "id": 306,
+            "type": "LTXVConditioning",
+            "pos": [
+              2790,
+              3670
+            ],
+            "size": [
+              280,
+              130
+            ],
+            "flags": {},
+            "order": 30,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 526
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 527
+              },
+              {
+                "localized_name": "frame_rate",
+                "name": "frame_rate",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "frame_rate"
+                },
+                "link": 566
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "links": [
+                  475,
+                  518
+                ]
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "links": [
+                  476,
+                  519
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.56",
+              "Node name for S&R": "LTXVConditioning",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              24
+            ]
+          },
+          {
+            "id": 307,
+            "type": "LTXVEmptyLatentAudio",
+            "pos": [
+              2970,
+              4970
+            ],
+            "size": [
+              280,
+              170
+            ],
+            "flags": {},
+            "order": 31,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "audio_vae",
+                "name": "audio_vae",
+                "type": "VAE",
+                "link": 481
+              },
+              {
+                "localized_name": "frames_number",
+                "name": "frames_number",
+                "type": "INT",
+                "widget": {
+                  "name": "frames_number"
+                },
+                "link": 630
+              },
+              {
+                "localized_name": "frame_rate",
+                "name": "frame_rate",
+                "type": "INT",
+                "widget": {
+                  "name": "frame_rate"
+                },
+                "link": 565
+              },
+              {
+                "localized_name": "batch_size",
+                "name": "batch_size",
+                "type": "INT",
+                "widget": {
+                  "name": "batch_size"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "Latent",
+                "name": "Latent",
+                "type": "LATENT",
+                "links": [
+                  498
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.68",
+              "Node name for S&R": "LTXVEmptyLatentAudio",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              97,
+              25,
+              1
+            ]
+          },
+          {
+            "id": 308,
+            "type": "ManualSigmas",
+            "pos": [
+              3200,
+              4550
+            ],
+            "size": [
+              500,
+              110
+            ],
+            "flags": {},
+            "order": 6,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "sigmas",
+                "name": "sigmas",
+                "type": "STRING",
+                "widget": {
+                  "name": "sigmas"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "SIGMAS",
+                "name": "SIGMAS",
+                "type": "SIGMAS",
+                "links": [
+                  544
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.14.1",
+              "Node name for S&R": "ManualSigmas",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "1.0, 0.99375, 0.9875, 0.98125, 0.975, 0.909375, 0.725, 0.421875, 0.0"
+            ]
+          },
+          {
+            "id": 309,
+            "type": "LTXVSeparateAVLatent",
+            "pos": [
+              3890,
+              3910
+            ],
+            "size": [
+              250,
+              100
+            ],
+            "flags": {},
+            "order": 32,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "av_latent",
+                "name": "av_latent",
+                "type": "LATENT",
+                "link": 488
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "video_latent",
+                "name": "video_latent",
+                "type": "LATENT",
+                "links": [
+                  477,
+                  547
+                ]
+              },
+              {
+                "localized_name": "audio_latent",
+                "name": "audio_latent",
+                "type": "LATENT",
+                "links": [
+                  513
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "LTXVSeparateAVLatent",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 310,
+            "type": "SamplerCustomAdvanced",
+            "pos": [
+              5070,
+              3750
+            ],
+            "size": [
+              230,
+              170
+            ],
+            "flags": {},
+            "order": 33,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "noise",
+                "name": "noise",
+                "type": "NOISE",
+                "link": 490
+              },
+              {
+                "localized_name": "guider",
+                "name": "guider",
+                "type": "GUIDER",
+                "link": 491
+              },
+              {
+                "localized_name": "sampler",
+                "name": "sampler",
+                "type": "SAMPLER",
+                "link": 492
+              },
+              {
+                "localized_name": "sigmas",
+                "name": "sigmas",
+                "type": "SIGMAS",
+                "link": 493
+              },
+              {
+                "localized_name": "latent_image",
+                "name": "latent_image",
+                "type": "LATENT",
+                "link": 494
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "output",
+                "name": "output",
+                "type": "LATENT",
+                "links": [
+                  578
+                ]
+              },
+              {
+                "localized_name": "denoised_output",
+                "name": "denoised_output",
+                "type": "LATENT",
+                "links": []
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.75",
+              "Node name for S&R": "SamplerCustomAdvanced",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 311,
+            "type": "LTXVSeparateAVLatent",
+            "pos": [
+              5410,
+              3750
+            ],
+            "size": [
+              230,
+              100
+            ],
+            "flags": {},
+            "order": 34,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "av_latent",
+                "name": "av_latent",
+                "type": "LATENT",
+                "link": 578
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "video_latent",
+                "name": "video_latent",
+                "type": "LATENT",
+                "links": [
+                  539
+                ]
+              },
+              {
+                "localized_name": "audio_latent",
+                "name": "audio_latent",
+                "type": "LATENT",
+                "links": [
+                  495
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "LTXVSeparateAVLatent",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 312,
+            "type": "CreateVideo",
+            "pos": [
+              5740,
+              4610
+            ],
+            "size": [
+              280,
+              130
+            ],
+            "flags": {},
+            "order": 35,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "images",
+                "name": "images",
+                "type": "IMAGE",
+                "link": 538
+              },
+              {
+                "localized_name": "audio",
+                "name": "audio",
+                "shape": 7,
+                "type": "AUDIO",
+                "link": 534
+              },
+              {
+                "localized_name": "fps",
+                "name": "fps",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "fps"
+                },
+                "link": 591
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "VIDEO",
+                "name": "VIDEO",
+                "type": "VIDEO",
+                "links": [
+                  536
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.5.1",
+              "Node name for S&R": "CreateVideo",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              24
+            ]
+          },
+          {
+            "id": 313,
+            "type": "LatentUpscaleModelLoader",
+            "pos": [
+              1670,
+              4600
+            ],
+            "size": [
+              400,
+              110
+            ],
+            "flags": {},
+            "order": 36,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model_name",
+                "name": "model_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "model_name"
+                },
+                "link": 607
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "LATENT_UPSCALE_MODEL",
+                "name": "LATENT_UPSCALE_MODEL",
+                "type": "LATENT_UPSCALE_MODEL",
+                "links": [
+                  545
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "LatentUpscaleModelLoader",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "ltx-2.3-spatial-upscaler-x2-1.1.safetensors",
+                  "url": "https://huggingface.co/Lightricks/LTX-2.3/resolve/main/ltx-2.3-spatial-upscaler-x2-1.1.safetensors",
+                  "directory": "latent_upscale_models"
+                }
+              ]
+            },
+            "widgets_values": [
+              "ltx-2.3-spatial-upscaler-x2-1.1.safetensors"
+            ]
+          },
+          {
+            "id": 314,
+            "type": "PrimitiveInt",
+            "pos": [
+              1160,
+              4380
+            ],
+            "size": [
+              370,
+              110
+            ],
+            "flags": {},
+            "order": 37,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "INT",
+                "widget": {
+                  "name": "value"
+                },
+                "link": 597
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  558,
+                  560
+                ]
+              }
+            ],
+            "title": "Width",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.16.3",
+              "Node name for S&R": "PrimitiveInt",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              1280,
+              "fixed"
+            ]
+          },
+          {
+            "id": 315,
+            "type": "CLIPTextEncode",
+            "pos": [
+              2180,
+              4480
+            ],
+            "size": [
+              530,
+              240
+            ],
+            "flags": {},
+            "order": 38,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "clip",
+                "name": "clip",
+                "type": "CLIP",
+                "link": 625
+              },
+              {
+                "localized_name": "text",
+                "name": "text",
+                "type": "STRING",
+                "widget": {
+                  "name": "text"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CONDITIONING",
+                "name": "CONDITIONING",
+                "type": "CONDITIONING",
+                "links": [
+                  527
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.56",
+              "Node name for S&R": "CLIPTextEncode",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "pc game, console game, video game, cartoon, childish, ugly"
+            ],
+            "color": "#323",
+            "bgcolor": "#535"
+          },
+          {
+            "id": 316,
+            "type": "CFGGuider",
+            "pos": [
+              3200,
+              4100
+            ],
+            "size": [
+              280,
+              160
+            ],
+            "flags": {},
+            "order": 39,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "model",
+                "name": "model",
+                "type": "MODEL",
+                "link": 541
+              },
+              {
+                "localized_name": "positive",
+                "name": "positive",
+                "type": "CONDITIONING",
+                "link": 518
+              },
+              {
+                "localized_name": "negative",
+                "name": "negative",
+                "type": "CONDITIONING",
+                "link": 519
+              },
+              {
+                "localized_name": "cfg",
+                "name": "cfg",
+                "type": "FLOAT",
+                "widget": {
+                  "name": "cfg"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "GUIDER",
+                "name": "GUIDER",
+                "type": "GUIDER",
+                "links": [
+                  484
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.64",
+              "Node name for S&R": "CFGGuider",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              1
+            ]
+          },
+          {
+            "id": 317,
+            "type": "VAEDecodeTiled",
+            "pos": [
+              5760,
+              3650
+            ],
+            "size": [
+              280,
+              200
+            ],
+            "flags": {},
+            "order": 40,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "samples",
+                "name": "samples",
+                "type": "LATENT",
+                "link": 539
+              },
+              {
+                "localized_name": "vae",
+                "name": "vae",
+                "type": "VAE",
+                "link": 553
+              },
+              {
+                "localized_name": "tile_size",
+                "name": "tile_size",
+                "type": "INT",
+                "widget": {
+                  "name": "tile_size"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "overlap",
+                "name": "overlap",
+                "type": "INT",
+                "widget": {
+                  "name": "overlap"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "temporal_size",
+                "name": "temporal_size",
+                "type": "INT",
+                "widget": {
+                  "name": "temporal_size"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "temporal_overlap",
+                "name": "temporal_overlap",
+                "type": "INT",
+                "widget": {
+                  "name": "temporal_overlap"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "links": [
+                  538
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.14.1",
+              "Node name for S&R": "VAEDecodeTiled",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              768,
+              64,
+              4096,
+              4
+            ]
+          },
+          {
+            "id": 318,
+            "type": "CheckpointLoaderSimple",
+            "pos": [
+              1660,
+              3660
+            ],
+            "size": [
+              430,
+              160
+            ],
+            "flags": {},
+            "order": 41,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "ckpt_name",
+                "name": "ckpt_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "ckpt_name"
+                },
+                "link": 601
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "MODEL",
+                "name": "MODEL",
+                "type": "MODEL",
+                "links": [
+                  520
+                ]
+              },
+              {
+                "localized_name": "CLIP",
+                "name": "CLIP",
+                "type": "CLIP",
+                "links": []
+              },
+              {
+                "localized_name": "VAE",
+                "name": "VAE",
+                "type": "VAE",
+                "links": [
+                  556,
+                  557
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.3.56",
+              "Node name for S&R": "CheckpointLoaderSimple",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "ltx-2.3-22b-dev-fp8.safetensors",
+                  "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-dev-fp8.safetensors",
+                  "directory": "checkpoints"
+                }
+              ]
+            },
+            "widgets_values": [
+              "ltx-2.3-22b-dev-fp8.safetensors"
+            ]
+          },
+          {
+            "id": 319,
+            "type": "LTXAVTextEncoderLoader",
+            "pos": [
+              1660,
+              4340
+            ],
+            "size": [
+              430,
+              170
+            ],
+            "flags": {},
+            "order": 42,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "text_encoder",
+                "name": "text_encoder",
+                "type": "COMBO",
+                "widget": {
+                  "name": "text_encoder"
+                },
+                "link": 606
+              },
+              {
+                "localized_name": "ckpt_name",
+                "name": "ckpt_name",
+                "type": "COMBO",
+                "widget": {
+                  "name": "ckpt_name"
+                },
+                "link": 605
+              },
+              {
+                "localized_name": "device",
+                "name": "device",
+                "type": "COMBO",
+                "widget": {
+                  "name": "device"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "CLIP",
+                "name": "CLIP",
+                "type": "CLIP",
+                "links": [
+                  615,
+                  625
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "LTXAVTextEncoderLoader",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65,
+              "models": [
+                {
+                  "name": "ltx-2.3-22b-dev-fp8.safetensors",
+                  "url": "https://huggingface.co/Lightricks/LTX-2.3-fp8/resolve/main/ltx-2.3-22b-dev-fp8.safetensors",
+                  "directory": "checkpoints"
+                },
+                {
+                  "name": "gemma_3_12B_it_fp4_mixed.safetensors",
+                  "url": "https://huggingface.co/Comfy-Org/ltx-2/resolve/main/split_files/text_encoders/gemma_3_12B_it_fp4_mixed.safetensors",
+                  "directory": "text_encoders"
+                }
+              ]
+            },
+            "widgets_values": [
+              "gemma_3_12B_it_fp4_mixed.safetensors",
+              "ltx-2.3-22b-dev-fp8.safetensors",
+              "default"
+            ]
+          },
+          {
+            "id": 320,
+            "type": "PrimitiveStringMultiline",
+            "pos": [
+              1160,
+              3680
+            ],
+            "size": [
+              370,
+              350
+            ],
+            "flags": {},
+            "order": 43,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "value",
+                "name": "value",
+                "type": "STRING",
+                "widget": {
+                  "name": "value"
+                },
+                "link": 595
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "STRING",
+                "name": "STRING",
+                "type": "STRING",
+                "links": [
+                  623
+                ]
+              }
+            ],
+            "title": "Prompt",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.16.3",
+              "Node name for S&R": "PrimitiveStringMultiline",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              ""
+            ]
+          },
+          {
+            "id": 321,
+            "type": "LTXVConcatAVLatent",
+            "pos": [
+              3820,
+              4990
+            ],
+            "size": [
+              240,
+              100
+            ],
+            "flags": {},
+            "order": 44,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "video_latent",
+                "name": "video_latent",
+                "type": "LATENT",
+                "link": 497
+              },
+              {
+                "localized_name": "audio_latent",
+                "name": "audio_latent",
+                "type": "LATENT",
+                "link": 498
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "latent",
+                "name": "latent",
+                "type": "LATENT",
+                "links": [
+                  487
+                ]
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.7.0",
+              "Node name for S&R": "LTXVConcatAVLatent",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            }
+          },
+          {
+            "id": 322,
+            "type": "LoadImage",
+            "pos": [
+              1150,
+              4940
+            ],
+            "size": [
+              400,
+              480
+            ],
+            "flags": {},
+            "order": 7,
+            "mode": 0,
+            "inputs": [
+              {
+                "localized_name": "image",
+                "name": "image",
+                "type": "COMBO",
+                "widget": {
+                  "name": "image"
+                },
+                "link": null
+              },
+              {
+                "localized_name": "choose file to upload",
+                "name": "upload",
+                "type": "IMAGEUPLOAD",
+                "widget": {
+                  "name": "upload"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "IMAGE",
+                "name": "IMAGE",
+                "type": "IMAGE",
+                "links": [
+                  626
+                ]
+              },
+              {
+                "localized_name": "MASK",
+                "name": "MASK",
+                "type": "MASK",
+                "links": null
+              }
+            ],
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "version": "7.7",
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.16.3",
+              "Node name for S&R": "LoadImage",
+              "enableTabs": false,
+              "tabWidth": 65,
+              "tabXOffset": 10,
+              "hasSecondTab": false,
+              "secondTabText": "Send Back",
+              "secondTabOffset": 80,
+              "secondTabWidth": 65
+            },
+            "widgets_values": [
+              "example.png",
+              "image"
+            ]
+          },
+          {
+            "id": 323,
+            "type": "ComfyMathExpression",
+            "pos": [
+              2540,
+              5370
+            ],
+            "size": [
+              260,
+              190
+            ],
+            "flags": {
+              "collapsed": true
+            },
+            "order": 45,
+            "mode": 0,
+            "inputs": [
+              {
+                "label": "a",
+                "localized_name": "values.a",
+                "name": "values.a",
+                "type": "FLOAT,INT",
+                "link": 628
+              },
+              {
+                "label": "b",
+                "localized_name": "values.b",
+                "name": "values.b",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": 629
+              },
+              {
+                "label": "c",
+                "localized_name": "values.c",
+                "name": "values.c",
+                "shape": 7,
+                "type": "FLOAT,INT",
+                "link": null
+              },
+              {
+                "localized_name": "expression",
+                "name": "expression",
+                "type": "STRING",
+                "widget": {
+                  "name": "expression"
+                },
+                "link": null
+              }
+            ],
+            "outputs": [
+              {
+                "localized_name": "FLOAT",
+                "name": "FLOAT",
+                "type": "FLOAT",
+                "links": null
+              },
+              {
+                "localized_name": "INT",
+                "name": "INT",
+                "type": "INT",
+                "links": [
+                  630,
+                  631
+                ]
+              }
+            ],
+            "title": "Math Expression (length)",
+            "properties": {
+              "ue_properties": {
+                "widget_ue_connectable": {},
+                "input_ue_unconnectable": {}
+              },
+              "cnr_id": "comfy-core",
+              "ver": "0.18.1",
+              "Node name for S&R": "ComfyMathExpression"
+            },
+            "widgets_values": [
+              "a * b + 1"
+            ]
+          }
+        ],
+        "groups": [
+          {
+            "id": 1,
+            "title": "Model",
+            "bounding": [
+              1630,
+              3550,
+              480,
+              1270
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 2,
+            "title": "Generate Low Resolution",
+            "bounding": [
+              3150,
+              3550,
+              1020,
+              1270
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 3,
+            "title": "Prompt",
+            "bounding": [
+              2140,
+              3550,
+              980,
+              1270
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 6,
+            "title": "Generate High Resolution",
+            "bounding": [
+              4690,
+              3550,
+              960,
+              1270
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 7,
+            "title": "Lantent Upscale",
+            "bounding": [
+              4200,
+              3550,
+              460,
+              1270
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 19,
+            "title": "Video Settings",
+            "bounding": [
+              1110,
+              3550,
+              490,
+              1270
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 20,
+            "title": "Image Preprocess",
+            "bounding": [
+              1630,
+              4850,
+              830,
+              610
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 21,
+            "title": "Empty Latent",
+            "bounding": [
+              2830,
+              4850,
+              1340,
+              610
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 22,
+            "title": "Number conversion",
+            "bounding": [
+              2490,
+              4850,
+              320,
+              610
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          },
+          {
+            "id": 26,
+            "title": "Image will not affect the video",
+            "bounding": [
+              1110,
+              4850,
+              490,
+              610
+            ],
+            "color": "#3f789e",
+            "font_size": 24,
+            "flags": {}
+          }
+        ],
+        "links": [
+          {
+            "id": 512,
+            "origin_id": 290,
+            "origin_slot": 0,
+            "target_id": 280,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 513,
+            "origin_id": 309,
+            "origin_slot": 1,
+            "target_id": 280,
+            "target_slot": 1,
+            "type": "LATENT"
+          },
+          {
+            "id": 478,
+            "origin_id": 287,
+            "origin_slot": 0,
+            "target_id": 284,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 479,
+            "origin_id": 286,
+            "origin_slot": 0,
+            "target_id": 284,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 480,
+            "origin_id": 286,
+            "origin_slot": 1,
+            "target_id": 284,
+            "target_slot": 2,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 541,
+            "origin_id": 287,
+            "origin_slot": 0,
+            "target_id": 316,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 518,
+            "origin_id": 306,
+            "origin_slot": 0,
+            "target_id": 316,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 519,
+            "origin_id": 306,
+            "origin_slot": 1,
+            "target_id": 316,
+            "target_slot": 2,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 483,
+            "origin_id": 279,
+            "origin_slot": 0,
+            "target_id": 285,
+            "target_slot": 0,
+            "type": "NOISE"
+          },
+          {
+            "id": 484,
+            "origin_id": 316,
+            "origin_slot": 0,
+            "target_id": 285,
+            "target_slot": 1,
+            "type": "GUIDER"
+          },
+          {
+            "id": 485,
+            "origin_id": 293,
+            "origin_slot": 0,
+            "target_id": 285,
+            "target_slot": 2,
+            "type": "SAMPLER"
+          },
+          {
+            "id": 544,
+            "origin_id": 308,
+            "origin_slot": 0,
+            "target_id": 285,
+            "target_slot": 3,
+            "type": "SIGMAS"
+          },
+          {
+            "id": 487,
+            "origin_id": 321,
+            "origin_slot": 0,
+            "target_id": 285,
+            "target_slot": 4,
+            "type": "LATENT"
+          },
+          {
+            "id": 475,
+            "origin_id": 306,
+            "origin_slot": 0,
+            "target_id": 286,
+            "target_slot": 0,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 476,
+            "origin_id": 306,
+            "origin_slot": 1,
+            "target_id": 286,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 477,
+            "origin_id": 309,
+            "origin_slot": 0,
+            "target_id": 286,
+            "target_slot": 2,
+            "type": "LATENT"
+          },
+          {
+            "id": 520,
+            "origin_id": 318,
+            "origin_slot": 0,
+            "target_id": 287,
+            "target_slot": 0,
+            "type": "MODEL"
+          },
+          {
+            "id": 523,
+            "origin_id": 292,
+            "origin_slot": 0,
+            "target_id": 288,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 547,
+            "origin_id": 309,
+            "origin_slot": 0,
+            "target_id": 289,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 545,
+            "origin_id": 313,
+            "origin_slot": 0,
+            "target_id": 289,
+            "target_slot": 1,
+            "type": "LATENT_UPSCALE_MODEL"
+          },
+          {
+            "id": 554,
+            "origin_id": 295,
+            "origin_slot": 0,
+            "target_id": 289,
+            "target_slot": 2,
+            "type": "VAE"
+          },
+          {
+            "id": 552,
+            "origin_id": 295,
+            "origin_slot": 0,
+            "target_id": 290,
+            "target_slot": 0,
+            "type": "VAE"
+          },
+          {
+            "id": 515,
+            "origin_id": 291,
+            "origin_slot": 0,
+            "target_id": 290,
+            "target_slot": 1,
+            "type": "IMAGE"
+          },
+          {
+            "id": 548,
+            "origin_id": 289,
+            "origin_slot": 0,
+            "target_id": 290,
+            "target_slot": 2,
+            "type": "LATENT"
+          },
+          {
+            "id": 543,
+            "origin_id": 304,
+            "origin_slot": 0,
+            "target_id": 290,
+            "target_slot": 4,
+            "type": "BOOLEAN"
+          },
+          {
+            "id": 505,
+            "origin_id": 288,
+            "origin_slot": 0,
+            "target_id": 291,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 558,
+            "origin_id": 314,
+            "origin_slot": 0,
+            "target_id": 292,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 559,
+            "origin_id": 301,
+            "origin_slot": 0,
+            "target_id": 292,
+            "target_slot": 3,
+            "type": "INT"
+          },
+          {
+            "id": 560,
+            "origin_id": 314,
+            "origin_slot": 0,
+            "target_id": 294,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 557,
+            "origin_id": 318,
+            "origin_slot": 2,
+            "target_id": 295,
+            "target_slot": 0,
+            "type": "VAE"
+          },
+          {
+            "id": 562,
+            "origin_id": 301,
+            "origin_slot": 0,
+            "target_id": 296,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 561,
+            "origin_id": 294,
+            "origin_slot": 1,
+            "target_id": 297,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 563,
+            "origin_id": 296,
+            "origin_slot": 1,
+            "target_id": 297,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 556,
+            "origin_id": 318,
+            "origin_slot": 2,
+            "target_id": 298,
+            "target_slot": 0,
+            "type": "VAE"
+          },
+          {
+            "id": 510,
+            "origin_id": 291,
+            "origin_slot": 0,
+            "target_id": 298,
+            "target_slot": 1,
+            "type": "IMAGE"
+          },
+          {
+            "id": 511,
+            "origin_id": 297,
+            "origin_slot": 0,
+            "target_id": 298,
+            "target_slot": 2,
+            "type": "LATENT"
+          },
+          {
+            "id": 542,
+            "origin_id": 304,
+            "origin_slot": 0,
+            "target_id": 298,
+            "target_slot": 4,
+            "type": "BOOLEAN"
+          },
+          {
+            "id": 495,
+            "origin_id": 311,
+            "origin_slot": 1,
+            "target_id": 299,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 496,
+            "origin_id": 281,
+            "origin_slot": 0,
+            "target_id": 299,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 564,
+            "origin_id": 302,
+            "origin_slot": 0,
+            "target_id": 300,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 526,
+            "origin_id": 305,
+            "origin_slot": 0,
+            "target_id": 306,
+            "target_slot": 0,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 527,
+            "origin_id": 315,
+            "origin_slot": 0,
+            "target_id": 306,
+            "target_slot": 1,
+            "type": "CONDITIONING"
+          },
+          {
+            "id": 566,
+            "origin_id": 300,
+            "origin_slot": 0,
+            "target_id": 306,
+            "target_slot": 2,
+            "type": "FLOAT"
+          },
+          {
+            "id": 497,
+            "origin_id": 298,
+            "origin_slot": 0,
+            "target_id": 321,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 498,
+            "origin_id": 307,
+            "origin_slot": 0,
+            "target_id": 321,
+            "target_slot": 1,
+            "type": "LATENT"
+          },
+          {
+            "id": 481,
+            "origin_id": 281,
+            "origin_slot": 0,
+            "target_id": 307,
+            "target_slot": 0,
+            "type": "VAE"
+          },
+          {
+            "id": 565,
+            "origin_id": 300,
+            "origin_slot": 1,
+            "target_id": 307,
+            "target_slot": 2,
+            "type": "INT"
+          },
+          {
+            "id": 488,
+            "origin_id": 285,
+            "origin_slot": 0,
+            "target_id": 309,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 490,
+            "origin_id": 278,
+            "origin_slot": 0,
+            "target_id": 310,
+            "target_slot": 0,
+            "type": "NOISE"
+          },
+          {
+            "id": 491,
+            "origin_id": 284,
+            "origin_slot": 0,
+            "target_id": 310,
+            "target_slot": 1,
+            "type": "GUIDER"
+          },
+          {
+            "id": 492,
+            "origin_id": 282,
+            "origin_slot": 0,
+            "target_id": 310,
+            "target_slot": 2,
+            "type": "SAMPLER"
+          },
+          {
+            "id": 493,
+            "origin_id": 283,
+            "origin_slot": 0,
+            "target_id": 310,
+            "target_slot": 3,
+            "type": "SIGMAS"
+          },
+          {
+            "id": 494,
+            "origin_id": 280,
+            "origin_slot": 0,
+            "target_id": 310,
+            "target_slot": 4,
+            "type": "LATENT"
+          },
+          {
+            "id": 578,
+            "origin_id": 310,
+            "origin_slot": 0,
+            "target_id": 311,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 539,
+            "origin_id": 311,
+            "origin_slot": 0,
+            "target_id": 317,
+            "target_slot": 0,
+            "type": "LATENT"
+          },
+          {
+            "id": 553,
+            "origin_id": 295,
+            "origin_slot": 0,
+            "target_id": 317,
+            "target_slot": 1,
+            "type": "VAE"
+          },
+          {
+            "id": 538,
+            "origin_id": 317,
+            "origin_slot": 0,
+            "target_id": 312,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 534,
+            "origin_id": 299,
+            "origin_slot": 0,
+            "target_id": 312,
+            "target_slot": 1,
+            "type": "AUDIO"
+          },
+          {
+            "id": 591,
+            "origin_id": 300,
+            "origin_slot": 0,
+            "target_id": 312,
+            "target_slot": 2,
+            "type": "FLOAT"
+          },
+          {
+            "id": 536,
+            "origin_id": 312,
+            "origin_slot": 0,
+            "target_id": -20,
+            "target_slot": 0,
+            "type": "VIDEO"
+          },
+          {
+            "id": 595,
+            "origin_id": -10,
+            "origin_slot": 0,
+            "target_id": 320,
+            "target_slot": 0,
+            "type": "STRING"
+          },
+          {
+            "id": 597,
+            "origin_id": -10,
+            "origin_slot": 1,
+            "target_id": 314,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 598,
+            "origin_id": -10,
+            "origin_slot": 2,
+            "target_id": 301,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 599,
+            "origin_id": -10,
+            "origin_slot": 3,
+            "target_id": 303,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 601,
+            "origin_id": -10,
+            "origin_slot": 4,
+            "target_id": 318,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 602,
+            "origin_id": -10,
+            "origin_slot": 5,
+            "target_id": 287,
+            "target_slot": 1,
+            "type": "COMBO"
+          },
+          {
+            "id": 604,
+            "origin_id": -10,
+            "origin_slot": 4,
+            "target_id": 281,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 605,
+            "origin_id": -10,
+            "origin_slot": 4,
+            "target_id": 319,
+            "target_slot": 1,
+            "type": "COMBO"
+          },
+          {
+            "id": 606,
+            "origin_id": -10,
+            "origin_slot": 6,
+            "target_id": 319,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 607,
+            "origin_id": -10,
+            "origin_slot": 7,
+            "target_id": 313,
+            "target_slot": 0,
+            "type": "COMBO"
+          },
+          {
+            "id": 615,
+            "origin_id": 319,
+            "origin_slot": 0,
+            "target_id": 305,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 623,
+            "origin_id": 320,
+            "origin_slot": 0,
+            "target_id": 305,
+            "target_slot": 1,
+            "type": "STRING"
+          },
+          {
+            "id": 625,
+            "origin_id": 319,
+            "origin_slot": 0,
+            "target_id": 315,
+            "target_slot": 0,
+            "type": "CLIP"
+          },
+          {
+            "id": 626,
+            "origin_id": 322,
+            "origin_slot": 0,
+            "target_id": 292,
+            "target_slot": 0,
+            "type": "IMAGE"
+          },
+          {
+            "id": 627,
+            "origin_id": -10,
+            "origin_slot": 8,
+            "target_id": 302,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 628,
+            "origin_id": 303,
+            "origin_slot": 0,
+            "target_id": 323,
+            "target_slot": 0,
+            "type": "INT"
+          },
+          {
+            "id": 629,
+            "origin_id": 302,
+            "origin_slot": 0,
+            "target_id": 323,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 630,
+            "origin_id": 323,
+            "origin_slot": 1,
+            "target_id": 307,
+            "target_slot": 1,
+            "type": "INT"
+          },
+          {
+            "id": 631,
+            "origin_id": 323,
+            "origin_slot": 1,
+            "target_id": 297,
+            "target_slot": 2,
+            "type": "INT"
+          }
+        ],
+        "extra": {
+          "workflowRendererVersion": "Vue-corrected"
+        },
+        "category": "Video generation and editing/Text to video"
+      }
+    ]
+  },
+  "extra": {
+    "ue_links": []
+  }
+}
\ No newline at end of file

From 115f418b64e0c62251f81aa181b41ed7e933c339 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Sun, 26 Apr 2026 20:23:57 -0700
Subject: [PATCH 31/81] Make EmptySD3LatentImage node use intermediate dtype.
 (#13577)

---
 comfy_extras/nodes_sd3.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy_extras/nodes_sd3.py b/comfy_extras/nodes_sd3.py
index c43844a1a..6655c1ba7 100644
--- a/comfy_extras/nodes_sd3.py
+++ b/comfy_extras/nodes_sd3.py
@@ -54,7 +54,7 @@ class EmptySD3LatentImage(io.ComfyNode):
 
     @classmethod
     def execute(cls, width, height, batch_size=1) -> io.NodeOutput:
-        latent = torch.zeros([batch_size, 16, height // 8, width // 8], device=comfy.model_management.intermediate_device())
+        latent = torch.zeros([batch_size, 16, height // 8, width // 8], device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype())
         return io.NodeOutput({"samples": latent, "downscale_ratio_spacial": 8})
 
     generate = execute  # TODO: remove

From 6968a70e603a0d2c80387aa139b06276635b36e3 Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Mon, 27 Apr 2026 19:53:08 +0300
Subject: [PATCH 32/81] [Partner Nodes] HappyHorse model (#13582)

* feat(api-nodes): add nodes for HappyHorse model

Signed-off-by: bigcat88 <bigcat88@icloud.com>

* fix price badges

Signed-off-by: bigcat88 <bigcat88@icloud.com>

* fix: allow durations up to 15 s

Signed-off-by: bigcat88 <bigcat88@icloud.com>

---------

Signed-off-by: bigcat88 <bigcat88@icloud.com>
---
 comfy_api_nodes/apis/wan.py  |   4 +-
 comfy_api_nodes/nodes_wan.py | 555 +++++++++++++++++++++++++++++++++++
 2 files changed, 557 insertions(+), 2 deletions(-)

diff --git a/comfy_api_nodes/apis/wan.py b/comfy_api_nodes/apis/wan.py
index 44b65e4f6..c64acae97 100644
--- a/comfy_api_nodes/apis/wan.py
+++ b/comfy_api_nodes/apis/wan.py
@@ -118,7 +118,7 @@ class Wan27ReferenceVideoInputField(BaseModel):
 class Wan27ReferenceVideoParametersField(BaseModel):
     resolution: str = Field(...)
     ratio: str | None = Field(None)
-    duration: int = Field(5, ge=2, le=10)
+    duration: int = Field(5, ge=2, le=15)
     watermark: bool = Field(False)
     seed: int = Field(..., ge=0, le=2147483647)
 
@@ -157,7 +157,7 @@ class Wan27VideoEditInputField(BaseModel):
 class Wan27VideoEditParametersField(BaseModel):
     resolution: str = Field(...)
     ratio: str | None = Field(None)
-    duration: int = Field(0)
+    duration: int | None = Field(0)
     audio_setting: str = Field("auto")
     watermark: bool = Field(False)
     seed: int = Field(..., ge=0, le=2147483647)
diff --git a/comfy_api_nodes/nodes_wan.py b/comfy_api_nodes/nodes_wan.py
index d1470894a..7d7466fb6 100644
--- a/comfy_api_nodes/nodes_wan.py
+++ b/comfy_api_nodes/nodes_wan.py
@@ -1646,6 +1646,557 @@ class Wan2ReferenceVideoApi(IO.ComfyNode):
         return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
 
 
+class HappyHorseTextToVideoApi(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="HappyHorseTextToVideoApi",
+            display_name="HappyHorse Text to Video",
+            category="api node/video/Wan",
+            description="Generates a video based on a text prompt using the HappyHorse model.",
+            inputs=[
+                IO.DynamicCombo.Input(
+                    "model",
+                    options=[
+                        IO.DynamicCombo.Option(
+                            "happyhorse-1.0-t2v",
+                            [
+                                IO.String.Input(
+                                    "prompt",
+                                    multiline=True,
+                                    default="",
+                                    tooltip="Prompt describing the elements and visual features. "
+                                    "Supports English and Chinese.",
+                                ),
+                                IO.Combo.Input(
+                                    "resolution",
+                                    options=["720P", "1080P"],
+                                ),
+                                IO.Combo.Input(
+                                    "ratio",
+                                    options=["16:9", "9:16", "1:1", "4:3", "3:4"],
+                                ),
+                                IO.Int.Input(
+                                    "duration",
+                                    default=5,
+                                    min=3,
+                                    max=15,
+                                    step=1,
+                                    display_mode=IO.NumberDisplay.number,
+                                ),
+                            ],
+                        ),
+                    ],
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed to use for generation.",
+                ),
+                IO.Boolean.Input(
+                    "watermark",
+                    default=False,
+                    tooltip="Whether to add an AI-generated watermark to the result.",
+                    advanced=True,
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution", "model.duration"]),
+                expr="""
+                (
+                  $res := $lookup(widgets, "model.resolution");
+                  $dur := $lookup(widgets, "model.duration");
+                  $ppsTable := { "720p": 0.14, "1080p": 0.24 };
+                  $pps := $lookup($ppsTable, $res);
+                  { "type": "usd", "usd": $pps * $dur }
+                )
+                """,
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: dict,
+        seed: int,
+        watermark: bool,
+    ):
+        validate_string(model["prompt"], strip_whitespace=False, min_length=1)
+        initial_response = await sync_op(
+            cls,
+            ApiEndpoint(
+                path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis",
+                method="POST",
+            ),
+            response_model=TaskCreationResponse,
+            data=Wan27Text2VideoTaskCreationRequest(
+                model=model["model"],
+                input=Text2VideoInputField(
+                    prompt=model["prompt"],
+                    negative_prompt=None,
+                ),
+                parameters=Wan27Text2VideoParametersField(
+                    resolution=model["resolution"],
+                    ratio=model["ratio"],
+                    duration=model["duration"],
+                    seed=seed,
+                    watermark=watermark,
+                ),
+            ),
+        )
+        if not initial_response.output:
+            raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
+        response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
+            response_model=VideoTaskStatusResponse,
+            status_extractor=lambda x: x.output.task_status,
+            poll_interval=7,
+        )
+        return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
+
+
+class HappyHorseImageToVideoApi(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="HappyHorseImageToVideoApi",
+            display_name="HappyHorse Image to Video",
+            category="api node/video/Wan",
+            description="Generate a video from a first-frame image using the HappyHorse model.",
+            inputs=[
+                IO.DynamicCombo.Input(
+                    "model",
+                    options=[
+                        IO.DynamicCombo.Option(
+                            "happyhorse-1.0-i2v",
+                            [
+                                IO.String.Input(
+                                    "prompt",
+                                    multiline=True,
+                                    default="",
+                                    tooltip="Prompt describing the elements and visual features. "
+                                    "Supports English and Chinese.",
+                                ),
+                                IO.Combo.Input(
+                                    "resolution",
+                                    options=["720P", "1080P"],
+                                ),
+                                IO.Int.Input(
+                                    "duration",
+                                    default=5,
+                                    min=3,
+                                    max=15,
+                                    step=1,
+                                    display_mode=IO.NumberDisplay.number,
+                                ),
+                            ],
+                        ),
+                    ],
+                ),
+                IO.Image.Input(
+                    "first_frame",
+                    tooltip="First frame image. The output aspect ratio is derived from this image.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed to use for generation.",
+                ),
+                IO.Boolean.Input(
+                    "watermark",
+                    default=False,
+                    tooltip="Whether to add an AI-generated watermark to the result.",
+                    advanced=True,
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution", "model.duration"]),
+                expr="""
+                (
+                  $res := $lookup(widgets, "model.resolution");
+                  $dur := $lookup(widgets, "model.duration");
+                  $ppsTable := { "720p": 0.14, "1080p": 0.24 };
+                  $pps := $lookup($ppsTable, $res);
+                  { "type": "usd", "usd": $pps * $dur }
+                )
+                """,
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: dict,
+        first_frame: Input.Image,
+        seed: int,
+        watermark: bool,
+    ):
+        media = [
+            Wan27MediaItem(
+                type="first_frame",
+                url=await upload_image_to_comfyapi(cls, image=first_frame),
+            )
+        ]
+        initial_response = await sync_op(
+            cls,
+            ApiEndpoint(
+                path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis",
+                method="POST",
+            ),
+            response_model=TaskCreationResponse,
+            data=Wan27ImageToVideoTaskCreationRequest(
+                model=model["model"],
+                input=Wan27ImageToVideoInputField(
+                    prompt=model["prompt"] or None,
+                    negative_prompt=None,
+                    media=media,
+                ),
+                parameters=Wan27ImageToVideoParametersField(
+                    resolution=model["resolution"],
+                    duration=model["duration"],
+                    seed=seed,
+                    watermark=watermark,
+                ),
+            ),
+        )
+        if not initial_response.output:
+            raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
+        response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
+            response_model=VideoTaskStatusResponse,
+            status_extractor=lambda x: x.output.task_status,
+            poll_interval=7,
+        )
+        return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
+
+
+class HappyHorseVideoEditApi(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="HappyHorseVideoEditApi",
+            display_name="HappyHorse Video Edit",
+            category="api node/video/Wan",
+            description="Edit a video using text instructions or reference images with the HappyHorse model. "
+            "Output duration is 3-15s and matches the input video; inputs longer than 15s are truncated.",
+            inputs=[
+                IO.DynamicCombo.Input(
+                    "model",
+                    options=[
+                        IO.DynamicCombo.Option(
+                            "happyhorse-1.0-video-edit",
+                            [
+                                IO.String.Input(
+                                    "prompt",
+                                    multiline=True,
+                                    default="",
+                                    tooltip="Editing instructions or style transfer requirements.",
+                                ),
+                                IO.Combo.Input(
+                                    "resolution",
+                                    options=["720P", "1080P"],
+                                ),
+                                IO.Combo.Input(
+                                    "ratio",
+                                    options=["16:9", "9:16", "1:1", "4:3", "3:4"],
+                                    tooltip="Aspect ratio. If not changed, approximates the input video ratio.",
+                                ),
+                                IO.Autogrow.Input(
+                                    "reference_images",
+                                    template=IO.Autogrow.TemplateNames(
+                                        IO.Image.Input("reference_image"),
+                                        names=[
+                                            "image1",
+                                            "image2",
+                                            "image3",
+                                            "image4",
+                                            "image5",
+                                        ],
+                                        min=0,
+                                    ),
+                                ),
+                            ],
+                        ),
+                    ],
+                ),
+                IO.Video.Input(
+                    "video",
+                    tooltip="The video to edit.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed to use for generation.",
+                ),
+                IO.Boolean.Input(
+                    "watermark",
+                    default=False,
+                    tooltip="Whether to add an AI-generated watermark to the result.",
+                    advanced=True,
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution"]),
+                expr="""
+                (
+                  $res := $lookup(widgets, "model.resolution");
+                  $ppsTable := { "720p": 0.14, "1080p": 0.24 };
+                  $pps := $lookup($ppsTable, $res);
+                  { "type": "usd", "usd": $pps, "format": { "suffix": "/second" } }
+                )
+                """,
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: dict,
+        video: Input.Video,
+        seed: int,
+        watermark: bool,
+    ):
+        validate_string(model["prompt"], strip_whitespace=False, min_length=1)
+        validate_video_duration(video, min_duration=3, max_duration=60)
+        media = [Wan27MediaItem(type="video", url=await upload_video_to_comfyapi(cls, video))]
+        reference_images = model.get("reference_images", {})
+        for key in reference_images:
+            media.append(
+                Wan27MediaItem(
+                    type="reference_image", url=await upload_image_to_comfyapi(cls, image=reference_images[key])
+                )
+            )
+        initial_response = await sync_op(
+            cls,
+            ApiEndpoint(
+                path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis",
+                method="POST",
+            ),
+            response_model=TaskCreationResponse,
+            data=Wan27VideoEditTaskCreationRequest(
+                model=model["model"],
+                input=Wan27VideoEditInputField(prompt=model["prompt"], media=media),
+                parameters=Wan27VideoEditParametersField(
+                    resolution=model["resolution"],
+                    ratio=model["ratio"],
+                    duration=None,
+                    watermark=watermark,
+                    seed=seed,
+                ),
+            ),
+        )
+        if not initial_response.output:
+            raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
+        response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
+            response_model=VideoTaskStatusResponse,
+            status_extractor=lambda x: x.output.task_status,
+            poll_interval=7,
+        )
+        return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
+
+
+class HappyHorseReferenceVideoApi(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="HappyHorseReferenceVideoApi",
+            display_name="HappyHorse Reference to Video",
+            category="api node/video/Wan",
+            description="Generate a video featuring a person or object from reference materials with the HappyHorse "
+            "model. Supports single-character performances and multi-character interactions.",
+            inputs=[
+                IO.DynamicCombo.Input(
+                    "model",
+                    options=[
+                        IO.DynamicCombo.Option(
+                            "happyhorse-1.0-r2v",
+                            [
+                                IO.String.Input(
+                                    "prompt",
+                                    multiline=True,
+                                    default="",
+                                    tooltip="Prompt describing the video. Use identifiers such as 'character1' and "
+                                    "'character2' to refer to the reference characters.",
+                                ),
+                                IO.Combo.Input(
+                                    "resolution",
+                                    options=["720P", "1080P"],
+                                ),
+                                IO.Combo.Input(
+                                    "ratio",
+                                    options=["16:9", "9:16", "1:1", "4:3", "3:4"],
+                                ),
+                                IO.Int.Input(
+                                    "duration",
+                                    default=5,
+                                    min=3,
+                                    max=15,
+                                    step=1,
+                                    display_mode=IO.NumberDisplay.number,
+                                ),
+                                IO.Autogrow.Input(
+                                    "reference_images",
+                                    template=IO.Autogrow.TemplateNames(
+                                        IO.Image.Input("reference_image"),
+                                        names=[
+                                            "image1",
+                                            "image2",
+                                            "image3",
+                                            "image4",
+                                            "image5",
+                                            "image6",
+                                            "image7",
+                                            "image8",
+                                            "image9",
+                                        ],
+                                        min=1,
+                                    ),
+                                ),
+                            ],
+                        ),
+                    ],
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed to use for generation.",
+                ),
+                IO.Boolean.Input(
+                    "watermark",
+                    default=False,
+                    tooltip="Whether to add an AI-generated watermark to the result.",
+                    advanced=True,
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution", "model.duration"]),
+                expr="""
+                (
+                  $res := $lookup(widgets, "model.resolution");
+                  $dur := $lookup(widgets, "model.duration");
+                  $ppsTable := { "720p": 0.14, "1080p": 0.24 };
+                  $pps := $lookup($ppsTable, $res);
+                  { "type": "usd", "usd": $pps * $dur }
+                )
+                """,
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: dict,
+        seed: int,
+        watermark: bool,
+    ):
+        validate_string(model["prompt"], strip_whitespace=False, min_length=1)
+        media = []
+        reference_images = model.get("reference_images", {})
+        for key in reference_images:
+            media.append(
+                Wan27MediaItem(
+                    type="reference_image",
+                    url=await upload_image_to_comfyapi(cls, image=reference_images[key]),
+                )
+            )
+        if not media:
+            raise ValueError("At least one reference reference image must be provided.")
+
+        initial_response = await sync_op(
+            cls,
+            ApiEndpoint(
+                path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis",
+                method="POST",
+            ),
+            response_model=TaskCreationResponse,
+            data=Wan27ReferenceVideoTaskCreationRequest(
+                model=model["model"],
+                input=Wan27ReferenceVideoInputField(
+                    prompt=model["prompt"],
+                    negative_prompt=None,
+                    media=media,
+                ),
+                parameters=Wan27ReferenceVideoParametersField(
+                    resolution=model["resolution"],
+                    ratio=model["ratio"],
+                    duration=model["duration"],
+                    watermark=watermark,
+                    seed=seed,
+                ),
+            ),
+        )
+        if not initial_response.output:
+            raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
+        response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
+            response_model=VideoTaskStatusResponse,
+            status_extractor=lambda x: x.output.task_status,
+            poll_interval=7,
+        )
+        return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
+
+
 class WanApiExtension(ComfyExtension):
     @override
     async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@@ -1660,6 +2211,10 @@ class WanApiExtension(ComfyExtension):
             Wan2VideoContinuationApi,
             Wan2VideoEditApi,
             Wan2ReferenceVideoApi,
+            HappyHorseTextToVideoApi,
+            HappyHorseImageToVideoApi,
+            HappyHorseVideoEditApi,
+            HappyHorseReferenceVideoApi,
         ]
 
 

From 1233f077b1b96ec1f8c7c39e83bbe1a734b36424 Mon Sep 17 00:00:00 2001
From: "Daxiong (Lin)" <contact@comfyui-wiki.com>
Date: Tue, 28 Apr 2026 01:06:03 +0800
Subject: [PATCH 33/81] chore: update workflow templates to v0.9.63 (#13586)

Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 6c7457e03..66a130a9b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 comfyui-frontend-package==1.42.15
-comfyui-workflow-templates==0.9.62
+comfyui-workflow-templates==0.9.63
 comfyui-embedded-docs==0.4.4
 torch
 torchsde

From 75143eeb06b14bc93db71de207945f6f888be4e0 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 27 Apr 2026 13:24:36 -0400
Subject: [PATCH 34/81] ComfyUI v0.20.0

---
 comfyui_version.py | 2 +-
 pyproject.toml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfyui_version.py b/comfyui_version.py
index 2a1eb9905..9c547a228 100644
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.19.3"
+__version__ = "0.20.0"
diff --git a/pyproject.toml b/pyproject.toml
index 8fa92ecbe..785837c09 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.19.3"
+version = "0.20.0"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.10"

From 64b8457f55cd7fb54ca7a956d9c73b505e903e0c Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 27 Apr 2026 16:10:14 -0400
Subject: [PATCH 35/81] ComfyUI v0.20.1 because github is broken again and
 messed up my release.

---
 comfyui_version.py | 2 +-
 pyproject.toml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfyui_version.py b/comfyui_version.py
index 9c547a228..53e7156e3 100644
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.20.0"
+__version__ = "0.20.1"
diff --git a/pyproject.toml b/pyproject.toml
index 785837c09..633dac517 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.20.0"
+version = "0.20.1"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.10"

From 3cbf015578ac04c30b10078887a774a4b4e45fe4 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Mon, 27 Apr 2026 16:44:12 -0700
Subject: [PATCH 36/81] Read audio and video at the same time in video loader
 node. (#13591)

---
 comfy_api/latest/_input_impl/video_types.py | 135 ++++++++++++--------
 1 file changed, 82 insertions(+), 53 deletions(-)

diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py
index eb4d3701d..812b3eb30 100644
--- a/comfy_api/latest/_input_impl/video_types.py
+++ b/comfy_api/latest/_input_impl/video_types.py
@@ -12,6 +12,7 @@ import numpy as np
 import math
 import torch
 from .._util import VideoContainer, VideoCodec, VideoComponents
+import logging
 
 
 def container_to_output_format(container_format: str | None) -> str | None:
@@ -238,32 +239,86 @@ class VideoFromFile(VideoInput):
             start_time = max(self._get_raw_duration() + self.__start_time, 0)
         else:
             start_time = self.__start_time
+
         # Get video frames
         frames = []
+        audio_frames = []
         alphas = None
         start_pts = int(start_time / video_stream.time_base)
         end_pts = int((start_time + self.__duration) / video_stream.time_base)
-        container.seek(start_pts, stream=video_stream)
-        image_format = 'gbrpf32le'
-        for frame in container.decode(video_stream):
-            if alphas is None:
-                for comp in frame.format.components:
-                    if comp.is_alpha:
-                        alphas = []
-                        image_format = 'gbrapf32le'
-                        break
 
-            if frame.pts < start_pts:
-                continue
-            if self.__duration and frame.pts >= end_pts:
+        if start_pts != 0:
+            container.seek(start_pts, stream=video_stream)
+
+        image_format = 'gbrpf32le'
+        audio = None
+
+        streams = [video_stream]
+        has_first_audio_frame = False
+        checked_alpha = False
+
+        # Default to False so we decode until EOF if duration is 0
+        video_done = False
+        audio_done = True
+
+        if len(container.streams.audio):
+            audio_stream = container.streams.audio[-1]
+            streams += [audio_stream]
+            resampler = av.audio.resampler.AudioResampler(format='fltp')
+            audio_done = False
+
+        for packet in container.demux(*streams):
+            if video_done and audio_done:
                 break
 
-            img = frame.to_ndarray(format=image_format)  # shape: (H, W, 4)
-            if alphas is None:
-                frames.append(torch.from_numpy(img))
-            else:
-                frames.append(torch.from_numpy(img[..., :-1]))
-                alphas.append(torch.from_numpy(img[..., -1:]))
+            if packet.stream.type == "video":
+                if video_done:
+                    continue
+                try:
+                    for frame in packet.decode():
+                        if frame.pts < start_pts:
+                            continue
+                        if self.__duration and frame.pts >= end_pts:
+                            video_done = True
+                            break
+
+                        if not checked_alpha:
+                            for comp in frame.format.components:
+                                if comp.is_alpha:
+                                    alphas = []
+                                    image_format = 'gbrapf32le'
+                                    break
+                            checked_alpha = True
+
+                        img = frame.to_ndarray(format=image_format)  # shape: (H, W, 4)
+                        if alphas is None:
+                            frames.append(torch.from_numpy(img))
+                        else:
+                            frames.append(torch.from_numpy(img[..., :-1]))
+                            alphas.append(torch.from_numpy(img[..., -1:]))
+                except av.error.InvalidDataError:
+                    logging.info("pyav decode error")
+
+            elif packet.stream.type == "audio":
+                if audio_done:
+                    continue
+
+                aframes = itertools.chain.from_iterable(
+                    map(resampler.resample, packet.decode())
+                )
+                for frame in aframes:
+                    if self.__duration and frame.time > start_time + self.__duration:
+                        audio_done = True
+                        break
+
+                    if not has_first_audio_frame:
+                        offset_seconds = start_time - frame.pts * audio_stream.time_base
+                        to_skip = max(0, int(offset_seconds * audio_stream.sample_rate))
+                        if to_skip < frame.samples:
+                            has_first_audio_frame = True
+                            audio_frames.append(frame.to_ndarray()[..., to_skip:])
+                    else:
+                        audio_frames.append(frame.to_ndarray())
 
         images = torch.stack(frames) if len(frames) > 0 else torch.zeros(0, 0, 0, 3)
         if alphas is not None:
@@ -272,42 +327,16 @@ class VideoFromFile(VideoInput):
         # Get frame rate
         frame_rate = Fraction(video_stream.average_rate) if video_stream.average_rate else Fraction(1)
 
-        # Get audio if available
-        audio = None
-        container.seek(start_pts, stream=video_stream)
-        # Use last stream for consistency
-        if len(container.streams.audio):
-            audio_stream = container.streams.audio[-1]
-            audio_frames = []
-            resample = av.audio.resampler.AudioResampler(format='fltp').resample
-            frames = itertools.chain.from_iterable(
-                map(resample, container.decode(audio_stream))
-            )
+        if len(audio_frames) > 0:
+            audio_data = np.concatenate(audio_frames, axis=1)  # shape: (channels, total_samples)
+            if self.__duration:
+                audio_data = audio_data[..., :int(self.__duration * audio_stream.sample_rate)]
 
-            has_first_frame = False
-            for frame in frames:
-                offset_seconds = start_time - frame.pts * audio_stream.time_base
-                to_skip = max(0, int(offset_seconds * audio_stream.sample_rate))
-                if to_skip < frame.samples:
-                    has_first_frame = True
-                    break
-            if has_first_frame:
-                audio_frames.append(frame.to_ndarray()[..., to_skip:])
-
-            for frame in frames:
-                if self.__duration and frame.time > start_time + self.__duration:
-                    break
-                audio_frames.append(frame.to_ndarray())  # shape: (channels, samples)
-            if len(audio_frames) > 0:
-                audio_data = np.concatenate(audio_frames, axis=1)  # shape: (channels, total_samples)
-                if self.__duration:
-                    audio_data = audio_data[..., :int(self.__duration * audio_stream.sample_rate)]
-
-                audio_tensor = torch.from_numpy(audio_data).unsqueeze(0)  # shape: (1, channels, total_samples)
-                audio = AudioInput({
-                    "waveform": audio_tensor,
-                    "sample_rate": int(audio_stream.sample_rate) if audio_stream.sample_rate else 1,
-                })
+            audio_tensor = torch.from_numpy(audio_data).unsqueeze(0)  # shape: (1, channels, total_samples)
+            audio = AudioInput({
+                "waveform": audio_tensor,
+                "sample_rate": int(audio_stream.sample_rate) if audio_stream.sample_rate else 1,
+            })
 
         metadata = container.metadata
         return VideoComponents(images=images, alpha=alphas, audio=audio, frame_rate=frame_rate, metadata=metadata)

From b47f15f25a2a96b5e9fd7efb4ffa5d988038d6ff Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Tue, 28 Apr 2026 12:22:31 +1000
Subject: [PATCH 37/81] fix: Handle un-inited meta-tensors in models (fixes a
 CPU TE crash) (CORE-67) (#13578)

---
 comfy/model_patcher.py |  5 ++++-
 comfy/ops.py           | 16 +++++++++++++++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index ee56f8523..e259aed63 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -31,6 +31,7 @@ import comfy.float
 import comfy.hooks
 import comfy.lora
 import comfy.model_management
+import comfy.ops
 import comfy.patcher_extension
 import comfy.utils
 from comfy.comfy_types import UnetWrapperFunction
@@ -856,7 +857,9 @@ class ModelPatcher:
                     if m.comfy_patched_weights == True:
                         continue
 
-                for param in params:
+                for param, param_value in params.items():
+                    if hasattr(m, "comfy_cast_weights") and getattr(param_value, "is_meta", False):
+                        comfy.ops.disable_weight_init._zero_init_parameter(m, param)
                     key = key_param_name_to_key(n, param)
                     self.unpin_weight(key)
                     self.patch_weight_to_device(key, device_to=device_to)
diff --git a/comfy/ops.py b/comfy/ops.py
index 7a9b4b84c..050f7cda0 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -79,14 +79,21 @@ def cast_to_input(weight, input, non_blocking=False, copy=True):
     return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)
 
 
-def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant):
+def materialize_meta_param(s, param_keys):
+    for param_key in param_keys:
+        param = getattr(s, param_key, None)
+        if param is not None and getattr(param, "is_meta", False):
+            setattr(s, param_key, torch.nn.Parameter(torch.zeros(param.shape, dtype=param.dtype), requires_grad=param.requires_grad))
 
+
+def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant):
     #vbar doesn't support CPU weights, but some custom nodes have weird paths
     #that might switch the layer to the CPU and expect it to work. We have to take
     #a clone conservatively as we are mmapped and some SFT files are packed misaligned
     #If you are a custom node author reading this, please move your layer to the GPU
     #or declare your ModelPatcher as CPU in the first place.
     if comfy.model_management.is_device_cpu(device):
+        materialize_meta_param(s, ["weight", "bias"])
         weight = s.weight.to(dtype=dtype, copy=True)
         if isinstance(weight, QuantizedTensor):
             weight = weight.dequantize()
@@ -108,6 +115,7 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
             xfer_dest = comfy_aimdo.torch.aimdo_to_tensor(s._v, device)
 
     if not resident:
+        materialize_meta_param(s, ["weight", "bias"])
         cast_geometry = comfy.memory_management.tensors_to_geometries([ s.weight, s.bias ])
         cast_dest = None
 
@@ -306,6 +314,12 @@ class CastWeightBiasOp:
     bias_function = []
 
 class disable_weight_init:
+    @staticmethod
+    def _zero_init_parameter(module, name):
+        param = getattr(module, name)
+        device = None if getattr(param, "is_meta", False) else param.device
+        setattr(module, name, torch.nn.Parameter(torch.zeros(param.shape, device=device, dtype=param.dtype), requires_grad=False))
+
     @staticmethod
     def _lazy_load_from_state_dict(module, state_dict, prefix, local_metadata,
                                    missing_keys, unexpected_keys, weight_shape,

From ed201fff08fbbd3dbcc500b252a9f41e8051c256 Mon Sep 17 00:00:00 2001
From: Matt Miller <mattmiller@comfy.org>
Date: Mon, 27 Apr 2026 19:51:33 -0700
Subject: [PATCH 38/81] ci: dispatch tag push to Comfy-Org/cloud (#13541)

Fires on v* tag push (earlier than release.published, which can lag)
and triggers a repository_dispatch on Comfy-Org/cloud with event_type
comfyui_tag_pushed. Legacy desktop dispatch in release-webhook.yml
is left untouched.
---
 .github/workflows/tag-dispatch-cloud.yml | 45 ++++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 .github/workflows/tag-dispatch-cloud.yml

diff --git a/.github/workflows/tag-dispatch-cloud.yml b/.github/workflows/tag-dispatch-cloud.yml
new file mode 100644
index 000000000..53a0e91d6
--- /dev/null
+++ b/.github/workflows/tag-dispatch-cloud.yml
@@ -0,0 +1,45 @@
+name: Tag Dispatch to Cloud
+
+on:
+  push:
+    tags:
+      - 'v*'
+
+jobs:
+  dispatch-cloud:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Send repository dispatch to cloud
+        env:
+          DISPATCH_TOKEN: ${{ secrets.CLOUD_REPO_DISPATCH_TOKEN }}
+          RELEASE_TAG: ${{ github.ref_name }}
+        run: |
+          set -euo pipefail
+
+          if [ -z "${DISPATCH_TOKEN:-}" ]; then
+            echo "::error::CLOUD_REPO_DISPATCH_TOKEN is required but not set."
+            exit 1
+          fi
+
+          RELEASE_URL="https://github.com/${{ github.repository }}/releases/tag/${RELEASE_TAG}"
+
+          PAYLOAD="$(jq -n \
+            --arg release_tag "$RELEASE_TAG" \
+            --arg release_url "$RELEASE_URL" \
+            '{
+              event_type: "comfyui_tag_pushed",
+              client_payload: {
+                release_tag: $release_tag,
+                release_url: $release_url
+              }
+            }')"
+
+          curl -fsSL \
+            -X POST \
+            -H "Accept: application/vnd.github+json" \
+            -H "Content-Type: application/json" \
+            -H "Authorization: Bearer ${DISPATCH_TOKEN}" \
+            https://api.github.com/repos/Comfy-Org/cloud/dispatches \
+            -d "$PAYLOAD"
+
+          echo "✅ Dispatched ComfyUI tag ${RELEASE_TAG} to Comfy-Org/cloud"

From c0d77a5d53828b8027a4f333e41473253150b614 Mon Sep 17 00:00:00 2001
From: "Daxiong (Lin)" <contact@comfyui-wiki.com>
Date: Tue, 28 Apr 2026 15:59:59 +0800
Subject: [PATCH 39/81] Change the `save 3d model` node's filename prefix to
 `3d/ComfyUI` (CORE-106) (#12826)

* Change save 3d model's filename prefix  to 3d/ComfyUI

As this node has already changed from `Save GLB` to `Save 3D Model`, using the filename prefix `3d` will be better than `mesh`

* use lowercase

---------
---
 comfy_extras/nodes_hunyuan3d.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy_extras/nodes_hunyuan3d.py b/comfy_extras/nodes_hunyuan3d.py
index df0c3e4b1..fa55ead59 100644
--- a/comfy_extras/nodes_hunyuan3d.py
+++ b/comfy_extras/nodes_hunyuan3d.py
@@ -637,7 +637,7 @@ class SaveGLB(IO.ComfyNode):
                     ],
                     tooltip="Mesh or 3D file to save",
                 ),
-                IO.String.Input("filename_prefix", default="mesh/ComfyUI"),
+                IO.String.Input("filename_prefix", default="3d/ComfyUI"),
             ],
             hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo]
         )

From 24de8dc01bc6c857be12f25ba24fb753a48cb0c2 Mon Sep 17 00:00:00 2001
From: Gilad Schreiber <gschreiber@lightricks.com>
Date: Tue, 28 Apr 2026 11:18:19 +0300
Subject: [PATCH 40/81] Fix SolidMask and MaskComposite device mismatch with
 --gpu-only (#13296)

SolidMask had a hardcoded device="cpu" while other nodes (e.g.
EmptyImage) follow intermediate_device(). This causes a RuntimeError
when MaskComposite combines masks from different device sources
under --gpu-only.

- SolidMask: use intermediate_device() instead of hardcoded "cpu"
- MaskComposite: align source device to destination before operating

Co-authored-by: Alexis Rolland <alexisrolland@hotmail.com>
Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>
---
 comfy_extras/nodes_mask.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/comfy_extras/nodes_mask.py b/comfy_extras/nodes_mask.py
index c44602597..8ca947718 100644
--- a/comfy_extras/nodes_mask.py
+++ b/comfy_extras/nodes_mask.py
@@ -2,6 +2,7 @@ import numpy as np
 import scipy.ndimage
 import torch
 import comfy.utils
+import comfy.model_management
 import node_helpers
 from typing_extensions import override
 from comfy_api.latest import ComfyExtension, IO, UI
@@ -188,7 +189,7 @@ class SolidMask(IO.ComfyNode):
 
     @classmethod
     def execute(cls, value, width, height) -> IO.NodeOutput:
-        out = torch.full((1, height, width), value, dtype=torch.float32, device="cpu")
+        out = torch.full((1, height, width), value, dtype=torch.float32, device=comfy.model_management.intermediate_device())
         return IO.NodeOutput(out)
 
     solid = execute  # TODO: remove
@@ -262,6 +263,7 @@ class MaskComposite(IO.ComfyNode):
     def execute(cls, destination, source, x, y, operation) -> IO.NodeOutput:
         output = destination.reshape((-1, destination.shape[-2], destination.shape[-1])).clone()
         source = source.reshape((-1, source.shape[-2], source.shape[-1]))
+        source = source.to(output.device)
 
         left, top = (x, y,)
         right, bottom = (min(left + source.shape[-1], destination.shape[-1]), min(top + source.shape[-2], destination.shape[-2]))

From 13519934ba4220bba47e51c185a63fc837c3d6e2 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 28 Apr 2026 13:27:42 -0700
Subject: [PATCH 41/81] Handle metadata rotation in pyav code. (#13605)

---
 comfy_api/latest/_input_impl/video_types.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py
index 812b3eb30..b2daa3d7d 100644
--- a/comfy_api/latest/_input_impl/video_types.py
+++ b/comfy_api/latest/_input_impl/video_types.py
@@ -291,6 +291,9 @@ class VideoFromFile(VideoInput):
                             checked_alpha = True
 
                         img = frame.to_ndarray(format=image_format)  # shape: (H, W, 4)
+                        if frame.rotation != 0:
+                            k = int(round(frame.rotation // 90))
+                            img = np.rot90(img, k=k, axes=(0, 1)).copy()
                         if alphas is None:
                             frames.append(torch.from_numpy(img))
                         else:

From e514119e1e3b73d5f4190295f3847f07ba228ea8 Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Wed, 29 Apr 2026 06:34:37 +1000
Subject: [PATCH 42/81] comfy-aimdo v0.3.0 (#13604)

Comfy-aimdo 0.3.0 contains several major new features.

multi-GPU support
ARM support
AMD support

Refactorings include:

Linkless architecture - linkage is now performed purely at runtime
to stop host library lookups completely and only interact with the
torch-loaded Nvidia stack.

Elimination of cudart integration on linux. Its no consistent with
windows.

Misc bugfixes and minor features.
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 66a130a9b..12c5ff7a9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,7 +23,7 @@ SQLAlchemy>=2.0
 filelock
 av>=14.2.0
 comfy-kitchen>=0.2.8
-comfy-aimdo==0.2.14
+comfy-aimdo==0.3.0
 requests
 simpleeval>=1.0.0
 blake3

From c7a517c2f9d182ea777c7e625ef532865dcff8b6 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 28 Apr 2026 14:59:55 -0700
Subject: [PATCH 43/81] Make pyav loading code handle tRNS PNG. (#13607)

---
 comfy_api/latest/_input_impl/video_types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py
index b2daa3d7d..6ed41bba8 100644
--- a/comfy_api/latest/_input_impl/video_types.py
+++ b/comfy_api/latest/_input_impl/video_types.py
@@ -284,7 +284,7 @@ class VideoFromFile(VideoInput):
 
                         if not checked_alpha:
                             for comp in frame.format.components:
-                                if comp.is_alpha:
+                                if comp.is_alpha or frame.format.name == "pal8":
                                     alphas = []
                                     image_format = 'gbrapf32le'
                                     break

From dae3d3475179fd796e2901e7d1f9e00aeb515a2f Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 28 Apr 2026 15:15:06 -0700
Subject: [PATCH 44/81] Use pyav to load images instead of pillow. (#13594)

On failure (ex: animated webp files) fallback to old pillow code.

This should fix the extra precision in high bit depth images (like 16 bit PNG) being discarded when loaded by Pillow and potentially add support for more image formats.
---
 nodes.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/nodes.py b/nodes.py
index fb83da896..e73a0712e 100644
--- a/nodes.py
+++ b/nodes.py
@@ -32,7 +32,7 @@ import comfy.controlnet
 from comfy.comfy_types import IO, ComfyNodeABC, InputTypeDict, FileLocator
 from comfy_api.internal import register_versions, ComfyAPIWithVersion
 from comfy_api.version_list import supported_versions
-from comfy_api.latest import io, ComfyExtension
+from comfy_api.latest import io, ComfyExtension, InputImpl
 
 import comfy.clip_vision
 
@@ -1716,6 +1716,10 @@ class LoadImage:
     def load_image(self, image):
         image_path = folder_paths.get_annotated_filepath(image)
 
+        components = InputImpl.VideoFromFile(image_path).get_components()
+        if components.images.shape[0] > 0:
+            return (components.images, 1.0 - components.alpha[..., -1] if components.alpha is not None else torch.zeros((components.images.shape[0], 64, 64), dtype=torch.float32, device="cpu"))
+
         img = node_helpers.pillow(Image.open, image_path)
 
         output_images = []

From fce0398470fe3ecdb7ab4c5c69555ad0fcbdc09e Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Wed, 29 Apr 2026 09:15:02 +1000
Subject: [PATCH 45/81] dynamicVRAM + --cache-ram 2 (CORE-117) (#13603)

* pinned_memory: remove JIT RAM pressure release

This doesn't work, as freeing intermediates for pins needs to be
higher-priority than freeing pins-for-pins if and when you are going
to do that. So this is too late as pins-for-pins is model load time
and we dont have JIT pins-for-pins.

* cacheing: Add a filter to only free intermediates from inactive wfs

This is to get priorities in amongst pins straight.

* mm: free inactive-ram from RAM cache first

Stuff from inactive workflows should be freed before anything else.

* caching: purge old ModelPatchers first

Dont try and score them, just dump them at the first sign of trouble
if they arent part of the workflow.
---
 comfy/model_management.py  | 1 +
 comfy/pinned_memory.py     | 6 ------
 comfy_execution/caching.py | 8 +++++++-
 execution.py               | 2 +-
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 3b39d6080..95af40012 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -663,6 +663,7 @@ def minimum_inference_memory():
 
 def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins_required=0, ram_required=0):
     cleanup_models_gc()
+    comfy.memory_management.extra_ram_release(max(pins_required, ram_required))
     unloaded_model = []
     can_unload = []
     unloaded_models = []
diff --git a/comfy/pinned_memory.py b/comfy/pinned_memory.py
index 6f142282d..6d3ba367a 100644
--- a/comfy/pinned_memory.py
+++ b/comfy/pinned_memory.py
@@ -2,7 +2,6 @@ import comfy.model_management
 import comfy.memory_management
 import comfy_aimdo.host_buffer
 import comfy_aimdo.torch
-import psutil
 
 from comfy.cli_args import args
 
@@ -12,11 +11,6 @@ def get_pin(module):
 def pin_memory(module):
     if module.pin_failed or args.disable_pinned_memory or get_pin(module) is not None:
         return
-    #FIXME: This is a RAM cache trigger event
-    ram_headroom = comfy.memory_management.RAM_CACHE_HEADROOM
-    #we split the difference and assume half the RAM cache headroom is for us
-    if ram_headroom > 0 and psutil.virtual_memory().available < (ram_headroom * 0.5):
-        comfy.memory_management.extra_ram_release(ram_headroom)
 
     size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ])
 
diff --git a/comfy_execution/caching.py b/comfy_execution/caching.py
index f9c913bdb..ba1e8bc84 100644
--- a/comfy_execution/caching.py
+++ b/comfy_execution/caching.py
@@ -5,6 +5,7 @@ import psutil
 import time
 import torch
 from typing import Sequence, Mapping, Dict
+from comfy.model_patcher import ModelPatcher
 from comfy_execution.graph import DynamicPrompt
 from abc import ABC, abstractmethod
 
@@ -523,13 +524,15 @@ class RAMPressureCache(LRUCache):
         self.timestamps[self.cache_key_set.get_data_key(node_id)] = time.time()
         super().set_local(node_id, value)
 
-    def ram_release(self, target):
+    def ram_release(self, target, free_active=False):
         if psutil.virtual_memory().available >= target:
             return
 
         clean_list = []
 
         for key, cache_entry in self.cache.items():
+            if not free_active and self.used_generation[key] == self.generation:
+                continue
             oom_score =  RAM_CACHE_OLD_WORKFLOW_OOM_MULTIPLIER ** (self.generation - self.used_generation[key])
 
             ram_usage = RAM_CACHE_DEFAULT_RAM_USAGE
@@ -542,6 +545,9 @@ class RAMPressureCache(LRUCache):
                         scan_list_for_ram_usage(output)
                     elif isinstance(output, torch.Tensor) and output.device.type == 'cpu':
                         ram_usage += output.numel() * output.element_size()
+                    elif isinstance(output, ModelPatcher) and self.used_generation[key] != self.generation:
+                        #old ModelPatchers are the first to go
+                        ram_usage = 1e30
             scan_list_for_ram_usage(cache_entry.outputs)
 
             oom_score *= ram_usage
diff --git a/execution.py b/execution.py
index e15eb4bda..5a6d3404c 100644
--- a/execution.py
+++ b/execution.py
@@ -779,7 +779,7 @@ class PromptExecutor:
 
                     if self.cache_type == CacheType.RAM_PRESSURE:
                         comfy.model_management.free_memory(0, None, pins_required=ram_headroom, ram_required=ram_headroom)
-                        comfy.memory_management.extra_ram_release(ram_headroom)
+                        ram_release_callback(ram_headroom, free_active=True)
                 else:
                     # Only execute when the while-loop ends without break
                     # Send cached UI for intermediate output nodes that weren't executed

From 0e25a6936ef41a56af87a4af174fa519da73b37c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jukka=20Sepp=C3=A4nen?=
 <40791699+kijai@users.noreply.github.com>
Date: Wed, 29 Apr 2026 22:15:10 +0300
Subject: [PATCH 46/81] Reduce video tiny VAE peak VRAM and decode time
 (CORE-127) (#13617)

* Update taehv.py

* Simplify

* Simplify pixel_unshuffle dispatch
---
 comfy/taesd/taehv.py | 37 ++++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/comfy/taesd/taehv.py b/comfy/taesd/taehv.py
index 6c06ce19d..696013200 100644
--- a/comfy/taesd/taehv.py
+++ b/comfy/taesd/taehv.py
@@ -7,6 +7,7 @@ from tqdm.auto import tqdm
 from collections import namedtuple, deque
 
 import comfy.ops
+import comfy.model_management
 operations=comfy.ops.disable_weight_init
 
 DecoderResult = namedtuple("DecoderResult", ("frame", "memory"))
@@ -47,11 +48,14 @@ class TGrow(nn.Module):
         x = self.conv(x)
         return x.reshape(-1, C, H, W)
 
-def apply_model_with_memblocks(model, x, parallel, show_progress_bar):
+def apply_model_with_memblocks(model, x, parallel, show_progress_bar, output_device=None,
+                               patch_size=1, decode=False):
 
     B, T, C, H, W = x.shape
     if parallel:
         x = x.reshape(B*T, C, H, W)
+        if not decode and patch_size > 1:
+            x = F.pixel_unshuffle(x, patch_size)
         # parallel over input timesteps, iterate over blocks
         for b in tqdm(model, disable=not show_progress_bar):
             if isinstance(b, MemBlock):
@@ -62,20 +66,27 @@ def apply_model_with_memblocks(model, x, parallel, show_progress_bar):
                 x = b(x, mem)
             else:
                 x = b(x)
-        BT, C, H, W = x.shape
-        T = BT // B
-        x = x.view(B, T, C, H, W)
+        if decode and patch_size > 1:
+            x = F.pixel_shuffle(x, patch_size)
+        x = x.view(B, x.shape[0] // B, *x.shape[1:])
+        x = x.to(output_device)
     else:
         out = []
-        work_queue = deque([TWorkItem(xt, 0) for t, xt in enumerate(x.reshape(B, T * C, H, W).chunk(T, dim=1))])
+        # Chunk along the time dim directly (chunks are [B,1,C,H,W] views, squeeze to [B,C,H,W] views).
+        # Avoids forcing a contiguous copy when x is non-contiguous (e.g. after movedim in encode/decode).
+        work_queue = deque([TWorkItem(xt.squeeze(1), 0) for xt in x.chunk(T, dim=1)])
         progress_bar = tqdm(range(T), disable=not show_progress_bar)
         mem = [None] * len(model)
         while work_queue:
             xt, i = work_queue.popleft()
             if i == 0:
                 progress_bar.update(1)
+                if not decode and patch_size > 1:
+                    xt = F.pixel_unshuffle(xt, patch_size)
             if i == len(model):
-                out.append(xt)
+                if decode and patch_size > 1:
+                    xt = F.pixel_shuffle(xt, patch_size)
+                out.append(xt.to(output_device))
                 del xt
             else:
                 b = model[i]
@@ -165,24 +176,20 @@ class TAEHV(nn.Module):
 
     def encode(self, x, **kwargs):
         x = x.movedim(2, 1)  # [B, C, T, H, W] -> [B, T, C, H, W]
-        if self.patch_size > 1:
-            B, T, C, H, W = x.shape
-            x = x.reshape(B * T, C, H, W)
-            x = F.pixel_unshuffle(x, self.patch_size)
-            x = x.reshape(B, T, C * self.patch_size ** 2, H // self.patch_size, W // self.patch_size)
         if x.shape[1] % self.t_downscale != 0:
             # pad at end to multiple of t_downscale
             n_pad = self.t_downscale - x.shape[1] % self.t_downscale
             padding = x[:, -1:].repeat_interleave(n_pad, dim=1)
             x = torch.cat([x, padding], 1)
-        x = apply_model_with_memblocks(self.encoder, x, self.parallel, self.show_progress_bar).movedim(2, 1)
+        x = apply_model_with_memblocks(self.encoder, x, self.parallel, self.show_progress_bar,
+                                        patch_size=self.patch_size).movedim(2, 1)
         return self.process_out(x)
 
     def decode(self, x, **kwargs):
         x = x.unsqueeze(0) if x.ndim == 4 else x  # [T, C, H, W] -> [1, T, C, H, W]
         x = x.movedim(1, 2) if x.shape[1] != self.latent_channels else x  # [B, T, C, H, W] or [B, C, T, H, W]
         x = self.process_in(x).movedim(2, 1)  # [B, C, T, H, W] -> [B, T, C, H, W]
-        x = apply_model_with_memblocks(self.decoder, x, self.parallel, self.show_progress_bar)
-        if self.patch_size > 1:
-            x = F.pixel_shuffle(x, self.patch_size)
+        x = apply_model_with_memblocks(self.decoder, x, self.parallel, self.show_progress_bar,
+                                        output_device=comfy.model_management.intermediate_device(),
+                                        patch_size=self.patch_size, decode=True)
         return x[:, self.frames_to_trim:].movedim(2, 1)

From 5eeae3f1d823e3f072896d6c72185e3c84373739 Mon Sep 17 00:00:00 2001
From: Talmaj <Talmaj@users.noreply.github.com>
Date: Thu, 30 Apr 2026 01:30:08 +0200
Subject: [PATCH 47/81] Cogvideox (#13402)

---------

Co-authored-by: kijai <40791699+kijai@users.noreply.github.com>
Co-authored-by: Talmaj Marinc <talmaj@comfy.org>
---
 comfy/latent_formats.py         |   7 +
 comfy/ldm/cogvideo/__init__.py  |   0
 comfy/ldm/cogvideo/model.py     | 573 ++++++++++++++++++++++++++++++++
 comfy/ldm/cogvideo/vae.py       | 566 +++++++++++++++++++++++++++++++
 comfy/model_base.py             |  60 ++++
 comfy/model_detection.py        |  48 +++
 comfy/model_sampling.py         |  24 ++
 comfy/sd.py                     |  12 +
 comfy/supported_models.py       |  49 ++-
 comfy/text_encoders/cogvideo.py |   6 +
 nodes.py                        |   2 +-
 11 files changed, 1345 insertions(+), 2 deletions(-)
 create mode 100644 comfy/ldm/cogvideo/__init__.py
 create mode 100644 comfy/ldm/cogvideo/model.py
 create mode 100644 comfy/ldm/cogvideo/vae.py
 create mode 100644 comfy/text_encoders/cogvideo.py

diff --git a/comfy/latent_formats.py b/comfy/latent_formats.py
index 6a57bca1c..0f4059ebe 100644
--- a/comfy/latent_formats.py
+++ b/comfy/latent_formats.py
@@ -783,3 +783,10 @@ class ZImagePixelSpace(ChromaRadiance):
     No VAE encoding/decoding — the model operates directly on RGB pixels.
     """
     pass
+
+class CogVideoX(LatentFormat):
+    latent_channels = 16
+    latent_dimensions = 3
+
+    def __init__(self):
+        self.scale_factor = 1.15258426
diff --git a/comfy/ldm/cogvideo/__init__.py b/comfy/ldm/cogvideo/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/comfy/ldm/cogvideo/model.py b/comfy/ldm/cogvideo/model.py
new file mode 100644
index 000000000..fb475ed53
--- /dev/null
+++ b/comfy/ldm/cogvideo/model.py
@@ -0,0 +1,573 @@
+# CogVideoX 3D Transformer - ported to ComfyUI native ops
+# Architecture reference: diffusers CogVideoXTransformer3DModel
+# Style reference: comfy/ldm/wan/model.py
+
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from comfy.ldm.modules.attention import optimized_attention
+import comfy.patcher_extension
+import comfy.ldm.common_dit
+
+
+def _get_1d_rotary_pos_embed(dim, pos, theta=10000.0):
+    """Returns (cos, sin) each with shape [seq_len, dim].
+
+    Frequencies are computed at dim//2 resolution then repeat_interleaved
+    to full dim, matching CogVideoX's interleaved (real, imag) pair format.
+    """
+    freqs = 1.0 / (theta ** (torch.arange(0, dim, 2, dtype=torch.float32, device=pos.device) / dim))
+    angles = torch.outer(pos.float(), freqs.float())
+    cos = angles.cos().repeat_interleave(2, dim=-1).float()
+    sin = angles.sin().repeat_interleave(2, dim=-1).float()
+    return (cos, sin)
+
+
+def apply_rotary_emb(x, freqs_cos_sin):
+    """Apply CogVideoX rotary embedding to query or key tensor.
+
+    x: [B, heads, seq_len, head_dim]
+    freqs_cos_sin: (cos, sin) each [seq_len, head_dim//2]
+
+    Uses interleaved pair rotation (same as diffusers CogVideoX/Flux).
+    head_dim is reshaped to (-1, 2) pairs, rotated, then flattened back.
+    """
+    cos, sin = freqs_cos_sin
+    cos = cos[None, None, :, :].to(x.device)
+    sin = sin[None, None, :, :].to(x.device)
+
+    # Interleaved pairs: [B, H, S, D] -> [B, H, S, D//2, 2] -> (real, imag)
+    x_real, x_imag = x.reshape(*x.shape[:-1], -1, 2).unbind(-1)
+    x_rotated = torch.stack([-x_imag, x_real], dim=-1).flatten(3)
+
+    return (x.float() * cos + x_rotated.float() * sin).to(x.dtype)
+
+
+def get_timestep_embedding(timesteps, dim, flip_sin_to_cos=True, downscale_freq_shift=0, scale=1, max_period=10000):
+    half = dim // 2
+    freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32, device=timesteps.device) / half)
+    args = timesteps[:, None].float() * freqs[None] * scale
+    embedding = torch.cat([torch.sin(args), torch.cos(args)], dim=-1)
+    if flip_sin_to_cos:
+        embedding = torch.cat([embedding[:, half:], embedding[:, :half]], dim=-1)
+    if dim % 2:
+        embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
+    return embedding
+
+
+def get_3d_sincos_pos_embed(embed_dim, spatial_size, temporal_size, spatial_interpolation_scale=1.0, temporal_interpolation_scale=1.0, device=None):
+    if isinstance(spatial_size, int):
+        spatial_size = (spatial_size, spatial_size)
+
+    grid_w = torch.arange(spatial_size[0], dtype=torch.float32, device=device) / spatial_interpolation_scale
+    grid_h = torch.arange(spatial_size[1], dtype=torch.float32, device=device) / spatial_interpolation_scale
+    grid_t = torch.arange(temporal_size, dtype=torch.float32, device=device) / temporal_interpolation_scale
+
+    grid_t, grid_h, grid_w = torch.meshgrid(grid_t, grid_h, grid_w, indexing="ij")
+
+    embed_dim_spatial = 2 * (embed_dim // 3)
+    embed_dim_temporal = embed_dim // 3
+
+    pos_embed_spatial = _get_2d_sincos_pos_embed(embed_dim_spatial, grid_h, grid_w, device=device)
+    pos_embed_temporal = _get_1d_sincos_pos_embed(embed_dim_temporal, grid_t[:, 0, 0], device=device)
+
+    T, H, W = grid_t.shape
+    pos_embed_temporal = pos_embed_temporal.unsqueeze(1).unsqueeze(1).expand(-1, H, W, -1)
+    pos_embed = torch.cat([pos_embed_temporal, pos_embed_spatial], dim=-1)
+
+    return pos_embed
+
+
+def _get_2d_sincos_pos_embed(embed_dim, grid_h, grid_w, device=None):
+    T, H, W = grid_h.shape
+    half_dim = embed_dim // 2
+    pos_h = _get_1d_sincos_pos_embed(half_dim, grid_h.reshape(-1), device=device).reshape(T, H, W, half_dim)
+    pos_w = _get_1d_sincos_pos_embed(half_dim, grid_w.reshape(-1), device=device).reshape(T, H, W, half_dim)
+    return torch.cat([pos_h, pos_w], dim=-1)
+
+
+def _get_1d_sincos_pos_embed(embed_dim, pos, device=None):
+    half = embed_dim // 2
+    freqs = torch.exp(-math.log(10000.0) * torch.arange(start=0, end=half, dtype=torch.float32, device=device) / half)
+    args = pos.float().reshape(-1)[:, None] * freqs[None]
+    embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
+    if embed_dim % 2:
+        embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
+    return embedding
+
+
+
+class CogVideoXPatchEmbed(nn.Module):
+    def __init__(self, patch_size=2, patch_size_t=None, in_channels=16, dim=1920,
+                 text_dim=4096, bias=True, sample_width=90, sample_height=60,
+                 sample_frames=49, temporal_compression_ratio=4,
+                 max_text_seq_length=226, spatial_interpolation_scale=1.875,
+                 temporal_interpolation_scale=1.0, use_positional_embeddings=True,
+                 use_learned_positional_embeddings=True,
+                 device=None, dtype=None, operations=None):
+        super().__init__()
+        self.patch_size = patch_size
+        self.patch_size_t = patch_size_t
+        self.dim = dim
+        self.sample_height = sample_height
+        self.sample_width = sample_width
+        self.sample_frames = sample_frames
+        self.temporal_compression_ratio = temporal_compression_ratio
+        self.max_text_seq_length = max_text_seq_length
+        self.spatial_interpolation_scale = spatial_interpolation_scale
+        self.temporal_interpolation_scale = temporal_interpolation_scale
+        self.use_positional_embeddings = use_positional_embeddings
+        self.use_learned_positional_embeddings = use_learned_positional_embeddings
+
+        if patch_size_t is None:
+            self.proj = operations.Conv2d(in_channels, dim, kernel_size=patch_size, stride=patch_size, bias=bias, device=device, dtype=dtype)
+        else:
+            self.proj = operations.Linear(in_channels * patch_size * patch_size * patch_size_t, dim, device=device, dtype=dtype)
+
+        self.text_proj = operations.Linear(text_dim, dim, device=device, dtype=dtype)
+
+        if use_positional_embeddings or use_learned_positional_embeddings:
+            persistent = use_learned_positional_embeddings
+            pos_embedding = self._get_positional_embeddings(sample_height, sample_width, sample_frames)
+            self.register_buffer("pos_embedding", pos_embedding, persistent=persistent)
+
+    def _get_positional_embeddings(self, sample_height, sample_width, sample_frames, device=None):
+        post_patch_height = sample_height // self.patch_size
+        post_patch_width = sample_width // self.patch_size
+        post_time_compression_frames = (sample_frames - 1) // self.temporal_compression_ratio + 1
+        if self.patch_size_t is not None:
+            post_time_compression_frames = post_time_compression_frames // self.patch_size_t
+        num_patches = post_patch_height * post_patch_width * post_time_compression_frames
+
+        pos_embedding = get_3d_sincos_pos_embed(
+            self.dim,
+            (post_patch_width, post_patch_height),
+            post_time_compression_frames,
+            self.spatial_interpolation_scale,
+            self.temporal_interpolation_scale,
+            device=device,
+        )
+        pos_embedding = pos_embedding.reshape(-1, self.dim)
+        joint_pos_embedding = pos_embedding.new_zeros(
+            1, self.max_text_seq_length + num_patches, self.dim, requires_grad=False
+        )
+        joint_pos_embedding.data[:, self.max_text_seq_length:].copy_(pos_embedding)
+        return joint_pos_embedding
+
+    def forward(self, text_embeds, image_embeds):
+        input_dtype = text_embeds.dtype
+        text_embeds = self.text_proj(text_embeds.to(self.text_proj.weight.dtype)).to(input_dtype)
+        batch_size, num_frames, channels, height, width = image_embeds.shape
+
+        proj_dtype = self.proj.weight.dtype
+        if self.patch_size_t is None:
+            image_embeds = image_embeds.reshape(-1, channels, height, width)
+            image_embeds = self.proj(image_embeds.to(proj_dtype)).to(input_dtype)
+            image_embeds = image_embeds.view(batch_size, num_frames, *image_embeds.shape[1:])
+            image_embeds = image_embeds.flatten(3).transpose(2, 3)
+            image_embeds = image_embeds.flatten(1, 2)
+        else:
+            p = self.patch_size
+            p_t = self.patch_size_t
+            image_embeds = image_embeds.permute(0, 1, 3, 4, 2)
+            image_embeds = image_embeds.reshape(
+                batch_size, num_frames // p_t, p_t, height // p, p, width // p, p, channels
+            )
+            image_embeds = image_embeds.permute(0, 1, 3, 5, 7, 2, 4, 6).flatten(4, 7).flatten(1, 3)
+            image_embeds = self.proj(image_embeds.to(proj_dtype)).to(input_dtype)
+
+        embeds = torch.cat([text_embeds, image_embeds], dim=1).contiguous()
+
+        if self.use_positional_embeddings or self.use_learned_positional_embeddings:
+            text_seq_length = text_embeds.shape[1]
+            num_image_patches = image_embeds.shape[1]
+
+            if self.use_learned_positional_embeddings:
+                image_pos = self.pos_embedding[
+                    :, self.max_text_seq_length:self.max_text_seq_length + num_image_patches
+                ].to(device=embeds.device, dtype=embeds.dtype)
+            else:
+                image_pos = get_3d_sincos_pos_embed(
+                    self.dim,
+                    (width // self.patch_size, height // self.patch_size),
+                    num_image_patches // ((height // self.patch_size) * (width // self.patch_size)),
+                    self.spatial_interpolation_scale,
+                    self.temporal_interpolation_scale,
+                    device=embeds.device,
+                ).reshape(1, num_image_patches, self.dim).to(dtype=embeds.dtype)
+
+            # Build joint: zeros for text + sincos for image
+            joint_pos = torch.zeros(1, text_seq_length + num_image_patches, self.dim, device=embeds.device, dtype=embeds.dtype)
+            joint_pos[:, text_seq_length:] = image_pos
+            embeds = embeds + joint_pos
+
+        return embeds
+
+
+class CogVideoXLayerNormZero(nn.Module):
+    def __init__(self, time_dim, dim, elementwise_affine=True, eps=1e-5, bias=True,
+                 device=None, dtype=None, operations=None):
+        super().__init__()
+        self.silu = nn.SiLU()
+        self.linear = operations.Linear(time_dim, 6 * dim, bias=bias, device=device, dtype=dtype)
+        self.norm = operations.LayerNorm(dim, eps=eps, elementwise_affine=elementwise_affine, device=device, dtype=dtype)
+
+    def forward(self, hidden_states, encoder_hidden_states, temb):
+        shift, scale, gate, enc_shift, enc_scale, enc_gate = self.linear(self.silu(temb)).chunk(6, dim=1)
+        hidden_states = self.norm(hidden_states) * (1 + scale)[:, None, :] + shift[:, None, :]
+        encoder_hidden_states = self.norm(encoder_hidden_states) * (1 + enc_scale)[:, None, :] + enc_shift[:, None, :]
+        return hidden_states, encoder_hidden_states, gate[:, None, :], enc_gate[:, None, :]
+
+
+class CogVideoXAdaLayerNorm(nn.Module):
+    def __init__(self, time_dim, dim, elementwise_affine=True, eps=1e-5,
+                 device=None, dtype=None, operations=None):
+        super().__init__()
+        self.silu = nn.SiLU()
+        self.linear = operations.Linear(time_dim, 2 * dim, device=device, dtype=dtype)
+        self.norm = operations.LayerNorm(dim, eps=eps, elementwise_affine=elementwise_affine, device=device, dtype=dtype)
+
+    def forward(self, x, temb):
+        temb = self.linear(self.silu(temb))
+        shift, scale = temb.chunk(2, dim=1)
+        x = self.norm(x) * (1 + scale)[:, None, :] + shift[:, None, :]
+        return x
+
+
+class CogVideoXBlock(nn.Module):
+    def __init__(self, dim, num_heads, head_dim, time_dim,
+                 eps=1e-5, ff_inner_dim=None, ff_bias=True,
+                 device=None, dtype=None, operations=None):
+        super().__init__()
+        self.dim = dim
+        self.num_heads = num_heads
+        self.head_dim = head_dim
+
+        self.norm1 = CogVideoXLayerNormZero(time_dim, dim, eps=eps, device=device, dtype=dtype, operations=operations)
+
+        # Self-attention (joint text + latent)
+        self.q = operations.Linear(dim, dim, bias=True, device=device, dtype=dtype)
+        self.k = operations.Linear(dim, dim, bias=True, device=device, dtype=dtype)
+        self.v = operations.Linear(dim, dim, bias=True, device=device, dtype=dtype)
+        self.norm_q = operations.LayerNorm(head_dim, eps=1e-6, elementwise_affine=True, device=device, dtype=dtype)
+        self.norm_k = operations.LayerNorm(head_dim, eps=1e-6, elementwise_affine=True, device=device, dtype=dtype)
+        self.attn_out = operations.Linear(dim, dim, bias=True, device=device, dtype=dtype)
+
+        self.norm2 = CogVideoXLayerNormZero(time_dim, dim, eps=eps, device=device, dtype=dtype, operations=operations)
+
+        # Feed-forward (GELU approximate)
+        inner_dim = ff_inner_dim or dim * 4
+        self.ff_proj = operations.Linear(dim, inner_dim, bias=ff_bias, device=device, dtype=dtype)
+        self.ff_out = operations.Linear(inner_dim, dim, bias=ff_bias, device=device, dtype=dtype)
+
+    def forward(self, hidden_states, encoder_hidden_states, temb, image_rotary_emb=None, transformer_options=None):
+        if transformer_options is None:
+            transformer_options = {}
+        text_seq_length = encoder_hidden_states.size(1)
+
+        # Norm & modulate
+        norm_hidden, norm_encoder, gate_msa, enc_gate_msa = self.norm1(hidden_states, encoder_hidden_states, temb)
+
+        # Joint self-attention
+        qkv_input = torch.cat([norm_encoder, norm_hidden], dim=1)
+        b, s, _ = qkv_input.shape
+        n, d = self.num_heads, self.head_dim
+
+        q = self.q(qkv_input).view(b, s, n, d)
+        k = self.k(qkv_input).view(b, s, n, d)
+        v = self.v(qkv_input)
+
+        q = self.norm_q(q).view(b, s, n, d)
+        k = self.norm_k(k).view(b, s, n, d)
+
+        # Apply rotary embeddings to image tokens only (diffusers format: [B, heads, seq, head_dim])
+        if image_rotary_emb is not None:
+            q_img = q[:, text_seq_length:].transpose(1, 2)  # [B, heads, img_seq, head_dim]
+            k_img = k[:, text_seq_length:].transpose(1, 2)
+            q_img = apply_rotary_emb(q_img, image_rotary_emb)
+            k_img = apply_rotary_emb(k_img, image_rotary_emb)
+            q = torch.cat([q[:, :text_seq_length], q_img.transpose(1, 2)], dim=1)
+            k = torch.cat([k[:, :text_seq_length], k_img.transpose(1, 2)], dim=1)
+
+        attn_out = optimized_attention(
+            q.reshape(b, s, n * d),
+            k.reshape(b, s, n * d),
+            v,
+            heads=self.num_heads,
+            transformer_options=transformer_options,
+        )
+
+        attn_out = self.attn_out(attn_out)
+
+        attn_encoder, attn_hidden = attn_out.split([text_seq_length, s - text_seq_length], dim=1)
+
+        hidden_states = hidden_states + gate_msa * attn_hidden
+        encoder_hidden_states = encoder_hidden_states + enc_gate_msa * attn_encoder
+
+        # Norm & modulate for FF
+        norm_hidden, norm_encoder, gate_ff, enc_gate_ff = self.norm2(hidden_states, encoder_hidden_states, temb)
+
+        # Feed-forward (GELU on concatenated text + latent)
+        ff_input = torch.cat([norm_encoder, norm_hidden], dim=1)
+        ff_output = self.ff_out(F.gelu(self.ff_proj(ff_input), approximate="tanh"))
+
+        hidden_states = hidden_states + gate_ff * ff_output[:, text_seq_length:]
+        encoder_hidden_states = encoder_hidden_states + enc_gate_ff * ff_output[:, :text_seq_length]
+
+        return hidden_states, encoder_hidden_states
+
+
+class CogVideoXTransformer3DModel(nn.Module):
+    def __init__(self,
+                 num_attention_heads=30,
+                 attention_head_dim=64,
+                 in_channels=16,
+                 out_channels=16,
+                 flip_sin_to_cos=True,
+                 freq_shift=0,
+                 time_embed_dim=512,
+                 ofs_embed_dim=None,
+                 text_embed_dim=4096,
+                 num_layers=30,
+                 dropout=0.0,
+                 attention_bias=True,
+                 sample_width=90,
+                 sample_height=60,
+                 sample_frames=49,
+                 patch_size=2,
+                 patch_size_t=None,
+                 temporal_compression_ratio=4,
+                 max_text_seq_length=226,
+                 spatial_interpolation_scale=1.875,
+                 temporal_interpolation_scale=1.0,
+                 use_rotary_positional_embeddings=False,
+                 use_learned_positional_embeddings=False,
+                 patch_bias=True,
+                 image_model=None,
+                 device=None,
+                 dtype=None,
+                 operations=None,
+                 ):
+        super().__init__()
+        self.dtype = dtype
+        dim = num_attention_heads * attention_head_dim
+        self.dim = dim
+        self.num_attention_heads = num_attention_heads
+        self.attention_head_dim = attention_head_dim
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.patch_size = patch_size
+        self.patch_size_t = patch_size_t
+        self.max_text_seq_length = max_text_seq_length
+        self.use_rotary_positional_embeddings = use_rotary_positional_embeddings
+
+        # 1. Patch embedding
+        self.patch_embed = CogVideoXPatchEmbed(
+            patch_size=patch_size,
+            patch_size_t=patch_size_t,
+            in_channels=in_channels,
+            dim=dim,
+            text_dim=text_embed_dim,
+            bias=patch_bias,
+            sample_width=sample_width,
+            sample_height=sample_height,
+            sample_frames=sample_frames,
+            temporal_compression_ratio=temporal_compression_ratio,
+            max_text_seq_length=max_text_seq_length,
+            spatial_interpolation_scale=spatial_interpolation_scale,
+            temporal_interpolation_scale=temporal_interpolation_scale,
+            use_positional_embeddings=not use_rotary_positional_embeddings,
+            use_learned_positional_embeddings=use_learned_positional_embeddings,
+            device=device, dtype=torch.float32, operations=operations,
+        )
+
+        # 2. Time embedding
+        self.time_proj_dim = dim
+        self.time_proj_flip = flip_sin_to_cos
+        self.time_proj_shift = freq_shift
+        self.time_embedding_linear_1 = operations.Linear(dim, time_embed_dim, device=device, dtype=dtype)
+        self.time_embedding_act = nn.SiLU()
+        self.time_embedding_linear_2 = operations.Linear(time_embed_dim, time_embed_dim, device=device, dtype=dtype)
+
+        # Optional OFS embedding (CogVideoX 1.5 I2V)
+        self.ofs_proj_dim = ofs_embed_dim
+        if ofs_embed_dim:
+            self.ofs_embedding_linear_1 = operations.Linear(ofs_embed_dim, ofs_embed_dim, device=device, dtype=dtype)
+            self.ofs_embedding_act = nn.SiLU()
+            self.ofs_embedding_linear_2 = operations.Linear(ofs_embed_dim, ofs_embed_dim, device=device, dtype=dtype)
+        else:
+            self.ofs_embedding_linear_1 = None
+
+        # 3. Transformer blocks
+        self.blocks = nn.ModuleList([
+            CogVideoXBlock(
+                dim=dim,
+                num_heads=num_attention_heads,
+                head_dim=attention_head_dim,
+                time_dim=time_embed_dim,
+                eps=1e-5,
+                device=device, dtype=dtype, operations=operations,
+            )
+            for _ in range(num_layers)
+        ])
+
+        self.norm_final = operations.LayerNorm(dim, eps=1e-5, elementwise_affine=True, device=device, dtype=dtype)
+
+        # 4. Output
+        self.norm_out = CogVideoXAdaLayerNorm(
+            time_dim=time_embed_dim, dim=dim, eps=1e-5,
+            device=device, dtype=dtype, operations=operations,
+        )
+
+        if patch_size_t is None:
+            output_dim = patch_size * patch_size * out_channels
+        else:
+            output_dim = patch_size * patch_size * patch_size_t * out_channels
+
+        self.proj_out = operations.Linear(dim, output_dim, device=device, dtype=dtype)
+
+        self.spatial_interpolation_scale = spatial_interpolation_scale
+        self.temporal_interpolation_scale = temporal_interpolation_scale
+        self.temporal_compression_ratio = temporal_compression_ratio
+
+    def forward(self, x, timestep, context, ofs=None, transformer_options=None, **kwargs):
+        if transformer_options is None:
+            transformer_options = {}
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, timestep, context, ofs, transformer_options, **kwargs)
+
+    def _forward(self, x, timestep, context, ofs=None, transformer_options=None, **kwargs):
+        if transformer_options is None:
+            transformer_options = {}
+        # ComfyUI passes [B, C, T, H, W]
+        batch_size, channels, t, h, w = x.shape
+
+        # Pad to patch size (temporal + spatial), same pattern as WAN
+        p_t = self.patch_size_t if self.patch_size_t is not None else 1
+        x = comfy.ldm.common_dit.pad_to_patch_size(x, (p_t, self.patch_size, self.patch_size))
+
+        # CogVideoX expects [B, T, C, H, W]
+        x = x.permute(0, 2, 1, 3, 4)
+        batch_size, num_frames, channels, height, width = x.shape
+
+        # Time embedding
+        t_emb = get_timestep_embedding(timestep, self.time_proj_dim, self.time_proj_flip, self.time_proj_shift)
+        t_emb = t_emb.to(dtype=x.dtype)
+        emb = self.time_embedding_linear_2(self.time_embedding_act(self.time_embedding_linear_1(t_emb)))
+
+        if self.ofs_embedding_linear_1 is not None and ofs is not None:
+            ofs_emb = get_timestep_embedding(ofs, self.ofs_proj_dim, self.time_proj_flip, self.time_proj_shift)
+            ofs_emb = ofs_emb.to(dtype=x.dtype)
+            ofs_emb = self.ofs_embedding_linear_2(self.ofs_embedding_act(self.ofs_embedding_linear_1(ofs_emb)))
+            emb = emb + ofs_emb
+
+        # Patch embedding
+        hidden_states = self.patch_embed(context, x)
+
+        text_seq_length = context.shape[1]
+        encoder_hidden_states = hidden_states[:, :text_seq_length]
+        hidden_states = hidden_states[:, text_seq_length:]
+
+        # Rotary embeddings (if used)
+        image_rotary_emb = None
+        if self.use_rotary_positional_embeddings:
+            post_patch_height = height // self.patch_size
+            post_patch_width = width // self.patch_size
+            if self.patch_size_t is None:
+                post_time = num_frames
+            else:
+                post_time = num_frames // self.patch_size_t
+            image_rotary_emb = self._get_rotary_emb(post_patch_height, post_patch_width, post_time, device=x.device)
+
+        # Transformer blocks
+        for i, block in enumerate(self.blocks):
+            hidden_states, encoder_hidden_states = block(
+                hidden_states=hidden_states,
+                encoder_hidden_states=encoder_hidden_states,
+                temb=emb,
+                image_rotary_emb=image_rotary_emb,
+                transformer_options=transformer_options,
+            )
+
+        hidden_states = self.norm_final(hidden_states)
+
+        # Output projection
+        hidden_states = self.norm_out(hidden_states, temb=emb)
+        hidden_states = self.proj_out(hidden_states)
+
+        # Unpatchify
+        p = self.patch_size
+        p_t = self.patch_size_t
+
+        if p_t is None:
+            output = hidden_states.reshape(batch_size, num_frames, height // p, width // p, -1, p, p)
+            output = output.permute(0, 1, 4, 2, 5, 3, 6).flatten(5, 6).flatten(3, 4)
+        else:
+            output = hidden_states.reshape(
+                batch_size, (num_frames + p_t - 1) // p_t, height // p, width // p, -1, p_t, p, p
+            )
+            output = output.permute(0, 1, 5, 4, 2, 6, 3, 7).flatten(6, 7).flatten(4, 5).flatten(1, 2)
+
+        # Back to ComfyUI format [B, C, T, H, W] and crop padding
+        output = output.permute(0, 2, 1, 3, 4)[:, :, :t, :h, :w]
+        return output
+
+    def _get_rotary_emb(self, h, w, t, device):
+        """Compute CogVideoX 3D rotary positional embeddings.
+
+        For CogVideoX 1.5 (patch_size_t != None): uses "slice" mode — grid positions
+        are integer arange computed at max_size, then sliced to actual size.
+        For CogVideoX 1.0 (patch_size_t == None): uses "linspace" mode with crop coords
+        scaled by spatial_interpolation_scale.
+        """
+        d = self.attention_head_dim
+        dim_t = d // 4
+        dim_h = d // 8 * 3
+        dim_w = d // 8 * 3
+
+        if self.patch_size_t is not None:
+            # CogVideoX 1.5: "slice" mode — positions are simple integer indices
+            # Compute at max(sample_size, actual_size) then slice to actual
+            base_h = self.patch_embed.sample_height // self.patch_size
+            base_w = self.patch_embed.sample_width // self.patch_size
+            max_h = max(base_h, h)
+            max_w = max(base_w, w)
+
+            grid_h = torch.arange(max_h, device=device, dtype=torch.float32)
+            grid_w = torch.arange(max_w, device=device, dtype=torch.float32)
+            grid_t = torch.arange(t, device=device, dtype=torch.float32)
+        else:
+            # CogVideoX 1.0: "linspace" mode with interpolation scale
+            grid_h = torch.linspace(0, h - 1, h, device=device, dtype=torch.float32) * self.spatial_interpolation_scale
+            grid_w = torch.linspace(0, w - 1, w, device=device, dtype=torch.float32) * self.spatial_interpolation_scale
+            grid_t = torch.arange(t, device=device, dtype=torch.float32)
+
+        freqs_t = _get_1d_rotary_pos_embed(dim_t, grid_t)
+        freqs_h = _get_1d_rotary_pos_embed(dim_h, grid_h)
+        freqs_w = _get_1d_rotary_pos_embed(dim_w, grid_w)
+
+        t_cos, t_sin = freqs_t
+        h_cos, h_sin = freqs_h
+        w_cos, w_sin = freqs_w
+
+        # Slice to actual size (for "slice" mode where grids may be larger)
+        t_cos, t_sin = t_cos[:t], t_sin[:t]
+        h_cos, h_sin = h_cos[:h], h_sin[:h]
+        w_cos, w_sin = w_cos[:w], w_sin[:w]
+
+        # Broadcast and concatenate into [T*H*W, head_dim]
+        t_cos = t_cos[:, None, None, :].expand(-1, h, w, -1)
+        t_sin = t_sin[:, None, None, :].expand(-1, h, w, -1)
+        h_cos = h_cos[None, :, None, :].expand(t, -1, w, -1)
+        h_sin = h_sin[None, :, None, :].expand(t, -1, w, -1)
+        w_cos = w_cos[None, None, :, :].expand(t, h, -1, -1)
+        w_sin = w_sin[None, None, :, :].expand(t, h, -1, -1)
+
+        cos = torch.cat([t_cos, h_cos, w_cos], dim=-1).reshape(t * h * w, -1)
+        sin = torch.cat([t_sin, h_sin, w_sin], dim=-1).reshape(t * h * w, -1)
+        return (cos, sin)
diff --git a/comfy/ldm/cogvideo/vae.py b/comfy/ldm/cogvideo/vae.py
new file mode 100644
index 000000000..d4e6f321e
--- /dev/null
+++ b/comfy/ldm/cogvideo/vae.py
@@ -0,0 +1,566 @@
+# CogVideoX VAE - ported to ComfyUI native ops
+# Architecture reference: diffusers AutoencoderKLCogVideoX
+# Style reference: comfy/ldm/wan/vae.py
+
+import numpy as np
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import comfy.ops
+ops = comfy.ops.disable_weight_init
+
+
+class CausalConv3d(nn.Module):
+    """Causal 3D convolution with temporal padding.
+
+    Uses comfy.ops.Conv3d with autopad='causal_zero' fast path: when input has
+    a single temporal frame and no cache, the 3D conv weight is sliced to act
+    as a 2D conv, avoiding computation on zero-padded temporal dimensions.
+    """
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, pad_mode="constant"):
+        super().__init__()
+        if isinstance(kernel_size, int):
+            kernel_size = (kernel_size,) * 3
+
+        time_kernel, height_kernel, width_kernel = kernel_size
+        self.time_kernel_size = time_kernel
+        self.pad_mode = pad_mode
+
+        height_pad = (height_kernel - 1) // 2
+        width_pad = (width_kernel - 1) // 2
+        self.time_causal_padding = (width_pad, width_pad, height_pad, height_pad, time_kernel - 1, 0)
+
+        stride = stride if isinstance(stride, tuple) else (stride, 1, 1)
+        dilation = (dilation, 1, 1)
+        self.conv = ops.Conv3d(
+            in_channels, out_channels, kernel_size,
+            stride=stride, dilation=dilation,
+            padding=(0, height_pad, width_pad),
+        )
+
+    def forward(self, x, conv_cache=None):
+        if self.pad_mode == "replicate":
+            x = F.pad(x, self.time_causal_padding, mode="replicate")
+            conv_cache = None
+        else:
+            kernel_t = self.time_kernel_size
+            if kernel_t > 1:
+                if conv_cache is None and x.shape[2] == 1:
+                    # Fast path: single frame, no cache. All temporal padding
+                    # frames are copies of the input (replicate-style), so the
+                    # 3D conv reduces to a 2D conv with summed temporal kernel.
+                    w = comfy.ops.cast_to_input(self.conv.weight, x)
+                    b = comfy.ops.cast_to_input(self.conv.bias, x) if self.conv.bias is not None else None
+                    w2d = w.sum(dim=2, keepdim=True)
+                    out = F.conv3d(x, w2d, b,
+                                   self.conv.stride, self.conv.padding,
+                                   self.conv.dilation, self.conv.groups)
+                    return out, None
+                cached = [conv_cache] if conv_cache is not None else [x[:, :, :1]] * (kernel_t - 1)
+                x = torch.cat(cached + [x], dim=2)
+            conv_cache = x[:, :, -self.time_kernel_size + 1:].clone() if self.time_kernel_size > 1 else None
+
+        out = self.conv(x)
+        return out, conv_cache
+
+
+def _interpolate_zq(zq, target_size):
+    """Interpolate latent z to target (T, H, W), matching CogVideoX's first-frame-special handling."""
+    t = target_size[0]
+    if t > 1 and t % 2 == 1:
+        z_first = F.interpolate(zq[:, :, :1], size=(1, target_size[1], target_size[2]))
+        z_rest = F.interpolate(zq[:, :, 1:], size=(t - 1, target_size[1], target_size[2]))
+        return torch.cat([z_first, z_rest], dim=2)
+    return F.interpolate(zq, size=target_size)
+
+
+class SpatialNorm3D(nn.Module):
+    """Spatially conditioned normalization."""
+    def __init__(self, f_channels, zq_channels, groups=32):
+        super().__init__()
+        self.norm_layer = ops.GroupNorm(num_channels=f_channels, num_groups=groups, eps=1e-6, affine=True)
+        self.conv_y = CausalConv3d(zq_channels, f_channels, kernel_size=1, stride=1)
+        self.conv_b = CausalConv3d(zq_channels, f_channels, kernel_size=1, stride=1)
+
+    def forward(self, f, zq, conv_cache=None):
+        new_cache = {}
+        conv_cache = conv_cache or {}
+
+        if zq.shape[-3:] != f.shape[-3:]:
+            zq = _interpolate_zq(zq, f.shape[-3:])
+
+        conv_y, new_cache["conv_y"] = self.conv_y(zq, conv_cache=conv_cache.get("conv_y"))
+        conv_b, new_cache["conv_b"] = self.conv_b(zq, conv_cache=conv_cache.get("conv_b"))
+
+        return self.norm_layer(f) * conv_y + conv_b, new_cache
+
+
+class ResnetBlock3D(nn.Module):
+    """3D ResNet block with optional spatial norm."""
+    def __init__(self, in_channels, out_channels=None, temb_channels=512, groups=32,
+                 eps=1e-6, act_fn="silu", spatial_norm_dim=None, pad_mode="first"):
+        super().__init__()
+        out_channels = out_channels or in_channels
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.spatial_norm_dim = spatial_norm_dim
+
+        if act_fn == "silu":
+            self.nonlinearity = nn.SiLU()
+        elif act_fn == "swish":
+            self.nonlinearity = nn.SiLU()
+        else:
+            self.nonlinearity = nn.SiLU()
+
+        if spatial_norm_dim is None:
+            self.norm1 = ops.GroupNorm(num_channels=in_channels, num_groups=groups, eps=eps)
+            self.norm2 = ops.GroupNorm(num_channels=out_channels, num_groups=groups, eps=eps)
+        else:
+            self.norm1 = SpatialNorm3D(in_channels, spatial_norm_dim, groups=groups)
+            self.norm2 = SpatialNorm3D(out_channels, spatial_norm_dim, groups=groups)
+
+        self.conv1 = CausalConv3d(in_channels, out_channels, kernel_size=3, pad_mode=pad_mode)
+
+        if temb_channels > 0:
+            self.temb_proj = ops.Linear(temb_channels, out_channels)
+
+        self.conv2 = CausalConv3d(out_channels, out_channels, kernel_size=3, pad_mode=pad_mode)
+
+        if in_channels != out_channels:
+            self.conv_shortcut = ops.Conv3d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
+        else:
+            self.conv_shortcut = None
+
+    def forward(self, x, temb=None, zq=None, conv_cache=None):
+        new_cache = {}
+        conv_cache = conv_cache or {}
+        residual = x
+
+        if zq is not None:
+            x, new_cache["norm1"] = self.norm1(x, zq, conv_cache=conv_cache.get("norm1"))
+        else:
+            x = self.norm1(x)
+
+        x = self.nonlinearity(x)
+        x, new_cache["conv1"] = self.conv1(x, conv_cache=conv_cache.get("conv1"))
+
+        if temb is not None and hasattr(self, "temb_proj"):
+            x = x + self.temb_proj(self.nonlinearity(temb))[:, :, None, None, None]
+
+        if zq is not None:
+            x, new_cache["norm2"] = self.norm2(x, zq, conv_cache=conv_cache.get("norm2"))
+        else:
+            x = self.norm2(x)
+
+        x = self.nonlinearity(x)
+        x, new_cache["conv2"] = self.conv2(x, conv_cache=conv_cache.get("conv2"))
+
+        if self.conv_shortcut is not None:
+            residual = self.conv_shortcut(residual)
+
+        return x + residual, new_cache
+
+
+class Downsample3D(nn.Module):
+    """3D downsampling with optional temporal compression."""
+    def __init__(self, in_channels, out_channels, kernel_size=3, stride=2, padding=0, compress_time=False):
+        super().__init__()
+        self.conv = ops.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)
+        self.compress_time = compress_time
+
+    def forward(self, x):
+        if self.compress_time:
+            b, c, t, h, w = x.shape
+            x = x.permute(0, 3, 4, 1, 2).reshape(b * h * w, c, t)
+            if t % 2 == 1:
+                x_first, x_rest = x[..., 0], x[..., 1:]
+                if x_rest.shape[-1] > 0:
+                    x_rest = F.avg_pool1d(x_rest, kernel_size=2, stride=2)
+                x = torch.cat([x_first[..., None], x_rest], dim=-1)
+                x = x.reshape(b, h, w, c, x.shape[-1]).permute(0, 3, 4, 1, 2)
+            else:
+                x = F.avg_pool1d(x, kernel_size=2, stride=2)
+                x = x.reshape(b, h, w, c, x.shape[-1]).permute(0, 3, 4, 1, 2)
+
+        pad = (0, 1, 0, 1)
+        x = F.pad(x, pad, mode="constant", value=0)
+        b, c, t, h, w = x.shape
+        x = x.permute(0, 2, 1, 3, 4).reshape(b * t, c, h, w)
+        x = self.conv(x)
+        x = x.reshape(b, t, x.shape[1], x.shape[2], x.shape[3]).permute(0, 2, 1, 3, 4)
+        return x
+
+
+class Upsample3D(nn.Module):
+    """3D upsampling with optional temporal decompression."""
+    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, compress_time=False):
+        super().__init__()
+        self.conv = ops.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)
+        self.compress_time = compress_time
+
+    def forward(self, x):
+        if self.compress_time:
+            if x.shape[2] > 1 and x.shape[2] % 2 == 1:
+                x_first, x_rest = x[:, :, 0], x[:, :, 1:]
+                x_first = F.interpolate(x_first, scale_factor=2.0)
+                x_rest = F.interpolate(x_rest, scale_factor=2.0)
+                x = torch.cat([x_first[:, :, None, :, :], x_rest], dim=2)
+            elif x.shape[2] > 1:
+                x = F.interpolate(x, scale_factor=2.0)
+            else:
+                x = x.squeeze(2)
+                x = F.interpolate(x, scale_factor=2.0)
+                x = x[:, :, None, :, :]
+        else:
+            b, c, t, h, w = x.shape
+            x = x.permute(0, 2, 1, 3, 4).reshape(b * t, c, h, w)
+            x = F.interpolate(x, scale_factor=2.0)
+            x = x.reshape(b, t, c, *x.shape[2:]).permute(0, 2, 1, 3, 4)
+
+        b, c, t, h, w = x.shape
+        x = x.permute(0, 2, 1, 3, 4).reshape(b * t, c, h, w)
+        x = self.conv(x)
+        x = x.reshape(b, t, *x.shape[1:]).permute(0, 2, 1, 3, 4)
+        return x
+
+
+class DownBlock3D(nn.Module):
+    def __init__(self, in_channels, out_channels, temb_channels=0, num_layers=1,
+                 eps=1e-6, act_fn="silu", groups=32, add_downsample=True,
+                 compress_time=False, pad_mode="first"):
+        super().__init__()
+        self.resnets = nn.ModuleList([
+            ResnetBlock3D(
+                in_channels=in_channels if i == 0 else out_channels,
+                out_channels=out_channels,
+                temb_channels=temb_channels,
+                groups=groups, eps=eps, act_fn=act_fn, pad_mode=pad_mode,
+            )
+            for i in range(num_layers)
+        ])
+        self.downsamplers = nn.ModuleList([Downsample3D(out_channels, out_channels, compress_time=compress_time)]) if add_downsample else None
+
+    def forward(self, x, temb=None, zq=None, conv_cache=None):
+        new_cache = {}
+        conv_cache = conv_cache or {}
+        for i, resnet in enumerate(self.resnets):
+            x, new_cache[f"resnet_{i}"] = resnet(x, temb, zq, conv_cache=conv_cache.get(f"resnet_{i}"))
+        if self.downsamplers is not None:
+            for ds in self.downsamplers:
+                x = ds(x)
+        return x, new_cache
+
+
+class MidBlock3D(nn.Module):
+    def __init__(self, in_channels, temb_channels=0, num_layers=1,
+                 eps=1e-6, act_fn="silu", groups=32, spatial_norm_dim=None, pad_mode="first"):
+        super().__init__()
+        self.resnets = nn.ModuleList([
+            ResnetBlock3D(
+                in_channels=in_channels, out_channels=in_channels,
+                temb_channels=temb_channels, groups=groups, eps=eps,
+                act_fn=act_fn, spatial_norm_dim=spatial_norm_dim, pad_mode=pad_mode,
+            )
+            for _ in range(num_layers)
+        ])
+
+    def forward(self, x, temb=None, zq=None, conv_cache=None):
+        new_cache = {}
+        conv_cache = conv_cache or {}
+        for i, resnet in enumerate(self.resnets):
+            x, new_cache[f"resnet_{i}"] = resnet(x, temb, zq, conv_cache=conv_cache.get(f"resnet_{i}"))
+        return x, new_cache
+
+
+class UpBlock3D(nn.Module):
+    def __init__(self, in_channels, out_channels, temb_channels=0, num_layers=1,
+                 eps=1e-6, act_fn="silu", groups=32, spatial_norm_dim=16,
+                 add_upsample=True, compress_time=False, pad_mode="first"):
+        super().__init__()
+        self.resnets = nn.ModuleList([
+            ResnetBlock3D(
+                in_channels=in_channels if i == 0 else out_channels,
+                out_channels=out_channels,
+                temb_channels=temb_channels, groups=groups, eps=eps,
+                act_fn=act_fn, spatial_norm_dim=spatial_norm_dim, pad_mode=pad_mode,
+            )
+            for i in range(num_layers)
+        ])
+        self.upsamplers = nn.ModuleList([Upsample3D(out_channels, out_channels, compress_time=compress_time)]) if add_upsample else None
+
+    def forward(self, x, temb=None, zq=None, conv_cache=None):
+        new_cache = {}
+        conv_cache = conv_cache or {}
+        for i, resnet in enumerate(self.resnets):
+            x, new_cache[f"resnet_{i}"] = resnet(x, temb, zq, conv_cache=conv_cache.get(f"resnet_{i}"))
+        if self.upsamplers is not None:
+            for us in self.upsamplers:
+                x = us(x)
+        return x, new_cache
+
+
+class Encoder3D(nn.Module):
+    def __init__(self, in_channels=3, out_channels=16,
+                 block_out_channels=(128, 256, 256, 512),
+                 layers_per_block=3, act_fn="silu",
+                 eps=1e-6, groups=32, pad_mode="first",
+                 temporal_compression_ratio=4):
+        super().__init__()
+        temporal_compress_level = int(np.log2(temporal_compression_ratio))
+
+        self.conv_in = CausalConv3d(in_channels, block_out_channels[0], kernel_size=3, pad_mode=pad_mode)
+
+        self.down_blocks = nn.ModuleList()
+        output_channel = block_out_channels[0]
+        for i in range(len(block_out_channels)):
+            input_channel = output_channel
+            output_channel = block_out_channels[i]
+            is_final = i == len(block_out_channels) - 1
+            compress_time = i < temporal_compress_level
+
+            self.down_blocks.append(DownBlock3D(
+                in_channels=input_channel, out_channels=output_channel,
+                temb_channels=0, num_layers=layers_per_block,
+                eps=eps, act_fn=act_fn, groups=groups,
+                add_downsample=not is_final, compress_time=compress_time,
+            ))
+
+        self.mid_block = MidBlock3D(
+            in_channels=block_out_channels[-1], temb_channels=0,
+            num_layers=2, eps=eps, act_fn=act_fn, groups=groups, pad_mode=pad_mode,
+        )
+
+        self.norm_out = ops.GroupNorm(groups, block_out_channels[-1], eps=1e-6)
+        self.conv_act = nn.SiLU()
+        self.conv_out = CausalConv3d(block_out_channels[-1], 2 * out_channels, kernel_size=3, pad_mode=pad_mode)
+
+    def forward(self, x, conv_cache=None):
+        new_cache = {}
+        conv_cache = conv_cache or {}
+
+        x, new_cache["conv_in"] = self.conv_in(x, conv_cache=conv_cache.get("conv_in"))
+
+        for i, block in enumerate(self.down_blocks):
+            key = f"down_block_{i}"
+            x, new_cache[key] = block(x, None, None, conv_cache.get(key))
+
+        x, new_cache["mid_block"] = self.mid_block(x, None, None, conv_cache=conv_cache.get("mid_block"))
+
+        x = self.norm_out(x)
+        x = self.conv_act(x)
+        x, new_cache["conv_out"] = self.conv_out(x, conv_cache=conv_cache.get("conv_out"))
+
+        return x, new_cache
+
+
+class Decoder3D(nn.Module):
+    def __init__(self, in_channels=16, out_channels=3,
+                 block_out_channels=(128, 256, 256, 512),
+                 layers_per_block=3, act_fn="silu",
+                 eps=1e-6, groups=32, pad_mode="first",
+                 temporal_compression_ratio=4):
+        super().__init__()
+        reversed_channels = list(reversed(block_out_channels))
+        temporal_compress_level = int(np.log2(temporal_compression_ratio))
+
+        self.conv_in = CausalConv3d(in_channels, reversed_channels[0], kernel_size=3, pad_mode=pad_mode)
+
+        self.mid_block = MidBlock3D(
+            in_channels=reversed_channels[0], temb_channels=0,
+            num_layers=2, eps=eps, act_fn=act_fn, groups=groups,
+            spatial_norm_dim=in_channels, pad_mode=pad_mode,
+        )
+
+        self.up_blocks = nn.ModuleList()
+        output_channel = reversed_channels[0]
+        for i in range(len(block_out_channels)):
+            prev_channel = output_channel
+            output_channel = reversed_channels[i]
+            is_final = i == len(block_out_channels) - 1
+            compress_time = i < temporal_compress_level
+
+            self.up_blocks.append(UpBlock3D(
+                in_channels=prev_channel, out_channels=output_channel,
+                temb_channels=0, num_layers=layers_per_block + 1,
+                eps=eps, act_fn=act_fn, groups=groups,
+                spatial_norm_dim=in_channels,
+                add_upsample=not is_final, compress_time=compress_time,
+            ))
+
+        self.norm_out = SpatialNorm3D(reversed_channels[-1], in_channels, groups=groups)
+        self.conv_act = nn.SiLU()
+        self.conv_out = CausalConv3d(reversed_channels[-1], out_channels, kernel_size=3, pad_mode=pad_mode)
+
+    def forward(self, sample, conv_cache=None):
+        new_cache = {}
+        conv_cache = conv_cache or {}
+
+        x, new_cache["conv_in"] = self.conv_in(sample, conv_cache=conv_cache.get("conv_in"))
+
+        x, new_cache["mid_block"] = self.mid_block(x, None, sample, conv_cache=conv_cache.get("mid_block"))
+
+        for i, block in enumerate(self.up_blocks):
+            key = f"up_block_{i}"
+            x, new_cache[key] = block(x, None, sample, conv_cache=conv_cache.get(key))
+
+        x, new_cache["norm_out"] = self.norm_out(x, sample, conv_cache=conv_cache.get("norm_out"))
+        x = self.conv_act(x)
+        x, new_cache["conv_out"] = self.conv_out(x, conv_cache=conv_cache.get("conv_out"))
+
+        return x, new_cache
+
+
+
+class AutoencoderKLCogVideoX(nn.Module):
+    """CogVideoX VAE. Spatial tiling/slicing handled by ComfyUI's VAE wrapper.
+
+    Uses rolling temporal decode: conv_in + mid_block + temporal up_blocks run
+    on the full (low-res) tensor, then the expensive spatial-only up_blocks +
+    norm_out + conv_out are processed in small temporal chunks with conv_cache
+    carrying causal state between chunks. This keeps peak VRAM proportional to
+    chunk_size rather than total frame count.
+    """
+
+    def __init__(self,
+                 in_channels=3, out_channels=3,
+                 block_out_channels=(128, 256, 256, 512),
+                 latent_channels=16, layers_per_block=3,
+                 act_fn="silu", eps=1e-6, groups=32,
+                 temporal_compression_ratio=4,
+                 ):
+        super().__init__()
+        self.latent_channels = latent_channels
+        self.temporal_compression_ratio = temporal_compression_ratio
+
+        self.encoder = Encoder3D(
+            in_channels=in_channels, out_channels=latent_channels,
+            block_out_channels=block_out_channels, layers_per_block=layers_per_block,
+            act_fn=act_fn, eps=eps, groups=groups,
+            temporal_compression_ratio=temporal_compression_ratio,
+        )
+        self.decoder = Decoder3D(
+            in_channels=latent_channels, out_channels=out_channels,
+            block_out_channels=block_out_channels, layers_per_block=layers_per_block,
+            act_fn=act_fn, eps=eps, groups=groups,
+            temporal_compression_ratio=temporal_compression_ratio,
+        )
+
+        self.num_latent_frames_batch_size = 2
+        self.num_sample_frames_batch_size = 8
+
+    def encode(self, x):
+        t = x.shape[2]
+        frame_batch = self.num_sample_frames_batch_size
+        remainder = t % frame_batch
+        conv_cache = None
+        enc = []
+
+        # Process remainder frames first so only the first chunk can have an
+        # odd temporal dimension — where Downsample3D's first-frame-special
+        # handling in temporal compression is actually correct.
+        if remainder > 0:
+            chunk, conv_cache = self.encoder(x[:, :, :remainder], conv_cache=conv_cache)
+            enc.append(chunk.to(x.device))
+
+        for start in range(remainder, t, frame_batch):
+            chunk, conv_cache = self.encoder(x[:, :, start:start + frame_batch], conv_cache=conv_cache)
+            enc.append(chunk.to(x.device))
+
+        enc = torch.cat(enc, dim=2)
+        mean, _ = enc.chunk(2, dim=1)
+        return mean
+
+    def decode(self, z):
+        return self._decode_rolling(z)
+
+    def _decode_batched(self, z):
+        """Original batched decode - processes 2 latent frames through full decoder."""
+        t = z.shape[2]
+        frame_batch = self.num_latent_frames_batch_size
+        num_batches = max(t // frame_batch, 1)
+        conv_cache = None
+        dec = []
+        for i in range(num_batches):
+            remaining = t % frame_batch
+            start = frame_batch * i + (0 if i == 0 else remaining)
+            end = frame_batch * (i + 1) + remaining
+            chunk, conv_cache = self.decoder(z[:, :, start:end], conv_cache=conv_cache)
+            dec.append(chunk.cpu())
+        return torch.cat(dec, dim=2).to(z.device)
+
+    def _decode_rolling(self, z):
+        """Rolling decode - processes low-res layers on full tensor, then rolls
+        through expensive high-res layers in temporal chunks."""
+        decoder = self.decoder
+        device = z.device
+
+        # Determine which up_blocks have temporal upsample vs spatial-only.
+        # Temporal up_blocks are cheap (low res), spatial-only are expensive.
+        temporal_compress_level = int(np.log2(self.temporal_compression_ratio))
+        split_at = temporal_compress_level  # first N up_blocks do temporal upsample
+
+        # Phase 1: conv_in + mid_block + temporal up_blocks on full tensor (low/medium res)
+        x, _ = decoder.conv_in(z)
+        x, _ = decoder.mid_block(x, None, z)
+
+        for i in range(split_at):
+            x, _ = decoder.up_blocks[i](x, None, z)
+
+        # Phase 2: remaining spatial-only up_blocks + norm_out + conv_out in temporal chunks
+        remaining_blocks = list(range(split_at, len(decoder.up_blocks)))
+        chunk_size = 4  # pixel frames per chunk through high-res layers
+        t_expanded = x.shape[2]
+
+        if t_expanded <= chunk_size or len(remaining_blocks) == 0:
+            # Small enough to process in one go
+            for i in remaining_blocks:
+                x, _ = decoder.up_blocks[i](x, None, z)
+            x, _ = decoder.norm_out(x, z)
+            x = decoder.conv_act(x)
+            x, _ = decoder.conv_out(x)
+            return x
+
+        # Expand z temporally once to match Phase 2's time dimension.
+        # z stays at latent spatial resolution so this is small (~16 MB vs ~1.3 GB
+        # for the old approach of pre-interpolating to every pixel resolution).
+        z_time_expanded = _interpolate_zq(z, (t_expanded, z.shape[3], z.shape[4]))
+
+        # Process in temporal chunks, interpolating spatially per-chunk to avoid
+        # allocating full [B, C, t_expanded, H, W] tensors at each resolution.
+        dec_out = []
+        conv_caches = {}
+
+        for chunk_start in range(0, t_expanded, chunk_size):
+            chunk_end = min(chunk_start + chunk_size, t_expanded)
+            x_chunk = x[:, :, chunk_start:chunk_end]
+            z_t_chunk = z_time_expanded[:, :, chunk_start:chunk_end]
+            z_spatial_cache = {}
+
+            for i in remaining_blocks:
+                block = decoder.up_blocks[i]
+                cache_key = f"up_block_{i}"
+                hw_key = (x_chunk.shape[3], x_chunk.shape[4])
+                if hw_key not in z_spatial_cache:
+                    if z_t_chunk.shape[3] == hw_key[0] and z_t_chunk.shape[4] == hw_key[1]:
+                        z_spatial_cache[hw_key] = z_t_chunk
+                    else:
+                        z_spatial_cache[hw_key] = F.interpolate(z_t_chunk, size=(z_t_chunk.shape[2], hw_key[0], hw_key[1]))
+                x_chunk, new_cache = block(x_chunk, None, z_spatial_cache[hw_key], conv_cache=conv_caches.get(cache_key))
+                conv_caches[cache_key] = new_cache
+
+            hw_key = (x_chunk.shape[3], x_chunk.shape[4])
+            if hw_key not in z_spatial_cache:
+                z_spatial_cache[hw_key] = F.interpolate(z_t_chunk, size=(z_t_chunk.shape[2], hw_key[0], hw_key[1]))
+            x_chunk, new_cache = decoder.norm_out(x_chunk, z_spatial_cache[hw_key], conv_cache=conv_caches.get("norm_out"))
+            conv_caches["norm_out"] = new_cache
+            x_chunk = decoder.conv_act(x_chunk)
+            x_chunk, new_cache = decoder.conv_out(x_chunk, conv_cache=conv_caches.get("conv_out"))
+            conv_caches["conv_out"] = new_cache
+
+            dec_out.append(x_chunk.cpu())
+            del z_spatial_cache
+
+        del x, z_time_expanded
+        return torch.cat(dec_out, dim=2).to(device)
diff --git a/comfy/model_base.py b/comfy/model_base.py
index 787ea1145..50dab5782 100644
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -52,6 +52,7 @@ import comfy.ldm.qwen_image.model
 import comfy.ldm.kandinsky5.model
 import comfy.ldm.anima.model
 import comfy.ldm.ace.ace_step15
+import comfy.ldm.cogvideo.model
 import comfy.ldm.rt_detr.rtdetr_v4
 import comfy.ldm.ernie.model
 import comfy.ldm.sam3.detector
@@ -81,6 +82,7 @@ class ModelType(Enum):
     IMG_TO_IMG = 9
     FLOW_COSMOS = 10
     IMG_TO_IMG_FLOW = 11
+    V_PREDICTION_DDPM = 12
 
 
 def model_sampling(model_config, model_type):
@@ -115,6 +117,8 @@ def model_sampling(model_config, model_type):
         s = comfy.model_sampling.ModelSamplingCosmosRFlow
     elif model_type == ModelType.IMG_TO_IMG_FLOW:
         c = comfy.model_sampling.IMG_TO_IMG_FLOW
+    elif model_type == ModelType.V_PREDICTION_DDPM:
+        c = comfy.model_sampling.V_PREDICTION_DDPM
 
     class ModelSampling(s, c):
         pass
@@ -1979,3 +1983,59 @@ class ErnieImage(BaseModel):
 class SAM3(BaseModel):
     def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
         super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.sam3.detector.SAM3Model)
+
+class CogVideoX(BaseModel):
+    def __init__(self, model_config, model_type=ModelType.V_PREDICTION_DDPM, image_to_video=False, device=None):
+        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.cogvideo.model.CogVideoXTransformer3DModel)
+        self.image_to_video = image_to_video
+
+    def concat_cond(self, **kwargs):
+        noise = kwargs.get("noise", None)
+        # Detect extra channels needed (e.g. 32 - 16 = 16 for ref latent)
+        extra_channels = self.diffusion_model.in_channels - noise.shape[1]
+        if extra_channels == 0:
+            return None
+
+        image = kwargs.get("concat_latent_image", None)
+        device = kwargs["device"]
+
+        if image is None:
+            shape = list(noise.shape)
+            shape[1] = extra_channels
+            return torch.zeros(shape, dtype=noise.dtype, layout=noise.layout, device=noise.device)
+
+        latent_dim = self.latent_format.latent_channels
+        image = utils.common_upscale(image.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center")
+
+        if noise.ndim == 5 and image.ndim == 5:
+            if image.shape[-3] < noise.shape[-3]:
+                image = torch.nn.functional.pad(image, (0, 0, 0, 0, 0, noise.shape[-3] - image.shape[-3]), "constant", 0)
+            elif image.shape[-3] > noise.shape[-3]:
+                image = image[:, :, :noise.shape[-3]]
+
+        for i in range(0, image.shape[1], latent_dim):
+            image[:, i:i + latent_dim] = self.process_latent_in(image[:, i:i + latent_dim])
+        image = utils.resize_to_batch_size(image, noise.shape[0])
+
+        if image.shape[1] > extra_channels:
+            image = image[:, :extra_channels]
+        elif image.shape[1] < extra_channels:
+            repeats = extra_channels // image.shape[1]
+            remainder = extra_channels % image.shape[1]
+            parts = [image] * repeats
+            if remainder > 0:
+                parts.append(image[:, :remainder])
+            image = torch.cat(parts, dim=1)
+
+        return image
+
+    def extra_conds(self, **kwargs):
+        out = super().extra_conds(**kwargs)
+        # OFS embedding (CogVideoX 1.5 I2V), default 2.0 as used by SparkVSR
+        if self.diffusion_model.ofs_proj_dim is not None:
+            ofs = kwargs.get("ofs", None)
+            if ofs is None:
+                noise = kwargs.get("noise", None)
+                ofs = torch.full((noise.shape[0],), 2.0, device=noise.device, dtype=noise.dtype)
+            out['ofs'] = comfy.conds.CONDRegular(ofs)
+        return out
diff --git a/comfy/model_detection.py b/comfy/model_detection.py
index 724a241bf..d9b67dcdf 100644
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -490,6 +490,54 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
 
         return dit_config
 
+    if '{}blocks.0.norm1.linear.weight'.format(key_prefix) in state_dict_keys:  # CogVideoX
+        dit_config = {}
+        dit_config["image_model"] = "cogvideox"
+
+        # Extract config from weight shapes
+        norm1_weight = state_dict['{}blocks.0.norm1.linear.weight'.format(key_prefix)]
+        time_embed_dim = norm1_weight.shape[1]
+        dim = norm1_weight.shape[0] // 6
+
+        dit_config["num_attention_heads"] = dim // 64
+        dit_config["attention_head_dim"] = 64
+        dit_config["time_embed_dim"] = time_embed_dim
+        dit_config["num_layers"] = count_blocks(state_dict_keys, '{}blocks.'.format(key_prefix) + '{}.')
+
+        # Detect in_channels from patch_embed
+        patch_proj_key = '{}patch_embed.proj.weight'.format(key_prefix)
+        if patch_proj_key in state_dict_keys:
+            w = state_dict[patch_proj_key]
+            if w.ndim == 4:
+                # Conv2d: [out, in, kh, kw] — CogVideoX 1.0
+                dit_config["in_channels"] = w.shape[1]
+                dit_config["patch_size"] = w.shape[2]
+            elif w.ndim == 2:
+                # Linear: [out, in_channels * patch_size * patch_size * patch_size_t] — CogVideoX 1.5
+                dit_config["patch_size"] = 2
+                dit_config["patch_size_t"] = 2
+                dit_config["in_channels"] = w.shape[1] // (2 * 2 * 2)  # 256 // 8 = 32
+
+        text_proj_key = '{}patch_embed.text_proj.weight'.format(key_prefix)
+        if text_proj_key in state_dict_keys:
+            dit_config["text_embed_dim"] = state_dict[text_proj_key].shape[1]
+
+        # Detect OFS embedding
+        ofs_key = '{}ofs_embedding_linear_1.weight'.format(key_prefix)
+        if ofs_key in state_dict_keys:
+            dit_config["ofs_embed_dim"] = state_dict[ofs_key].shape[1]
+
+        # Detect positional embedding type
+        pos_key = '{}patch_embed.pos_embedding'.format(key_prefix)
+        if pos_key in state_dict_keys:
+            dit_config["use_learned_positional_embeddings"] = True
+            dit_config["use_rotary_positional_embeddings"] = False
+        else:
+            dit_config["use_learned_positional_embeddings"] = False
+            dit_config["use_rotary_positional_embeddings"] = True
+
+        return dit_config
+
     if '{}head.modulation'.format(key_prefix) in state_dict_keys:  # Wan 2.1
         dit_config = {}
         dit_config["image_model"] = "wan2.1"
diff --git a/comfy/model_sampling.py b/comfy/model_sampling.py
index 13860e6a2..cf2b5db5f 100644
--- a/comfy/model_sampling.py
+++ b/comfy/model_sampling.py
@@ -54,6 +54,30 @@ class V_PREDICTION(EPS):
         sigma = reshape_sigma(sigma, model_output.ndim)
         return model_input * self.sigma_data ** 2 / (sigma ** 2 + self.sigma_data ** 2) - model_output * sigma * self.sigma_data / (sigma ** 2 + self.sigma_data ** 2) ** 0.5
 
+class V_PREDICTION_DDPM:
+    """CogVideoX v-prediction: model receives raw x_t (unscaled), predicts velocity v.
+    x_0 = sqrt(alpha) * x_t - sqrt(1-alpha) * v
+        = x_t / sqrt(sigma^2 + 1) - v * sigma / sqrt(sigma^2 + 1)
+    """
+    def calculate_input(self, sigma, noise):
+        return noise
+
+    def calculate_denoised(self, sigma, model_output, model_input):
+        sigma = reshape_sigma(sigma, model_output.ndim)
+        return model_input / (sigma ** 2 + 1.0) ** 0.5 - model_output * sigma / (sigma ** 2 + 1.0) ** 0.5
+
+    def noise_scaling(self, sigma, noise, latent_image, max_denoise=False):
+        sigma = reshape_sigma(sigma, noise.ndim)
+        if max_denoise:
+            noise = noise * torch.sqrt(1.0 + sigma ** 2.0)
+        else:
+            noise = noise * sigma
+        noise += latent_image
+        return noise
+
+    def inverse_noise_scaling(self, sigma, latent):
+        return latent
+
 class EDM(V_PREDICTION):
     def calculate_denoised(self, sigma, model_output, model_input):
         sigma = reshape_sigma(sigma, model_output.ndim)
diff --git a/comfy/sd.py b/comfy/sd.py
index 736fe35de..9158317f1 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -18,6 +18,7 @@ import comfy.ldm.wan.vae
 import comfy.ldm.wan.vae2_2
 import comfy.ldm.hunyuan3d.vae
 import comfy.ldm.ace.vae.music_dcae_pipeline
+import comfy.ldm.cogvideo.vae
 import comfy.ldm.hunyuan_video.vae
 import comfy.ldm.mmaudio.vae.autoencoder
 import comfy.pixel_space_convert
@@ -652,6 +653,17 @@ class VAE:
 
                 self.memory_used_encode = lambda shape, dtype: (1400 * 9 * shape[-2] * shape[-1]) * model_management.dtype_size(dtype)
                 self.memory_used_decode = lambda shape, dtype: (3600 * 4 * shape[-2] * shape[-1] * 16 * 16) * model_management.dtype_size(dtype)
+            elif "decoder.conv_in.conv.weight" in sd and "decoder.mid_block.resnets.0.norm1.norm_layer.weight" in sd:  # CogVideoX VAE
+                self.upscale_ratio = (lambda a: max(0, a * 4 - 3), 8, 8)
+                self.upscale_index_formula = (4, 8, 8)
+                self.downscale_ratio = (lambda a: max(0, math.floor((a + 3) / 4)), 8, 8)
+                self.downscale_index_formula = (4, 8, 8)
+                self.latent_dim = 3
+                self.latent_channels = sd["encoder.conv_out.conv.weight"].shape[0] // 2
+                self.first_stage_model = comfy.ldm.cogvideo.vae.AutoencoderKLCogVideoX(latent_channels=self.latent_channels)
+                self.memory_used_decode = lambda shape, dtype: (2800 * max(2, ((shape[2] - 1) * 4) + 1) * shape[3] * shape[4] * (8 * 8)) * model_management.dtype_size(dtype)
+                self.memory_used_encode = lambda shape, dtype: (1400 * max(1, shape[2]) * shape[3] * shape[4]) * model_management.dtype_size(dtype)
+                self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32]
             elif "decoder.conv_in.conv.weight" in sd:
                 ddconfig = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0}
                 ddconfig["conv3d"] = True
diff --git a/comfy/supported_models.py b/comfy/supported_models.py
index 8886f32d5..92d0305c5 100644
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@@ -27,6 +27,7 @@ import comfy.text_encoders.anima
 import comfy.text_encoders.ace15
 import comfy.text_encoders.longcat_image
 import comfy.text_encoders.ernie
+import comfy.text_encoders.cogvideo
 
 from . import supported_models_base
 from . import latent_formats
@@ -1832,6 +1833,52 @@ class SAM31(SAM3):
     unet_config = {"image_model": "SAM31"}
 
 
-models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, LongCatImage, FluxSchnell, GenmoMochi, LTXV, LTXAV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImagePixelSpace, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, WAN21_FlowRVS, WAN21_SCAIL, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, ACEStep15, Omnigen2, QwenImage, Flux2, Kandinsky5Image, Kandinsky5, Anima, RT_DETR_v4, ErnieImage, SAM3, SAM31]
+class CogVideoX_T2V(supported_models_base.BASE):
+    unet_config = {
+        "image_model": "cogvideox",
+    }
+
+    sampling_settings = {
+        "linear_start": 0.00085,
+        "linear_end": 0.012,
+        "beta_schedule": "linear",
+        "zsnr": True,
+    }
+
+    unet_extra_config = {}
+    latent_format = latent_formats.CogVideoX
+
+    supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32]
+
+    vae_key_prefix = ["vae."]
+    text_encoder_key_prefix = ["text_encoders."]
+
+    def get_model(self, state_dict, prefix="", device=None):
+        # CogVideoX 1.5 (patch_size_t=2) has different training base dimensions for RoPE
+        if self.unet_config.get("patch_size_t") is not None:
+            self.unet_config.setdefault("sample_height", 96)
+            self.unet_config.setdefault("sample_width", 170)
+            self.unet_config.setdefault("sample_frames", 81)
+        out = model_base.CogVideoX(self, device=device)
+        return out
+
+    def clip_target(self, state_dict={}):
+        return supported_models_base.ClipTarget(comfy.text_encoders.cogvideo.CogVideoXT5Tokenizer, comfy.text_encoders.sd3_clip.T5XXLModel)
+
+class CogVideoX_I2V(CogVideoX_T2V):
+    unet_config = {
+        "image_model": "cogvideox",
+        "in_channels": 32,
+    }
+
+    def get_model(self, state_dict, prefix="", device=None):
+        if self.unet_config.get("patch_size_t") is not None:
+            self.unet_config.setdefault("sample_height", 96)
+            self.unet_config.setdefault("sample_width", 170)
+            self.unet_config.setdefault("sample_frames", 81)
+        out = model_base.CogVideoX(self, image_to_video=True, device=device)
+        return out
+
+models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, LongCatImage, FluxSchnell, GenmoMochi, LTXV, LTXAV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImagePixelSpace, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, WAN21_FlowRVS, WAN21_SCAIL, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, ACEStep15, Omnigen2, QwenImage, Flux2, Kandinsky5Image, Kandinsky5, Anima, RT_DETR_v4, ErnieImage, SAM3, SAM31, CogVideoX_I2V, CogVideoX_T2V]
 
 models += [SVD_img2vid]
diff --git a/comfy/text_encoders/cogvideo.py b/comfy/text_encoders/cogvideo.py
new file mode 100644
index 000000000..f1e8e3f5d
--- /dev/null
+++ b/comfy/text_encoders/cogvideo.py
@@ -0,0 +1,6 @@
+import comfy.text_encoders.sd3_clip
+
+
+class CogVideoXT5Tokenizer(comfy.text_encoders.sd3_clip.T5XXLTokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, min_length=226)
diff --git a/nodes.py b/nodes.py
index e73a0712e..7aeb05b32 100644
--- a/nodes.py
+++ b/nodes.py
@@ -2463,7 +2463,7 @@ async def init_builtin_extra_nodes():
         "nodes_curve.py",
         "nodes_rtdetr.py",
         "nodes_frame_interpolation.py",
-        "nodes_sam3.py"
+        "nodes_sam3.py",
     ]
 
     import_failed = []

From a164c82913d3e04d92d0f6630fc4c850ec184ef3 Mon Sep 17 00:00:00 2001
From: blepping <157360029+blepping@users.noreply.github.com>
Date: Wed, 29 Apr 2026 17:37:30 -0600
Subject: [PATCH 48/81] Add high quality preview support for Flux2 latents
 (#13496)

---
 comfy/latent_formats.py |   1 +
 comfy/sd.py             |   5 +-
 comfy/taesd/taesd.py    | 102 ++++++++++++++++++++++++++++++----------
 nodes.py                |  53 +++++++--------------
 4 files changed, 100 insertions(+), 61 deletions(-)

diff --git a/comfy/latent_formats.py b/comfy/latent_formats.py
index 0f4059ebe..3dac5be18 100644
--- a/comfy/latent_formats.py
+++ b/comfy/latent_formats.py
@@ -224,6 +224,7 @@ class Flux2(LatentFormat):
 
         self.latent_rgb_factors_bias = [-0.0329, -0.0718, -0.0851]
         self.latent_rgb_factors_reshape = lambda t: t.reshape(t.shape[0], 32, 2, 2, t.shape[-2], t.shape[-1]).permute(0, 1, 4, 2, 5, 3).reshape(t.shape[0], 32, t.shape[-2] * 2, t.shape[-1] * 2)
+        self.taesd_decoder_name = "taef2_decoder"
 
     def process_in(self, latent):
         return latent
diff --git a/comfy/sd.py b/comfy/sd.py
index 9158317f1..ee66490f5 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -479,7 +479,10 @@ class VAE:
                                                             encoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Encoder", 'params': encoder_config},
                                                             decoder_config={'target': "comfy.ldm.modules.temporal_ae.VideoDecoder", 'params': decoder_config})
             elif "taesd_decoder.1.weight" in sd:
-                self.latent_channels = sd["taesd_decoder.1.weight"].shape[1]
+                if isinstance(metadata, dict) and "tae_latent_channels" in metadata:
+                    self.latent_channels = metadata["tae_latent_channels"]
+                else:
+                    self.latent_channels = sd["taesd_decoder.1.weight"].shape[1]
                 self.first_stage_model = comfy.taesd.taesd.TAESD(latent_channels=self.latent_channels)
             elif "vquantizer.codebook.weight" in sd: #VQGan: stage a of stable cascade
                 self.first_stage_model = StageA()
diff --git a/comfy/taesd/taesd.py b/comfy/taesd/taesd.py
index ce36f1a84..05d370209 100644
--- a/comfy/taesd/taesd.py
+++ b/comfy/taesd/taesd.py
@@ -17,32 +17,79 @@ class Clamp(nn.Module):
         return torch.tanh(x / 3) * 3
 
 class Block(nn.Module):
-    def __init__(self, n_in, n_out):
+    def __init__(self, n_in: int, n_out: int, use_midblock_gn: bool = False):
         super().__init__()
         self.conv = nn.Sequential(conv(n_in, n_out), nn.ReLU(), conv(n_out, n_out), nn.ReLU(), conv(n_out, n_out))
         self.skip = comfy.ops.disable_weight_init.Conv2d(n_in, n_out, 1, bias=False) if n_in != n_out else nn.Identity()
         self.fuse = nn.ReLU()
-    def forward(self, x):
+        if not use_midblock_gn:
+            self.pool = None
+            return
+        n_gn = n_in * 4
+        self.pool = nn.Sequential(
+            comfy.ops.disable_weight_init.Conv2d(n_in, n_gn, 1, bias=False),
+            comfy.ops.disable_weight_init.GroupNorm(4, n_gn),
+            nn.ReLU(inplace=True),
+            comfy.ops.disable_weight_init.Conv2d(n_gn, n_in, 1, bias=False),
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if self.pool is not None:
+            x = x + self.pool(x)
         return self.fuse(self.conv(x) + self.skip(x))
 
-def Encoder(latent_channels=4):
-    return nn.Sequential(
-        conv(3, 64), Block(64, 64),
-        conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
-        conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
-        conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
-        conv(64, latent_channels),
-    )
+class Encoder(nn.Sequential):
+    def __init__(self, latent_channels: int = 4, use_gn: bool = False):
+        super().__init__(
+            conv(3, 64), Block(64, 64),
+            conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
+            conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
+            conv(64, 64, stride=2, bias=False), Block(64, 64, use_gn), Block(64, 64, use_gn), Block(64, 64, use_gn),
+            conv(64, latent_channels),
+        )
 
+class Decoder(nn.Sequential):
+    def __init__(self, latent_channels: int = 4, use_gn: bool = False):
+        super().__init__(
+            Clamp(), conv(latent_channels, 64), nn.ReLU(),
+            Block(64, 64, use_gn), Block(64, 64, use_gn), Block(64, 64, use_gn), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
+            Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
+            Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
+            Block(64, 64), conv(64, 3),
+        )
+
+class DecoderFlux2(Decoder):
+    def __init__(self, latent_channels: int = 128, use_gn: bool = True):
+        if latent_channels != 128 or not use_gn:
+            raise ValueError("Unexpected parameters for Flux2 TAE module")
+        super().__init__(latent_channels=32, use_gn=True)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        B, C, H, W = x.shape
+        x = (
+            x
+            .reshape(B, 32, 2, 2, H, W)
+            .permute(0, 1, 4, 2, 5, 3)
+            .reshape(B, 32, H * 2, W * 2)
+        )
+        return super().forward(x)
+
+class EncoderFlux2(Encoder):
+    def __init__(self, latent_channels: int = 128, use_gn: bool = True):
+        if latent_channels != 128 or not use_gn:
+            raise ValueError("Unexpected parameters for Flux2 TAE module")
+        super().__init__(latent_channels=32, use_gn=True)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        result = super().forward(x)
+        B, C, H, W = result.shape
+        return (
+            result
+            .reshape(B, C, H // 2, 2, W // 2, 2)
+            .permute(0, 1, 3, 5, 2, 4)
+            .reshape(B, 128, H // 2, W // 2)
+        )
 
-def Decoder(latent_channels=4):
-    return nn.Sequential(
-        Clamp(), conv(latent_channels, 64), nn.ReLU(),
-        Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
-        Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
-        Block(64, 64), Block(64, 64), Block(64, 64), nn.Upsample(scale_factor=2), conv(64, 64, bias=False),
-        Block(64, 64), conv(64, 3),
-    )
 
 class TAESD(nn.Module):
     latent_magnitude = 3
@@ -51,8 +98,15 @@ class TAESD(nn.Module):
     def __init__(self, encoder_path=None, decoder_path=None, latent_channels=4):
         """Initialize pretrained TAESD on the given device from the given checkpoints."""
         super().__init__()
-        self.taesd_encoder = Encoder(latent_channels=latent_channels)
-        self.taesd_decoder = Decoder(latent_channels=latent_channels)
+        if latent_channels == 128:
+            encoder_class = EncoderFlux2
+            decoder_class = DecoderFlux2
+        else:
+            encoder_class = Encoder
+            decoder_class = Decoder
+        self.taesd_encoder = encoder_class(latent_channels=latent_channels)
+        self.taesd_decoder = decoder_class(latent_channels=latent_channels)
+
         self.vae_scale = torch.nn.Parameter(torch.tensor(1.0))
         self.vae_shift = torch.nn.Parameter(torch.tensor(0.0))
         if encoder_path is not None:
@@ -61,19 +115,19 @@ class TAESD(nn.Module):
             self.taesd_decoder.load_state_dict(comfy.utils.load_torch_file(decoder_path, safe_load=True))
 
     @staticmethod
-    def scale_latents(x):
+    def scale_latents(x: torch.Tensor) -> torch.Tensor:
         """raw latents -> [0, 1]"""
         return x.div(2 * TAESD.latent_magnitude).add(TAESD.latent_shift).clamp(0, 1)
 
     @staticmethod
-    def unscale_latents(x):
+    def unscale_latents(x: torch.Tensor) -> torch.Tensor:
         """[0, 1] -> raw latents"""
         return x.sub(TAESD.latent_shift).mul(2 * TAESD.latent_magnitude)
 
-    def decode(self, x):
+    def decode(self, x: torch.Tensor) -> torch.Tensor:
         x_sample = self.taesd_decoder((x - self.vae_shift) * self.vae_scale)
         x_sample = x_sample.sub(0.5).mul(2)
         return x_sample
 
-    def encode(self, x):
+    def encode(self, x: torch.Tensor) -> torch.Tensor:
         return (self.taesd_encoder(x * 0.5 + 0.5) / self.vae_scale) + self.vae_shift
diff --git a/nodes.py b/nodes.py
index 7aeb05b32..99dc07227 100644
--- a/nodes.py
+++ b/nodes.py
@@ -728,50 +728,26 @@ class LoraLoaderModelOnly(LoraLoader):
 
 class VAELoader:
     video_taes = ["taehv", "lighttaew2_2", "lighttaew2_1", "lighttaehy1_5", "taeltx_2"]
-    image_taes = ["taesd", "taesdxl", "taesd3", "taef1"]
+    image_taes = ["taesd", "taesdxl", "taesd3", "taef1", "taef2"]
+
     @staticmethod
     def vae_list(s):
         vaes = folder_paths.get_filename_list("vae")
         approx_vaes = folder_paths.get_filename_list("vae_approx")
-        sdxl_taesd_enc = False
-        sdxl_taesd_dec = False
-        sd1_taesd_enc = False
-        sd1_taesd_dec = False
-        sd3_taesd_enc = False
-        sd3_taesd_dec = False
-        f1_taesd_enc = False
-        f1_taesd_dec = False
-
+        have_img_encoder, have_img_decoder = set(), set()
         for v in approx_vaes:
-            if v.startswith("taesd_decoder."):
-                sd1_taesd_dec = True
-            elif v.startswith("taesd_encoder."):
-                sd1_taesd_enc = True
-            elif v.startswith("taesdxl_decoder."):
-                sdxl_taesd_dec = True
-            elif v.startswith("taesdxl_encoder."):
-                sdxl_taesd_enc = True
-            elif v.startswith("taesd3_decoder."):
-                sd3_taesd_dec = True
-            elif v.startswith("taesd3_encoder."):
-                sd3_taesd_enc = True
-            elif v.startswith("taef1_encoder."):
-                f1_taesd_dec = True
-            elif v.startswith("taef1_decoder."):
-                f1_taesd_enc = True
-            else:
+            parts = v.split("_", 1)
+            if len(parts) != 2 or parts[0] not in s.image_taes:
                 for tae in s.video_taes:
                     if v.startswith(tae):
                         vaes.append(v)
-
-        if sd1_taesd_dec and sd1_taesd_enc:
-            vaes.append("taesd")
-        if sdxl_taesd_dec and sdxl_taesd_enc:
-            vaes.append("taesdxl")
-        if sd3_taesd_dec and sd3_taesd_enc:
-            vaes.append("taesd3")
-        if f1_taesd_dec and f1_taesd_enc:
-            vaes.append("taef1")
+                        break
+                continue
+            if parts[1].startswith("encoder."):
+                have_img_encoder.add(parts[0])
+            elif parts[1].startswith("decoder."):
+                have_img_decoder.add(parts[0])
+        vaes += [k for k in have_img_decoder if k in have_img_encoder]
         vaes.append("pixel_space")
         return vaes
 
@@ -827,6 +803,11 @@ class VAELoader:
             else:
                 vae_path = folder_paths.get_full_path_or_raise("vae", vae_name)
             sd, metadata = comfy.utils.load_torch_file(vae_path, return_metadata=True)
+        if vae_name == "taef2":
+            if metadata is None:
+                metadata = {"tae_latent_channels": 128}
+            else:
+                metadata["tae_latent_channels"] = 128
         vae = comfy.sd.VAE(sd=sd, metadata=metadata)
         vae.throw_exception_if_invalid()
         return (vae,)

From d10fc2d6524043d2322968b518168910b1e9b530 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 29 Apr 2026 20:05:31 -0700
Subject: [PATCH 49/81] Lower peak mem usage for 8 bit formats with pyav.
 (#13626)

---
 comfy_api/latest/_input_impl/video_types.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py
index 6ed41bba8..9a107fb76 100644
--- a/comfy_api/latest/_input_impl/video_types.py
+++ b/comfy_api/latest/_input_impl/video_types.py
@@ -251,6 +251,7 @@ class VideoFromFile(VideoInput):
             container.seek(start_pts, stream=video_stream)
 
         image_format = 'gbrpf32le'
+        process_image_format = lambda a: a
         audio = None
 
         streams = [video_stream]
@@ -283,11 +284,25 @@ class VideoFromFile(VideoInput):
                             break
 
                         if not checked_alpha:
+                            alpha_channel = False
                             for comp in frame.format.components:
                                 if comp.is_alpha or frame.format.name == "pal8":
                                     alphas = []
-                                    image_format = 'gbrapf32le'
+                                    alpha_channel = True
                                     break
+                            if frame.format.name in ("yuvj420p", "rgb24", "rgba", "pal8"):
+                                process_image_format = lambda a: a.float() / 255.0
+                                if alpha_channel:
+                                    image_format = 'rgba'
+                                else:
+                                    image_format = 'rgb24'
+                            else:
+                                process_image_format = lambda a: a
+                                if alpha_channel:
+                                    image_format = 'gbrapf32le'
+                                else:
+                                    image_format = 'gbrpf32le'
+
                             checked_alpha = True
 
                         img = frame.to_ndarray(format=image_format)  # shape: (H, W, 4)
@@ -323,9 +338,9 @@ class VideoFromFile(VideoInput):
                     else:
                         audio_frames.append(frame.to_ndarray())
 
-        images = torch.stack(frames) if len(frames) > 0 else torch.zeros(0, 0, 0, 3)
+        images = process_image_format(torch.stack(frames)) if len(frames) > 0 else torch.zeros(0, 0, 0, 3)
         if alphas is not None:
-            alphas = torch.stack(alphas) if len(alphas) > 0 else torch.zeros(0, 0, 0, 1)
+            alphas = process_image_format(torch.stack(alphas)) if len(alphas) > 0 else torch.zeros(0, 0, 0, 1)
 
         # Get frame rate
         frame_rate = Fraction(video_stream.average_rate) if video_stream.average_rate else Fraction(1)

From a7d82baa06e6b2e3d19c38c244118909fe270d49 Mon Sep 17 00:00:00 2001
From: Jedrzej Kosinski <kosinkadink1@gmail.com>
Date: Wed, 29 Apr 2026 20:30:01 -0700
Subject: [PATCH 50/81] Fix SQLAlchemy version format in requirements.txt
 (#13547)

Change SQLAlchemy>=2.0 to SQLAlchemy>=2.0.0 to satisfy the X.Y.Z
version format expected by install_util.is_valid_version().
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 12c5ff7a9..c3d51e2fa 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,7 +19,7 @@ scipy
 tqdm
 psutil
 alembic
-SQLAlchemy>=2.0
+SQLAlchemy>=2.0.0
 filelock
 av>=14.2.0
 comfy-kitchen>=0.2.8

From 38ecad8f8af30965eb1017b0eb6a552c751b84a4 Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Thu, 30 Apr 2026 11:09:33 +0300
Subject: [PATCH 51/81] feat(api-nodes): allow custom resolutions for GPTImage2
 node (#13631)

Signed-off-by: bigcat88 <bigcat88@icloud.com>
---
 comfy_api_nodes/nodes_openai.py | 51 +++++++++++++++++++++++++++++----
 1 file changed, 46 insertions(+), 5 deletions(-)

diff --git a/comfy_api_nodes/nodes_openai.py b/comfy_api_nodes/nodes_openai.py
index bbb758068..843681817 100644
--- a/comfy_api_nodes/nodes_openai.py
+++ b/comfy_api_nodes/nodes_openai.py
@@ -415,8 +415,9 @@ class OpenAIGPTImage1(IO.ComfyNode):
                         "1152x2048",
                         "3840x2160",
                         "2160x3840",
+                        "Custom",
                     ],
-                    tooltip="Image size",
+                    tooltip="Image size. Select 'Custom' to use the custom width and height (GPT Image 2 only).",
                     optional=True,
                 ),
                 IO.Int.Input(
@@ -445,6 +446,26 @@ class OpenAIGPTImage1(IO.ComfyNode):
                     default="gpt-image-2",
                     optional=True,
                 ),
+                IO.Int.Input(
+                    "custom_width",
+                    default=1024,
+                    min=1024,
+                    max=3840,
+                    step=16,
+                    tooltip="Used only when `size` is 'Custom'. Must be a multiple of 16 (GPT Image 2 only).",
+                    optional=True,
+                    advanced=True,
+                ),
+                IO.Int.Input(
+                    "custom_height",
+                    default=1024,
+                    min=1024,
+                    max=3840,
+                    step=16,
+                    tooltip="Used only when `size` is 'Custom'. Must be a multiple of 16 (GPT Image 2 only).",
+                    optional=True,
+                    advanced=True,
+                ),
             ],
             outputs=[
                 IO.Image.Output(),
@@ -471,9 +492,9 @@ class OpenAIGPTImage1(IO.ComfyNode):
                       "high":   [0.133, 0.22]
                     },
                     "gpt-image-2": {
-                      "low":    [0.0048, 0.012],
-                      "medium": [0.041, 0.112],
-                      "high":   [0.165, 0.43]
+                      "low":    [0.0048, 0.019],
+                      "medium": [0.041, 0.168],
+                      "high":   [0.165, 0.67]
                     }
                   };
                   $range := $lookup($lookup($ranges, widgets.model), widgets.quality);
@@ -503,6 +524,8 @@ class OpenAIGPTImage1(IO.ComfyNode):
         mask: Input.Image | None = None,
         n: int = 1,
         size: str = "1024x1024",
+        custom_width: int = 1024,
+        custom_height: int = 1024,
         model: str = "gpt-image-1",
     ) -> IO.NodeOutput:
         validate_string(prompt, strip_whitespace=False)
@@ -510,7 +533,25 @@ class OpenAIGPTImage1(IO.ComfyNode):
         if mask is not None and image is None:
             raise ValueError("Cannot use a mask without an input image")
 
-        if model in ("gpt-image-1", "gpt-image-1.5"):
+        if size == "Custom":
+            if model != "gpt-image-2":
+                raise ValueError("Custom resolution is only supported by GPT Image 2 model")
+            if custom_width % 16 != 0 or custom_height % 16 != 0:
+                raise ValueError(f"Custom width and height must be multiples of 16, got {custom_width}x{custom_height}")
+            if max(custom_width, custom_height) > 3840:
+                raise ValueError(f"Custom resolution max edge must be <= 3840, got {custom_width}x{custom_height}")
+            ratio = max(custom_width, custom_height) / min(custom_width, custom_height)
+            if ratio > 3:
+                raise ValueError(
+                    f"Custom resolution aspect ratio must not exceed 3:1, got {custom_width}x{custom_height}"
+                )
+            total_pixels = custom_width * custom_height
+            if not 655_360 <= total_pixels <= 8_294_400:
+                raise ValueError(
+                    f"Custom resolution total pixels must be between 655,360 and 8,294,400, got {total_pixels}"
+                )
+            size = f"{custom_width}x{custom_height}"
+        elif model in ("gpt-image-1", "gpt-image-1.5"):
             if size not in ("auto", "1024x1024", "1024x1536", "1536x1024"):
                 raise ValueError(f"Resolution {size} is only supported by GPT Image 2 model")
 

From b633244635e577e199944cd4f027df79afa16dbf Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Thu, 30 Apr 2026 21:49:08 +0300
Subject: [PATCH 52/81] [Partner Nodes] ByteDance: virtual portrait library for
 regular images (#13638)

* feat(api-nodes-bytedance): use the virtual portrait library for regular images

Signed-off-by: bigcat88 <bigcat88@icloud.com>

* fix: include shape in image dedup hash

Signed-off-by: bigcat88 <bigcat88@icloud.com>

---------

Signed-off-by: bigcat88 <bigcat88@icloud.com>
---
 comfy_api_nodes/apis/bytedance.py  |  5 ++++
 comfy_api_nodes/nodes_bytedance.py | 38 ++++++++++++++++++++++++++----
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/comfy_api_nodes/apis/bytedance.py b/comfy_api_nodes/apis/bytedance.py
index eafabbefe..c05bd6893 100644
--- a/comfy_api_nodes/apis/bytedance.py
+++ b/comfy_api_nodes/apis/bytedance.py
@@ -157,6 +157,11 @@ class SeedanceCreateAssetResponse(BaseModel):
     asset_id: str = Field(...)
 
 
+class SeedanceVirtualLibraryCreateAssetRequest(BaseModel):
+    url: str = Field(..., description="Publicly accessible URL of the image asset to upload.")
+    hash: str = Field(..., description="Dedup key. Re-submitting the same hash returns the existing asset id.")
+
+
 # Dollars per 1K tokens, keyed by (model_id, has_video_input).
 SEEDANCE2_PRICE_PER_1K_TOKENS = {
     ("dreamina-seedance-2-0-260128", False): 0.007,
diff --git a/comfy_api_nodes/nodes_bytedance.py b/comfy_api_nodes/nodes_bytedance.py
index de192c5ac..fee0ab888 100644
--- a/comfy_api_nodes/nodes_bytedance.py
+++ b/comfy_api_nodes/nodes_bytedance.py
@@ -1,3 +1,4 @@
+import hashlib
 import logging
 import math
 import re
@@ -20,6 +21,7 @@ from comfy_api_nodes.apis.bytedance import (
     SeedanceCreateAssetResponse,
     SeedanceCreateVisualValidateSessionResponse,
     SeedanceGetVisualValidateSessionResponse,
+    SeedanceVirtualLibraryCreateAssetRequest,
     Seedream4Options,
     Seedream4TaskCreationRequest,
     TaskAudioContent,
@@ -271,6 +273,30 @@ async def _wait_for_asset_active(cls: type[IO.ComfyNode], asset_id: str, group_i
     )
 
 
+async def _seedance_virtual_library_upload_image_asset(
+    cls: type[IO.ComfyNode],
+    image: torch.Tensor,
+    *,
+    wait_label: str = "Uploading image",
+) -> str:
+    """Upload an image into the caller's per-customer Seedance virtual library."""
+    public_url = await upload_image_to_comfyapi(cls, image, wait_label=wait_label)
+    normalized = image.detach().cpu().contiguous().to(torch.float32)
+    digest = hashlib.sha256()
+    digest.update(str(tuple(normalized.shape)).encode("utf-8"))
+    digest.update(b"\0")
+    digest.update(normalized.numpy().tobytes())
+    image_hash = digest.hexdigest()
+    create_resp = await sync_op(
+        cls,
+        ApiEndpoint(path="/proxy/seedance/virtual-library/assets", method="POST"),
+        response_model=SeedanceCreateAssetResponse,
+        data=SeedanceVirtualLibraryCreateAssetRequest(url=public_url, hash=image_hash),
+    )
+    await _wait_for_asset_active(cls, create_resp.asset_id, group_id="virtual-library")
+    return f"asset://{create_resp.asset_id}"
+
+
 def _seedance2_price_extractor(model_id: str, has_video_input: bool):
     """Returns a price_extractor closure for Seedance 2.0 poll_op."""
     rate = SEEDANCE2_PRICE_PER_1K_TOKENS.get((model_id, has_video_input))
@@ -1507,7 +1533,9 @@ class ByteDance2FirstLastFrameNode(IO.ComfyNode):
         if first_frame_asset_id:
             first_frame_url = image_assets[first_frame_asset_id]
         else:
-            first_frame_url = await upload_image_to_comfyapi(cls, first_frame, wait_label="Uploading first frame.")
+            first_frame_url = await _seedance_virtual_library_upload_image_asset(
+                cls, first_frame, wait_label="Uploading first frame."
+            )
 
         content: list[TaskTextContent | TaskImageContent] = [
             TaskTextContent(text=model["prompt"]),
@@ -1527,7 +1555,9 @@ class ByteDance2FirstLastFrameNode(IO.ComfyNode):
             content.append(
                 TaskImageContent(
                     image_url=TaskImageContentUrl(
-                        url=await upload_image_to_comfyapi(cls, last_frame, wait_label="Uploading last frame.")
+                        url=await _seedance_virtual_library_upload_image_asset(
+                            cls, last_frame, wait_label="Uploading last frame."
+                        )
                     ),
                     role="last_frame",
                 ),
@@ -1805,9 +1835,9 @@ class ByteDance2ReferenceNode(IO.ComfyNode):
             content.append(
                 TaskImageContent(
                     image_url=TaskImageContentUrl(
-                        url=await upload_image_to_comfyapi(
+                        url=await _seedance_virtual_library_upload_image_asset(
                             cls,
-                            image=reference_images[key],
+                            reference_images[key],
                             wait_label=f"Uploading image {i}",
                         ),
                     ),

From e6e0936128858608c5cc45585be3583176d748b2 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Thu, 30 Apr 2026 16:33:09 -0700
Subject: [PATCH 53/81] Load other jpeg formats without taking so much memory.
 (#13642)

---
 comfy_api/latest/_input_impl/video_types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py
index 9a107fb76..942278d88 100644
--- a/comfy_api/latest/_input_impl/video_types.py
+++ b/comfy_api/latest/_input_impl/video_types.py
@@ -290,7 +290,7 @@ class VideoFromFile(VideoInput):
                                     alphas = []
                                     alpha_channel = True
                                     break
-                            if frame.format.name in ("yuvj420p", "rgb24", "rgba", "pal8"):
+                            if frame.format.name in ("yuvj420p", "yuvj422p", "yuvj444p", "rgb24", "rgba", "pal8"):
                                 process_image_format = lambda a: a.float() / 255.0
                                 if alpha_channel:
                                     image_format = 'rgba'

From e9c311b2458a327585a7e387558377c8190eebb0 Mon Sep 17 00:00:00 2001
From: Rainer <rainer@rainerots.eu>
Date: Fri, 1 May 2026 02:33:41 +0300
Subject: [PATCH 54/81] OneTainer ERNIE LoRA support (#13640)

---
 comfy/lora.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/comfy/lora.py b/comfy/lora.py
index 63ee85323..e4337c729 100644
--- a/comfy/lora.py
+++ b/comfy/lora.py
@@ -342,6 +342,12 @@ def model_lora_keys_unet(model, key_map={}):
                 key_map["base_model.model.{}".format(key_lora)] = k  # Official base model loras
                 key_map["lycoris_{}".format(key_lora.replace(".", "_"))] = k  # LyCORIS/LoKR format
 
+    if isinstance(model, comfy.model_base.ErnieImage):
+        for k in sdk:
+            if k.startswith("diffusion_model.") and k.endswith(".weight"):
+                key_lora = k[len("diffusion_model."):-len(".weight")]
+                key_map["transformer.{}".format(key_lora)] = k
+
     return key_map
 
 

From e8e8fee22476a926090df9f719acd0a553ff8165 Mon Sep 17 00:00:00 2001
From: "Daxiong (Lin)" <contact@comfyui-wiki.com>
Date: Fri, 1 May 2026 09:14:28 +0800
Subject: [PATCH 55/81] chore: update workflow templates to v0.9.65 (#13644)

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index c3d51e2fa..cb85d970b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 comfyui-frontend-package==1.42.15
-comfyui-workflow-templates==0.9.63
+comfyui-workflow-templates==0.9.65
 comfyui-embedded-docs==0.4.4
 torch
 torchsde

From 97f58baaaf89e2232b735fab2a3f2d4e24d134c3 Mon Sep 17 00:00:00 2001
From: Jedrzej Kosinski <kosinkadink1@gmail.com>
Date: Thu, 30 Apr 2026 18:49:31 -0700
Subject: [PATCH 56/81] Add alexisrolland and rattus128 as code owners (#13648)

---
 CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CODEOWNERS b/CODEOWNERS
index 4d5448636..e693955a0 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -1,2 +1,2 @@
 # Admins
-* @comfyanonymous @kosinkadink @guill
+* @comfyanonymous @kosinkadink @guill @alexisrolland @rattus128

From 96f1cee9f5304c1f4e3a176ed02a44cf0a0116ad Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Fri, 1 May 2026 09:15:11 +0300
Subject: [PATCH 57/81] chore(api-nodes): always display the custom width and
 height in GPTImage2 node (#13651)

Signed-off-by: bigcat88 <bigcat88@icloud.com>
---
 comfy_api_nodes/nodes_openai.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/comfy_api_nodes/nodes_openai.py b/comfy_api_nodes/nodes_openai.py
index 843681817..21fe470ce 100644
--- a/comfy_api_nodes/nodes_openai.py
+++ b/comfy_api_nodes/nodes_openai.py
@@ -454,7 +454,6 @@ class OpenAIGPTImage1(IO.ComfyNode):
                     step=16,
                     tooltip="Used only when `size` is 'Custom'. Must be a multiple of 16 (GPT Image 2 only).",
                     optional=True,
-                    advanced=True,
                 ),
                 IO.Int.Input(
                     "custom_height",
@@ -464,7 +463,6 @@ class OpenAIGPTImage1(IO.ComfyNode):
                     step=16,
                     tooltip="Used only when `size` is 'Custom'. Must be a multiple of 16 (GPT Image 2 only).",
                     optional=True,
-                    advanced=True,
                 ),
             ],
             outputs=[

From cf9cbec5960e38368393137419637f6b9ca7691b Mon Sep 17 00:00:00 2001
From: Talmaj <Talmaj@users.noreply.github.com>
Date: Fri, 1 May 2026 11:20:11 +0200
Subject: [PATCH 58/81] Reformat models variable into multiline array CORE-59
 (#13513)

Co-authored-by: Talmaj Marinc <talmaj@comfy.org>
---
 comfy/supported_models.py | 84 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 82 insertions(+), 2 deletions(-)

diff --git a/comfy/supported_models.py b/comfy/supported_models.py
index 92d0305c5..e6c17fb98 100644
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@@ -1879,6 +1879,86 @@ class CogVideoX_I2V(CogVideoX_T2V):
         out = model_base.CogVideoX(self, image_to_video=True, device=device)
         return out
 
-models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, LongCatImage, FluxSchnell, GenmoMochi, LTXV, LTXAV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImagePixelSpace, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, WAN21_FlowRVS, WAN21_SCAIL, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, ACEStep15, Omnigen2, QwenImage, Flux2, Kandinsky5Image, Kandinsky5, Anima, RT_DETR_v4, ErnieImage, SAM3, SAM31, CogVideoX_I2V, CogVideoX_T2V]
 
-models += [SVD_img2vid]
+models = [
+    LotusD,
+    Stable_Zero123,
+    SD15_instructpix2pix,
+    SD15,
+    SD20,
+    SD21UnclipL,
+    SD21UnclipH,
+    SDXL_instructpix2pix,
+    SDXLRefiner,
+    SDXL,
+    SSD1B,
+    KOALA_700M,
+    KOALA_1B,
+    Segmind_Vega,
+    SD_X4Upscaler,
+    Stable_Cascade_C,
+    Stable_Cascade_B,
+    SV3D_u,
+    SV3D_p,
+    SD3,
+    StableAudio,
+    AuraFlow,
+    PixArtAlpha,
+    PixArtSigma,
+    HunyuanDiT,
+    HunyuanDiT1,
+    FluxInpaint,
+    Flux,
+    LongCatImage,
+    FluxSchnell,
+    GenmoMochi,
+    LTXV,
+    LTXAV,
+    HunyuanVideo15_SR_Distilled,
+    HunyuanVideo15,
+    HunyuanImage21Refiner,
+    HunyuanImage21,
+    HunyuanVideoSkyreelsI2V,
+    HunyuanVideoI2V,
+    HunyuanVideo,
+    CosmosT2V,
+    CosmosI2V,
+    CosmosT2IPredict2,
+    CosmosI2VPredict2,
+    ZImagePixelSpace,
+    ZImage,
+    Lumina2,
+    WAN22_T2V,
+    WAN21_T2V,
+    WAN21_I2V,
+    WAN21_FunControl2V,
+    WAN21_Vace,
+    WAN21_Camera,
+    WAN22_Camera,
+    WAN22_S2V,
+    WAN21_HuMo,
+    WAN22_Animate,
+    WAN21_FlowRVS,
+    WAN21_SCAIL,
+    Hunyuan3Dv2mini,
+    Hunyuan3Dv2,
+    Hunyuan3Dv2_1,
+    HiDream,
+    Chroma,
+    ChromaRadiance,
+    ACEStep,
+    ACEStep15,
+    Omnigen2,
+    QwenImage,
+    Flux2,
+    Kandinsky5Image,
+    Kandinsky5,
+    Anima,
+    RT_DETR_v4,
+    ErnieImage,
+    SAM3,
+    SAM31,
+    CogVideoX_I2V,
+    CogVideoX_T2V,
+    SVD_img2vid,
+]

From fa7553138e3c75befe6aaf988048d4a0a95c1a32 Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Fri, 1 May 2026 21:09:25 +0300
Subject: [PATCH 59/81] chore(api-nodes): remove Moonvalley API nodes (#13659)

Signed-off-by: bigcat88 <bigcat88@icloud.com>
---
 comfy_api_nodes/apis/moonvalley.py  | 152 --------
 comfy_api_nodes/nodes_moonvalley.py | 534 ----------------------------
 2 files changed, 686 deletions(-)
 delete mode 100644 comfy_api_nodes/apis/moonvalley.py
 delete mode 100644 comfy_api_nodes/nodes_moonvalley.py

diff --git a/comfy_api_nodes/apis/moonvalley.py b/comfy_api_nodes/apis/moonvalley.py
deleted file mode 100644
index 7ec7a4ade..000000000
--- a/comfy_api_nodes/apis/moonvalley.py
+++ /dev/null
@@ -1,152 +0,0 @@
-from enum import Enum
-from typing import Optional, Dict, Any
-
-from pydantic import BaseModel, Field, StrictBytes
-
-
-class MoonvalleyPromptResponse(BaseModel):
-    error: Optional[Dict[str, Any]] = None
-    frame_conditioning: Optional[Dict[str, Any]] = None
-    id: Optional[str] = None
-    inference_params: Optional[Dict[str, Any]] = None
-    meta: Optional[Dict[str, Any]] = None
-    model_params: Optional[Dict[str, Any]] = None
-    output_url: Optional[str] = None
-    prompt_text: Optional[str] = None
-    status: Optional[str] = None
-
-
-class MoonvalleyTextToVideoInferenceParams(BaseModel):
-    add_quality_guidance: Optional[bool] = Field(
-        True, description='Whether to add quality guidance'
-    )
-    caching_coefficient: Optional[float] = Field(
-        0.3, description='Caching coefficient for optimization'
-    )
-    caching_cooldown: Optional[int] = Field(
-        3, description='Number of caching cooldown steps'
-    )
-    caching_warmup: Optional[int] = Field(
-        3, description='Number of caching warmup steps'
-    )
-    clip_value: Optional[float] = Field(
-        3, description='CLIP value for generation control'
-    )
-    conditioning_frame_index: Optional[int] = Field(
-        0, description='Index of the conditioning frame'
-    )
-    cooldown_steps: Optional[int] = Field(
-        75, description='Number of cooldown steps (calculated based on num_frames)'
-    )
-    fps: Optional[int] = Field(
-        24, description='Frames per second of the generated video'
-    )
-    guidance_scale: Optional[float] = Field(
-        10, description='Guidance scale for generation control'
-    )
-    height: Optional[int] = Field(
-        1080, description='Height of the generated video in pixels'
-    )
-    negative_prompt: Optional[str] = Field(None, description='Negative prompt text')
-    num_frames: Optional[int] = Field(64, description='Number of frames to generate')
-    seed: Optional[int] = Field(
-        None, description='Random seed for generation (default: random)'
-    )
-    shift_value: Optional[float] = Field(
-        3, description='Shift value for generation control'
-    )
-    steps: Optional[int] = Field(80, description='Number of denoising steps')
-    use_guidance_schedule: Optional[bool] = Field(
-        True, description='Whether to use guidance scheduling'
-    )
-    use_negative_prompts: Optional[bool] = Field(
-        False, description='Whether to use negative prompts'
-    )
-    use_timestep_transform: Optional[bool] = Field(
-        True, description='Whether to use timestep transformation'
-    )
-    warmup_steps: Optional[int] = Field(
-        0, description='Number of warmup steps (calculated based on num_frames)'
-    )
-    width: Optional[int] = Field(
-        1920, description='Width of the generated video in pixels'
-    )
-
-
-class MoonvalleyTextToVideoRequest(BaseModel):
-    image_url: Optional[str] = None
-    inference_params: Optional[MoonvalleyTextToVideoInferenceParams] = None
-    prompt_text: Optional[str] = None
-    webhook_url: Optional[str] = None
-
-
-class MoonvalleyUploadFileRequest(BaseModel):
-    file: Optional[StrictBytes] = None
-
-
-class MoonvalleyUploadFileResponse(BaseModel):
-    access_url: Optional[str] = None
-
-
-class MoonvalleyVideoToVideoInferenceParams(BaseModel):
-    add_quality_guidance: Optional[bool] = Field(
-        True, description='Whether to add quality guidance'
-    )
-    caching_coefficient: Optional[float] = Field(
-        0.3, description='Caching coefficient for optimization'
-    )
-    caching_cooldown: Optional[int] = Field(
-        3, description='Number of caching cooldown steps'
-    )
-    caching_warmup: Optional[int] = Field(
-        3, description='Number of caching warmup steps'
-    )
-    clip_value: Optional[float] = Field(
-        3, description='CLIP value for generation control'
-    )
-    conditioning_frame_index: Optional[int] = Field(
-        0, description='Index of the conditioning frame'
-    )
-    cooldown_steps: Optional[int] = Field(
-        36, description='Number of cooldown steps (calculated based on num_frames)'
-    )
-    guidance_scale: Optional[float] = Field(
-        15, description='Guidance scale for generation control'
-    )
-    negative_prompt: Optional[str] = Field(None, description='Negative prompt text')
-    seed: Optional[int] = Field(
-        None, description='Random seed for generation (default: random)'
-    )
-    shift_value: Optional[float] = Field(
-        3, description='Shift value for generation control'
-    )
-    steps: Optional[int] = Field(80, description='Number of denoising steps')
-    use_guidance_schedule: Optional[bool] = Field(
-        True, description='Whether to use guidance scheduling'
-    )
-    use_negative_prompts: Optional[bool] = Field(
-        False, description='Whether to use negative prompts'
-    )
-    use_timestep_transform: Optional[bool] = Field(
-        True, description='Whether to use timestep transformation'
-    )
-    warmup_steps: Optional[int] = Field(
-        24, description='Number of warmup steps (calculated based on num_frames)'
-    )
-
-
-class ControlType(str, Enum):
-    motion_control = 'motion_control'
-    pose_control = 'pose_control'
-
-
-class MoonvalleyVideoToVideoRequest(BaseModel):
-    control_type: ControlType = Field(
-        ..., description='Supported types for video control'
-    )
-    inference_params: Optional[MoonvalleyVideoToVideoInferenceParams] = None
-    prompt_text: str = Field(..., description='Describes the video to generate')
-    video_url: str = Field(..., description='Url to control video')
-    webhook_url: Optional[str] = Field(
-        None, description='Optional webhook URL for notifications'
-    )
diff --git a/comfy_api_nodes/nodes_moonvalley.py b/comfy_api_nodes/nodes_moonvalley.py
deleted file mode 100644
index 78a230529..000000000
--- a/comfy_api_nodes/nodes_moonvalley.py
+++ /dev/null
@@ -1,534 +0,0 @@
-import logging
-
-from typing_extensions import override
-
-from comfy_api.latest import IO, ComfyExtension, Input
-from comfy_api_nodes.apis.moonvalley import (
-    MoonvalleyPromptResponse,
-    MoonvalleyTextToVideoInferenceParams,
-    MoonvalleyTextToVideoRequest,
-    MoonvalleyVideoToVideoInferenceParams,
-    MoonvalleyVideoToVideoRequest,
-)
-from comfy_api_nodes.util import (
-    ApiEndpoint,
-    download_url_to_video_output,
-    poll_op,
-    sync_op,
-    trim_video,
-    upload_images_to_comfyapi,
-    upload_video_to_comfyapi,
-    validate_container_format_is_mp4,
-    validate_image_dimensions,
-    validate_string,
-)
-
-API_UPLOADS_ENDPOINT = "/proxy/moonvalley/uploads"
-API_PROMPTS_ENDPOINT = "/proxy/moonvalley/prompts"
-API_VIDEO2VIDEO_ENDPOINT = "/proxy/moonvalley/prompts/video-to-video"
-API_TXT2VIDEO_ENDPOINT = "/proxy/moonvalley/prompts/text-to-video"
-API_IMG2VIDEO_ENDPOINT = "/proxy/moonvalley/prompts/image-to-video"
-
-MIN_WIDTH = 300
-MIN_HEIGHT = 300
-
-MAX_WIDTH = 10000
-MAX_HEIGHT = 10000
-
-MIN_VID_WIDTH = 300
-MIN_VID_HEIGHT = 300
-
-MAX_VID_WIDTH = 10000
-MAX_VID_HEIGHT = 10000
-
-MAX_VIDEO_SIZE = 1024 * 1024 * 1024  # 1 GB max for in-memory video processing
-
-MOONVALLEY_MAREY_MAX_PROMPT_LENGTH = 5000
-
-
-def is_valid_task_creation_response(response: MoonvalleyPromptResponse) -> bool:
-    """Verifies that the initial response contains a task ID."""
-    return bool(response.id)
-
-
-def validate_task_creation_response(response) -> None:
-    if not is_valid_task_creation_response(response):
-        error_msg = f"Moonvalley Marey API: Initial request failed. Code: {response.code}, Message: {response.message}, Data: {response}"
-        logging.error(error_msg)
-        raise RuntimeError(error_msg)
-
-
-def validate_video_to_video_input(video: Input.Video) -> Input.Video:
-    """
-    Validates and processes video input for Moonvalley Video-to-Video generation.
-
-    Args:
-        video: Input video to validate
-
-    Returns:
-        Validated and potentially trimmed video
-
-    Raises:
-        ValueError: If video doesn't meet requirements
-        MoonvalleyApiError: If video duration is too short
-    """
-    width, height = _get_video_dimensions(video)
-    _validate_video_dimensions(width, height)
-    validate_container_format_is_mp4(video)
-
-    return _validate_and_trim_duration(video)
-
-
-def _get_video_dimensions(video: Input.Video) -> tuple[int, int]:
-    """Extracts video dimensions with error handling."""
-    try:
-        return video.get_dimensions()
-    except Exception as e:
-        logging.error("Error getting dimensions of video: %s", e)
-        raise ValueError(f"Cannot get video dimensions: {e}") from e
-
-
-def _validate_video_dimensions(width: int, height: int) -> None:
-    """Validates video dimensions meet Moonvalley V2V requirements."""
-    supported_resolutions = {
-        (1920, 1080),
-        (1080, 1920),
-        (1152, 1152),
-        (1536, 1152),
-        (1152, 1536),
-    }
-
-    if (width, height) not in supported_resolutions:
-        supported_list = ", ".join([f"{w}x{h}" for w, h in sorted(supported_resolutions)])
-        raise ValueError(f"Resolution {width}x{height} not supported. Supported: {supported_list}")
-
-
-def _validate_and_trim_duration(video: Input.Video) -> Input.Video:
-    """Validates video duration and trims to 5 seconds if needed."""
-    duration = video.get_duration()
-    _validate_minimum_duration(duration)
-    return _trim_if_too_long(video, duration)
-
-
-def _validate_minimum_duration(duration: float) -> None:
-    """Ensures video is at least 5 seconds long."""
-    if duration < 5:
-        raise ValueError("Input video must be at least 5 seconds long.")
-
-
-def _trim_if_too_long(video: Input.Video, duration: float) -> Input.Video:
-    """Trims video to 5 seconds if longer."""
-    if duration > 5:
-        return trim_video(video, 5)
-    return video
-
-
-def parse_width_height_from_res(resolution: str):
-    # Accepts a string like "16:9 (1920 x 1080)" and returns width, height as a dict
-    res_map = {
-        "16:9 (1920 x 1080)": {"width": 1920, "height": 1080},
-        "9:16 (1080 x 1920)": {"width": 1080, "height": 1920},
-        "1:1 (1152 x 1152)": {"width": 1152, "height": 1152},
-        "4:3 (1536 x 1152)": {"width": 1536, "height": 1152},
-        "3:4 (1152 x 1536)": {"width": 1152, "height": 1536},
-        # "21:9 (2560 x 1080)": {"width": 2560, "height": 1080},
-    }
-    return res_map.get(resolution, {"width": 1920, "height": 1080})
-
-
-def parse_control_parameter(value):
-    control_map = {
-        "Motion Transfer": "motion_control",
-        "Canny": "canny_control",
-        "Pose Transfer": "pose_control",
-        "Depth": "depth_control",
-    }
-    return control_map.get(value, control_map["Motion Transfer"])
-
-
-async def get_response(cls: type[IO.ComfyNode], task_id: str) -> MoonvalleyPromptResponse:
-    return await poll_op(
-        cls,
-        ApiEndpoint(path=f"{API_PROMPTS_ENDPOINT}/{task_id}"),
-        response_model=MoonvalleyPromptResponse,
-        status_extractor=lambda r: (r.status if r and r.status else None),
-        poll_interval=16.0,
-        max_poll_attempts=240,
-    )
-
-
-class MoonvalleyImg2VideoNode(IO.ComfyNode):
-
-    @classmethod
-    def define_schema(cls) -> IO.Schema:
-        return IO.Schema(
-            node_id="MoonvalleyImg2VideoNode",
-            display_name="Moonvalley Marey Image to Video",
-            category="api node/video/Moonvalley Marey",
-            description="Moonvalley Marey Image to Video Node",
-            inputs=[
-                IO.Image.Input(
-                    "image",
-                    tooltip="The reference image used to generate the video",
-                ),
-                IO.String.Input(
-                    "prompt",
-                    multiline=True,
-                ),
-                IO.String.Input(
-                    "negative_prompt",
-                    multiline=True,
-                    default="<synthetic> <scene cut> gopro, bright, contrast, static, overexposed, vignette, "
-                    "artifacts, still, noise, texture, scanlines, videogame, 360 camera, VR, transition, "
-                    "flare, saturation, distorted, warped, wide angle, saturated, vibrant, glowing, "
-                    "cross dissolve, cheesy, ugly hands, mutated hands, mutant, disfigured, extra fingers, "
-                    "blown out, horrible, blurry, worst quality, bad, dissolve, melt, fade in, fade out, "
-                    "wobbly, weird, low quality, plastic, stock footage, video camera, boring",
-                    tooltip="Negative prompt text",
-                ),
-                IO.Combo.Input(
-                    "resolution",
-                    options=[
-                        "16:9 (1920 x 1080)",
-                        "9:16 (1080 x 1920)",
-                        "1:1 (1152 x 1152)",
-                        "4:3 (1536 x 1152)",
-                        "3:4 (1152 x 1536)",
-                        # "21:9 (2560 x 1080)",
-                    ],
-                    default="16:9 (1920 x 1080)",
-                    tooltip="Resolution of the output video",
-                ),
-                IO.Float.Input(
-                    "prompt_adherence",
-                    default=4.5,
-                    min=1.0,
-                    max=20.0,
-                    step=1.0,
-                    tooltip="Guidance scale for generation control",
-                ),
-                IO.Int.Input(
-                    "seed",
-                    default=9,
-                    min=0,
-                    max=4294967295,
-                    step=1,
-                    display_mode=IO.NumberDisplay.number,
-                    tooltip="Random seed value",
-                    control_after_generate=True,
-                ),
-                IO.Int.Input(
-                    "steps",
-                    default=80,
-                    min=75,  # steps should be greater or equal to cooldown_steps(75) + warmup_steps(0)
-                    max=100,
-                    step=1,
-                    tooltip="Number of denoising steps",
-                ),
-            ],
-            outputs=[IO.Video.Output()],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(),
-                expr="""{"type":"usd","usd": 1.5}""",
-            ),
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        image: Input.Image,
-        prompt: str,
-        negative_prompt: str,
-        resolution: str,
-        prompt_adherence: float,
-        seed: int,
-        steps: int,
-    ) -> IO.NodeOutput:
-        validate_image_dimensions(image, min_width=300, min_height=300, max_height=MAX_HEIGHT, max_width=MAX_WIDTH)
-        validate_string(prompt, min_length=1, max_length=MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
-        validate_string(negative_prompt, field_name="negative_prompt", max_length=MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
-        width_height = parse_width_height_from_res(resolution)
-
-        inference_params = MoonvalleyTextToVideoInferenceParams(
-            negative_prompt=negative_prompt,
-            steps=steps,
-            seed=seed,
-            guidance_scale=prompt_adherence,
-            width=width_height["width"],
-            height=width_height["height"],
-            use_negative_prompts=True,
-        )
-
-        # Get MIME type from tensor - assuming PNG format for image tensors
-        mime_type = "image/png"
-        image_url = (await upload_images_to_comfyapi(cls, image, max_images=1, mime_type=mime_type))[0]
-        task_creation_response = await sync_op(
-            cls,
-            endpoint=ApiEndpoint(path=API_IMG2VIDEO_ENDPOINT, method="POST"),
-            response_model=MoonvalleyPromptResponse,
-            data=MoonvalleyTextToVideoRequest(
-                image_url=image_url, prompt_text=prompt, inference_params=inference_params
-            ),
-        )
-        validate_task_creation_response(task_creation_response)
-        final_response = await get_response(cls, task_creation_response.id)
-        video = await download_url_to_video_output(final_response.output_url)
-        return IO.NodeOutput(video)
-
-
-class MoonvalleyVideo2VideoNode(IO.ComfyNode):
-
-    @classmethod
-    def define_schema(cls) -> IO.Schema:
-        return IO.Schema(
-            node_id="MoonvalleyVideo2VideoNode",
-            display_name="Moonvalley Marey Video to Video",
-            category="api node/video/Moonvalley Marey",
-            description="",
-            inputs=[
-                IO.String.Input(
-                    "prompt",
-                    multiline=True,
-                    tooltip="Describes the video to generate",
-                ),
-                IO.String.Input(
-                    "negative_prompt",
-                    multiline=True,
-                    default="<synthetic> <scene cut> gopro, bright, contrast, static, overexposed, vignette, "
-                    "artifacts, still, noise, texture, scanlines, videogame, 360 camera, VR, transition, "
-                    "flare, saturation, distorted, warped, wide angle, saturated, vibrant, glowing, "
-                    "cross dissolve, cheesy, ugly hands, mutated hands, mutant, disfigured, extra fingers, "
-                    "blown out, horrible, blurry, worst quality, bad, dissolve, melt, fade in, fade out, "
-                    "wobbly, weird, low quality, plastic, stock footage, video camera, boring",
-                    tooltip="Negative prompt text",
-                ),
-                IO.Int.Input(
-                    "seed",
-                    default=9,
-                    min=0,
-                    max=4294967295,
-                    step=1,
-                    display_mode=IO.NumberDisplay.number,
-                    tooltip="Random seed value",
-                    control_after_generate=False,
-                ),
-                IO.Video.Input(
-                    "video",
-                    tooltip="The reference video used to generate the output video. Must be at least 5 seconds long. "
-                    "Videos longer than 5s will be automatically trimmed. Only MP4 format supported.",
-                ),
-                IO.Combo.Input(
-                    "control_type",
-                    options=["Motion Transfer", "Pose Transfer"],
-                    default="Motion Transfer",
-                    optional=True,
-                ),
-                IO.Int.Input(
-                    "motion_intensity",
-                    default=100,
-                    min=0,
-                    max=100,
-                    step=1,
-                    tooltip="Only used if control_type is 'Motion Transfer'",
-                    optional=True,
-                ),
-                IO.Int.Input(
-                    "steps",
-                    default=60,
-                    min=60,  # steps should be greater or equal to cooldown_steps(36) + warmup_steps(24)
-                    max=100,
-                    step=1,
-                    display_mode=IO.NumberDisplay.number,
-                    tooltip="Number of inference steps",
-                ),
-            ],
-            outputs=[IO.Video.Output()],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(),
-                expr="""{"type":"usd","usd": 2.25}""",
-            ),
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        prompt: str,
-        negative_prompt: str,
-        seed: int,
-        video: Input.Video | None = None,
-        control_type: str = "Motion Transfer",
-        motion_intensity: int | None = 100,
-        steps=60,
-        prompt_adherence=4.5,
-    ) -> IO.NodeOutput:
-        validated_video = validate_video_to_video_input(video)
-        video_url = await upload_video_to_comfyapi(cls, validated_video)
-        validate_string(prompt, min_length=1, max_length=MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
-        validate_string(negative_prompt, field_name="negative_prompt", max_length=MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
-
-        # Only include motion_intensity for Motion Transfer
-        control_params = {}
-        if control_type == "Motion Transfer" and motion_intensity is not None:
-            control_params["motion_intensity"] = motion_intensity
-
-        inference_params = MoonvalleyVideoToVideoInferenceParams(
-            negative_prompt=negative_prompt,
-            seed=seed,
-            control_params=control_params,
-            steps=steps,
-            guidance_scale=prompt_adherence,
-        )
-
-        task_creation_response = await sync_op(
-            cls,
-            endpoint=ApiEndpoint(path=API_VIDEO2VIDEO_ENDPOINT, method="POST"),
-            response_model=MoonvalleyPromptResponse,
-            data=MoonvalleyVideoToVideoRequest(
-                control_type=parse_control_parameter(control_type),
-                video_url=video_url,
-                prompt_text=prompt,
-                inference_params=inference_params,
-            ),
-        )
-        validate_task_creation_response(task_creation_response)
-        final_response = await get_response(cls, task_creation_response.id)
-        return IO.NodeOutput(await download_url_to_video_output(final_response.output_url))
-
-
-class MoonvalleyTxt2VideoNode(IO.ComfyNode):
-
-    @classmethod
-    def define_schema(cls) -> IO.Schema:
-        return IO.Schema(
-            node_id="MoonvalleyTxt2VideoNode",
-            display_name="Moonvalley Marey Text to Video",
-            category="api node/video/Moonvalley Marey",
-            description="",
-            inputs=[
-                IO.String.Input(
-                    "prompt",
-                    multiline=True,
-                ),
-                IO.String.Input(
-                    "negative_prompt",
-                    multiline=True,
-                    default="<synthetic> <scene cut> gopro, bright, contrast, static, overexposed, vignette, "
-                    "artifacts, still, noise, texture, scanlines, videogame, 360 camera, VR, transition, "
-                    "flare, saturation, distorted, warped, wide angle, saturated, vibrant, glowing, "
-                    "cross dissolve, cheesy, ugly hands, mutated hands, mutant, disfigured, extra fingers, "
-                    "blown out, horrible, blurry, worst quality, bad, dissolve, melt, fade in, fade out, "
-                    "wobbly, weird, low quality, plastic, stock footage, video camera, boring",
-                    tooltip="Negative prompt text",
-                ),
-                IO.Combo.Input(
-                    "resolution",
-                    options=[
-                        "16:9 (1920 x 1080)",
-                        "9:16 (1080 x 1920)",
-                        "1:1 (1152 x 1152)",
-                        "4:3 (1536 x 1152)",
-                        "3:4 (1152 x 1536)",
-                        "21:9 (2560 x 1080)",
-                    ],
-                    default="16:9 (1920 x 1080)",
-                    tooltip="Resolution of the output video",
-                ),
-                IO.Float.Input(
-                    "prompt_adherence",
-                    default=4.0,
-                    min=1.0,
-                    max=20.0,
-                    step=1.0,
-                    tooltip="Guidance scale for generation control",
-                ),
-                IO.Int.Input(
-                    "seed",
-                    default=9,
-                    min=0,
-                    max=4294967295,
-                    step=1,
-                    display_mode=IO.NumberDisplay.number,
-                    control_after_generate=True,
-                    tooltip="Random seed value",
-                ),
-                IO.Int.Input(
-                    "steps",
-                    default=80,
-                    min=75,  # steps should be greater or equal to cooldown_steps(75) + warmup_steps(0)
-                    max=100,
-                    step=1,
-                    tooltip="Inference steps",
-                ),
-            ],
-            outputs=[IO.Video.Output()],
-            hidden=[
-                IO.Hidden.auth_token_comfy_org,
-                IO.Hidden.api_key_comfy_org,
-                IO.Hidden.unique_id,
-            ],
-            is_api_node=True,
-            price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(),
-                expr="""{"type":"usd","usd": 1.5}""",
-            ),
-        )
-
-    @classmethod
-    async def execute(
-        cls,
-        prompt: str,
-        negative_prompt: str,
-        resolution: str,
-        prompt_adherence: float,
-        seed: int,
-        steps: int,
-    ) -> IO.NodeOutput:
-        validate_string(prompt, min_length=1, max_length=MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
-        validate_string(negative_prompt, field_name="negative_prompt", max_length=MOONVALLEY_MAREY_MAX_PROMPT_LENGTH)
-        width_height = parse_width_height_from_res(resolution)
-
-        inference_params = MoonvalleyTextToVideoInferenceParams(
-            negative_prompt=negative_prompt,
-            steps=steps,
-            seed=seed,
-            guidance_scale=prompt_adherence,
-            num_frames=128,
-            width=width_height["width"],
-            height=width_height["height"],
-        )
-
-        task_creation_response = await sync_op(
-            cls,
-            endpoint=ApiEndpoint(path=API_TXT2VIDEO_ENDPOINT, method="POST"),
-            response_model=MoonvalleyPromptResponse,
-            data=MoonvalleyTextToVideoRequest(prompt_text=prompt, inference_params=inference_params),
-        )
-        validate_task_creation_response(task_creation_response)
-        final_response = await get_response(cls, task_creation_response.id)
-        return IO.NodeOutput(await download_url_to_video_output(final_response.output_url))
-
-
-class MoonvalleyExtension(ComfyExtension):
-    @override
-    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
-        return [
-            MoonvalleyImg2VideoNode,
-            MoonvalleyTxt2VideoNode,
-            MoonvalleyVideo2VideoNode,
-        ]
-
-
-async def comfy_entrypoint() -> MoonvalleyExtension:
-    return MoonvalleyExtension()

From 10b45a71cdac2898693bb42aa0a21e2cb23a2daa Mon Sep 17 00:00:00 2001
From: "Daxiong (Lin)" <contact@comfyui-wiki.com>
Date: Sat, 2 May 2026 03:11:30 +0800
Subject: [PATCH 60/81] chore: update workflow templates to v0.9.66 (#13662)

Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index cb85d970b..932034076 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 comfyui-frontend-package==1.42.15
-comfyui-workflow-templates==0.9.65
+comfyui-workflow-templates==0.9.66
 comfyui-embedded-docs==0.4.4
 torch
 torchsde

From cf758bd2566a04a156496fa77ec2c7fa76ff8873 Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Fri, 1 May 2026 22:48:41 +0300
Subject: [PATCH 61/81] chore(api-nodes): increase default timeout for partner
 API node tasks (#13663)

Signed-off-by: bigcat88 <bigcat88@icloud.com>
Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>
---
 comfy_api_nodes/nodes_bytedance.py | 3 ---
 comfy_api_nodes/nodes_hitpaw.py    | 2 --
 comfy_api_nodes/nodes_kling.py     | 3 ---
 comfy_api_nodes/nodes_magnific.py  | 5 -----
 comfy_api_nodes/nodes_topaz.py     | 1 -
 comfy_api_nodes/nodes_vidu.py      | 3 +--
 comfy_api_nodes/nodes_wan.py       | 1 -
 comfy_api_nodes/nodes_wavespeed.py | 2 --
 comfy_api_nodes/util/client.py     | 4 ++--
 9 files changed, 3 insertions(+), 21 deletions(-)

diff --git a/comfy_api_nodes/nodes_bytedance.py b/comfy_api_nodes/nodes_bytedance.py
index fee0ab888..2f241a775 100644
--- a/comfy_api_nodes/nodes_bytedance.py
+++ b/comfy_api_nodes/nodes_bytedance.py
@@ -1403,7 +1403,6 @@ class ByteDance2TextToVideoNode(IO.ComfyNode):
             status_extractor=lambda r: r.status,
             price_extractor=_seedance2_price_extractor(model_id, has_video_input=False),
             poll_interval=9,
-            max_poll_attempts=180,
         )
         return IO.NodeOutput(await download_url_to_video_output(response.content.video_url))
 
@@ -1585,7 +1584,6 @@ class ByteDance2FirstLastFrameNode(IO.ComfyNode):
             status_extractor=lambda r: r.status,
             price_extractor=_seedance2_price_extractor(model_id, has_video_input=False),
             poll_interval=9,
-            max_poll_attempts=180,
         )
         return IO.NodeOutput(await download_url_to_video_output(response.content.video_url))
 
@@ -1907,7 +1905,6 @@ class ByteDance2ReferenceNode(IO.ComfyNode):
             status_extractor=lambda r: r.status,
             price_extractor=_seedance2_price_extractor(model_id, has_video_input=has_video_input),
             poll_interval=9,
-            max_poll_attempts=180,
         )
         return IO.NodeOutput(await download_url_to_video_output(response.content.video_url))
 
diff --git a/comfy_api_nodes/nodes_hitpaw.py b/comfy_api_nodes/nodes_hitpaw.py
index 488080a74..bca5170e4 100644
--- a/comfy_api_nodes/nodes_hitpaw.py
+++ b/comfy_api_nodes/nodes_hitpaw.py
@@ -178,7 +178,6 @@ class HitPawGeneralImageEnhance(IO.ComfyNode):
             status_extractor=lambda x: x.data.status,
             price_extractor=lambda x: request_price,
             poll_interval=10.0,
-            max_poll_attempts=480,
         )
         return IO.NodeOutput(await download_url_to_image_tensor(final_response.data.res_url))
 
@@ -324,7 +323,6 @@ class HitPawVideoEnhance(IO.ComfyNode):
             status_extractor=lambda x: x.data.status,
             price_extractor=lambda x: request_price,
             poll_interval=10.0,
-            max_poll_attempts=320,
         )
         return IO.NodeOutput(await download_url_to_video_output(final_response.data.res_url))
 
diff --git a/comfy_api_nodes/nodes_kling.py b/comfy_api_nodes/nodes_kling.py
index 709b3726c..efd58fac3 100644
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
@@ -276,7 +276,6 @@ async def finish_omni_video_task(cls: type[IO.ComfyNode], response: TaskStatusRe
         cls,
         ApiEndpoint(path=f"/proxy/kling/v1/videos/omni-video/{response.data.task_id}"),
         response_model=TaskStatusResponse,
-        max_poll_attempts=280,
         status_extractor=lambda r: (r.data.task_status if r.data else None),
     )
     return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
@@ -3062,7 +3061,6 @@ class KlingVideoNode(IO.ComfyNode):
             cls,
             ApiEndpoint(path=poll_path),
             response_model=TaskStatusResponse,
-            max_poll_attempts=280,
             status_extractor=lambda r: (r.data.task_status if r.data else None),
         )
         return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
@@ -3188,7 +3186,6 @@ class KlingFirstLastFrameNode(IO.ComfyNode):
             cls,
             ApiEndpoint(path=f"/proxy/kling/v1/videos/image2video/{response.data.task_id}"),
             response_model=TaskStatusResponse,
-            max_poll_attempts=280,
             status_extractor=lambda r: (r.data.task_status if r.data else None),
         )
         return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
diff --git a/comfy_api_nodes/nodes_magnific.py b/comfy_api_nodes/nodes_magnific.py
index 0f53208d4..38b881fea 100644
--- a/comfy_api_nodes/nodes_magnific.py
+++ b/comfy_api_nodes/nodes_magnific.py
@@ -230,7 +230,6 @@ class MagnificImageUpscalerCreativeNode(IO.ComfyNode):
             status_extractor=lambda x: x.status,
             price_extractor=lambda _: price_usd,
             poll_interval=10.0,
-            max_poll_attempts=480,
         )
         return IO.NodeOutput(await download_url_to_image_tensor(final_response.generated[0]))
 
@@ -391,7 +390,6 @@ class MagnificImageUpscalerPreciseV2Node(IO.ComfyNode):
             status_extractor=lambda x: x.status,
             price_extractor=lambda _: price_usd,
             poll_interval=10.0,
-            max_poll_attempts=480,
         )
         return IO.NodeOutput(await download_url_to_image_tensor(final_response.generated[0]))
 
@@ -541,7 +539,6 @@ class MagnificImageStyleTransferNode(IO.ComfyNode):
             response_model=TaskResponse,
             status_extractor=lambda x: x.status,
             poll_interval=10.0,
-            max_poll_attempts=480,
         )
         return IO.NodeOutput(await download_url_to_image_tensor(final_response.generated[0]))
 
@@ -782,7 +779,6 @@ class MagnificImageRelightNode(IO.ComfyNode):
             response_model=TaskResponse,
             status_extractor=lambda x: x.status,
             poll_interval=10.0,
-            max_poll_attempts=480,
         )
         return IO.NodeOutput(await download_url_to_image_tensor(final_response.generated[0]))
 
@@ -924,7 +920,6 @@ class MagnificImageSkinEnhancerNode(IO.ComfyNode):
             response_model=TaskResponse,
             status_extractor=lambda x: x.status,
             poll_interval=10.0,
-            max_poll_attempts=480,
         )
         return IO.NodeOutput(await download_url_to_image_tensor(final_response.generated[0]))
 
diff --git a/comfy_api_nodes/nodes_topaz.py b/comfy_api_nodes/nodes_topaz.py
index b18b31af1..fe3666ec9 100644
--- a/comfy_api_nodes/nodes_topaz.py
+++ b/comfy_api_nodes/nodes_topaz.py
@@ -453,7 +453,6 @@ class TopazVideoEnhance(IO.ComfyNode):
             progress_extractor=lambda x: getattr(x, "progress", 0),
             price_extractor=lambda x: (x.estimates.cost[0] * 0.08 if x.estimates and x.estimates.cost[0] else None),
             poll_interval=10.0,
-            max_poll_attempts=320,
         )
         return IO.NodeOutput(await download_url_to_video_output(final_response.download.url))
 
diff --git a/comfy_api_nodes/nodes_vidu.py b/comfy_api_nodes/nodes_vidu.py
index f04407eb5..8d90cefeb 100644
--- a/comfy_api_nodes/nodes_vidu.py
+++ b/comfy_api_nodes/nodes_vidu.py
@@ -38,7 +38,7 @@ async def execute_task(
     cls: type[IO.ComfyNode],
     vidu_endpoint: str,
     payload: TaskCreationRequest | TaskExtendCreationRequest | TaskMultiFrameCreationRequest,
-    max_poll_attempts: int = 320,
+    max_poll_attempts: int = 480,
 ) -> list[TaskResult]:
     task_creation_response = await sync_op(
         cls,
@@ -1097,7 +1097,6 @@ class ViduExtendVideoNode(IO.ComfyNode):
                 video_url=await upload_video_to_comfyapi(cls, video, wait_label="Uploading video"),
                 images=[image_url] if image_url else None,
             ),
-            max_poll_attempts=480,
         )
         return IO.NodeOutput(await download_url_to_video_output(results[0].url))
 
diff --git a/comfy_api_nodes/nodes_wan.py b/comfy_api_nodes/nodes_wan.py
index 7d7466fb6..68061bb5c 100644
--- a/comfy_api_nodes/nodes_wan.py
+++ b/comfy_api_nodes/nodes_wan.py
@@ -818,7 +818,6 @@ class WanReferenceVideoApi(IO.ComfyNode):
             response_model=VideoTaskStatusResponse,
             status_extractor=lambda x: x.output.task_status,
             poll_interval=6,
-            max_poll_attempts=280,
         )
         return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
 
diff --git a/comfy_api_nodes/nodes_wavespeed.py b/comfy_api_nodes/nodes_wavespeed.py
index c59fafd3b..65e45f60a 100644
--- a/comfy_api_nodes/nodes_wavespeed.py
+++ b/comfy_api_nodes/nodes_wavespeed.py
@@ -84,7 +84,6 @@ class WavespeedFlashVSRNode(IO.ComfyNode):
             response_model=TaskResultResponse,
             status_extractor=lambda x: "failed" if x.data is None else x.data.status,
             poll_interval=10.0,
-            max_poll_attempts=480,
         )
         if final_response.code != 200:
             raise ValueError(
@@ -156,7 +155,6 @@ class WavespeedImageUpscaleNode(IO.ComfyNode):
             response_model=TaskResultResponse,
             status_extractor=lambda x: "failed" if x.data is None else x.data.status,
             poll_interval=10.0,
-            max_poll_attempts=480,
         )
         if final_response.code != 200:
             raise ValueError(
diff --git a/comfy_api_nodes/util/client.py b/comfy_api_nodes/util/client.py
index b0cf97ae4..a0b8d35e1 100644
--- a/comfy_api_nodes/util/client.py
+++ b/comfy_api_nodes/util/client.py
@@ -148,7 +148,7 @@ async def poll_op(
     queued_statuses: list[str | int] | None = None,
     data: BaseModel | None = None,
     poll_interval: float = 5.0,
-    max_poll_attempts: int = 160,
+    max_poll_attempts: int = 480,
     timeout_per_poll: float = 120.0,
     max_retries_per_poll: int = 10,
     retry_delay_per_poll: float = 1.0,
@@ -254,7 +254,7 @@ async def poll_op_raw(
     queued_statuses: list[str | int] | None = None,
     data: dict[str, Any] | BaseModel | None = None,
     poll_interval: float = 5.0,
-    max_poll_attempts: int = 160,
+    max_poll_attempts: int = 480,
     timeout_per_poll: float = 120.0,
     max_retries_per_poll: int = 10,
     retry_delay_per_poll: float = 1.0,

From 63103d519ec960701438e8617452ef64b02609c7 Mon Sep 17 00:00:00 2001
From: Simon Lui <502929+simonlui@users.noreply.github.com>
Date: Fri, 1 May 2026 14:16:41 -0700
Subject: [PATCH 62/81] Remove IPEX and clean up checks and add missing
 synchronize during empty cache. (#13653)

---
 comfy/cli_args.py         |  1 -
 comfy/model_management.py | 18 +++---------------
 2 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index dbaadf723..cef1a5e6b 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -90,7 +90,6 @@ parser.add_argument("--force-channels-last", action="store_true", help="Force ch
 parser.add_argument("--directml", type=int, nargs="?", metavar="DIRECTML_DEVICE", const=-1, help="Use torch-directml.")
 
 parser.add_argument("--oneapi-device-selector", type=str, default=None, metavar="SELECTOR_STRING", help="Sets the oneAPI device(s) this instance will use.")
-parser.add_argument("--disable-ipex-optimize", action="store_true", help="Disables ipex.optimize default when loading models with Intel's Extension for Pytorch.")
 parser.add_argument("--supports-fp8-compute", action="store_true", help="ComfyUI will act like if the device supports fp8 compute.")
 
 class LatentPreviewMethod(enum.Enum):
diff --git a/comfy/model_management.py b/comfy/model_management.py
index 95af40012..f86e2a4aa 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -112,10 +112,6 @@ if args.directml is not None:
     # torch_directml.disable_tiled_resources(True)
     lowvram_available = False #TODO: need to find a way to get free memory in directml before this can be enabled by default.
 
-try:
-    import intel_extension_for_pytorch as ipex  # noqa: F401
-except:
-    pass
 
 try:
     _ = torch.xpu.device_count()
@@ -583,9 +579,6 @@ class LoadedModel:
 
         real_model = self.model.model
 
-        if is_intel_xpu() and not args.disable_ipex_optimize and 'ipex' in globals() and real_model is not None:
-            with torch.no_grad():
-                real_model = ipex.optimize(real_model.eval(), inplace=True, graph_mode=True, concat_linear=True)
 
         self.real_model = weakref.ref(real_model)
         self.model_finalizer = weakref.finalize(real_model, cleanup_models)
@@ -1581,10 +1574,7 @@ def should_use_fp16(device=None, model_params=0, prioritize_performance=True, ma
         return False
 
     if is_intel_xpu():
-        if torch_version_numeric < (2, 3):
-            return True
-        else:
-            return torch.xpu.get_device_properties(device).has_fp16
+        return torch.xpu.get_device_properties(device).has_fp16
 
     if is_ascend_npu():
         return True
@@ -1650,10 +1640,7 @@ def should_use_bf16(device=None, model_params=0, prioritize_performance=True, ma
         return False
 
     if is_intel_xpu():
-        if torch_version_numeric < (2, 3):
-            return True
-        else:
-            return torch.xpu.is_bf16_supported()
+        return torch.xpu.is_bf16_supported()
 
     if is_ascend_npu():
         return True
@@ -1784,6 +1771,7 @@ def soft_empty_cache(force=False):
     if cpu_state == CPUState.MPS:
         torch.mps.empty_cache()
     elif is_intel_xpu():
+        torch.xpu.synchronize()
         torch.xpu.empty_cache()
     elif is_ascend_npu():
         torch.npu.empty_cache()

From b5921c8ac2d3cd1171bb33245f4343b1471224ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jukka=20Sepp=C3=A4nen?=
 <40791699+kijai@users.noreply.github.com>
Date: Sat, 2 May 2026 00:17:25 +0300
Subject: [PATCH 63/81] SDPose: resize fix (#13656)

---
 comfy_extras/nodes_sdpose.py | 38 ++++++++++++++++--------------------
 1 file changed, 17 insertions(+), 21 deletions(-)

diff --git a/comfy_extras/nodes_sdpose.py b/comfy_extras/nodes_sdpose.py
index 7d54967d5..96b6821bd 100644
--- a/comfy_extras/nodes_sdpose.py
+++ b/comfy_extras/nodes_sdpose.py
@@ -459,27 +459,23 @@ class SDPoseKeypointExtractor(io.ComfyNode):
         total_images = image.shape[0]
         captured_feat = None
 
-        model_h = int(head.heatmap_size[0]) * 4   # e.g. 192 * 4 = 768
-        model_w = int(head.heatmap_size[1]) * 4   # e.g. 256 * 4 = 1024
+        model_w = int(head.heatmap_size[0]) * 4   # 192 * 4 = 768
+        model_h = int(head.heatmap_size[1]) * 4   # 256 * 4 = 1024
 
         def _resize_to_model(imgs):
-            """Aspect-preserving resize + zero-pad BHWC images to (model_h, model_w). Returns (resized_bhwc, scale, pad_top, pad_left)."""
+            """Stretch BHWC images to (model_h, model_w), model expects no aspect preservation."""
             h, w = imgs.shape[-3], imgs.shape[-2]
-            scale = min(model_h / h, model_w / w)
-            sh, sw = int(round(h * scale)), int(round(w * scale))
-            pt, pl = (model_h - sh) // 2, (model_w - sw) // 2
+            method = "area" if (model_h <= h and model_w <= w) else "bilinear"
             chw = imgs.permute(0, 3, 1, 2).float()
-            scaled = comfy.utils.common_upscale(chw, sw, sh, upscale_method="bilinear", crop="disabled")
-            padded = torch.zeros(scaled.shape[0], scaled.shape[1], model_h, model_w, dtype=scaled.dtype, device=scaled.device)
-            padded[:, :, pt:pt + sh, pl:pl + sw] = scaled
-            return padded.permute(0, 2, 3, 1), scale, pt, pl
+            scaled = comfy.utils.common_upscale(chw, model_w, model_h, upscale_method=method, crop="disabled")
+            return scaled.permute(0, 2, 3, 1), model_w / w, model_h / h
 
-        def _remap_keypoints(kp, scale, pad_top, pad_left, offset_x=0, offset_y=0):
+        def _remap_keypoints(kp, scale_x, scale_y, offset_x=0, offset_y=0):
             """Remap keypoints from model space back to original image space."""
             kp = kp.copy() if isinstance(kp, np.ndarray) else np.array(kp, dtype=np.float32)
             invalid = kp[..., 0] < 0
-            kp[..., 0] = (kp[..., 0] - pad_left) / scale + offset_x
-            kp[..., 1] = (kp[..., 1] - pad_top)  / scale + offset_y
+            kp[..., 0] = kp[..., 0] / scale_x + offset_x
+            kp[..., 1] = kp[..., 1] / scale_y + offset_y
             kp[invalid] = -1
             return kp
 
@@ -529,18 +525,18 @@ class SDPoseKeypointExtractor(io.ComfyNode):
                             continue
 
                         crop = img[:, y1:y2, x1:x2, :]  # (1, crop_h, crop_w, C)
-                        crop_resized, scale, pad_top, pad_left = _resize_to_model(crop)
+                        crop_resized, sx, sy = _resize_to_model(crop)
 
                         latent_crop = vae.encode(crop_resized)
                         kp_batch, sc_batch = _run_on_latent(latent_crop)
-                        kp = _remap_keypoints(kp_batch[0], scale, pad_top, pad_left, x1, y1)
+                        kp = _remap_keypoints(kp_batch[0], sx, sy, x1, y1)
                         img_keypoints.append(kp)
                         img_scores.append(sc_batch[0])
                 else:
-                    img_resized, scale, pad_top, pad_left = _resize_to_model(img)
+                    img_resized, sx, sy = _resize_to_model(img)
                     latent_img = vae.encode(img_resized)
                     kp_batch, sc_batch = _run_on_latent(latent_img)
-                    img_keypoints.append(_remap_keypoints(kp_batch[0], scale, pad_top, pad_left))
+                    img_keypoints.append(_remap_keypoints(kp_batch[0], sx, sy))
                     img_scores.append(sc_batch[0])
 
                 all_keypoints.append(img_keypoints)
@@ -549,12 +545,12 @@ class SDPoseKeypointExtractor(io.ComfyNode):
 
         else: # full-image mode, batched
             for batch_start in tqdm(range(0, total_images, batch_size), desc="Extracting keypoints"):
-                batch_resized, scale, pad_top, pad_left = _resize_to_model(image[batch_start:batch_start + batch_size])
+                batch_resized, sx, sy = _resize_to_model(image[batch_start:batch_start + batch_size])
                 latent_batch = vae.encode(batch_resized)
                 kp_batch, sc_batch = _run_on_latent(latent_batch)
 
                 for kp, sc in zip(kp_batch, sc_batch):
-                    all_keypoints.append([_remap_keypoints(kp, scale, pad_top, pad_left)])
+                    all_keypoints.append([_remap_keypoints(kp, sx, sy)])
                     all_scores.append([sc])
 
                 pbar.update(len(kp_batch))
@@ -727,13 +723,13 @@ class CropByBBoxes(io.ComfyNode):
                 scale = min(output_width / crop_w, output_height / crop_h)
                 scaled_w = int(round(crop_w * scale))
                 scaled_h = int(round(crop_h * scale))
-                scaled = comfy.utils.common_upscale(crop_chw, scaled_w, scaled_h, upscale_method="bilinear", crop="disabled")
+                scaled = comfy.utils.common_upscale(crop_chw, scaled_w, scaled_h, upscale_method="area", crop="disabled")
                 pad_left = (output_width  - scaled_w) // 2
                 pad_top  = (output_height - scaled_h) // 2
                 resized = torch.zeros(1, num_ch, output_height, output_width, dtype=image.dtype, device=image.device)
                 resized[:, :, pad_top:pad_top + scaled_h, pad_left:pad_left + scaled_w] = scaled
             else:  # "stretch"
-                resized = comfy.utils.common_upscale(crop_chw, output_width, output_height, upscale_method="bilinear", crop="disabled")
+                resized = comfy.utils.common_upscale(crop_chw, output_width, output_height, upscale_method="area", crop="disabled")
             crops.append(resized)
 
         if not crops:

From 0230e0e7cc389979e509cd6237a7b9244798e69c Mon Sep 17 00:00:00 2001
From: Alexis Rolland <alexisrolland@hotmail.com>
Date: Sat, 2 May 2026 06:37:18 +0800
Subject: [PATCH 64/81] Adding kijai (#13664)

Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>
---
 CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CODEOWNERS b/CODEOWNERS
index e693955a0..946dbf946 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -1,2 +1,2 @@
 # Admins
-* @comfyanonymous @kosinkadink @guill @alexisrolland @rattus128
+* @comfyanonymous @kosinkadink @guill @alexisrolland @rattus128 @kijai

From 67f6cb35273d00278d2b1ef2a8c3efe21238f22d Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Fri, 1 May 2026 17:19:32 -0700
Subject: [PATCH 65/81] List all the portable downloads in the README section.
 (#13666)

---
 README.md | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index f05311421..3b5114633 100644
--- a/README.md
+++ b/README.md
@@ -193,13 +193,15 @@ If you have trouble extracting it, right click the file -> properties -> unblock
 
 The portable above currently comes with python 3.13 and pytorch cuda 13.0. Update your Nvidia drivers if it doesn't start.
 
-#### Alternative Downloads:
+#### All Official Portable Downloads:
 
 [Portable for AMD GPUs](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_amd.7z)
 
-[Experimental portable for Intel GPUs](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_intel.7z)
+[Portable for Intel GPUs](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_intel.7z)
 
-[Portable with pytorch cuda 12.6 and python 3.12](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia_cu126.7z) (Supports Nvidia 10 series and older GPUs).
+[Portable for Nvidia GPUs](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia.7z) (supports 20 series and above).
+
+[Portable for Nvidia GPUs with pytorch cuda 12.6 and python 3.12](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia_cu126.7z) (Supports Nvidia 10 series and older GPUs).
 
 #### How do I share models between another UI and ComfyUI?
 

From 3e3ed8cc2aaa142711e89e1e799956e1e57af62f Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Fri, 1 May 2026 17:19:46 -0700
Subject: [PATCH 66/81] Add script in AMD portable to launch with dynamic vram.
 (#13667)

---
 ...ble_smart_memory.bat => run_amd_gpu_enable_dynamic_vram.bat} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename .ci/windows_amd_base_files/{run_amd_gpu_disable_smart_memory.bat => run_amd_gpu_enable_dynamic_vram.bat} (66%)

diff --git a/.ci/windows_amd_base_files/run_amd_gpu_disable_smart_memory.bat b/.ci/windows_amd_base_files/run_amd_gpu_enable_dynamic_vram.bat
similarity index 66%
rename from .ci/windows_amd_base_files/run_amd_gpu_disable_smart_memory.bat
rename to .ci/windows_amd_base_files/run_amd_gpu_enable_dynamic_vram.bat
index cece0aeb2..94ad31942 100755
--- a/.ci/windows_amd_base_files/run_amd_gpu_disable_smart_memory.bat
+++ b/.ci/windows_amd_base_files/run_amd_gpu_enable_dynamic_vram.bat
@@ -1,2 +1,2 @@
-.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --disable-smart-memory
+.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --enable-dynamic-vram
 pause

From 783782d5d742a7bc38dd0b661e030813bc50839a Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Sun, 3 May 2026 09:23:24 +1000
Subject: [PATCH 67/81] Implement block prefetch + Lora Async load + and adopt
 in LTX (Speedup!) (CORE-111) (#13618)

* mm: Use Aimdo raw allocator for cast buffers

pytorch manages allocation of growing buffers on streams poorly. Pyt
has no windows support for the expandable segments allocator (which is
the right tool for this job), while also segmenting the memory by
stream such that it can be generally re-used. So kick the problem to
aimdo which can just grow a virtual region thats freed per stream.

* plan

* ops: move cpu handler up to the caller

* ops: split up prefetch from weight prep block prefetching API

Split up the casting and weight formating/lora stuff in prep for
arbitrary prefetch support.

* ops: implement block prefetching API

allow a model to construct a prefetch list and operate it for increased
async offload.

* ltxv2: Implement block prefetching

* Implement lora async offload

Implement async offload of loras.
---
 comfy/ldm/lightricks/av_model.py |   5 +
 comfy/lora.py                    |  15 +++
 comfy/model_base.py              |   5 +
 comfy/model_management.py        |  22 +++-
 comfy/model_patcher.py           |  13 ++-
 comfy/model_prefetch.py          |  65 +++++++++++
 comfy/ops.py                     | 181 ++++++++++++++++++++++---------
 execution.py                     |   2 +
 8 files changed, 251 insertions(+), 57 deletions(-)
 create mode 100644 comfy/model_prefetch.py

diff --git a/comfy/ldm/lightricks/av_model.py b/comfy/ldm/lightricks/av_model.py
index 6f2ba41ef..3fb87b4a3 100644
--- a/comfy/ldm/lightricks/av_model.py
+++ b/comfy/ldm/lightricks/av_model.py
@@ -16,6 +16,7 @@ from comfy.ldm.lightricks.model import (
 from comfy.ldm.lightricks.symmetric_patchifier import AudioPatchifier
 from comfy.ldm.lightricks.embeddings_connector import Embeddings1DConnector
 import comfy.ldm.common_dit
+import comfy.model_prefetch
 
 class CompressedTimestep:
     """Store video timestep embeddings in compressed form using per-frame indexing."""
@@ -907,9 +908,11 @@ class LTXAVModel(LTXVModel):
         """Process transformer blocks for LTXAV."""
         patches_replace = transformer_options.get("patches_replace", {})
         blocks_replace = patches_replace.get("dit", {})
+        prefetch_queue = comfy.model_prefetch.make_prefetch_queue(list(self.transformer_blocks), vx.device, transformer_options)
 
         # Process transformer blocks
         for i, block in enumerate(self.transformer_blocks):
+            comfy.model_prefetch.prefetch_queue_pop(prefetch_queue, vx.device, block)
             if ("double_block", i) in blocks_replace:
 
                 def block_wrap(args):
@@ -982,6 +985,8 @@ class LTXAVModel(LTXVModel):
                     a_prompt_timestep=a_prompt_timestep,
                 )
 
+        comfy.model_prefetch.prefetch_queue_pop(prefetch_queue, vx.device, None)
+
         return [vx, ax]
 
     def _process_output(self, x, embedded_timestep, keyframe_idxs, **kwargs):
diff --git a/comfy/lora.py b/comfy/lora.py
index e4337c729..db8f16bcb 100644
--- a/comfy/lora.py
+++ b/comfy/lora.py
@@ -17,6 +17,7 @@
 """
 
 from __future__ import annotations
+import comfy.memory_management
 import comfy.utils
 import comfy.model_management
 import comfy.model_base
@@ -473,3 +474,17 @@ def calculate_weight(patches, weight, key, intermediate_dtype=torch.float32, ori
             weight = old_weight
 
     return weight
+
+def prefetch_prepared_value(value, allocate_buffer, stream):
+    if isinstance(value, torch.Tensor):
+        dest = allocate_buffer(comfy.memory_management.vram_aligned_size(value))
+        comfy.model_management.cast_to_gathered([value], dest, non_blocking=True, stream=stream)
+        return comfy.memory_management.interpret_gathered_like([value], dest)[0]
+    elif isinstance(value, weight_adapter.WeightAdapterBase):
+        return type(value)(value.loaded_keys, prefetch_prepared_value(value.weights, allocate_buffer, stream))
+    elif isinstance(value, tuple):
+        return tuple(prefetch_prepared_value(item, allocate_buffer, stream) for item in value)
+    elif isinstance(value, list):
+        return [prefetch_prepared_value(item, allocate_buffer, stream) for item in value]
+
+    return value
diff --git a/comfy/model_base.py b/comfy/model_base.py
index 50dab5782..b61a2aa09 100644
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -214,6 +214,11 @@ class BaseModel(torch.nn.Module):
         if "latent_shapes" in extra_conds:
             xc = utils.unpack_latents(xc, extra_conds.pop("latent_shapes"))
 
+        transformer_options = transformer_options.copy()
+        transformer_options["prefetch_dynamic_vbars"] = (
+            self.current_patcher is not None and self.current_patcher.is_dynamic()
+        )
+
         model_output = self.diffusion_model(xc, t, context=context, control=control, transformer_options=transformer_options, **extra_conds)
         if len(model_output) > 1 and not torch.is_tensor(model_output):
             model_output, _ = utils.pack_latents(model_output)
diff --git a/comfy/model_management.py b/comfy/model_management.py
index f86e2a4aa..02ad66656 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -31,6 +31,7 @@ from contextlib import nullcontext
 import comfy.memory_management
 import comfy.utils
 import comfy.quant_ops
+import comfy_aimdo.vram_buffer
 
 class VRAMState(Enum):
     DISABLED = 0    #No vram present: no need to move models to vram
@@ -1175,6 +1176,10 @@ stream_counters = {}
 
 STREAM_CAST_BUFFERS = {}
 LARGEST_CASTED_WEIGHT = (None, 0)
+STREAM_AIMDO_CAST_BUFFERS = {}
+LARGEST_AIMDO_CASTED_WEIGHT = (None, 0)
+
+DEFAULT_AIMDO_CAST_BUFFER_RESERVATION_SIZE = 16 * 1024 ** 3
 
 def get_cast_buffer(offload_stream, device, size, ref):
     global LARGEST_CASTED_WEIGHT
@@ -1208,13 +1213,26 @@ def get_cast_buffer(offload_stream, device, size, ref):
 
     return cast_buffer
 
+def get_aimdo_cast_buffer(offload_stream, device):
+    cast_buffer = STREAM_AIMDO_CAST_BUFFERS.get(offload_stream, None)
+    if cast_buffer is None:
+        cast_buffer = comfy_aimdo.vram_buffer.VRAMBuffer(DEFAULT_AIMDO_CAST_BUFFER_RESERVATION_SIZE, device.index)
+        STREAM_AIMDO_CAST_BUFFERS[offload_stream] = cast_buffer
+
+    return cast_buffer
 def reset_cast_buffers():
     global LARGEST_CASTED_WEIGHT
+    global LARGEST_AIMDO_CASTED_WEIGHT
+
     LARGEST_CASTED_WEIGHT = (None, 0)
-    for offload_stream in STREAM_CAST_BUFFERS:
-        offload_stream.synchronize()
+    LARGEST_AIMDO_CASTED_WEIGHT = (None, 0)
+    for offload_stream in set(STREAM_CAST_BUFFERS) | set(STREAM_AIMDO_CAST_BUFFERS):
+        if offload_stream is not None:
+            offload_stream.synchronize()
     synchronize()
+
     STREAM_CAST_BUFFERS.clear()
+    STREAM_AIMDO_CAST_BUFFERS.clear()
     soft_empty_cache()
 
 def get_offload_stream(device):
diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index e259aed63..7d2d6883f 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -121,9 +121,20 @@ class LowVramPatch:
         self.patches = patches
         self.convert_func = convert_func # TODO: remove
         self.set_func = set_func
+        self.prepared_patches = None
+
+    def prepare(self, allocate_buffer, stream):
+        self.prepared_patches = [
+            (patch[0], comfy.lora.prefetch_prepared_value(patch[1], allocate_buffer, stream), patch[2], patch[3], patch[4])
+            for patch in self.patches[self.key]
+        ]
+
+    def clear_prepared(self):
+        self.prepared_patches = None
 
     def __call__(self, weight):
-        return comfy.lora.calculate_weight(self.patches[self.key], weight, self.key, intermediate_dtype=weight.dtype)
+        patches = self.prepared_patches if self.prepared_patches is not None else self.patches[self.key]
+        return comfy.lora.calculate_weight(patches, weight, self.key, intermediate_dtype=weight.dtype)
 
 LOWVRAM_PATCH_ESTIMATE_MATH_FACTOR = 2
 
diff --git a/comfy/model_prefetch.py b/comfy/model_prefetch.py
new file mode 100644
index 000000000..0ad35deb5
--- /dev/null
+++ b/comfy/model_prefetch.py
@@ -0,0 +1,65 @@
+import comfy_aimdo.model_vbar
+import comfy.model_management
+import comfy.ops
+
+PREFETCH_QUEUES = []
+
+def cleanup_prefetched_modules(comfy_modules):
+    for s in comfy_modules:
+        prefetch = getattr(s, "_prefetch", None)
+        if prefetch is None:
+            continue
+        for param_key in ("weight", "bias"):
+            lowvram_fn = getattr(s, param_key + "_lowvram_function", None)
+            if lowvram_fn is not None:
+                lowvram_fn.clear_prepared()
+        if prefetch["signature"] is not None:
+            comfy_aimdo.model_vbar.vbar_unpin(s._v)
+        delattr(s, "_prefetch")
+
+def cleanup_prefetch_queues():
+    global PREFETCH_QUEUES
+
+    for queue in PREFETCH_QUEUES:
+        for entry in queue:
+            if entry is None or not isinstance(entry, tuple):
+                continue
+            _, prefetch_state = entry
+            comfy_modules = prefetch_state[1]
+            if comfy_modules is not None:
+                cleanup_prefetched_modules(comfy_modules)
+    PREFETCH_QUEUES = []
+
+def prefetch_queue_pop(queue, device, module):
+    if queue is None:
+        return
+
+    consumed = queue.pop(0)
+    if consumed is not None:
+        offload_stream, prefetch_state = consumed
+        offload_stream.wait_stream(comfy.model_management.current_stream(device))
+        _, comfy_modules = prefetch_state
+        if comfy_modules is not None:
+            cleanup_prefetched_modules(comfy_modules)
+
+    prefetch = queue[0]
+    if prefetch is not None:
+        comfy_modules = []
+        for s in prefetch.modules():
+            if hasattr(s, "_v"):
+                comfy_modules.append(s)
+
+        offload_stream = comfy.ops.cast_modules_with_vbar(comfy_modules, None, device, None, True)
+        comfy.model_management.sync_stream(device, offload_stream)
+        queue[0] = (offload_stream, (prefetch, comfy_modules))
+
+def make_prefetch_queue(queue, device, transformer_options):
+    if (not transformer_options.get("prefetch_dynamic_vbars", False)
+        or comfy.model_management.NUM_STREAMS == 0
+        or comfy.model_management.is_device_cpu(device)
+        or not comfy.model_management.device_supports_non_blocking(device)):
+        return None
+
+    queue = [None] + queue + [None]
+    PREFETCH_QUEUES.append(queue)
+    return queue
diff --git a/comfy/ops.py b/comfy/ops.py
index 050f7cda0..96db1411c 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -86,38 +86,61 @@ def materialize_meta_param(s, param_keys):
             setattr(s, param_key, torch.nn.Parameter(torch.zeros(param.shape, dtype=param.dtype), requires_grad=param.requires_grad))
 
 
-def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant):
-    #vbar doesn't support CPU weights, but some custom nodes have weird paths
-    #that might switch the layer to the CPU and expect it to work. We have to take
-    #a clone conservatively as we are mmapped and some SFT files are packed misaligned
-    #If you are a custom node author reading this, please move your layer to the GPU
-    #or declare your ModelPatcher as CPU in the first place.
-    if comfy.model_management.is_device_cpu(device):
-        materialize_meta_param(s, ["weight", "bias"])
-        weight = s.weight.to(dtype=dtype, copy=True)
-        if isinstance(weight, QuantizedTensor):
-            weight = weight.dequantize()
-        bias = None
-        if s.bias is not None:
-            bias = s.bias.to(dtype=bias_dtype, copy=True)
-        return weight, bias, (None, None, None)
-
+# FIXME: add n=1 cache hit fast path
+def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blocking):
     offload_stream = None
-    xfer_dest = None
+    cast_buffer = None
+    cast_buffer_offset = 0
+
+    def ensure_offload_stream(module, required_size, check_largest):
+        nonlocal offload_stream
+        nonlocal cast_buffer
+
+        if offload_stream is None:
+            offload_stream = comfy.model_management.get_offload_stream(device)
+        if offload_stream is None or not check_largest or len(comfy_modules) != 1:
+            return
+
+        current_size = 0 if cast_buffer is None else cast_buffer.size()
+        if current_size < required_size and module is comfy.model_management.LARGEST_AIMDO_CASTED_WEIGHT[0]:
+            offload_stream = comfy.model_management.get_offload_stream(device)
+            cast_buffer = None
+        if required_size > comfy.model_management.LARGEST_AIMDO_CASTED_WEIGHT[1]:
+            comfy.model_management.LARGEST_AIMDO_CASTED_WEIGHT = (module, required_size)
+
+    def get_cast_buffer(buffer_size):
+        nonlocal offload_stream
+        nonlocal cast_buffer
+        nonlocal cast_buffer_offset
+
+        if buffer_size == 0:
+            return None
+
+        if offload_stream is None:
+            return torch.empty((buffer_size,), dtype=torch.uint8, device=device)
+
+        cast_buffer = comfy.model_management.get_aimdo_cast_buffer(offload_stream, device)
+        buffer = comfy_aimdo.torch.aimdo_to_tensor(cast_buffer.get(buffer_size, cast_buffer_offset), device)
+        cast_buffer_offset += buffer_size
+        return buffer
+
+    for s in comfy_modules:
+        signature = comfy_aimdo.model_vbar.vbar_fault(s._v)
+        resident = comfy_aimdo.model_vbar.vbar_signature_compare(signature, s._v_signature)
+        prefetch = {
+            "signature": signature,
+            "resident": resident,
+        }
 
-    signature = comfy_aimdo.model_vbar.vbar_fault(s._v)
-    resident = comfy_aimdo.model_vbar.vbar_signature_compare(signature, s._v_signature)
-    if signature is not None:
         if resident:
-            weight = s._v_weight
-            bias = s._v_bias
-        else:
-            xfer_dest = comfy_aimdo.torch.aimdo_to_tensor(s._v, device)
+            s._prefetch = prefetch
+            continue
 
-    if not resident:
         materialize_meta_param(s, ["weight", "bias"])
+        xfer_dest = comfy_aimdo.torch.aimdo_to_tensor(s._v, device) if signature is not None else None
         cast_geometry = comfy.memory_management.tensors_to_geometries([ s.weight, s.bias ])
         cast_dest = None
+        needs_cast = False
 
         xfer_source = [ s.weight, s.bias ]
 
@@ -129,22 +152,15 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
             if data is None:
                 continue
             if data.dtype != geometry.dtype:
+                needs_cast = True
                 cast_dest = xfer_dest
-                if cast_dest is None:
-                    cast_dest = torch.empty((comfy.memory_management.vram_aligned_size(cast_geometry),), dtype=torch.uint8, device=device)
                 xfer_dest = None
                 break
 
         dest_size = comfy.memory_management.vram_aligned_size(xfer_source)
-        offload_stream = comfy.model_management.get_offload_stream(device)
-        if xfer_dest is None and offload_stream is not None:
-                xfer_dest = comfy.model_management.get_cast_buffer(offload_stream, device, dest_size, s)
-                if xfer_dest is None:
-                    offload_stream = comfy.model_management.get_offload_stream(device)
-                    xfer_dest = comfy.model_management.get_cast_buffer(offload_stream, device, dest_size, s)
+        ensure_offload_stream(s, dest_size if xfer_dest is None else 0, True)
         if xfer_dest is None:
-            xfer_dest = torch.empty((dest_size,), dtype=torch.uint8, device=device)
-            offload_stream = None
+            xfer_dest = get_cast_buffer(dest_size)
 
         if signature is None and pin is None:
             comfy.pinned_memory.pin_memory(s)
@@ -157,27 +173,54 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
             xfer_source = [ pin ]
         #send it over
         comfy.model_management.cast_to_gathered(xfer_source, xfer_dest, non_blocking=non_blocking, stream=offload_stream)
-        comfy.model_management.sync_stream(device, offload_stream)
 
-        if cast_dest is not None:
+        for param_key in ("weight", "bias"):
+            lowvram_fn = getattr(s, param_key + "_lowvram_function", None)
+            if lowvram_fn is not None:
+                ensure_offload_stream(s, cast_buffer_offset, False)
+                lowvram_fn.prepare(lambda size: get_cast_buffer(size), offload_stream)
+
+        prefetch["xfer_dest"] = xfer_dest
+        prefetch["cast_dest"] = cast_dest
+        prefetch["cast_geometry"] = cast_geometry
+        prefetch["needs_cast"] = needs_cast
+        s._prefetch = prefetch
+
+    return offload_stream
+
+
+def resolve_cast_module_with_vbar(s, dtype, device, bias_dtype, compute_dtype, want_requant):
+
+    prefetch = getattr(s, "_prefetch", None)
+
+    if prefetch["resident"]:
+        weight = s._v_weight
+        bias = s._v_bias
+    else:
+        xfer_dest = prefetch["xfer_dest"]
+        if prefetch["needs_cast"]:
+            cast_dest = prefetch["cast_dest"] if prefetch["cast_dest"] is not None else torch.empty((comfy.memory_management.vram_aligned_size(prefetch["cast_geometry"]),), dtype=torch.uint8, device=device)
             for pre_cast, post_cast in zip(comfy.memory_management.interpret_gathered_like([s.weight, s.bias ], xfer_dest),
-                                           comfy.memory_management.interpret_gathered_like(cast_geometry, cast_dest)):
+                                           comfy.memory_management.interpret_gathered_like(prefetch["cast_geometry"], cast_dest)):
                 if post_cast is not None:
                     post_cast.copy_(pre_cast)
             xfer_dest = cast_dest
 
-        params = comfy.memory_management.interpret_gathered_like(cast_geometry, xfer_dest)
+        params = comfy.memory_management.interpret_gathered_like(prefetch["cast_geometry"], xfer_dest)
         weight = params[0]
         bias = params[1]
-        if signature is not None:
+        if prefetch["signature"] is not None:
             s._v_weight = weight
             s._v_bias = bias
-        s._v_signature=signature
+        s._v_signature = prefetch["signature"]
 
     def post_cast(s, param_key, x, dtype, resident, update_weight):
         lowvram_fn = getattr(s, param_key + "_lowvram_function", None)
         fns = getattr(s, param_key + "_function", [])
 
+        if x is None:
+            return None
+
         orig = x
 
         def to_dequant(tensor, dtype):
@@ -205,14 +248,12 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
             x = f(x)
         return x
 
-    update_weight = signature is not None
+    update_weight = prefetch["signature"] is not None
+    weight = post_cast(s, "weight", weight, dtype, prefetch["resident"], update_weight)
+    if bias is not None:
+        bias = post_cast(s, "bias", bias, bias_dtype, prefetch["resident"], update_weight)
 
-    weight = post_cast(s, "weight", weight, dtype, resident, update_weight)
-    if s.bias is not None:
-        bias = post_cast(s, "bias", bias, bias_dtype, resident, update_weight)
-
-    #FIXME: weird offload return protocol
-    return weight, bias, (offload_stream, device if signature is not None else None, None)
+    return weight, bias
 
 
 def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None, offloadable=False, compute_dtype=None, want_requant=False):
@@ -230,10 +271,46 @@ def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None, of
         if device is None:
             device = input.device
 
+    def format_return(result, offloadable):
+        weight, bias, offload_stream = result
+        return (weight, bias, offload_stream) if offloadable else (weight, bias)
+
     non_blocking = comfy.model_management.device_supports_non_blocking(device)
 
     if hasattr(s, "_v"):
-        return cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant)
+
+        #vbar doesn't support CPU weights, but some custom nodes have weird paths
+        #that might switch the layer to the CPU and expect it to work. We have to take
+        #a clone conservatively as we are mmapped and some SFT files are packed misaligned
+        #If you are a custom node author reading this, please move your layer to the GPU
+        #or declare your ModelPatcher as CPU in the first place.
+        if comfy.model_management.is_device_cpu(device):
+            materialize_meta_param(s, ["weight", "bias"])
+            weight = s.weight.to(dtype=dtype, copy=True)
+            if isinstance(weight, QuantizedTensor):
+                weight = weight.dequantize()
+            bias = s.bias.to(dtype=bias_dtype, copy=True) if s.bias is not None else None
+            return format_return((weight, bias, (None, None, None)), offloadable)
+
+        prefetched = hasattr(s, "_prefetch")
+        offload_stream = None
+        offload_device = None
+        if not prefetched:
+            offload_stream = cast_modules_with_vbar([s], dtype, device, bias_dtype, non_blocking)
+            comfy.model_management.sync_stream(device, offload_stream)
+
+        weight, bias = resolve_cast_module_with_vbar(s, dtype, device, bias_dtype, compute_dtype, want_requant)
+
+        if not prefetched:
+            if getattr(s, "_prefetch")["signature"] is not None:
+                offload_device = device
+            for param_key in ("weight", "bias"):
+                lowvram_fn = getattr(s, param_key + "_lowvram_function", None)
+                if lowvram_fn is not None:
+                    lowvram_fn.clear_prepared()
+            delattr(s, "_prefetch")
+        return format_return((weight, bias, (offload_stream, offload_device, None)), offloadable)
+
 
     if offloadable and (device != s.weight.device or
                         (s.bias is not None and device != s.bias.device)):
@@ -280,11 +357,7 @@ def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None, of
         for f in s.weight_function:
             weight = f(weight)
 
-    if offloadable:
-        return weight, bias, (offload_stream, weight_a, bias_a)
-    else:
-        #Legacy function signature
-        return weight, bias
+    return format_return((weight, bias, (offload_stream, weight_a, bias_a)), offloadable)
 
 
 def uncast_bias_weight(s, weight, bias, offload_stream):
diff --git a/execution.py b/execution.py
index 5a6d3404c..654db8426 100644
--- a/execution.py
+++ b/execution.py
@@ -15,6 +15,7 @@ import torch
 from comfy.cli_args import args
 import comfy.memory_management
 import comfy.model_management
+import comfy.model_prefetch
 import comfy_aimdo.model_vbar
 
 from latent_preview import set_preview_method
@@ -537,6 +538,7 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
                     if args.verbose == "DEBUG":
                         comfy_aimdo.control.analyze()
                     comfy.model_management.reset_cast_buffers()
+                    comfy.model_prefetch.cleanup_prefetch_queues()
                     comfy_aimdo.model_vbar.vbars_reset_watermark_limits()
 
             if has_pending_tasks:

From ef6722f6be7bf073d225d21da47354905a6abd2b Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Sat, 2 May 2026 17:34:27 -0700
Subject: [PATCH 68/81] Some cleanups to the load image node. (#13677)

---
 nodes.py | 29 ++++++++++-------------------
 1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/nodes.py b/nodes.py
index 99dc07227..710cccffe 100644
--- a/nodes.py
+++ b/nodes.py
@@ -1694,26 +1694,27 @@ class LoadImage:
 
     RETURN_TYPES = ("IMAGE", "MASK")
     FUNCTION = "load_image"
+
     def load_image(self, image):
         image_path = folder_paths.get_annotated_filepath(image)
 
+        dtype = comfy.model_management.intermediate_dtype()
+        device = comfy.model_management.intermediate_device()
+
         components = InputImpl.VideoFromFile(image_path).get_components()
         if components.images.shape[0] > 0:
-            return (components.images, 1.0 - components.alpha[..., -1] if components.alpha is not None else torch.zeros((components.images.shape[0], 64, 64), dtype=torch.float32, device="cpu"))
+            return (components.images.to(device=device, dtype=dtype), (1.0 - components.alpha[..., -1]).to(device=device, dtype=dtype) if components.alpha is not None else torch.zeros((components.images.shape[0], 64, 64), dtype=dtype, device=device))
 
+        # This code is left here to handle animated webp which pyav does not support loading
         img = node_helpers.pillow(Image.open, image_path)
 
         output_images = []
         output_masks = []
         w, h = None, None
 
-        dtype = comfy.model_management.intermediate_dtype()
-
         for i in ImageSequence.Iterator(img):
             i = node_helpers.pillow(ImageOps.exif_transpose, i)
 
-            if i.mode == 'I':
-                i = i.point(lambda i: i * (1 / 255))
             image = i.convert("RGB")
 
             if len(output_images) == 0:
@@ -1728,25 +1729,15 @@ class LoadImage:
             if 'A' in i.getbands():
                 mask = np.array(i.getchannel('A')).astype(np.float32) / 255.0
                 mask = 1. - torch.from_numpy(mask)
-            elif i.mode == 'P' and 'transparency' in i.info:
-                mask = np.array(i.convert('RGBA').getchannel('A')).astype(np.float32) / 255.0
-                mask = 1. - torch.from_numpy(mask)
             else:
-                mask = torch.zeros((64,64), dtype=torch.float32, device="cpu")
+                mask = torch.zeros((64, 64), dtype=torch.float32, device="cpu")
             output_images.append(image.to(dtype=dtype))
             output_masks.append(mask.unsqueeze(0).to(dtype=dtype))
 
-            if img.format == "MPO":
-                break  # ignore all frames except the first one for MPO format
+        output_image = torch.cat(output_images, dim=0)
+        output_mask = torch.cat(output_masks, dim=0)
 
-        if len(output_images) > 1:
-            output_image = torch.cat(output_images, dim=0)
-            output_mask = torch.cat(output_masks, dim=0)
-        else:
-            output_image = output_images[0]
-            output_mask = output_masks[0]
-
-        return (output_image, output_mask)
+        return (output_image.to(device=device, dtype=dtype), output_mask.to(device=device, dtype=dtype))
 
     @classmethod
     def IS_CHANGED(s, image):

From 1d23a875ed0d4644538265635c1259be08a3370e Mon Sep 17 00:00:00 2001
From: "Daxiong (Lin)" <contact@comfyui-wiki.com>
Date: Sun, 3 May 2026 10:06:55 +0800
Subject: [PATCH 69/81] chore: update workflow templates to v0.9.68 (#13678)

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 932034076..32826e25a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 comfyui-frontend-package==1.42.15
-comfyui-workflow-templates==0.9.66
+comfyui-workflow-templates==0.9.68
 comfyui-embedded-docs==0.4.4
 torch
 torchsde

From f756d801a1e5fbafe81cdfdf8a1c0aadf54c9bea Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Sun, 3 May 2026 05:29:00 +0300
Subject: [PATCH 70/81] [Partner Nodes] Topaz Astra 2 model (#13672)

* feat(api-nodes): add Topaz Astra 2 model

Signed-off-by: bigcat88 <bigcat88@icloud.com>

* feat(api-nodes): make Astra 2 the default Topaz upscaler model

Reorder UPSCALER_MODELS_MAP and the upscaler_model dynamic combo so
"Astra 2" appears first, surfacing it as the default selection.

---------

Signed-off-by: bigcat88 <bigcat88@icloud.com>
Co-authored-by: Marwan Mostafa <marawan206@gmail.com>
---
 comfy_api_nodes/apis/topaz.py  |   9 +-
 comfy_api_nodes/nodes_topaz.py | 361 ++++++++++++++++++++++++++++++++-
 2 files changed, 365 insertions(+), 5 deletions(-)

diff --git a/comfy_api_nodes/apis/topaz.py b/comfy_api_nodes/apis/topaz.py
index a9e6235a7..f91980e3d 100644
--- a/comfy_api_nodes/apis/topaz.py
+++ b/comfy_api_nodes/apis/topaz.py
@@ -1,4 +1,4 @@
-from typing import Optional, Union
+from typing import Optional
 
 from pydantic import BaseModel, Field
 
@@ -72,8 +72,11 @@ class VideoEnhancementFilter(BaseModel):
     grain: Optional[float] = Field(None, description="Grain after AI model processing")
     grainSize: Optional[float] = Field(None, description="Size of generated grain")
     recoverOriginalDetailValue: Optional[float] = Field(None, description="Source details into the output video")
-    creativity: Optional[str] = Field(None, description="Creativity level(high, low) for slc-1 only")
+    creativity: float | str | None = Field(None, description="slc-1/slp-2.5: enum (low/middle/high). ast-2: decimal 0.0-1.0.")
     isOptimizedMode: Optional[bool] = Field(None, description="Set to true for Starlight Creative (slc-1) only")
+    prompt: str | None = Field(None, description="Descriptive scene prompt (ast-2 only)")
+    sharp: float | None = Field(None, description="ast-2 pre-enhance sharpness")
+    realism: float | None = Field(None, description="ast-2 realism control")
 
 
 class OutputInformationVideo(BaseModel):
@@ -90,7 +93,7 @@ class Overrides(BaseModel):
 
 class CreateVideoRequest(BaseModel):
     source: CreateVideoRequestSource = Field(...)
-    filters: list[Union[VideoFrameInterpolationFilter, VideoEnhancementFilter]] = Field(...)
+    filters: list[VideoFrameInterpolationFilter | VideoEnhancementFilter] = Field(...)
     output: OutputInformationVideo = Field(...)
     overrides: Overrides = Field(Overrides(isPaidDiffusion=True))
 
diff --git a/comfy_api_nodes/nodes_topaz.py b/comfy_api_nodes/nodes_topaz.py
index fe3666ec9..e79c16d3c 100644
--- a/comfy_api_nodes/nodes_topaz.py
+++ b/comfy_api_nodes/nodes_topaz.py
@@ -36,11 +36,15 @@ from comfy_api_nodes.util import (
 )
 
 UPSCALER_MODELS_MAP = {
+    "Astra 2": "ast-2",
     "Starlight (Astra) Fast": "slf-1",
     "Starlight (Astra) Creative": "slc-1",
     "Starlight Precise 2.5": "slp-2.5",
 }
 
+AST2_MAX_FRAMES = 9000
+AST2_MAX_FRAMES_WITH_PROMPT = 450
+
 
 class TopazImageEnhance(IO.ComfyNode):
     @classmethod
@@ -230,13 +234,20 @@ class TopazVideoEnhance(IO.ComfyNode):
     def define_schema(cls):
         return IO.Schema(
             node_id="TopazVideoEnhance",
-            display_name="Topaz Video Enhance",
+            display_name="Topaz Video Enhance (Legacy)",
             category="api node/video/Topaz",
             description="Breathe new life into video with powerful upscaling and recovery technology.",
             inputs=[
                 IO.Video.Input("video"),
                 IO.Boolean.Input("upscaler_enabled", default=True),
-                IO.Combo.Input("upscaler_model", options=list(UPSCALER_MODELS_MAP.keys())),
+                IO.Combo.Input(
+                    "upscaler_model",
+                    options=[
+                        "Starlight (Astra) Fast",
+                        "Starlight (Astra) Creative",
+                        "Starlight Precise 2.5",
+                    ],
+                ),
                 IO.Combo.Input("upscaler_resolution", options=["FullHD (1080p)", "4K (2160p)"]),
                 IO.Combo.Input(
                     "upscaler_creativity",
@@ -304,6 +315,7 @@ class TopazVideoEnhance(IO.ComfyNode):
                 IO.Hidden.unique_id,
             ],
             is_api_node=True,
+            is_deprecated=True,
         )
 
     @classmethod
@@ -457,12 +469,357 @@ class TopazVideoEnhance(IO.ComfyNode):
         return IO.NodeOutput(await download_url_to_video_output(final_response.download.url))
 
 
+class TopazVideoEnhanceV2(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="TopazVideoEnhanceV2",
+            display_name="Topaz Video Enhance",
+            category="api node/video/Topaz",
+            description="Breathe new life into video with powerful upscaling and recovery technology.",
+            inputs=[
+                IO.Video.Input("video"),
+                IO.DynamicCombo.Input(
+                    "upscaler_model",
+                    options=[
+                        IO.DynamicCombo.Option(
+                            "Astra 2",
+                            [
+                                IO.Combo.Input("upscaler_resolution", options=["FullHD (1080p)", "4K (2160p)"]),
+                                IO.Float.Input(
+                                    "creativity",
+                                    default=0.5,
+                                    min=0.0,
+                                    max=1.0,
+                                    step=0.1,
+                                    display_mode=IO.NumberDisplay.slider,
+                                    tooltip="Creative strength of the upscale.",
+                                ),
+                                IO.String.Input(
+                                    "prompt",
+                                    multiline=True,
+                                    default="",
+                                    tooltip="Optional descriptive (not instructive) scene prompt."
+                                    f"Capping input at {AST2_MAX_FRAMES_WITH_PROMPT} frames (~15s @ 30fps) when set.",
+                                ),
+                                IO.Float.Input(
+                                    "sharp",
+                                    default=0.5,
+                                    min=0.0,
+                                    max=1.0,
+                                    step=0.01,
+                                    display_mode=IO.NumberDisplay.slider,
+                                    tooltip="Pre-enhance sharpness: "
+                                    "0.0=Gaussian blur, 0.5=passthrough (default), 1.0=USM sharpening.",
+                                    advanced=True,
+                                ),
+                                IO.Float.Input(
+                                    "realism",
+                                    default=0.0,
+                                    min=0.0,
+                                    max=1.0,
+                                    step=0.01,
+                                    display_mode=IO.NumberDisplay.slider,
+                                    tooltip="Pulls output toward photographic realism."
+                                    "Leave at 0 for the model default.",
+                                    advanced=True,
+                                ),
+                            ],
+                        ),
+                        IO.DynamicCombo.Option(
+                            "Starlight (Astra) Fast",
+                            [IO.Combo.Input("upscaler_resolution", options=["FullHD (1080p)", "4K (2160p)"]),],
+                        ),
+                        IO.DynamicCombo.Option(
+                            "Starlight (Astra) Creative",
+                            [
+                                IO.Combo.Input("upscaler_resolution", options=["FullHD (1080p)", "4K (2160p)"]),
+                                IO.Combo.Input(
+                                    "creativity",
+                                    options=["low", "middle", "high"],
+                                    default="low",
+                                    tooltip="Creative strength of the upscale.",
+                                ),
+                            ],
+                        ),
+                        IO.DynamicCombo.Option(
+                            "Starlight Precise 2.5",
+                            [IO.Combo.Input("upscaler_resolution", options=["FullHD (1080p)", "4K (2160p)"])],
+                        ),
+                        IO.DynamicCombo.Option("Disabled", []),
+                    ],
+                ),
+                IO.DynamicCombo.Input(
+                    "interpolation_model",
+                    options=[
+                        IO.DynamicCombo.Option("Disabled", []),
+                        IO.DynamicCombo.Option(
+                            "apo-8",
+                            [
+                                IO.Int.Input(
+                                    "interpolation_frame_rate",
+                                    default=60,
+                                    min=15,
+                                    max=240,
+                                    display_mode=IO.NumberDisplay.number,
+                                    tooltip="Output frame rate.",
+                                ),
+                                IO.Int.Input(
+                                    "interpolation_slowmo",
+                                    default=1,
+                                    min=1,
+                                    max=16,
+                                    display_mode=IO.NumberDisplay.number,
+                                    tooltip="Slow-motion factor applied to the input video. "
+                                    "For example, 2 makes the output twice as slow and doubles the duration.",
+                                    advanced=True,
+                                ),
+                                IO.Boolean.Input(
+                                    "interpolation_duplicate",
+                                    default=False,
+                                    tooltip="Analyze the input for duplicate frames and remove them.",
+                                    advanced=True,
+                                ),
+                                IO.Float.Input(
+                                    "interpolation_duplicate_threshold",
+                                    default=0.01,
+                                    min=0.001,
+                                    max=0.1,
+                                    step=0.001,
+                                    display_mode=IO.NumberDisplay.number,
+                                    tooltip="Detection sensitivity for duplicate frames.",
+                                    advanced=True,
+                                ),
+                            ],
+                        ),
+                    ],
+                ),
+                IO.Combo.Input(
+                    "dynamic_compression_level",
+                    options=["Low", "Mid", "High"],
+                    default="Low",
+                    tooltip="CQP level.",
+                    optional=True,
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                depends_on=IO.PriceBadgeDepends(widgets=[
+                    "upscaler_model",
+                    "upscaler_model.upscaler_resolution",
+                    "interpolation_model",
+                ]),
+                expr="""
+                (
+                  $model := $lookup(widgets, "upscaler_model");
+                  $res := $lookup(widgets, "upscaler_model.upscaler_resolution");
+                  $interp := $lookup(widgets, "interpolation_model");
+                  $is4k := $contains($res, "4k");
+                  $hasInterp := $interp != "disabled";
+                  $rates := {
+                    "starlight (astra) fast":     {"hd": 0.43, "uhd": 0.85},
+                    "starlight precise 2.5":      {"hd": 0.70, "uhd": 1.54},
+                    "astra 2":                    {"hd": 1.72, "uhd": 2.85},
+                    "starlight (astra) creative": {"hd": 2.25, "uhd": 3.99}
+                  };
+                  $surcharge := $is4k ? 0.28 : 0.14;
+                  $entry := $lookup($rates, $model);
+                  $base := $is4k ? $entry.uhd : $entry.hd;
+                  $hi := $base + ($hasInterp ? $surcharge : 0);
+                  $model = "disabled"
+                    ? {"type":"text","text":"Interpolation only"}
+                    : ($hasInterp
+                        ? {"type":"text","text":"~" & $string($base) & "–" & $string($hi) & " credits/src frame"}
+                        : {"type":"text","text":"~" & $string($base) & " credits/src frame"})
+                )
+                """,
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        video: Input.Video,
+        upscaler_model: dict,
+        interpolation_model: dict,
+        dynamic_compression_level: str = "Low",
+    ) -> IO.NodeOutput:
+        upscaler_choice = upscaler_model["upscaler_model"]
+        interpolation_choice = interpolation_model["interpolation_model"]
+        if upscaler_choice == "Disabled" and interpolation_choice == "Disabled":
+            raise ValueError("There is nothing to do: both upscaling and interpolation are disabled.")
+        validate_container_format_is_mp4(video)
+        src_width, src_height = video.get_dimensions()
+        src_frame_rate = int(video.get_frame_rate())
+        duration_sec = video.get_duration()
+        src_video_stream = video.get_stream_source()
+        target_width = src_width
+        target_height = src_height
+        target_frame_rate = src_frame_rate
+        filters = []
+        if upscaler_choice != "Disabled":
+            if "1080p" in upscaler_model["upscaler_resolution"]:
+                target_pixel_p = 1080
+                max_long_side = 1920
+            else:
+                target_pixel_p = 2160
+                max_long_side = 3840
+            ar = src_width / src_height
+            if src_width >= src_height:
+                # Landscape or Square; Attempt to set height to target (e.g., 2160), calculate width
+                target_height = target_pixel_p
+                target_width = int(target_height * ar)
+                # Check if width exceeds standard bounds (for ultra-wide e.g., 21:9 ARs)
+                if target_width > max_long_side:
+                    target_width = max_long_side
+                    target_height = int(target_width / ar)
+            else:
+                # Portrait; Attempt to set width to target (e.g., 2160), calculate height
+                target_width = target_pixel_p
+                target_height = int(target_width / ar)
+                # Check if height exceeds standard bounds
+                if target_height > max_long_side:
+                    target_height = max_long_side
+                    target_width = int(target_height * ar)
+            if target_width % 2 != 0:
+                target_width += 1
+            if target_height % 2 != 0:
+                target_height += 1
+            model_id = UPSCALER_MODELS_MAP[upscaler_choice]
+            if model_id == "slc-1":
+                filters.append(
+                    VideoEnhancementFilter(
+                        model=model_id,
+                        creativity=upscaler_model["creativity"],
+                        isOptimizedMode=True,
+                    )
+                )
+            elif model_id == "ast-2":
+                n_frames = video.get_frame_count()
+                ast2_prompt = (upscaler_model["prompt"] or "").strip()
+                if ast2_prompt and n_frames > AST2_MAX_FRAMES_WITH_PROMPT:
+                    raise ValueError(
+                        f"Astra 2 with a prompt is limited to {AST2_MAX_FRAMES_WITH_PROMPT} input frames "
+                        f"(~15s @ 30fps); video has {n_frames}. Clear the prompt or shorten the clip."
+                    )
+                if n_frames > AST2_MAX_FRAMES:
+                    raise ValueError(f"Astra 2 is limited to {AST2_MAX_FRAMES} input frames; video has {n_frames}.")
+                realism = upscaler_model["realism"]
+                filters.append(
+                    VideoEnhancementFilter(
+                        model=model_id,
+                        creativity=upscaler_model["creativity"],
+                        prompt=(ast2_prompt or None),
+                        sharp=upscaler_model["sharp"],
+                        realism=(realism if realism > 0 else None),
+                    )
+                )
+            else:
+                filters.append(VideoEnhancementFilter(model=model_id))
+        if interpolation_choice != "Disabled":
+            target_frame_rate = interpolation_model["interpolation_frame_rate"]
+            filters.append(
+                VideoFrameInterpolationFilter(
+                    model=interpolation_choice,
+                    slowmo=interpolation_model["interpolation_slowmo"],
+                    fps=interpolation_model["interpolation_frame_rate"],
+                    duplicate=interpolation_model["interpolation_duplicate"],
+                    duplicate_threshold=interpolation_model["interpolation_duplicate_threshold"],
+                ),
+            )
+        initial_res = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/topaz/video/", method="POST"),
+            response_model=CreateVideoResponse,
+            data=CreateVideoRequest(
+                source=CreateVideoRequestSource(
+                    container="mp4",
+                    size=get_fs_object_size(src_video_stream),
+                    duration=int(duration_sec),
+                    frameCount=video.get_frame_count(),
+                    frameRate=src_frame_rate,
+                    resolution=Resolution(width=src_width, height=src_height),
+                ),
+                filters=filters,
+                output=OutputInformationVideo(
+                    resolution=Resolution(width=target_width, height=target_height),
+                    frameRate=target_frame_rate,
+                    audioCodec="AAC",
+                    audioTransfer="Copy",
+                    dynamicCompressionLevel=dynamic_compression_level,
+                ),
+            ),
+            wait_label="Creating task",
+            final_label_on_success="Task created",
+        )
+        upload_res = await sync_op(
+            cls,
+            ApiEndpoint(
+                path=f"/proxy/topaz/video/{initial_res.requestId}/accept",
+                method="PATCH",
+            ),
+            response_model=VideoAcceptResponse,
+            wait_label="Preparing upload",
+            final_label_on_success="Upload started",
+        )
+        if len(upload_res.urls) > 1:
+            raise NotImplementedError(
+                "Large files are not currently supported. Please open an issue in the ComfyUI repository."
+            )
+        async with aiohttp.ClientSession(headers={"Content-Type": "video/mp4"}) as session:
+            if isinstance(src_video_stream, BytesIO):
+                src_video_stream.seek(0)
+                async with session.put(upload_res.urls[0], data=src_video_stream, raise_for_status=True) as res:
+                    upload_etag = res.headers["Etag"]
+            else:
+                with builtins.open(src_video_stream, "rb") as video_file:
+                    async with session.put(upload_res.urls[0], data=video_file, raise_for_status=True) as res:
+                        upload_etag = res.headers["Etag"]
+        await sync_op(
+            cls,
+            ApiEndpoint(
+                path=f"/proxy/topaz/video/{initial_res.requestId}/complete-upload",
+                method="PATCH",
+            ),
+            response_model=VideoCompleteUploadResponse,
+            data=VideoCompleteUploadRequest(
+                uploadResults=[
+                    VideoCompleteUploadRequestPart(
+                        partNum=1,
+                        eTag=upload_etag,
+                    ),
+                ],
+            ),
+            wait_label="Finalizing upload",
+            final_label_on_success="Upload completed",
+        )
+        final_response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/topaz/video/{initial_res.requestId}/status"),
+            response_model=VideoStatusResponse,
+            status_extractor=lambda x: x.status,
+            progress_extractor=lambda x: getattr(x, "progress", 0),
+            price_extractor=lambda x: (x.estimates.cost[0] * 0.08 if x.estimates and x.estimates.cost[0] else None),
+            poll_interval=10.0,
+        )
+        return IO.NodeOutput(await download_url_to_video_output(final_response.download.url))
+
+
 class TopazExtension(ComfyExtension):
     @override
     async def get_node_list(self) -> list[type[IO.ComfyNode]]:
         return [
             TopazImageEnhance,
             TopazVideoEnhance,
+            TopazVideoEnhanceV2,
         ]
 
 

From be95871adccfac92a91ebdc06e52a85511f7b85c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jukka=20Sepp=C3=A4nen?=
 <40791699+kijai@users.noreply.github.com>
Date: Sun, 3 May 2026 05:46:15 +0300
Subject: [PATCH 71/81] feat: Gemma4 text generation support (CORE-30) (#13376)

* initial gemma4 support

* parity with reference implementation

outputs can 100% match transformers with same sdpa flags, checkpoint this and then optimize

* Cleanup, video fixes

* cleanup, enable fused rms norm by default

* update comment

* Cleanup

* Update sd.py

* Various fixes

* Add fp8 scaled embedding support

* small fixes

* Translate think tokens

* Fix image encoder attention mask type

So it works with basic attention

* Handle thinking tokens different only for Gemma4

* Code cleanup

* Update nodes_textgen.py

* Use embed scale class instead of buffer

Slight difference to HF, but technically more accurate and simpler code

* Default to fused rms_norm

* Update gemma4.py
---
 comfy/ldm/modules/attention.py |   24 +-
 comfy/ops.py                   |   87 +++
 comfy/rmsnorm.py               |    1 +
 comfy/sd.py                    |   17 +
 comfy/text_encoders/gemma4.py  | 1298 ++++++++++++++++++++++++++++++++
 comfy/text_encoders/llama.py   |   40 +-
 comfy/text_encoders/lt.py      |    3 +-
 comfy/text_encoders/lumina2.py |    3 +-
 comfy/text_encoders/qwen35.py  |    2 -
 comfy/utils.py                 |    7 -
 comfy_extras/nodes_textgen.py  |   13 +-
 11 files changed, 1453 insertions(+), 42 deletions(-)
 create mode 100644 comfy/text_encoders/gemma4.py

diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py
index b193fe5e8..a68cb8439 100644
--- a/comfy/ldm/modules/attention.py
+++ b/comfy/ldm/modules/attention.py
@@ -14,6 +14,8 @@ from .sub_quadratic_attention import efficient_dot_product_attention
 
 from comfy import model_management
 
+TORCH_HAS_GQA = model_management.torch_version_numeric >= (2, 5)
+
 if model_management.xformers_enabled():
     import xformers
     import xformers.ops
@@ -150,7 +152,12 @@ def attention_basic(q, k, v, heads, mask=None, attn_precision=None, skip_reshape
         b, _, dim_head = q.shape
         dim_head //= heads
 
-    scale = dim_head ** -0.5
+    if kwargs.get("enable_gqa", False) and q.shape[-3] != k.shape[-3]:
+        n_rep = q.shape[-3] // k.shape[-3]
+        k = k.repeat_interleave(n_rep, dim=-3)
+        v = v.repeat_interleave(n_rep, dim=-3)
+
+    scale = kwargs.get("scale", dim_head ** -0.5)
 
     h = heads
     if skip_reshape:
@@ -219,6 +226,10 @@ def attention_sub_quad(query, key, value, heads, mask=None, attn_precision=None,
         b, _, dim_head = query.shape
         dim_head //= heads
 
+    if "scale" in kwargs:
+        # Pre-scale query to match requested scale (cancels internal 1/sqrt(dim_head))
+        query = query * (kwargs["scale"] * dim_head ** 0.5)
+
     if skip_reshape:
         query = query.reshape(b * heads, -1, dim_head)
         value = value.reshape(b * heads, -1, dim_head)
@@ -290,7 +301,7 @@ def attention_split(q, k, v, heads, mask=None, attn_precision=None, skip_reshape
         b, _, dim_head = q.shape
         dim_head //= heads
 
-    scale = dim_head ** -0.5
+    scale = kwargs.get("scale", dim_head ** -0.5)
 
     if skip_reshape:
          q, k, v = map(
@@ -500,8 +511,13 @@ def attention_pytorch(q, k, v, heads, mask=None, attn_precision=None, skip_resha
         if mask.ndim == 3:
             mask = mask.unsqueeze(1)
 
+    # Pass through extra SDPA kwargs (scale, enable_gqa) if provided
+    # enable_gqa requires PyTorch 2.5+; older versions use manual KV expansion above
+    sdpa_keys = ("scale", "enable_gqa") if TORCH_HAS_GQA else ("scale",)
+    sdpa_extra = {k: v for k, v in kwargs.items() if k in sdpa_keys}
+
     if SDP_BATCH_LIMIT >= b:
-        out = comfy.ops.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False)
+        out = comfy.ops.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False, **sdpa_extra)
         if not skip_output_reshape:
             out = (
                 out.transpose(1, 2).reshape(b, -1, heads * dim_head)
@@ -519,7 +535,7 @@ def attention_pytorch(q, k, v, heads, mask=None, attn_precision=None, skip_resha
                 k[i : i + SDP_BATCH_LIMIT],
                 v[i : i + SDP_BATCH_LIMIT],
                 attn_mask=m,
-                dropout_p=0.0, is_causal=False
+                dropout_p=0.0, is_causal=False, **sdpa_extra
             ).transpose(1, 2).reshape(-1, q.shape[2], heads * dim_head)
     return out
 
diff --git a/comfy/ops.py b/comfy/ops.py
index 96db1411c..4f0338346 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -1246,6 +1246,93 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                         self._buffers[key] = fn(buf)
                 return self
 
+        class Embedding(manual_cast.Embedding):
+            def _load_from_state_dict(self, state_dict, prefix, local_metadata,
+                                    strict, missing_keys, unexpected_keys, error_msgs):
+                weight_key = f"{prefix}weight"
+                layer_conf = state_dict.pop(f"{prefix}comfy_quant", None)
+                if layer_conf is not None:
+                    layer_conf = json.loads(layer_conf.numpy().tobytes())
+
+                # Only fp8 makes sense for embeddings (per-row dequant via index select).
+                # Block-scaled formats (NVFP4, MXFP8) can't do per-row lookup efficiently.
+                quant_format = layer_conf.get("format", None) if layer_conf is not None else None
+                if quant_format in ["float8_e4m3fn", "float8_e5m2"] and weight_key in state_dict:
+                    self.quant_format = quant_format
+                    qconfig = QUANT_ALGOS[quant_format]
+                    layout_cls = get_layout_class(qconfig["comfy_tensor_layout"])
+                    weight = state_dict.pop(weight_key)
+                    manually_loaded_keys = [weight_key]
+
+                    scale_key = f"{prefix}weight_scale"
+                    scale = state_dict.pop(scale_key, None)
+                    if scale is not None:
+                        scale = scale.float()
+                        manually_loaded_keys.append(scale_key)
+
+                    params = layout_cls.Params(
+                        scale=scale if scale is not None else torch.ones((), dtype=torch.float32),
+                        orig_dtype=MixedPrecisionOps._compute_dtype,
+                        orig_shape=(self.num_embeddings, self.embedding_dim),
+                    )
+                    self.weight = torch.nn.Parameter(
+                        QuantizedTensor(weight.to(dtype=qconfig["storage_t"]), qconfig["comfy_tensor_layout"], params),
+                        requires_grad=False)
+
+                    super()._load_from_state_dict(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs)
+                    for k in manually_loaded_keys:
+                        if k in missing_keys:
+                            missing_keys.remove(k)
+                else:
+                    if layer_conf is not None:
+                        state_dict[f"{prefix}comfy_quant"] = torch.tensor(list(json.dumps(layer_conf).encode('utf-8')), dtype=torch.uint8)
+                    super()._load_from_state_dict(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs)
+
+            def state_dict(self, *args, destination=None, prefix="", **kwargs):
+                if destination is not None:
+                    sd = destination
+                else:
+                    sd = {}
+
+                if not hasattr(self, 'weight') or self.weight is None:
+                    return sd
+
+                if isinstance(self.weight, QuantizedTensor):
+                    sd_out = self.weight.state_dict("{}weight".format(prefix))
+                    for k in sd_out:
+                        sd[k] = sd_out[k]
+
+                    quant_conf = {"format": self.quant_format}
+                    sd["{}comfy_quant".format(prefix)] = torch.tensor(list(json.dumps(quant_conf).encode('utf-8')), dtype=torch.uint8)
+                else:
+                    sd["{}weight".format(prefix)] = self.weight
+                return sd
+
+            def forward_comfy_cast_weights(self, input, out_dtype=None):
+                weight = self.weight
+
+                # Optimized path: lookup in fp8, dequantize only the selected rows.
+                if isinstance(weight, QuantizedTensor) and len(self.weight_function) == 0:
+                    qdata, _, offload_stream = cast_bias_weight(self, device=input.device, dtype=weight.dtype, offloadable=True)
+                    if isinstance(qdata, QuantizedTensor):
+                        scale = qdata._params.scale
+                        qdata = qdata._qdata
+                    else:
+                        scale = None
+
+                    x = torch.nn.functional.embedding(
+                        input, qdata, self.padding_idx, self.max_norm,
+                        self.norm_type, self.scale_grad_by_freq, self.sparse)
+                    uncast_bias_weight(self, qdata, None, offload_stream)
+                    target_dtype = out_dtype if out_dtype is not None else weight._params.orig_dtype
+                    x = x.to(dtype=target_dtype)
+                    if scale is not None and scale != 1.0:
+                        x = x * scale.to(dtype=target_dtype)
+                    return x
+
+                # Fallback for non-quantized or weight_function (LoRA) case
+                return super().forward_comfy_cast_weights(input, out_dtype=out_dtype)
+
     return MixedPrecisionOps
 
 def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, model_config=None):
diff --git a/comfy/rmsnorm.py b/comfy/rmsnorm.py
index ab7cf14fa..e54be98d6 100644
--- a/comfy/rmsnorm.py
+++ b/comfy/rmsnorm.py
@@ -3,6 +3,7 @@ import comfy.model_management
 
 RMSNorm = torch.nn.RMSNorm
 
+# Note: torch's fused F.rms_norm is faster but produces slightly different output than manual implementations (rsqrt/reduction rounding).
 def rms_norm(x, weight=None, eps=1e-6):
     if weight is None:
         return torch.nn.functional.rms_norm(x, (x.shape[-1],), eps=eps)
diff --git a/comfy/sd.py b/comfy/sd.py
index ee66490f5..9fce0e7d0 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -65,6 +65,7 @@ import comfy.text_encoders.ace15
 import comfy.text_encoders.longcat_image
 import comfy.text_encoders.qwen35
 import comfy.text_encoders.ernie
+import comfy.text_encoders.gemma4
 
 import comfy.model_patcher
 import comfy.lora
@@ -1271,6 +1272,9 @@ class TEModel(Enum):
     QWEN35_9B = 26
     QWEN35_27B = 27
     MINISTRAL_3_3B = 28
+    GEMMA_4_E4B = 29
+    GEMMA_4_E2B = 30
+    GEMMA_4_31B = 31
 
 
 def detect_te_model(sd):
@@ -1296,6 +1300,12 @@ def detect_te_model(sd):
             return TEModel.BYT5_SMALL_GLYPH
         return TEModel.T5_BASE
     if 'model.layers.0.post_feedforward_layernorm.weight' in sd:
+        if 'model.layers.59.self_attn.q_norm.weight' in sd:
+            return TEModel.GEMMA_4_31B
+        if 'model.layers.41.self_attn.q_norm.weight' in sd and 'model.layers.47.self_attn.q_norm.weight' not in sd:
+            return TEModel.GEMMA_4_E4B
+        if 'model.layers.34.self_attn.q_norm.weight' in sd and 'model.layers.41.self_attn.q_norm.weight' not in sd:
+            return TEModel.GEMMA_4_E2B
         if 'model.layers.47.self_attn.q_norm.weight' in sd:
             return TEModel.GEMMA_3_12B
         if 'model.layers.0.self_attn.q_norm.weight' in sd:
@@ -1435,6 +1445,13 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
             else:
                 clip_target.clip = comfy.text_encoders.sa_t5.SAT5Model
                 clip_target.tokenizer = comfy.text_encoders.sa_t5.SAT5Tokenizer
+        elif te_model in (TEModel.GEMMA_4_E4B, TEModel.GEMMA_4_E2B, TEModel.GEMMA_4_31B):
+            variant = {TEModel.GEMMA_4_E4B: comfy.text_encoders.gemma4.Gemma4_E4B,
+                       TEModel.GEMMA_4_E2B: comfy.text_encoders.gemma4.Gemma4_E2B,
+                       TEModel.GEMMA_4_31B: comfy.text_encoders.gemma4.Gemma4_31B}[te_model]
+            clip_target.clip = comfy.text_encoders.gemma4.gemma4_te(**llama_detect(clip_data), model_class=variant)
+            clip_target.tokenizer = variant.tokenizer
+            tokenizer_data["tokenizer_json"] = clip_data[0].get("tokenizer_json", None)
         elif te_model == TEModel.GEMMA_2_2B:
             clip_target.clip = comfy.text_encoders.lumina2.te(**llama_detect(clip_data))
             clip_target.tokenizer = comfy.text_encoders.lumina2.LuminaTokenizer
diff --git a/comfy/text_encoders/gemma4.py b/comfy/text_encoders/gemma4.py
new file mode 100644
index 000000000..f050061ed
--- /dev/null
+++ b/comfy/text_encoders/gemma4.py
@@ -0,0 +1,1298 @@
+import torch
+import torch.nn as nn
+import numpy as np
+from dataclasses import dataclass
+import math
+
+from comfy import sd1_clip
+import comfy.model_management
+from comfy.ldm.modules.attention import optimized_attention_for_device
+from comfy.rmsnorm import rms_norm
+from comfy.text_encoders.llama import RMSNorm, MLP, BaseLlama, BaseGenerate, _make_scaled_embedding
+
+
+# Intentional minor divergences from transformers -reference implementation:
+# - Embedding sqrt(hidden_size) scale applied as a Python scalar (full precision) instead of dtype-matched buffer tensor.
+# - RMSNorm uses torch fused F.rms_norm, very slight numerical differences, but considerably faster
+# - Input image and audio resizing/resampling slightly different numerically
+
+
+GEMMA4_VISION_CONFIG = {"hidden_size": 768, "image_size": 896, "intermediate_size": 3072, "num_attention_heads": 12, "num_hidden_layers": 16, "patch_size": 16, "head_dim": 64, "rms_norm_eps": 1e-6, "position_embedding_size": 10240, "pooling_kernel_size": 3}
+GEMMA4_VISION_31B_CONFIG = {"hidden_size": 1152, "image_size": 896, "intermediate_size": 4304, "num_attention_heads": 16, "num_hidden_layers": 27, "patch_size": 16, "head_dim": 72, "rms_norm_eps": 1e-6, "position_embedding_size": 10240, "pooling_kernel_size": 3}
+GEMMA4_AUDIO_CONFIG = {"hidden_size": 1024, "num_hidden_layers": 12, "num_attention_heads": 8, "intermediate_size": 4096, "conv_kernel_size": 5, "attention_chunk_size": 12, "attention_context_left": 13, "attention_context_right": 0, "attention_logit_cap": 50.0, "output_proj_dims": 1536, "rms_norm_eps": 1e-6, "residual_weight": 0.5}
+
+@dataclass
+class Gemma4Config:
+    vocab_size: int = 262144
+    hidden_size: int = 2560
+    intermediate_size: int = 10240
+    num_hidden_layers: int = 42
+    num_attention_heads: int = 8
+    num_key_value_heads: int = 2
+    max_position_embeddings: int = 131072
+    rms_norm_eps: float = 1e-6
+    rope_theta = [1000000.0, 10000.0]
+    transformer_type: str = "gemma4"
+    head_dim = 256
+    global_head_dim = 512
+    rms_norm_add = False
+    mlp_activation = "gelu_pytorch_tanh"
+    qkv_bias = False
+    rope_dims = None
+    q_norm = "gemma3"
+    k_norm = "gemma3"
+    sliding_attention = [512, 512, 512, 512, 512, False]
+    rope_scale = None
+    partial_rotary_factor: float = 0.25
+    final_norm: bool = True
+    lm_head: bool = False
+    final_logit_softcapping: float = 30.0
+    hidden_size_per_layer_input: int = 256
+    num_kv_shared_layers: int = 18
+    use_double_wide_mlp: bool = False
+    stop_tokens = [1, 50, 106]
+    vision_config = GEMMA4_VISION_CONFIG
+    audio_config = GEMMA4_AUDIO_CONFIG
+    mm_tokens_per_image = 280
+
+@dataclass
+class Gemma4_E2B_Config(Gemma4Config):
+    hidden_size: int = 1536
+    intermediate_size: int = 6144
+    num_hidden_layers: int = 35
+    num_key_value_heads: int = 1
+    sliding_attention = [512, 512, 512, 512, False]
+    num_kv_shared_layers: int = 20
+    use_double_wide_mlp: bool = True
+
+@dataclass
+class Gemma4_31B_Config(Gemma4Config):
+    hidden_size: int = 5376
+    intermediate_size: int = 21504
+    num_hidden_layers: int = 60
+    num_attention_heads: int = 32
+    num_key_value_heads: int = 16
+    sliding_attention = [1024, 1024, 1024, 1024, 1024, False]
+    hidden_size_per_layer_input: int = 0
+    num_kv_shared_layers: int = 0
+    audio_config = None
+    vision_config = GEMMA4_VISION_31B_CONFIG
+
+
+# unfused RoPE as addcmul_ RoPE diverges from reference code
+def _apply_rotary_pos_emb(x, freqs_cis):
+    cos, sin = freqs_cis[0], freqs_cis[1]
+    half = x.shape[-1] // 2
+    out = x * cos
+    out[..., :half] -= x[..., half:] * sin[..., :half]
+    out[..., half:] += x[..., :half] * sin[..., half:]
+    return out
+
+class Gemma4Attention(nn.Module):
+    def __init__(self, config, head_dim, device=None, dtype=None, ops=None):
+        super().__init__()
+        self.num_heads = config.num_attention_heads
+        self.num_kv_heads = config.num_key_value_heads
+        self.hidden_size = config.hidden_size
+        self.head_dim = head_dim
+        self.inner_size = self.num_heads * head_dim
+
+        self.q_proj = ops.Linear(config.hidden_size, self.inner_size, bias=config.qkv_bias, device=device, dtype=dtype)
+        self.k_proj = ops.Linear(config.hidden_size, self.num_kv_heads * head_dim, bias=config.qkv_bias, device=device, dtype=dtype)
+        self.v_proj = ops.Linear(config.hidden_size, self.num_kv_heads * head_dim, bias=config.qkv_bias, device=device, dtype=dtype)
+        self.o_proj = ops.Linear(self.inner_size, config.hidden_size, bias=False, device=device, dtype=dtype)
+
+        self.q_norm = None
+        self.k_norm = None
+        if config.q_norm == "gemma3":
+            self.q_norm = RMSNorm(head_dim, eps=config.rms_norm_eps, device=device, dtype=dtype)
+        if config.k_norm == "gemma3":
+            self.k_norm = RMSNorm(head_dim, eps=config.rms_norm_eps, device=device, dtype=dtype)
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask=None,
+        freqs_cis=None,
+        past_key_value=None,
+        sliding_window=None,
+        shared_kv=None,
+    ):
+        batch_size, seq_length, _ = hidden_states.shape
+
+        xq = self.q_proj(hidden_states)
+        xq = xq.view(batch_size, seq_length, self.num_heads, self.head_dim).transpose(1, 2)
+        if self.q_norm is not None:
+            xq = self.q_norm(xq)
+
+        if shared_kv is not None:
+            xk, xv = shared_kv
+            # Apply RoPE to Q only (K already has RoPE from source layer)
+            xq = _apply_rotary_pos_emb(xq, freqs_cis)
+            present_key_value = None
+            shareable_kv = None
+        else:
+            xk = self.k_proj(hidden_states).view(batch_size, seq_length, self.num_kv_heads, self.head_dim)
+            xv = self.v_proj(hidden_states).view(batch_size, seq_length, self.num_kv_heads, self.head_dim)
+            if self.k_norm is not None:
+                xk = self.k_norm(xk)
+            xv = rms_norm(xv)
+            xk = xk.transpose(1, 2)
+            xv = xv.transpose(1, 2)
+            xq = _apply_rotary_pos_emb(xq, freqs_cis)
+            xk = _apply_rotary_pos_emb(xk, freqs_cis)
+
+            present_key_value = None
+            if past_key_value is not None:
+                cumulative_len = 0
+                if len(past_key_value) > 0:
+                    past_key, past_value, cumulative_len = past_key_value
+                    xk = torch.cat((past_key, xk), dim=2)
+                    xv = torch.cat((past_value, xv), dim=2)
+                new_cumulative = cumulative_len + seq_length
+                if sliding_window is not None and xk.shape[2] > sliding_window - 1:
+                    cache_k = xk[:, :, -(sliding_window - 1):]
+                    cache_v = xv[:, :, -(sliding_window - 1):]
+                else:
+                    cache_k = xk
+                    cache_v = xv
+                present_key_value = (cache_k, cache_v, new_cumulative)
+
+            # KV for sharing: full xk/xv that SDPA sees (not evicted cache)
+            shareable_kv = (xk, xv)
+
+        # GQA: pass unexpanded KV with enable_gqa when no sliding mask,
+        # expand heads when sliding mask is present
+        # has to be done within SDPA itself to match the reference code, pre-scaling expansion causes numerical differences
+        expand_kv = (self.num_heads != self.num_kv_heads and
+                     sliding_window is not None and
+                     xk.shape[2] >= sliding_window)
+        if expand_kv:
+            xk = xk.repeat_interleave(self.num_heads // self.num_kv_heads, dim=1)
+            xv = xv.repeat_interleave(self.num_heads // self.num_kv_heads, dim=1)
+        gqa_kwargs = {} if expand_kv else ({"enable_gqa": True} if self.num_heads != self.num_kv_heads else {})
+        output = optimized_attention_for_device(xq.device, mask=attention_mask is not None, small_input=True)(xq, xk, xv, self.num_heads, mask=attention_mask, skip_reshape=True, scale=1.0, **gqa_kwargs)
+
+        return self.o_proj(output), present_key_value, shareable_kv
+
+
+class TransformerBlockGemma4(nn.Module):
+    def __init__(self, config, index, device=None, dtype=None, ops=None):
+        super().__init__()
+        if config.sliding_attention is not None:
+            self.sliding_attention = config.sliding_attention[index % len(config.sliding_attention)]
+        else:
+            self.sliding_attention = False
+
+        head_dim = config.head_dim if self.sliding_attention else config.global_head_dim
+
+        self.self_attn = Gemma4Attention(config, head_dim=head_dim, device=device, dtype=dtype, ops=ops)
+
+        num_kv_shared = config.num_kv_shared_layers
+        first_kv_shared = config.num_hidden_layers - num_kv_shared
+        mlp_size = config.intermediate_size * 2 if config.use_double_wide_mlp and index >= first_kv_shared else None
+        self.mlp = MLP(config, device=device, dtype=dtype, ops=ops, intermediate_size=mlp_size)
+
+        self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, device=device, dtype=dtype)
+        self.post_attention_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, device=device, dtype=dtype)
+        self.pre_feedforward_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, device=device, dtype=dtype)
+        self.post_feedforward_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, device=device, dtype=dtype)
+
+        self.hidden_size_per_layer_input = config.hidden_size_per_layer_input
+        if self.hidden_size_per_layer_input:
+            self.per_layer_input_gate = ops.Linear(config.hidden_size, self.hidden_size_per_layer_input, bias=False, device=device, dtype=dtype)
+            self.per_layer_projection = ops.Linear(self.hidden_size_per_layer_input, config.hidden_size, bias=False, device=device, dtype=dtype)
+            self.post_per_layer_input_norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, device=device, dtype=dtype)
+            self.register_buffer("layer_scalar", torch.ones(1, device=device, dtype=dtype))
+        else:
+            self.layer_scalar = None
+
+    def forward(self, x, attention_mask=None, freqs_cis=None, past_key_value=None, per_layer_input=None, shared_kv=None):
+        sliding_window = None
+        if self.sliding_attention:
+            sliding_window = self.sliding_attention
+            # For prefill > sliding window, add sliding window restriction to the causal mask.
+            if x.shape[1] > self.sliding_attention:
+                sw_mask = torch.zeros(x.shape[1], x.shape[1], dtype=x.dtype, device=x.device)
+                sw_mask.masked_fill_(torch.ones_like(sw_mask, dtype=torch.bool).tril_(-self.sliding_attention), torch.finfo(x.dtype).min)
+                attention_mask = attention_mask + sw_mask if attention_mask is not None else sw_mask
+            freqs_cis = freqs_cis[1]
+        else:
+            freqs_cis = freqs_cis[0]
+
+        residual = x
+        x = self.input_layernorm(x)
+        x, present_key_value, shareable_kv = self.self_attn(
+            hidden_states=x, attention_mask=attention_mask, freqs_cis=freqs_cis,
+            past_key_value=past_key_value, sliding_window=sliding_window, shared_kv=shared_kv,
+        )
+        x = self.post_attention_layernorm(x)
+        x = residual + x
+
+        residual = x
+        x = self.pre_feedforward_layernorm(x)
+        x = self.mlp(x)
+        x = self.post_feedforward_layernorm(x)
+        x = residual + x
+
+        if self.hidden_size_per_layer_input and per_layer_input is not None:
+            residual = x
+            x = self.per_layer_input_gate(x)
+            x = torch.nn.functional.gelu(x, approximate="tanh")
+            x = x * per_layer_input
+            x = self.per_layer_projection(x)
+            x = self.post_per_layer_input_norm(x)
+            x = residual + x
+
+        if self.layer_scalar is not None:
+            x = x * self.layer_scalar
+
+        return x, present_key_value, shareable_kv
+
+
+class Gemma4Transformer(nn.Module):
+    def __init__(self, config, device=None, dtype=None, ops=None):
+        super().__init__()
+        self.config = config
+
+        self.embed_tokens = _make_scaled_embedding(ops, config.vocab_size, config.hidden_size, config.hidden_size ** 0.5, device, dtype)
+
+        self.layers = nn.ModuleList([
+            TransformerBlockGemma4(config, index=i, device=device, dtype=dtype, ops=ops)
+            for i in range(config.num_hidden_layers)
+        ])
+
+        self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps, device=device, dtype=dtype) if config.final_norm else None
+
+        # Precompute RoPE inv_freq on CPU to match reference code's exact value
+        rope_angles_global = int(config.partial_rotary_factor * config.global_head_dim // 2)
+        nope_global = config.global_head_dim // 2 - rope_angles_global
+        global_inv = 1.0 / (config.rope_theta[0] ** (torch.arange(0, 2 * rope_angles_global, 2).float() / config.global_head_dim))
+        if nope_global > 0:
+            global_inv = torch.cat([global_inv, torch.zeros(nope_global)])
+        self.register_buffer("_global_inv_freq", global_inv, persistent=False)
+
+        sliding_inv = 1.0 / (config.rope_theta[1] ** (torch.arange(0, config.head_dim, 2).float() / config.head_dim))
+        self.register_buffer("_sliding_inv_freq", sliding_inv, persistent=False)
+
+        # Per-layer input mechanism
+        self.hidden_size_per_layer_input = config.hidden_size_per_layer_input
+        if self.hidden_size_per_layer_input:
+            self.embed_tokens_per_layer = _make_scaled_embedding(ops, config.vocab_size, config.num_hidden_layers * self.hidden_size_per_layer_input, self.hidden_size_per_layer_input ** 0.5, device, dtype)
+            self.per_layer_model_projection = ops.Linear(
+                config.hidden_size, config.num_hidden_layers * self.hidden_size_per_layer_input,
+                bias=False, device=device, dtype=dtype)
+            self.per_layer_projection_norm = RMSNorm(
+                self.hidden_size_per_layer_input, eps=config.rms_norm_eps,
+                device=device, dtype=dtype)
+
+    def get_past_len(self, past_key_values):
+        for kv in past_key_values:
+            if len(kv) >= 3:
+                return kv[2]
+        return 0
+
+    def _freqs_from_inv(self, inv_freq, position_ids, device, dtype):
+        """Compute cos/sin from stored inv_freq"""
+        inv_exp = inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1).to(device)
+        pos_exp = position_ids[:, None, :].float()
+        freqs = (inv_exp @ pos_exp).transpose(1, 2)
+        emb = torch.cat((freqs, freqs), dim=-1)
+        return emb.cos().unsqueeze(1).to(dtype), emb.sin().unsqueeze(1).to(dtype)
+
+    def compute_freqs_cis(self, position_ids, device, dtype=None):
+        global_freqs = self._freqs_from_inv(self._global_inv_freq, position_ids, device, dtype)
+        sliding_freqs = self._freqs_from_inv(self._sliding_inv_freq, position_ids, device, dtype)
+        return [global_freqs, sliding_freqs]
+
+    def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None,
+                final_layer_norm_intermediate=True, dtype=None, position_ids=None, embeds_info=None,
+                past_key_values=None, input_ids=None):
+        if embeds is not None:
+            x = embeds
+        else:
+            x = self.embed_tokens(x, out_dtype=dtype)
+
+        seq_len = x.shape[1]
+        past_len = 0
+        if past_key_values is not None and len(past_key_values) > 0:
+            past_len = self.get_past_len(past_key_values)
+
+        if position_ids is None:
+            position_ids = torch.arange(past_len, past_len + seq_len, device=x.device).unsqueeze(0)
+
+        freqs_cis = self.compute_freqs_cis(position_ids, x.device, dtype=x.dtype)
+
+        mask = None
+        min_val = torch.finfo(x.dtype).min
+        if attention_mask is not None:
+            mask = 1.0 - attention_mask.to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1])).expand(attention_mask.shape[0], 1, seq_len, attention_mask.shape[-1])
+            mask = mask.masked_fill(mask.to(torch.bool), min_val)
+
+        if seq_len > 1:
+            causal_mask = torch.zeros(past_len + seq_len, past_len + seq_len, dtype=x.dtype, device=x.device)
+            causal_mask.masked_fill_(torch.ones_like(causal_mask, dtype=torch.bool).triu_(1), min_val)
+            mask = mask + causal_mask if mask is not None else causal_mask
+
+        # Per-layer inputs
+        per_layer_inputs = None
+        if self.hidden_size_per_layer_input:
+            num_layers = self.config.num_hidden_layers
+            hpl = self.hidden_size_per_layer_input
+            per_layer_proj = self.per_layer_model_projection(x) * (1.0 / (self.config.hidden_size ** 0.5))
+            per_layer_proj = self.per_layer_projection_norm(per_layer_proj.reshape(*x.shape[:-1], num_layers, hpl))
+            if input_ids is not None and input_ids.shape[1] == x.shape[1]:
+                per_layer_emb = self.embed_tokens_per_layer(input_ids).reshape(*input_ids.shape, num_layers, hpl)
+                per_layer_inputs = (per_layer_proj + per_layer_emb) * (0.5 ** 0.5)
+            else:
+                per_layer_inputs = per_layer_proj
+
+        # KV sharing: later layers reuse KV from the last non-shared sliding/global layer
+        num_kv_shared = self.config.num_kv_shared_layers
+        first_kv_shared = self.config.num_hidden_layers - num_kv_shared if num_kv_shared > 0 else self.config.num_hidden_layers
+        shared_sliding_kv = None  # KV from last non-shared sliding layer
+        shared_global_kv = None   # KV from last non-shared global layer
+
+        intermediate = None
+        next_key_values = []
+        for i, layer in enumerate(self.layers):
+            past_kv = past_key_values[i] if past_key_values is not None and len(past_key_values) > 0 else None
+
+            layer_kwargs = {}
+            if per_layer_inputs is not None:
+                layer_kwargs['per_layer_input'] = per_layer_inputs[:, :, i, :]
+
+            is_sliding = hasattr(layer, 'sliding_attention') and layer.sliding_attention
+            if i >= first_kv_shared and num_kv_shared > 0:
+                shared = shared_sliding_kv if is_sliding else shared_global_kv
+                if shared is not None:
+                    layer_kwargs['shared_kv'] = shared
+
+            x, current_kv, shareable_kv = layer(x=x, attention_mask=mask, freqs_cis=freqs_cis, past_key_value=past_kv, **layer_kwargs)
+
+            next_key_values.append(current_kv if current_kv is not None else ())
+
+            # Only track the last sliding/global before the sharing boundary
+            if i < first_kv_shared and shareable_kv is not None:
+                if is_sliding:
+                    shared_sliding_kv = shareable_kv
+                else:
+                    shared_global_kv = shareable_kv
+
+            if i == intermediate_output:
+                intermediate = x.clone()
+
+        if self.norm is not None:
+            x = self.norm(x)
+
+        if len(next_key_values) > 0:
+            return x, intermediate, next_key_values
+        return x, intermediate
+
+
+class Gemma4Base(BaseLlama, BaseGenerate, torch.nn.Module):
+    """Common base for all Gemma4 variants: text model + vision."""
+    def _init_model(self, config, dtype, device, operations):
+        self.num_layers = config.num_hidden_layers
+        self.model = Gemma4Transformer(config, device=device, dtype=dtype, ops=operations)
+        self.dtype = dtype
+        self.multi_modal_projector = Gemma4MultiModalProjector(config, dtype=dtype, device=device, ops=operations)
+        self.vision_model = Gemma4VisionEncoder(config.vision_config, dtype=dtype, device=device, ops=operations)
+
+    def logits(self, x):
+        logits = super().logits(x)
+        cap = self.model.config.final_logit_softcapping
+        if cap:
+            logits = cap * torch.tanh(logits / cap)
+        return logits
+
+    def init_kv_cache(self, batch, max_cache_len, device, execution_dtype):
+        past_key_values = []
+        for _ in range(self.model.config.num_hidden_layers):
+            past_key_values.append(())
+        return past_key_values
+
+    def preprocess_embed(self, embed, device):
+        if embed["type"] == "image":
+            image = embed.pop("data").movedim(-1, 1)  # [B, H, W, C] -> [B, C, H, W]
+            max_soft_tokens = embed.get("max_soft_tokens", None)
+            vision_out = self.vision_model(image.to(device, dtype=torch.float32), max_soft_tokens=max_soft_tokens)
+            return self.multi_modal_projector(vision_out), None
+        return None, None
+
+
+class Gemma4AudioMixin:
+    """Adds audio support to a Gemma4 model."""
+    def _init_audio(self, config, dtype, device, operations):
+        self.audio_model = Gemma4AudioEncoder(config.audio_config, dtype=dtype, device=device, ops=operations)
+        self.audio_projector = Gemma4AudioProjector({"audio_output_proj_dims": config.audio_config["output_proj_dims"], "text_hidden_size": config.hidden_size, "rms_norm_eps": config.rms_norm_eps}, dtype=dtype, device=device, ops=operations)
+
+    def preprocess_embed(self, embed, device):
+        result, extra = super().preprocess_embed(embed, device)
+        if result is not None:
+            return result, extra
+        if embed["type"] == "audio":
+            audio = embed.pop("data").to(device, dtype=torch.float32)
+            audio_mask = embed.pop("mask", None)
+            if audio_mask is not None:
+                audio_mask = audio_mask.to(device)
+            audio_out = self.audio_model(audio, audio_mask=audio_mask)
+            return self.audio_projector(audio_out), None
+        return None, None
+
+
+# Vision Encoder
+
+def _compute_vision_2d_rope(head_dim, pixel_position_ids, theta=100.0, device=None):
+    """Compute 2D RoPE for vision: separate frequencies for x and y dimensions.
+
+    Args:
+        head_dim: dimension per head (e.g. 64)
+        pixel_position_ids: [batch, num_patches, 2] with (x, y) coords
+        theta: RoPE base frequency
+    Returns:
+        (cos, sin) each of shape [batch, num_patches, head_dim]
+    """
+    rotary_dim_per_axis = head_dim // 2
+    freq_indices = torch.arange(0, rotary_dim_per_axis, 2, device=device).float()
+    inv_freq = 1.0 / (theta ** (freq_indices / rotary_dim_per_axis))
+
+    all_cos, all_sin = [], []
+    for i in range(2):  # x and y
+        dim_positions = pixel_position_ids[:, :, i].float()  # [batch, num_patches]
+        freqs = torch.einsum('bi,j->bij', dim_positions, inv_freq.to(device))  # [batch, num_patches, rotary_dim/2]
+        emb = torch.cat([freqs, freqs], dim=-1)  # [batch, num_patches, rotary_dim]
+        all_cos.append(emb.cos())
+        all_sin.append(emb.sin())
+
+    cos = torch.cat(all_cos, dim=-1).to(pixel_position_ids.device)  # [batch, num_patches, head_dim]
+    sin = torch.cat(all_sin, dim=-1).to(pixel_position_ids.device)
+    return cos, sin
+
+
+def _apply_vision_2d_rope(x, freqs):
+    """Apply 2D RoPE (multidimensional) to vision query/key states.
+
+    Splits x and cos/sin into ndim=2 parts, applies 1D RoPE to each independently.
+
+    x: [batch, heads, seq, head_dim]
+    freqs: (cos, sin) each [batch, seq, head_dim]
+    """
+    cos = freqs[0].unsqueeze(1)  # [batch, 1, seq, head_dim]
+    sin = freqs[1].unsqueeze(1)
+    half = x.shape[-1] // 2
+    a = _apply_rotary_pos_emb(x[..., :half], (cos[..., :half], sin[..., :half]))
+    b = _apply_rotary_pos_emb(x[..., half:], (cos[..., half:], sin[..., half:]))
+    return torch.cat([a, b], dim=-1)
+
+
+class ClippedLinear(nn.Module):
+    """Linear layer with activation clipping (from quantization-aware training).
+
+    Stores input_max/min and output_max/min as buffers loaded from checkpoint.
+    """
+    def __init__(self, in_features, out_features, bias=False, device=None, dtype=None, ops=None):
+        super().__init__()
+        self.linear = ops.Linear(in_features, out_features, bias=bias, device=device, dtype=dtype)
+        self.register_buffer('input_max', torch.tensor(float('inf'), device=device, dtype=dtype))
+        self.register_buffer('input_min', torch.tensor(float('-inf'), device=device, dtype=dtype))
+        self.register_buffer('output_max', torch.tensor(float('inf'), device=device, dtype=dtype))
+        self.register_buffer('output_min', torch.tensor(float('-inf'), device=device, dtype=dtype))
+
+    @property
+    def weight(self):
+        return self.linear.weight
+
+    def forward(self, x):
+        x = x.clamp(min=self.input_min, max=self.input_max)
+        x = self.linear(x)
+        return x.clamp_(min=self.output_min, max=self.output_max)
+
+
+class Gemma4VisionMLP(nn.Module):
+    """SwiGLU MLP matching gate_proj/up_proj/down_proj structure."""
+    def __init__(self, config, device=None, dtype=None, ops=None):
+        super().__init__()
+        hidden_size = config["hidden_size"]
+        intermediate_size = config["intermediate_size"]
+        self.gate_proj = ClippedLinear(hidden_size, intermediate_size, device=device, dtype=dtype, ops=ops)
+        self.up_proj = ClippedLinear(hidden_size, intermediate_size, device=device, dtype=dtype, ops=ops)
+        self.down_proj = ClippedLinear(intermediate_size, hidden_size, device=device, dtype=dtype, ops=ops)
+
+    def forward(self, x):
+        return self.down_proj(torch.nn.functional.gelu(self.gate_proj(x), approximate="tanh") * self.up_proj(x))
+
+
+class Gemma4VisionAttention(nn.Module):
+    def __init__(self, config, device=None, dtype=None, ops=None):
+        super().__init__()
+        self.hidden_size = config["hidden_size"]
+        self.num_heads = config["num_attention_heads"]
+        self.head_dim = config.get("head_dim", self.hidden_size // self.num_heads)
+
+        self.q_proj = ClippedLinear(self.hidden_size, self.num_heads * self.head_dim, device=device, dtype=dtype, ops=ops)
+        self.k_proj = ClippedLinear(self.hidden_size, self.num_heads * self.head_dim, device=device, dtype=dtype, ops=ops)
+        self.v_proj = ClippedLinear(self.hidden_size, self.num_heads * self.head_dim, device=device, dtype=dtype, ops=ops)
+        self.o_proj = ClippedLinear(self.num_heads * self.head_dim, self.hidden_size, device=device, dtype=dtype, ops=ops)
+
+        self.q_norm = RMSNorm(self.head_dim, eps=config["rms_norm_eps"], device=device, dtype=dtype)
+        self.k_norm = RMSNorm(self.head_dim, eps=config["rms_norm_eps"], device=device, dtype=dtype)
+
+    def forward(self, x, freqs, attention_mask=None):
+        batch_size, seq_length, _ = x.shape
+
+        xq = self.q_proj(x).view(batch_size, seq_length, self.num_heads, self.head_dim)
+        xk = self.k_proj(x).view(batch_size, seq_length, self.num_heads, self.head_dim)
+        xv = self.v_proj(x).view(batch_size, seq_length, self.num_heads, self.head_dim)
+
+        xq = self.q_norm(xq).transpose(1, 2)
+        xk = self.k_norm(xk).transpose(1, 2)
+        xv = rms_norm(xv)
+
+        xq = _apply_vision_2d_rope(xq, freqs)
+        xk = _apply_vision_2d_rope(xk, freqs)
+
+        xv = xv.to(xq.dtype).transpose(1, 2)
+
+        output = optimized_attention_for_device(xq.device, mask=attention_mask is not None, small_input=True)(xq, xk, xv, self.num_heads, mask=attention_mask, skip_reshape=True, scale=1.0)
+        return self.o_proj(output)
+
+
+class Gemma4VisionLayer(nn.Module):
+    def __init__(self, config, device=None, dtype=None, ops=None):
+        super().__init__()
+        self.self_attn = Gemma4VisionAttention(config, device=device, dtype=dtype, ops=ops)
+        self.mlp = Gemma4VisionMLP(config, device=device, dtype=dtype, ops=ops)
+        norm_kwargs = dict(eps=config["rms_norm_eps"], device=device, dtype=dtype)
+        hidden = config["hidden_size"]
+        self.input_layernorm = RMSNorm(hidden, **norm_kwargs)
+        self.post_attention_layernorm = RMSNorm(hidden, **norm_kwargs)
+        self.pre_feedforward_layernorm = RMSNorm(hidden, **norm_kwargs)
+        self.post_feedforward_layernorm = RMSNorm(hidden, **norm_kwargs)
+
+    def forward(self, x, freqs, attention_mask=None):
+        residual = x
+        x = self.input_layernorm(x)
+        x = self.self_attn(x, freqs, attention_mask=attention_mask)
+        x = self.post_attention_layernorm(x)
+        x = residual + x
+
+        residual = x
+        x = self.pre_feedforward_layernorm(x)
+        x = self.mlp(x)
+        x = self.post_feedforward_layernorm(x)
+        x = residual + x
+        return x
+
+
+class Gemma4PatchEmbedder(nn.Module):
+    """Patch embedding with learned 2D position embeddings via one-hot lookup."""
+    def __init__(self, config, device=None, dtype=None, ops=None):
+        super().__init__()
+        hidden_size = config["hidden_size"]
+        patch_size = config["patch_size"]
+        self.patch_size = patch_size
+        self.position_embedding_size = config.get("position_embedding_size", 10240)
+
+        self.input_proj = ops.Linear(3 * patch_size * patch_size, hidden_size, bias=False, device=device, dtype=dtype)
+        self.position_embedding_table = nn.Parameter(
+            torch.empty(2, self.position_embedding_size, hidden_size, device=device, dtype=dtype)
+        )
+
+    def forward(self, patches, pixel_position_ids):
+        """
+        patches: [B, num_patches, 3*patch_size²] in [0,1] range (normalized to [-1,1] inside, matching HF)
+        pixel_position_ids: [B, num_patches, 2] with (x,y) positions, (-1,-1) for padding
+        """
+        hidden_states = self.input_proj((2.0 * (patches - 0.5)).to(self.input_proj.weight.dtype))
+
+        clamped_positions = pixel_position_ids.clamp(min=0)
+        pos_table = comfy.model_management.cast_to_device(self.position_embedding_table, hidden_states.device, hidden_states.dtype)
+        position_embeddings = pos_table[0][clamped_positions[..., 0]] + pos_table[1][clamped_positions[..., 1]]
+
+        # Zero out position embeddings for padding patches (matching HF)
+        padding_positions = (pixel_position_ids == -1).all(dim=-1)
+        position_embeddings = torch.where(padding_positions.unsqueeze(-1), 0.0, position_embeddings)
+
+        return hidden_states + position_embeddings
+
+
+class Gemma4VisionEncoderLayers(nn.Module):
+    """Wrapper to produce state dict keys as encoder.layers.X.*"""
+    def __init__(self, config, dtype=None, device=None, ops=None):
+        super().__init__()
+        self.layers = nn.ModuleList([
+            Gemma4VisionLayer(config, device=device, dtype=dtype, ops=ops)
+            for _ in range(config["num_hidden_layers"])
+        ])
+
+
+class Gemma4VisionEncoder(nn.Module):
+    def __init__(self, config, dtype=None, device=None, ops=None):
+        super().__init__()
+        self.config = config
+        self.hidden_size = config["hidden_size"]
+        self.head_dim = config.get("head_dim", config["hidden_size"] // config["num_attention_heads"])
+        self.patch_size = config["patch_size"]
+        self.pooling_kernel_size = config.get("pooling_kernel_size", 3)
+        self.root_hidden_size = self.hidden_size ** 0.5
+
+        self.patch_embedder = Gemma4PatchEmbedder(config, device=device, dtype=dtype, ops=ops)
+        self.encoder = Gemma4VisionEncoderLayers(config, dtype=dtype, device=device, ops=ops)
+
+    def forward(self, pixel_values, max_soft_tokens=None):
+        """
+        pixel_values: [B, C, H, W] in [0,1] range
+        max_soft_tokens: if provided, pad to max_soft_tokens * k² total patches
+        """
+        batch_size, _, height, width = pixel_values.shape
+        ps = self.patch_size
+        k = self.pooling_kernel_size
+        patches_h, patches_w = height // ps, width // ps
+        num_patches = patches_h * patches_w
+        output_length = max_soft_tokens if max_soft_tokens is not None else num_patches // (k * k)
+        n_padding = output_length * k * k - num_patches
+
+        # Patchify and build position grid
+        patches = pixel_values.reshape(batch_size, -1, patches_h, ps, patches_w, ps)
+        patches = patches.permute(0, 2, 4, 3, 5, 1).reshape(batch_size, num_patches, -1)
+        grid_y, grid_x = torch.meshgrid(torch.arange(patches_h, device=pixel_values.device), torch.arange(patches_w, device=pixel_values.device), indexing='ij')
+        position_ids = torch.stack([grid_x.flatten(), grid_y.flatten()], dim=-1).unsqueeze(0).expand(batch_size, -1, -1)
+
+        # Append zero-pixel padding with (-1,-1) positions
+        if n_padding > 0:
+            patches = torch.cat([patches, patches.new_zeros(batch_size, n_padding, patches.shape[-1])], dim=1)
+            position_ids = torch.cat([position_ids, position_ids.new_full((batch_size, n_padding, 2), -1)], dim=1)
+
+        padding = (position_ids == -1).all(dim=-1)
+
+        # Embed, encode, pool
+        x = self.patch_embedder(patches, position_ids)
+        freqs = _compute_vision_2d_rope(self.head_dim, position_ids, device=pixel_values.device)
+        freqs = tuple(t.to(x.dtype) for t in freqs)
+        if n_padding > 0:
+            mask = padding.unsqueeze(1).unsqueeze(2).expand(-1, 1, position_ids.shape[1], -1)
+            mask = torch.zeros_like(mask, dtype=x.dtype).masked_fill_(mask, torch.finfo(x.dtype).min)
+        else:
+            mask = None
+
+        for layer in self.encoder.layers:
+            x = layer(x, freqs, attention_mask=mask)
+
+        if n_padding > 0:
+            x = x.masked_fill(padding.unsqueeze(-1), 0.0)
+
+        # Average pool by spatial position
+        clamped = position_ids.clamp(min=0)
+        max_x = clamped[:, :, 0].max(dim=-1, keepdim=True)[0] + 1
+        ki = torch.div(clamped, k, rounding_mode="floor")
+        ki = ki[:, :, 0] + (max_x // k) * ki[:, :, 1]
+        weights = torch.nn.functional.one_hot(ki.long(), output_length).float() / (k * k)
+        x = (weights.transpose(1, 2) @ x.float()).to(x.dtype)
+
+        # Strip empty output tokens
+        valid_out = ~((weights == 0).all(dim=1))
+        if valid_out.any() and not valid_out.all():
+            x = x[:, valid_out[0]] if batch_size > 1 else x[valid_out].unsqueeze(0)
+
+        return x * self.root_hidden_size
+
+
+class Gemma4RMSNormProjector(nn.Module):
+    """Shared projector: parameterless RMSNorm → linear. Used for both vision and audio."""
+    def __init__(self, in_dim, out_dim, dtype=None, device=None, ops=None):
+        super().__init__()
+        self.embedding_projection = ops.Linear(in_dim, out_dim, bias=False, device=device, dtype=dtype)
+
+    def forward(self, x):
+        return self.embedding_projection(rms_norm(x))
+
+
+class Gemma4MultiModalProjector(Gemma4RMSNormProjector):
+    def __init__(self, config, dtype=None, device=None, ops=None):
+        super().__init__(config.vision_config["hidden_size"], config.hidden_size, dtype=dtype, device=device, ops=ops)
+
+
+# Audio Encoder
+
+class Gemma4AudioConvSubsampler(nn.Module):
+    """2D convolution subsampling for audio features"""
+    def __init__(self, config, device=None, dtype=None, ops=None):
+        super().__init__()
+        eps = config["rms_norm_eps"]
+        self.layer0 = nn.ModuleDict({
+            'conv': ops.Conv2d(1, 128, kernel_size=3, stride=2, padding=1, bias=False, device=device, dtype=dtype),
+            'norm': ops.LayerNorm(128, eps=eps, elementwise_affine=True, bias=False, device=device, dtype=dtype),
+        })
+        self.layer1 = nn.ModuleDict({
+            'conv': ops.Conv2d(128, 32, kernel_size=3, stride=2, padding=1, bias=False, device=device, dtype=dtype),
+            'norm': ops.LayerNorm(32, eps=eps, elementwise_affine=True, bias=False, device=device, dtype=dtype),
+        })
+        # proj_input_dim = (128 // 4) * 32 = 1024
+        self.input_proj_linear = ops.Linear(1024, config["hidden_size"], bias=False, device=device, dtype=dtype)
+
+    def _conv_layer(self, x, layer, mask):
+        if mask is not None:
+            x = x * mask[:, None, :, None].to(x.device)
+        x = layer['conv'](x.to(layer['conv'].weight.dtype))
+        x = torch.relu(layer['norm'](x.permute(0, 2, 3, 1)).permute(0, 3, 1, 2).contiguous())
+        if mask is not None:
+            mask = mask[:, ::2]
+        return x, mask
+
+    def forward(self, x, mask=None):
+        x = x.unsqueeze(1)
+        x, mask = self._conv_layer(x, self.layer0, mask)
+        x, mask = self._conv_layer(x, self.layer1, mask)
+        batch_size, _, seq_len, _ = x.shape
+        x = x.permute(0, 2, 3, 1).contiguous().reshape(batch_size, seq_len, -1)
+        return self.input_proj_linear(x), mask
+
+
+class Gemma4AudioFeedForward(nn.Module):
+    """Conformer feed-forward with residual scaling."""
+    def __init__(self, config, device=None, dtype=None, ops=None):
+        super().__init__()
+        hidden_size = config["hidden_size"]
+        intermediate_size = config.get("intermediate_size", hidden_size * 4)
+        self.pre_layer_norm = RMSNorm(hidden_size, eps=config["rms_norm_eps"], device=device, dtype=dtype)
+        self.ffw_layer_1 = ClippedLinear(hidden_size, intermediate_size, device=device, dtype=dtype, ops=ops)
+        self.ffw_layer_2 = ClippedLinear(intermediate_size, hidden_size, device=device, dtype=dtype, ops=ops)
+        self.post_layer_norm = RMSNorm(hidden_size, eps=config["rms_norm_eps"], device=device, dtype=dtype)
+        self.post_layer_scale = config.get("residual_weight", 0.5)
+
+    def forward(self, x):
+        residual = x
+        x = self.pre_layer_norm(x)
+        x = torch.nn.functional.silu(self.ffw_layer_1(x))
+        x = self.ffw_layer_2(x)
+        x = self.post_layer_norm(x)
+        x = x * self.post_layer_scale
+        return x + residual
+
+
+class Gemma4AudioRelPositionalEncoding(nn.Module):
+    """Sinusoidal relative positional encoding for audio attention."""
+    def __init__(self, config, device=None, dtype=None):
+        super().__init__()
+        hidden_size = config["hidden_size"]
+        context_left = config.get("attention_context_left", 13)
+        context_right = config.get("attention_context_right", 0)
+        self.chunk_size = config.get("attention_chunk_size", 12)
+        self.context_size = self.chunk_size + context_left - 1 + context_right
+
+        num_timescales = hidden_size // 2
+        log_inc = math.log(10000.0) / max(num_timescales - 1, 1)
+        inv_timescales = torch.exp(torch.arange(num_timescales) * -log_inc).to(dtype=dtype).unsqueeze(0).unsqueeze(0)
+        self.register_buffer("inv_timescales", inv_timescales, persistent=False)
+
+    def forward(self, hidden_states):
+        positions = torch.arange(self.chunk_size, -1, -1, device=hidden_states.device).unsqueeze(-1)
+        scaled = positions * self.inv_timescales.to(device=hidden_states.device)
+        return torch.cat([torch.sin(scaled), torch.cos(scaled)], dim=-1).to(dtype=hidden_states.dtype)
+
+
+class Gemma4AudioAttention(nn.Module):
+    """Chunked block attention with relative position bias and softcap."""
+    def __init__(self, config, device=None, dtype=None, ops=None):
+        super().__init__()
+        self.hidden_size = config["hidden_size"]
+        self.num_heads = config["num_attention_heads"]
+        self.head_dim = self.hidden_size // self.num_heads
+        self.chunk_size = config.get("attention_chunk_size", 12)
+        self.max_past_horizon = config.get("attention_context_left", 13) - 1
+        self.max_future_horizon = config.get("attention_context_right", 0)
+        self.context_size = self.chunk_size + self.max_past_horizon + self.max_future_horizon
+
+        self.q_scale = (self.head_dim ** -0.5) / math.log(2)
+        self.k_scale = math.log(1 + math.e) / math.log(2)
+        self.register_buffer("softcap", torch.tensor(config.get("attention_logit_cap", 50.0), dtype=dtype), persistent=False)
+
+        self.q_proj = ClippedLinear(self.hidden_size, self.hidden_size, device=device, dtype=dtype, ops=ops)
+        self.k_proj = ClippedLinear(self.hidden_size, self.hidden_size, device=device, dtype=dtype, ops=ops)
+        self.v_proj = ClippedLinear(self.hidden_size, self.hidden_size, device=device, dtype=dtype, ops=ops)
+        self.post = ClippedLinear(self.hidden_size, self.hidden_size, device=device, dtype=dtype, ops=ops)
+        self.per_dim_scale = nn.Parameter(torch.empty(self.head_dim, device=device, dtype=dtype))
+        self.relative_k_proj = ops.Linear(self.hidden_size, self.hidden_size, bias=False, device=device, dtype=dtype)
+
+    def _convert_to_block(self, x):
+        B, S, H, D = x.shape
+        num_blocks = (S + self.chunk_size - 1) // self.chunk_size
+        pad = num_blocks * self.chunk_size - S
+        x = torch.nn.functional.pad(x, (0, 0, 0, 0, 0, pad))
+        return x.reshape(B, num_blocks, self.chunk_size, H, D).contiguous()
+
+    def _extract_block_context(self, x):
+        x = torch.nn.functional.pad(x, (0, 0, 0, 0, self.max_past_horizon, self.max_future_horizon + self.chunk_size - 1))
+        x = x.unfold(1, self.context_size, self.chunk_size)
+        return torch.movedim(x, -1, 2).contiguous()
+
+    def _rel_shift(self, x):
+        B, H, NB, BS, PL = x.shape
+        CS = self.context_size
+        x = torch.nn.functional.pad(x, (0, CS + 1 - PL))
+        x = x.view(B, H, NB, BS * (CS + 1))
+        x = x[..., :BS * CS]
+        return x.view(B, H, NB, BS, CS)
+
+    def _build_blocked_mask(self, seq_len, num_blocks, device, audio_mask=None):
+        """Build 5D boolean blocked attention mask (True=attend, False=mask)"""
+        q = torch.arange(seq_len, device=device)
+        dist = q[:, None] - q[None, :]
+        mask = (dist >= 0) & (dist < self.max_past_horizon)
+        if self.max_future_horizon > 0:
+            mask = mask | ((dist < 0) & ((-dist) < self.max_future_horizon))
+        if audio_mask is not None:
+            mask = mask & audio_mask[0, None, :].bool()
+        m = mask[None, None]
+        # Reshape to blocked 5D matching reference code
+        p = num_blocks * self.chunk_size - seq_len
+        m = torch.nn.functional.pad(m, (0, p, 0, p), value=False)
+        m = m.reshape(1, 1, num_blocks, self.chunk_size, -1)
+        m = torch.nn.functional.pad(m, (self.max_past_horizon, self.max_future_horizon), value=False)
+        idx = (torch.arange(num_blocks, device=device) * self.chunk_size)[:, None] + torch.arange(self.context_size, device=device)[None, :]
+        return m.gather(-1, idx[None, None, :, None, :].expand(1, 1, -1, self.chunk_size, -1))
+
+    def forward(self, x, position_embeddings=None, attn_mask=None):
+        B, S, _ = x.shape
+
+        q = self.q_proj(x).float().view(B, S, self.num_heads, self.head_dim)
+        k = self.k_proj(x).float().view(B, S, self.num_heads, self.head_dim)
+        v = self.v_proj(x).float().view(B, S, self.num_heads, self.head_dim)
+
+        q = q * self.q_scale * torch.nn.functional.softplus(self.per_dim_scale)
+        k = k * self.k_scale
+
+        q_blocks = self._convert_to_block(q)
+        k_context = self._extract_block_context(k)
+        v_context = self._extract_block_context(v)
+        num_blocks = q_blocks.shape[1]
+
+        rel_k = self.relative_k_proj(position_embeddings).view(-1, self.num_heads, self.head_dim).to(q.dtype)
+
+        queries = q_blocks.permute(0, 3, 1, 2, 4)  # [B, H, NB, CS, D]
+        matrix_ac = queries @ k_context.permute(0, 3, 1, 4, 2)
+
+        queries_flat = queries.reshape(B, self.num_heads, -1, self.head_dim)
+        matrix_bd = queries_flat @ rel_k.permute(1, 2, 0)
+        matrix_bd = matrix_bd.reshape(B, self.num_heads, num_blocks, self.chunk_size, -1)
+        matrix_bd = self._rel_shift(matrix_bd)
+
+        attn_weights = matrix_ac + matrix_bd
+        attn_weights = torch.tanh(attn_weights / self.softcap) * self.softcap
+
+        # Mask out invalid positions in chunk context (matching reference's masked_fill approach)
+        if attn_mask is None:
+            attn_mask = self._build_blocked_mask(S, num_blocks, x.device)
+        attn_weights = attn_weights.masked_fill(attn_mask.logical_not(), -1e9)
+
+        attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(v.dtype)
+        out = attn_weights @ v_context.permute(0, 3, 1, 2, 4)
+        out = out.permute(0, 2, 3, 1, 4).reshape(B, num_blocks * self.chunk_size, -1)
+        out = out[:, :S].contiguous()
+        return self.post(out.to(self.post.linear.weight.dtype))
+
+
+class Gemma4AudioLConv1d(nn.Module):
+    """Lightweight convolution with standard GLU."""
+    def __init__(self, config, device=None, dtype=None, ops=None):
+        super().__init__()
+        hidden_size = config["hidden_size"]
+        conv_kernel_size = config.get("conv_kernel_size", 5)
+        self.pre_layer_norm = RMSNorm(hidden_size, eps=config["rms_norm_eps"], device=device, dtype=dtype)
+        self.linear_start = ClippedLinear(hidden_size, hidden_size * 2, device=device, dtype=dtype, ops=ops)
+        # Causal conv: left-pad only
+        self.depthwise_conv1d = ops.Conv1d(hidden_size, hidden_size, kernel_size=conv_kernel_size, padding=0, groups=hidden_size, bias=False, device=device, dtype=dtype)
+        self.conv_left_pad = conv_kernel_size - 1  # causal: pad left by kernel-1
+        self.conv_norm = RMSNorm(hidden_size, eps=config["rms_norm_eps"], device=device, dtype=dtype)
+        self.linear_end = ClippedLinear(hidden_size, hidden_size, device=device, dtype=dtype, ops=ops)
+
+    def forward(self, x):
+        residual = x
+        x = self.pre_layer_norm(x)
+        x = self.linear_start(x)
+        x = torch.nn.functional.glu(x, dim=-1)
+        x = x.transpose(1, 2)
+        x = torch.nn.functional.pad(x, (self.conv_left_pad, 0))
+        x = self.depthwise_conv1d(x).transpose(1, 2)
+        x = self.conv_norm(x)
+        x = torch.nn.functional.silu(x)
+        x = self.linear_end(x)
+        return x + residual
+
+
+class Gemma4AudioLayer(nn.Module):
+    """Conformer block: FFN1 -> Attention -> LConv -> FFN2."""
+    def __init__(self, config, device=None, dtype=None, ops=None):
+        super().__init__()
+        self.feed_forward1 = Gemma4AudioFeedForward(config, device=device, dtype=dtype, ops=ops)
+        self.self_attn = Gemma4AudioAttention(config, device=device, dtype=dtype, ops=ops)
+        norm_kwargs = dict(eps=config["rms_norm_eps"], device=device, dtype=dtype)
+        hidden_size = config["hidden_size"]
+        self.norm_pre_attn = RMSNorm(hidden_size, **norm_kwargs)
+        self.norm_post_attn = RMSNorm(hidden_size, **norm_kwargs)
+        self.lconv1d = Gemma4AudioLConv1d(config, device=device, dtype=dtype, ops=ops)
+        self.feed_forward2 = Gemma4AudioFeedForward(config, device=device, dtype=dtype, ops=ops)
+        self.norm_out = RMSNorm(hidden_size, **norm_kwargs)
+
+    def forward(self, x, position_embeddings=None, attn_mask=None):
+        x = self.feed_forward1(x)
+
+        residual = x
+        x = self.norm_pre_attn(x)
+        x = self.self_attn(x, position_embeddings=position_embeddings, attn_mask=attn_mask)
+        x = self.norm_post_attn(x)
+        x = x + residual
+
+        x = self.lconv1d(x)
+        x = self.feed_forward2(x)
+
+        x = self.norm_out(x)
+        return x
+
+
+class Gemma4AudioEncoder(nn.Module):
+    def __init__(self, config, dtype=None, device=None, ops=None):
+        super().__init__()
+        self.hidden_size = config["hidden_size"]
+        self.output_proj_dims = config.get("output_proj_dims", 1536)
+
+        self.subsample_conv_projection = Gemma4AudioConvSubsampler(config, device=device, dtype=dtype, ops=ops)
+        self.rel_pos_enc = Gemma4AudioRelPositionalEncoding(config, device=device, dtype=dtype)
+
+        self.layers = nn.ModuleList([
+            Gemma4AudioLayer(config, device=device, dtype=dtype, ops=ops)
+            for _ in range(config["num_hidden_layers"])
+        ])
+
+        self.output_proj = ops.Linear(self.hidden_size, self.output_proj_dims, bias=True, device=device, dtype=dtype)
+
+    def forward(self, audio_features, audio_mask=None):
+        x, audio_mask = self.subsample_conv_projection(audio_features, audio_mask)
+        position_embeddings = self.rel_pos_enc(x)
+
+        # Build blocked attention mask once for all layers
+        attn_mask = self.layers[0].self_attn._build_blocked_mask(
+            x.shape[1], (x.shape[1] + self.layers[0].self_attn.chunk_size - 1) // self.layers[0].self_attn.chunk_size,
+            x.device, audio_mask=audio_mask)
+
+        for layer in self.layers:
+            x = layer(x, position_embeddings=position_embeddings, attn_mask=attn_mask)
+
+        x = self.output_proj(x)
+        return x
+
+
+class Gemma4AudioProjector(Gemma4RMSNormProjector):
+    def __init__(self, config, dtype=None, device=None, ops=None):
+        super().__init__(config.get("audio_output_proj_dims", 1536), config.get("text_hidden_size", 2560), dtype=dtype, device=device, ops=ops)
+
+
+# Tokenizer and Wrappers
+
+class Gemma4_Tokenizer():
+    tokenizer_json_data = None
+
+    def state_dict(self):
+        if self.tokenizer_json_data is not None:
+            return {"tokenizer_json": self.tokenizer_json_data}
+        return {}
+
+    def _extract_mel_spectrogram(self, waveform, sample_rate):
+        """Extract 128-bin log mel spectrogram.
+        Uses numpy for FFT/matmul/log to produce bit-identical results with reference code.
+        """
+        # Mix to mono first, then resample to 16kHz
+        if waveform.dim() > 1 and waveform.shape[0] > 1:
+            waveform = waveform.mean(dim=0, keepdim=True)
+        if waveform.dim() == 1:
+            waveform = waveform.unsqueeze(0)
+        audio = waveform.squeeze(0).float().numpy()
+        if sample_rate != 16000:
+            # Use scipy's resample_poly with a high-quality FIR filter to get as close as possible to librosa's resampling (while still not full match)
+            from scipy.signal import resample_poly, firwin
+            from math import gcd
+            g = gcd(sample_rate, 16000)
+            up, down = 16000 // g, sample_rate // g
+            L = max(up, down)
+            h = firwin(160 * L + 1, 0.96 / L, window=('kaiser', 6.5))
+            audio = resample_poly(audio, up, down, window=h).astype(np.float32)
+        n = len(audio)
+
+        # Pad to multiple of 128, build sample-level mask
+        if n % 128 != 0:
+            audio = np.pad(audio, (0, 128 - n % 128))
+        mask_raw = np.ones(len(audio), dtype=np.float32)
+        mask_raw[n:] = 0.0
+
+        # Semicausal padding: 160 zeros prepended
+        audio = np.pad(audio, (160, 0))
+        mask_raw = np.pad(mask_raw, (160, 0))
+
+        # Extract 321-sample frames via stride tricks, drop last → 320
+        nf = (len(audio) - 321) // 160 + 1
+        strides = (audio.strides[0] * 160, audio.strides[0])
+        frames = np.lib.stride_tricks.as_strided(audio, (nf, 321), strides)[..., :-1].copy()
+
+        # Periodic Hann window, FFT magnitude, mel filterbank, log
+        window = (0.5 - 0.5 * np.cos(2 * np.pi * np.arange(320) / 320)).astype(np.float32)
+        magnitude = np.abs(np.fft.rfft(frames * window, n=512, axis=-1))
+        mel_fb = self._build_mel_filterbank()
+        log_mel = np.log(np.matmul(magnitude, mel_fb) + np.float64(0.001)).astype(np.float32)
+
+        # Frame mask: valid when last sample in window is real audio
+        mask = mask_raw[np.arange(nf) * 160 + 320].astype(bool)
+        log_mel = log_mel * mask[:, None]
+        return torch.from_numpy(log_mel), torch.from_numpy(mask)  # [T, 128], [T]
+
+    @staticmethod
+    def _build_mel_filterbank():
+        """Build 128-bin HTK mel filterbank [257, 128] for 512-pt FFT at 16kHz."""
+        mel_freqs = np.linspace(0.0, 2595.0 * np.log10(1.0 + 8000.0 / 700.0), 130)
+        filter_freqs = 700.0 * (10.0 ** (mel_freqs / 2595.0) - 1.0)
+        fft_freqs = np.linspace(0, 16000 // 2, 257)
+        filter_diff = np.diff(filter_freqs)
+        slopes = np.expand_dims(filter_freqs, 0) - np.expand_dims(fft_freqs, 1)
+        down_slopes = -slopes[:, :-2] / filter_diff[:-1]
+        up_slopes = slopes[:, 2:] / filter_diff[1:]
+        return np.maximum(np.zeros(1), np.minimum(down_slopes, up_slopes))
+
+    def tokenize_with_weights(self, text, return_word_ids=False, image=None, audio=None, video=None, llama_template=None, skip_template=True, thinking=False, **kwargs):
+
+        # Process audio
+        audio_features = []
+        if audio is not None:
+            waveform = audio["waveform"].squeeze(0) if hasattr(audio, "__getitem__") else audio
+            sample_rate = audio.get("sample_rate", 16000) if hasattr(audio, "get") else 16000
+            mel, mel_mask = self._extract_mel_spectrogram(waveform, sample_rate)
+            audio_features = [(mel.unsqueeze(0), mel_mask.unsqueeze(0))]  # ([1, T, 128], [1, T])
+
+        # Process image/video frames
+        is_video = video is not None
+        source = video if is_video else image
+        images = []
+        if source is not None:
+            samples = source.movedim(-1, 1)  # [B, C, H, W]
+            num_frames = samples.shape[0]
+
+            # Subsample video to 1fps
+            if is_video:
+                fps = kwargs.get("fps", 24)
+                step = max(1, round(fps))
+                indices = list(range(0, num_frames, step))
+                if len(indices) == 0:
+                    indices = [0]
+                samples = samples[indices]
+                num_frames = len(indices)
+
+            h, w = samples.shape[2], samples.shape[3]
+            patch_size = 16
+            pooling_k = 3
+            max_soft_tokens = 70 if is_video else 280  # video uses smaller token budget per frame
+            max_patches = max_soft_tokens * pooling_k * pooling_k
+            target_px = max_patches * patch_size * patch_size
+            factor = (target_px / (h * w)) ** 0.5
+            side_mult = pooling_k * patch_size
+            target_h = max(int(factor * h // side_mult) * side_mult, side_mult)
+            target_w = max(int(factor * w // side_mult) * side_mult, side_mult)
+
+            import torchvision.transforms.functional as TVF
+            for i in range(num_frames):
+                # rescaling to match reference code
+                s = (samples[i].clamp(0, 1) * 255).to(torch.uint8)  # [C, H, W] uint8
+                if target_h != h or target_w != w:
+                    s = TVF.resize(s, [target_h, target_w], interpolation=TVF.InterpolationMode.BICUBIC, antialias=True)
+                s = s.float() * (1.0 / 255.0)
+                images.append({"pixels": s.unsqueeze(0).movedim(1, -1)[:, :, :, :3], "max_soft_tokens": max_soft_tokens})
+
+        if text.startswith('<|turn>'):
+            skip_template = True
+
+        if skip_template:
+            llama_text = text
+        else:
+            if llama_template is not None:
+                llama_text = llama_template.format(text)
+            else:
+                # Build template from modalities present
+                system = "<|turn>system\n<|think|><turn|>\n" if thinking else ""
+                media = ""
+                if len(images) > 0:
+                    if is_video:
+                        media += "\n\n"
+                        for i in range(len(images)):
+                            ts = f"{int(i // 60):02d}:{int(i % 60):02d}"
+                            sep = "" if i == 0 else " "
+                            media += f"{sep}{ts} <|image><|video|><image|>"
+                        media += "\n\n"
+                    else:
+                        media += "\n\n"
+                        for i in range(len(images)):
+                            if i > 0:
+                                media += "\n\n\n\n"
+                            media += "<|image><|image|><image|>"
+                        media += "\n\n"
+                if len(audio_features) > 0:
+                    # Compute audio token count (always at 16kHz)
+                    num_samples = int(waveform.shape[-1] * 16000 / sample_rate) if sample_rate != 16000 else waveform.shape[-1]
+                    _fl = 320  # int(round(16000 * 20.0 / 1000.0))
+                    _hl = 160  # int(round(16000 * 10.0 / 1000.0))
+                    _nmel = (num_samples + _fl // 2 - (_fl + 1)) // _hl + 1
+                    _t = _nmel
+                    for _ in range(2):
+                        _t = (_t + 2 - 3) // 2 + 1
+                    n_audio_tokens = min(_t, 750)
+                    media += "<|audio>" + "<|audio|>" * n_audio_tokens + "<audio|>"
+                llama_text = f"{system}<|turn>user\n{media}{text}<turn|>\n<|turn>model\n"
+
+        text_tokens = super().tokenize_with_weights(llama_text, return_word_ids)
+
+        def _replace_placeholders(token_list, token_id, embeds):
+            """Replace first placeholder with embed dict, remove remaining consecutive ones."""
+            embed_idx = 0
+            i = 0
+            while i < len(token_list):
+                if token_list[i][0] == token_id and embed_idx < len(embeds):
+                    token_list[i] = (embeds[embed_idx],) + token_list[i][1:]
+                    embed_idx += 1
+                    i += 1
+                    while i < len(token_list) and token_list[i][0] == token_id:
+                        token_list.pop(i)
+                else:
+                    i += 1
+
+        if len(images) > 0:
+            img_token_id = 258884 if is_video else 258880
+            img_embeds = [{"type": "image", "data": img["pixels"], "max_soft_tokens": img["max_soft_tokens"]} for img in images]
+            for r in text_tokens:
+                _replace_placeholders(r, img_token_id, img_embeds)
+
+        if len(audio_features) > 0:
+            aud_embeds = [{"type": "audio", "data": mel, "mask": mask} for mel, mask in audio_features]
+            for r in text_tokens:
+                _replace_placeholders(r, 258881, aud_embeds)
+
+        return text_tokens
+
+
+class _Gemma4Tokenizer:
+    """Tokenizer using the tokenizers (Gemma4 doesn't come with sentencepiece model)"""
+    def __init__(self, tokenizer_json_bytes=None, **kwargs):
+        from tokenizers import Tokenizer
+        if isinstance(tokenizer_json_bytes, torch.Tensor):
+            tokenizer_json_bytes = bytes(tokenizer_json_bytes.tolist())
+        self.tokenizer = Tokenizer.from_str(tokenizer_json_bytes.decode("utf-8"))
+
+    @classmethod
+    def from_pretrained(cls, tokenizer_data, **kwargs):
+        return cls(tokenizer_json_bytes=tokenizer_data, **kwargs)
+
+    def __call__(self, text):
+        return {"input_ids": self.tokenizer.encode(text, add_special_tokens=False).ids}
+
+    def get_vocab(self):
+        return self.tokenizer.get_vocab()
+
+    def convert_tokens_to_ids(self, tokens):
+        return [self.tokenizer.token_to_id(t) for t in tokens]
+
+    def decode(self, ids, **kwargs):
+        return self.tokenizer.decode(ids, skip_special_tokens=kwargs.get("skip_special_tokens", False))
+
+
+# Tokenizer
+class Gemma4SDTokenizer(Gemma4_Tokenizer, sd1_clip.SDTokenizer):
+    embedding_size = 2560
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        tokenizer_json = tokenizer_data.get("tokenizer_json", None)
+        self.tokenizer_json_data = tokenizer_json
+        super().__init__(tokenizer_json, pad_with_end=False, embedding_size=self.embedding_size, embedding_key='gemma4', tokenizer_class=_Gemma4Tokenizer, has_start_token=True, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, pad_left=True, disable_weights=True, start_token=2, tokenizer_data=tokenizer_data)
+
+    def decode(self, token_ids, **kwargs):
+        text = super().decode(token_ids, skip_special_tokens=False)
+        # Translate thinking channel markers to standard <think>/</think> tags
+        text = text.replace("<|channel>thought\n", "<think>\n")
+        text = text.replace("<channel|>", "</think>")
+        # Strip remaining special tokens
+        text = text.replace("<turn|>", "").replace("<eos>", "").strip()
+        return text
+
+
+class Gemma4Tokenizer(sd1_clip.SD1Tokenizer):
+    tokenizer_class = Gemma4SDTokenizer
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name="gemma4", tokenizer=self.tokenizer_class)
+
+
+# Model wrappers
+class Gemma4Model(sd1_clip.SDClipModel):
+    model_class = None
+    def __init__(self, device="cpu", layer="all", layer_idx=None, dtype=None, attention_mask=True, model_options={}):
+        self.dtypes = set()
+        self.dtypes.add(dtype)
+        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=self.model_class, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
+
+    def process_tokens(self, tokens, device):
+        embeds, _, _, _ = super().process_tokens(tokens, device)
+        return embeds
+
+    def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, presence_penalty=0.0):
+        if isinstance(tokens, dict):
+            tokens = next(iter(tokens.values()))
+        tokens_only = [[t[0] for t in b] for b in tokens]
+        embeds, _, _, embeds_info = sd1_clip.SDClipModel.process_tokens(self, tokens_only, self.execution_device)
+        seq_len = embeds.shape[1]
+        ids = [0] * seq_len
+        expanded_idx = 0
+        embed_map = {info["index"]: info["size"] for info in embeds_info}
+        for t in tokens_only[0]:
+            if expanded_idx in embed_map:
+                expanded_idx += embed_map[expanded_idx]
+            elif isinstance(t, int):
+                if expanded_idx < seq_len:
+                    ids[expanded_idx] = t
+                expanded_idx += 1
+            else:
+                expanded_idx += 1
+        initial_token_ids = [ids]
+        input_ids = torch.tensor(initial_token_ids, device=self.execution_device)
+        return self.transformer.generate(embeds, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, initial_tokens=initial_token_ids[0], presence_penalty=presence_penalty, initial_input_ids=input_ids)
+
+
+def gemma4_te(dtype_llama=None, llama_quantization_metadata=None, model_class=None):
+    clip_model = type('Gemma4Model_', (Gemma4Model,), {'model_class': model_class})
+    class Gemma4TEModel_(sd1_clip.SD1ClipModel):
+        def __init__(self, device="cpu", dtype=None, model_options={}):
+            if llama_quantization_metadata is not None:
+                model_options = model_options.copy()
+                model_options["quantization_metadata"] = llama_quantization_metadata
+            if dtype_llama is not None:
+                dtype = dtype_llama
+            super().__init__(device=device, dtype=dtype, name="gemma4", clip_model=clip_model, model_options=model_options)
+    return Gemma4TEModel_
+
+
+# Variants
+
+def _make_variant(config_cls):
+    audio = config_cls.audio_config is not None
+    bases = (Gemma4AudioMixin, Gemma4Base) if audio else (Gemma4Base,)
+    class Variant(*bases):
+        def __init__(self, config_dict, dtype, device, operations):
+            super().__init__()
+            self._init_model(config_cls(**config_dict), dtype, device, operations)
+            if audio:
+                self._init_audio(self.model.config, dtype, device, operations)
+    embedding_size = config_cls.hidden_size
+    if embedding_size != Gemma4SDTokenizer.embedding_size:
+        tok_cls = type('T', (Gemma4SDTokenizer,), {'embedding_size': embedding_size})
+        class Tokenizer(Gemma4Tokenizer):
+            tokenizer_class = tok_cls
+        Variant.tokenizer = Tokenizer
+    else:
+        Variant.tokenizer = Gemma4Tokenizer
+    return Variant
+
+Gemma4_E4B = _make_variant(Gemma4Config)
+Gemma4_E2B = _make_variant(Gemma4_E2B_Config)
+Gemma4_31B = _make_variant(Gemma4_31B_Config)
diff --git a/comfy/text_encoders/llama.py b/comfy/text_encoders/llama.py
index 6ea8e36b1..a34c41144 100644
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@@ -521,7 +521,7 @@ class Attention(nn.Module):
             else:
                 present_key_value = (xk, xv, index + num_tokens)
 
-            if sliding_window is not None and xk.shape[2] > sliding_window:
+            if sliding_window is not None and xk.shape[2] > sliding_window and seq_length == 1:
                 xk = xk[:, :, -sliding_window:]
                 xv = xv[:, :, -sliding_window:]
                 attention_mask = attention_mask[..., -sliding_window:] if attention_mask is not None else None
@@ -533,12 +533,12 @@ class Attention(nn.Module):
         return self.o_proj(output), present_key_value
 
 class MLP(nn.Module):
-    def __init__(self, config: Llama2Config, device=None, dtype=None, ops: Any = None):
+    def __init__(self, config: Llama2Config, device=None, dtype=None, ops: Any = None, intermediate_size=None):
         super().__init__()
-        ops = ops or nn
-        self.gate_proj = ops.Linear(config.hidden_size, config.intermediate_size, bias=False, device=device, dtype=dtype)
-        self.up_proj = ops.Linear(config.hidden_size, config.intermediate_size, bias=False, device=device, dtype=dtype)
-        self.down_proj = ops.Linear(config.intermediate_size, config.hidden_size, bias=False, device=device, dtype=dtype)
+        intermediate_size = intermediate_size or config.intermediate_size
+        self.gate_proj = ops.Linear(config.hidden_size, intermediate_size, bias=False, device=device, dtype=dtype)
+        self.up_proj = ops.Linear(config.hidden_size, intermediate_size, bias=False, device=device, dtype=dtype)
+        self.down_proj = ops.Linear(intermediate_size, config.hidden_size, bias=False, device=device, dtype=dtype)
         if config.mlp_activation == "silu":
             self.activation = torch.nn.functional.silu
         elif config.mlp_activation == "gelu_pytorch_tanh":
@@ -647,24 +647,25 @@ class TransformerBlockGemma2(nn.Module):
 
         return x, present_key_value
 
+def _make_scaled_embedding(ops, vocab_size, hidden_size, scale, device, dtype):
+    class ScaledEmbedding(ops.Embedding):
+        def forward(self, input_ids, out_dtype=None):
+            return super().forward(input_ids, out_dtype=out_dtype) * scale
+    return ScaledEmbedding(vocab_size, hidden_size, device=device, dtype=dtype)
+
+
 class Llama2_(nn.Module):
     def __init__(self, config, device=None, dtype=None, ops=None):
         super().__init__()
         self.config = config
         self.vocab_size = config.vocab_size
 
-        self.embed_tokens = ops.Embedding(
-            config.vocab_size,
-            config.hidden_size,
-            device=device,
-            dtype=dtype
-        )
         if self.config.transformer_type == "gemma2" or self.config.transformer_type == "gemma3":
             transformer = TransformerBlockGemma2
-            self.normalize_in = True
+            self.embed_tokens = _make_scaled_embedding(ops, config.vocab_size, config.hidden_size, config.hidden_size ** 0.5, device, dtype)
         else:
             transformer = TransformerBlock
-            self.normalize_in = False
+            self.embed_tokens = ops.Embedding(config.vocab_size, config.hidden_size, device=device, dtype=dtype)
 
         self.layers = nn.ModuleList([
             transformer(config, index=i, device=device, dtype=dtype, ops=ops)
@@ -690,15 +691,12 @@ class Llama2_(nn.Module):
                                     self.config.rope_dims,
                                     device=device)
 
-    def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=None, position_ids=None, embeds_info=[], past_key_values=None):
+    def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermediate_output=None, final_layer_norm_intermediate=True, dtype=None, position_ids=None, embeds_info=[], past_key_values=None, input_ids=None):
         if embeds is not None:
             x = embeds
         else:
             x = self.embed_tokens(x, out_dtype=dtype)
 
-        if self.normalize_in:
-            x *= self.config.hidden_size ** 0.5
-
         seq_len = x.shape[1]
         past_len = 0
         if past_key_values is not None and len(past_key_values) > 0:
@@ -850,7 +848,7 @@ class BaseGenerate:
                                     torch.empty([batch, model_config.num_key_value_heads, max_cache_len, model_config.head_dim], device=device, dtype=execution_dtype), 0))
         return past_key_values
 
-    def generate(self, embeds=None, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.9, min_p=0.0, repetition_penalty=1.0, seed=42, stop_tokens=None, initial_tokens=[], execution_dtype=None, min_tokens=0, presence_penalty=0.0):
+    def generate(self, embeds=None, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.9, min_p=0.0, repetition_penalty=1.0, seed=42, stop_tokens=None, initial_tokens=[], execution_dtype=None, min_tokens=0, presence_penalty=0.0, initial_input_ids=None):
         device = embeds.device
 
         if stop_tokens is None:
@@ -875,14 +873,16 @@ class BaseGenerate:
         pbar = comfy.utils.ProgressBar(max_length)
 
         # Generation loop
+        current_input_ids = initial_input_ids
         for step in tqdm(range(max_length), desc="Generating tokens"):
-            x, _, past_key_values = self.model.forward(None, embeds=embeds, attention_mask=None, past_key_values=past_key_values)
+            x, _, past_key_values = self.model.forward(None, embeds=embeds, attention_mask=None, past_key_values=past_key_values, input_ids=current_input_ids)
             logits = self.logits(x)[:, -1]
             next_token = self.sample_token(logits, temperature, top_k, top_p, min_p, repetition_penalty, initial_tokens + generated_token_ids, generator, do_sample=do_sample, presence_penalty=presence_penalty)
             token_id = next_token[0].item()
             generated_token_ids.append(token_id)
 
             embeds = self.model.embed_tokens(next_token).to(execution_dtype)
+            current_input_ids = next_token if initial_input_ids is not None else None
             pbar.update(1)
 
             if token_id in stop_tokens:
diff --git a/comfy/text_encoders/lt.py b/comfy/text_encoders/lt.py
index 5aee1f4c0..bc5cbae28 100644
--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@@ -93,8 +93,7 @@ class Gemma3_12BModel(sd1_clip.SDClipModel):
 
     def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, presence_penalty):
         tokens_only = [[t[0] for t in b] for b in tokens]
-        embeds, _, _, embeds_info = self.process_tokens(tokens_only, self.execution_device)
-        comfy.utils.normalize_image_embeddings(embeds, embeds_info, self.transformer.model.config.hidden_size ** 0.5)
+        embeds, _, _, _ = self.process_tokens(tokens_only, self.execution_device)
         return self.transformer.generate(embeds, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, stop_tokens=[106], presence_penalty=presence_penalty)  # 106 is <end_of_turn>
 
 class DualLinearProjection(torch.nn.Module):
diff --git a/comfy/text_encoders/lumina2.py b/comfy/text_encoders/lumina2.py
index 01ebdfabe..b1f1dbb9f 100644
--- a/comfy/text_encoders/lumina2.py
+++ b/comfy/text_encoders/lumina2.py
@@ -50,8 +50,7 @@ class Gemma3_4B_Vision_Model(sd1_clip.SDClipModel):
         super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_4B_Vision, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
 
     def process_tokens(self, tokens, device):
-        embeds, _, _, embeds_info = super().process_tokens(tokens, device)
-        comfy.utils.normalize_image_embeddings(embeds, embeds_info, self.transformer.model.config.hidden_size ** 0.5)
+        embeds, _, _, _ = super().process_tokens(tokens, device)
         return embeds
 
 class LuminaModel(sd1_clip.SD1ClipModel):
diff --git a/comfy/text_encoders/qwen35.py b/comfy/text_encoders/qwen35.py
index ce9b07464..d8ed9cd32 100644
--- a/comfy/text_encoders/qwen35.py
+++ b/comfy/text_encoders/qwen35.py
@@ -408,8 +408,6 @@ class Qwen35Transformer(Llama2_):
         nn.Module.__init__(self)
         self.config = config
         self.vocab_size = config.vocab_size
-        self.normalize_in = False
-
         self.embed_tokens = ops.Embedding(config.vocab_size, config.hidden_size, device=device, dtype=dtype)
         self.layers = nn.ModuleList([
             Qwen35TransformerBlock(config, index=i, device=device, dtype=dtype, ops=ops)
diff --git a/comfy/utils.py b/comfy/utils.py
index 78c491b98..7b7faad3a 100644
--- a/comfy/utils.py
+++ b/comfy/utils.py
@@ -1446,10 +1446,3 @@ def deepcopy_list_dict(obj, memo=None):
     memo[obj_id] = res
     return res
 
-def normalize_image_embeddings(embeds, embeds_info, scale_factor):
-    """Normalize image embeddings to match text embedding scale"""
-    for info in embeds_info:
-        if info.get("type") == "image":
-            start_idx = info["index"]
-            end_idx = start_idx + info["size"]
-            embeds[:, start_idx:end_idx, :] /= scale_factor
diff --git a/comfy_extras/nodes_textgen.py b/comfy_extras/nodes_textgen.py
index 1f46d820f..1661a1011 100644
--- a/comfy_extras/nodes_textgen.py
+++ b/comfy_extras/nodes_textgen.py
@@ -32,6 +32,8 @@ class TextGenerate(io.ComfyNode):
                 io.Clip.Input("clip"),
                 io.String.Input("prompt", multiline=True, dynamic_prompts=True, default=""),
                 io.Image.Input("image", optional=True),
+                io.Image.Input("video", optional=True, tooltip="Video frames as image batch. Assumed to be 24 FPS; subsampled to 1 FPS internally."),
+                io.Audio.Input("audio", optional=True),
                 io.Int.Input("max_length", default=256, min=1, max=2048),
                 io.DynamicCombo.Input("sampling_mode", options=sampling_options, display_name="Sampling Mode"),
                 io.Boolean.Input("thinking", optional=True, default=False, tooltip="Operate in thinking mode if the model supports it."),
@@ -43,9 +45,9 @@ class TextGenerate(io.ComfyNode):
         )
 
     @classmethod
-    def execute(cls, clip, prompt, max_length, sampling_mode, image=None, thinking=False, use_default_template=True) -> io.NodeOutput:
+    def execute(cls, clip, prompt, max_length, sampling_mode, image=None, thinking=False, use_default_template=True, video=None, audio=None) -> io.NodeOutput:
 
-        tokens = clip.tokenize(prompt, image=image, skip_template=not use_default_template, min_length=1, thinking=thinking)
+        tokens = clip.tokenize(prompt, image=image, skip_template=not use_default_template, min_length=1, thinking=thinking, video=video, audio=audio)
 
         # Get sampling parameters from dynamic combo
         do_sample = sampling_mode.get("sampling_mode") == "on"
@@ -70,7 +72,8 @@ class TextGenerate(io.ComfyNode):
             seed=seed
         )
 
-        generated_text = clip.decode(generated_ids, skip_special_tokens=True)
+        generated_text = clip.decode(generated_ids)
+
         return io.NodeOutput(generated_text)
 
 
@@ -161,12 +164,12 @@ class TextGenerateLTX2Prompt(TextGenerate):
         )
 
     @classmethod
-    def execute(cls, clip, prompt, max_length, sampling_mode, image=None, thinking=False, use_default_template=True) -> io.NodeOutput:
+    def execute(cls, clip, prompt, max_length, sampling_mode, image=None, thinking=False, use_default_template=True, video=None, audio=None) -> io.NodeOutput:
         if image is None:
             formatted_prompt = f"<start_of_turn>system\n{LTX2_T2V_SYSTEM_PROMPT.strip()}<end_of_turn>\n<start_of_turn>user\nUser Raw Input Prompt: {prompt}.<end_of_turn>\n<start_of_turn>model\n"
         else:
             formatted_prompt = f"<start_of_turn>system\n{LTX2_I2V_SYSTEM_PROMPT.strip()}<end_of_turn>\n<start_of_turn>user\n\n<image_soft_token>\n\nUser Raw Input Prompt: {prompt}.<end_of_turn>\n<start_of_turn>model\n"
-        return super().execute(clip, formatted_prompt, max_length, sampling_mode, image, thinking, use_default_template)
+        return super().execute(clip, formatted_prompt, max_length, sampling_mode, image=image, thinking=thinking, use_default_template=use_default_template, video=video, audio=audio)
 
 
 class TextgenExtension(ComfyExtension):

From f6d5068ac0163e7f626c9cec2e7c663cf6fa64a8 Mon Sep 17 00:00:00 2001
From: Alexis Rolland <alexisrolland@hotmail.com>
Date: Sun, 3 May 2026 12:20:17 +0800
Subject: [PATCH 72/81] Update README (#13679)

Updated the README to include a new screenshot, improved description and add Ernie Image to supported models.
---
 README.md | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 3b5114633..ee68e8bb8 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 <div align="center">
 
 # ComfyUI
-**The most powerful and modular visual AI engine and application.**
+**The most powerful and modular AI engine for content creation.**
 
 
 [![Website][website-shield]][website-url]
@@ -31,10 +31,15 @@
 [github-downloads-latest-shield]: https://img.shields.io/github/downloads/comfyanonymous/ComfyUI/latest/total?style=flat&label=downloads%40latest
 [github-downloads-link]: https://github.com/comfyanonymous/ComfyUI/releases
 
-![ComfyUI Screenshot](https://github.com/user-attachments/assets/7ccaf2c1-9b72-41ae-9a89-5688c94b7abe)
+<img width="1590" height="795" alt="ComfyUI Screenshot" src="https://github.com/user-attachments/assets/4aab0bef-b413-4595-9766-a2c134676d27" />
 </div>
 
-ComfyUI lets you design and execute advanced stable diffusion pipelines using a graph/nodes/flowchart based interface. Available on Windows, Linux, and macOS.
+ComfyUI is the AI creation engine for visual professionals who demand control over every model, every parameter, and every output. Its powerful and modular node graph interface empowers creatives to generate images, videos, 3D models, audio, and more...
+- ComfyUI natively supports the latest open-source state of the art models.
+- API nodes provide access to the best closed source models such as Nano Banana, Seedance, Hunyuan3D, etc.
+- It is available on Windows, Linux, and macOS, locally with our desktop application or on our cloud.
+- The most sophisticated workflows can be exposed through a simple UI thanks to App Mode.
+- It integrates seamlessly into production pipelines with our API endpoints.
 
 ## Get Started
 
@@ -77,6 +82,7 @@ See what ComfyUI can do with the [newer template workflows](https://comfy.org/wo
    - [Hunyuan Image 2.1](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_image/)
    - [Flux 2](https://comfyanonymous.github.io/ComfyUI_examples/flux2/)
    - [Z Image](https://comfyanonymous.github.io/ComfyUI_examples/z_image/)
+   - Ernie Image
 - Image Editing Models
    - [Omnigen 2](https://comfyanonymous.github.io/ComfyUI_examples/omnigen/)
    - [Flux Kontext](https://comfyanonymous.github.io/ComfyUI_examples/flux/#flux-kontext-image-editing-model)

From b5bb83c964519b7574ce9229b2314e04c17592c0 Mon Sep 17 00:00:00 2001
From: Alexis Rolland <alexisrolland@hotmail.com>
Date: Sun, 3 May 2026 18:17:08 +0800
Subject: [PATCH 73/81] Fix issue blend images with alpha (#13615)

Make ImageBlend and ImageCompositeMasked nodes handle images with different channel counts
---
 node_helpers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/node_helpers.py b/node_helpers.py
index d3d834516..cac4e88dd 100644
--- a/node_helpers.py
+++ b/node_helpers.py
@@ -86,6 +86,6 @@ def image_alpha_fix(destination, source):
     if destination.shape[-1] < source.shape[-1]:
         source = source[...,:destination.shape[-1]]
     elif destination.shape[-1] > source.shape[-1]:
-        destination = torch.nn.functional.pad(destination, (0, 1))
-        destination[..., -1] = 1.0
+        source = torch.nn.functional.pad(source, (0, 1))
+        source[..., -1] = 1.0
     return destination, source

From d0f0b15cf5d1fbff67390c8d90ec8654c2582f7a Mon Sep 17 00:00:00 2001
From: Alexis Rolland <alexisrolland@hotmail.com>
Date: Sun, 3 May 2026 18:48:58 +0800
Subject: [PATCH 74/81] Update ComfyUI screenshot in README (#13683)

Update ComfyUI screenshot to showcase a more modern workflow
---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ee68e8bb8..a3bd3ba0a 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,8 @@
 [github-downloads-latest-shield]: https://img.shields.io/github/downloads/comfyanonymous/ComfyUI/latest/total?style=flat&label=downloads%40latest
 [github-downloads-link]: https://github.com/comfyanonymous/ComfyUI/releases
 
-<img width="1590" height="795" alt="ComfyUI Screenshot" src="https://github.com/user-attachments/assets/4aab0bef-b413-4595-9766-a2c134676d27" />
+<img width="1590" height="795" alt="ComfyUI Screenshot" src="https://github.com/user-attachments/assets/36e065e0-bfae-4456-8c7f-8369d5ea48a2" />
+<br>
 </div>
 
 ComfyUI is the AI creation engine for visual professionals who demand control over every model, every parameter, and every output. Its powerful and modular node graph interface empowers creatives to generate images, videos, 3D models, audio, and more...

From 867b8d2408a8f3062f25bd6707a4b96755d70e1d Mon Sep 17 00:00:00 2001
From: Luke Mino-Altherr <luke@comfy.org>
Date: Sun, 3 May 2026 05:44:20 -0700
Subject: [PATCH 75/81] fix: gracefully handle port-in-use error on server
 startup (#13001)

Catch EADDRINUSE OSError when binding the TCP site and exit with a clear error message instead of an unhandled traceback.
---
 server.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/server.py b/server.py
index 881da8e66..2f3b438bb 100644
--- a/server.py
+++ b/server.py
@@ -1,3 +1,4 @@
+import errno
 import os
 import sys
 import asyncio
@@ -1245,7 +1246,13 @@ class PromptServer():
             address = addr[0]
             port = addr[1]
             site = web.TCPSite(runner, address, port, ssl_context=ssl_ctx)
-            await site.start()
+            try:
+                await site.start()
+            except OSError as e:
+                if e.errno == errno.EADDRINUSE:
+                    logging.error(f"Port {port} is already in use on address {address}. Please close the other application or use a different port with --port.")
+                    raise SystemExit(1)
+                raise
 
             if not hasattr(self, 'address'):
                 self.address = address #TODO: remove this

From 025e6792ee64181ddce8a84411e0c7311e00b179 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jukka=20Sepp=C3=A4nen?=
 <40791699+kijai@users.noreply.github.com>
Date: Sun, 3 May 2026 16:30:00 +0300
Subject: [PATCH 76/81] Batch broadcasting in JoinImageWithAlpha node (#13686)

* Batch broadcasting in JoinImageWithAlpha node
---
 comfy_extras/nodes_compositing.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/comfy_extras/nodes_compositing.py b/comfy_extras/nodes_compositing.py
index 3bc9fccb3..5b4423734 100644
--- a/comfy_extras/nodes_compositing.py
+++ b/comfy_extras/nodes_compositing.py
@@ -202,14 +202,11 @@ class JoinImageWithAlpha(io.ComfyNode):
 
     @classmethod
     def execute(cls, image: torch.Tensor, alpha: torch.Tensor) -> io.NodeOutput:
-        batch_size = min(len(image), len(alpha))
-        out_images = []
-
+        batch_size = max(len(image), len(alpha))
         alpha = 1.0 - resize_mask(alpha, image.shape[1:])
-        for i in range(batch_size):
-           out_images.append(torch.cat((image[i][:,:,:3], alpha[i].unsqueeze(2)), dim=2))
-
-        return io.NodeOutput(torch.stack(out_images))
+        alpha = comfy.utils.repeat_to_batch_size(alpha, batch_size)
+        image = comfy.utils.repeat_to_batch_size(image, batch_size)
+        return io.NodeOutput(torch.cat((image[..., :3], alpha.unsqueeze(-1)), dim=-1))
 
 
 class CompositingExtension(ComfyExtension):

From b138133ffa43541c85b5f9ca57f449c8345ca005 Mon Sep 17 00:00:00 2001
From: Silver <65376327+silveroxides@users.noreply.github.com>
Date: Sun, 3 May 2026 20:07:21 +0200
Subject: [PATCH 77/81] Enable triton comfy kitchen via cli-arg (#12730)

---
 comfy/cli_args.py  |  1 +
 comfy/quant_ops.py | 12 +++++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index cef1a5e6b..d2fde8b67 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -91,6 +91,7 @@ parser.add_argument("--directml", type=int, nargs="?", metavar="DIRECTML_DEVICE"
 
 parser.add_argument("--oneapi-device-selector", type=str, default=None, metavar="SELECTOR_STRING", help="Sets the oneAPI device(s) this instance will use.")
 parser.add_argument("--supports-fp8-compute", action="store_true", help="ComfyUI will act like if the device supports fp8 compute.")
+parser.add_argument("--enable-triton-backend", action="store_true", help="ComfyUI will enable the use of Triton backend in comfy-kitchen. Is disabled at launch by default.")
 
 class LatentPreviewMethod(enum.Enum):
     NoPreviews = "none"
diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py
index 42ee08fb2..b90bcfd25 100644
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@@ -1,6 +1,8 @@
 import torch
 import logging
 
+from comfy.cli_args import args
+
 try:
     import comfy_kitchen as ck
     from comfy_kitchen.tensor import (
@@ -21,7 +23,15 @@ try:
             ck.registry.disable("cuda")
             logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")
 
-    ck.registry.disable("triton")
+    if args.enable_triton_backend:
+        try:
+            import triton
+            logging.info("Found triton %s. Enabling comfy-kitchen triton backend.", triton.__version__)
+        except ImportError as e:
+            logging.error(f"Failed to import triton, Error: {e}, the comfy-kitchen triton backend will not be available.")
+            ck.registry.disable("triton")
+    else:
+        ck.registry.disable("triton")
     for k, v in ck.list_backends().items():
         logging.info(f"Found comfy_kitchen backend {k}: {v}")
 except ImportError as e:

From cea8d0925febb4dd32e400bbbf94243f55af3371 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Sun, 3 May 2026 13:18:27 -0700
Subject: [PATCH 78/81] Refactor LoadImageMask to use LoadImage code. (#13687)

---
 nodes.py | 66 +++++++++++++++++++++++++-------------------------------
 1 file changed, 29 insertions(+), 37 deletions(-)

diff --git a/nodes.py b/nodes.py
index 710cccffe..8f8f90cf6 100644
--- a/nodes.py
+++ b/nodes.py
@@ -1754,57 +1754,49 @@ class LoadImage:
 
         return True
 
-class LoadImageMask:
+
+class LoadImageMask(LoadImage):
     ESSENTIALS_CATEGORY = "Image Tools"
     SEARCH_ALIASES = ["import mask", "alpha mask", "channel mask"]
 
     _color_channels = ["alpha", "red", "green", "blue"]
+
     @classmethod
     def INPUT_TYPES(s):
-        input_dir = folder_paths.get_input_directory()
-        files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))]
-        return {"required":
-                    {"image": (sorted(files), {"image_upload": True}),
-                     "channel": (s._color_channels, ), }
-                }
+        types = super().INPUT_TYPES()
+        return {
+            "required": {
+                **types["required"],
+                "channel": (s._color_channels, )
+            }
+        }
 
     CATEGORY = "mask"
-
     RETURN_TYPES = ("MASK",)
-    FUNCTION = "load_image"
-    def load_image(self, image, channel):
-        image_path = folder_paths.get_annotated_filepath(image)
-        i = node_helpers.pillow(Image.open, image_path)
-        i = node_helpers.pillow(ImageOps.exif_transpose, i)
-        if i.getbands() != ("R", "G", "B", "A"):
-            if i.mode == 'I':
-                i = i.point(lambda i: i * (1 / 255))
-            i = i.convert("RGBA")
-        mask = None
+    FUNCTION = "load_image_mask"
+
+    def load_image_mask(self, image, channel):
+        image_tensor, mask_tensor = super().load_image(image)
         c = channel[0].upper()
-        if c in i.getbands():
-            mask = np.array(i.getchannel(c)).astype(np.float32) / 255.0
-            mask = torch.from_numpy(mask)
-            if c == 'A':
-                mask = 1. - mask
+
+        if c == 'A':
+            return (mask_tensor,)
+
+        channel_idx = {'R': 0, 'G': 1, 'B': 2}.get(c, 0)
+
+        if channel_idx < image_tensor.shape[-1]:
+            return (image_tensor[..., channel_idx].clone(),)
         else:
-            mask = torch.zeros((64,64), dtype=torch.float32, device="cpu")
-        return (mask.unsqueeze(0),)
+            empty_mask = torch.zeros(
+                image_tensor.shape[:-1],
+                dtype=image_tensor.dtype,
+                device=image_tensor.device
+            )
+            return (empty_mask,)
 
     @classmethod
     def IS_CHANGED(s, image, channel):
-        image_path = folder_paths.get_annotated_filepath(image)
-        m = hashlib.sha256()
-        with open(image_path, 'rb') as f:
-            m.update(f.read())
-        return m.digest().hex()
-
-    @classmethod
-    def VALIDATE_INPUTS(s, image):
-        if not folder_paths.exists_annotated_filepath(image):
-            return "Invalid image file: {}".format(image)
-
-        return True
+        return super().IS_CHANGED(image)
 
 
 class LoadImageOutput(LoadImage):

From 2806163f6e06465bacb1b16906cd17a8b78c9610 Mon Sep 17 00:00:00 2001
From: Jedrzej Kosinski <kosinkadink1@gmail.com>
Date: Sun, 3 May 2026 16:21:34 -0700
Subject: [PATCH 79/81] Default control_after_generate to fixed in PrimitiveInt
 node (#13690)

---
 comfy_extras/nodes_primitive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy_extras/nodes_primitive.py b/comfy_extras/nodes_primitive.py
index 9c2e98758..3c8f90b19 100644
--- a/comfy_extras/nodes_primitive.py
+++ b/comfy_extras/nodes_primitive.py
@@ -49,7 +49,7 @@ class Int(io.ComfyNode):
             display_name="Int",
             category="utils/primitive",
             inputs=[
-                io.Int.Input("value", min=-sys.maxsize, max=sys.maxsize, control_after_generate=True),
+                io.Int.Input("value", min=-sys.maxsize, max=sys.maxsize, control_after_generate=io.ControlAfterGenerate.fixed),
             ],
             outputs=[io.Int.Output()],
         )

From 5538f62b0b81102c382849fd90469283c725b212 Mon Sep 17 00:00:00 2001
From: Alexis Rolland <alexisrolland@hotmail.com>
Date: Mon, 4 May 2026 12:33:11 +0800
Subject: [PATCH 80/81] fix: Update ColorTransfer node ref_image to be
 mandatory (#13691)

---
 comfy_extras/nodes_post_processing.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/comfy_extras/nodes_post_processing.py b/comfy_extras/nodes_post_processing.py
index c932b747a..345fdb695 100644
--- a/comfy_extras/nodes_post_processing.py
+++ b/comfy_extras/nodes_post_processing.py
@@ -666,12 +666,13 @@ class ColorTransfer(io.ComfyNode):
     def define_schema(cls):
         return io.Schema(
             node_id="ColorTransfer",
+            display_name="Color Transfer",
             category="image/postprocessing",
             description="Match the colors of one image to another using various algorithms.",
             search_aliases=["color match", "color grading", "color correction", "match colors", "color transform", "mkl", "reinhard", "histogram"],
             inputs=[
                 io.Image.Input("image_target", tooltip="Image(s) to apply the color transform to."),
-                io.Image.Input("image_ref", optional=True, tooltip="Reference image(s) to match colors to. If not provided, processing is skipped"),
+                io.Image.Input("image_ref", tooltip="Reference image(s) to match colors to."),
                 io.Combo.Input("method", options=['reinhard_lab', 'mkl_lab', 'histogram'],),
                 io.DynamicCombo.Input("source_stats",
                     tooltip="per_frame: each frame matched to image_ref individually. uniform: pool stats across all source frames as baseline, match to image_ref. target_frame: use one chosen frame as the baseline for the transform to image_ref, applied uniformly to all frames (preserves relative differences)",

From f3ea976cba8743a87efeb9fbca717309e3d65c47 Mon Sep 17 00:00:00 2001
From: Soof Golan <83900570+soof-golan@users.noreply.github.com>
Date: Mon, 4 May 2026 10:01:46 +0200
Subject: [PATCH 81/81] Fix a1111 typo in extra_model_paths.yaml (#2720)

---
 extra_model_paths.yaml.example | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extra_model_paths.yaml.example b/extra_model_paths.yaml.example
index 34df01681..9c395c0b2 100644
--- a/extra_model_paths.yaml.example
+++ b/extra_model_paths.yaml.example
@@ -28,7 +28,7 @@
 #config for a1111 ui
 #all you have to do is uncomment this (remove the #) and change the base_path to where yours is installed
 
-#a111:
+#a1111:
 #     base_path: path/to/stable-diffusion-webui/
 #     checkpoints: models/Stable-diffusion
 #     configs: models/Stable-diffusion