diff --git a/.github/workflows/tag-dispatch-cloud.yml b/.github/workflows/tag-dispatch-cloud.yml
new file mode 100644
index 000000000..53a0e91d6
--- /dev/null
+++ b/.github/workflows/tag-dispatch-cloud.yml
@@ -0,0 +1,45 @@
+name: Tag Dispatch to Cloud
+
+on:
+  push:
+    tags:
+      - 'v*'
+
+jobs:
+  dispatch-cloud:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Send repository dispatch to cloud
+        env:
+          DISPATCH_TOKEN: ${{ secrets.CLOUD_REPO_DISPATCH_TOKEN }}
+          RELEASE_TAG: ${{ github.ref_name }}
+        run: |
+          set -euo pipefail
+
+          if [ -z "${DISPATCH_TOKEN:-}" ]; then
+            echo "::error::CLOUD_REPO_DISPATCH_TOKEN is required but not set."
+            exit 1
+          fi
+
+          RELEASE_URL="https://github.com/${{ github.repository }}/releases/tag/${RELEASE_TAG}"
+
+          PAYLOAD="$(jq -n \
+            --arg release_tag "$RELEASE_TAG" \
+            --arg release_url "$RELEASE_URL" \
+            '{
+              event_type: "comfyui_tag_pushed",
+              client_payload: {
+                release_tag: $release_tag,
+                release_url: $release_url
+              }
+            }')"
+
+          curl -fsSL \
+            -X POST \
+            -H "Accept: application/vnd.github+json" \
+            -H "Content-Type: application/json" \
+            -H "Authorization: Bearer ${DISPATCH_TOKEN}" \
+            https://api.github.com/repos/Comfy-Org/cloud/dispatches \
+            -d "$PAYLOAD"
+
+          echo "✅ Dispatched ComfyUI tag ${RELEASE_TAG} to Comfy-Org/cloud"
diff --git a/comfy/model_management.py b/comfy/model_management.py
index 3b39d6080..95af40012 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -663,6 +663,7 @@ def minimum_inference_memory():
 
 def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins_required=0, ram_required=0):
     cleanup_models_gc()
+    comfy.memory_management.extra_ram_release(max(pins_required, ram_required))
     unloaded_model = []
     can_unload = []
     unloaded_models = []
diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index ee56f8523..e259aed63 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -31,6 +31,7 @@ import comfy.float
 import comfy.hooks
 import comfy.lora
 import comfy.model_management
+import comfy.ops
 import comfy.patcher_extension
 import comfy.utils
 from comfy.comfy_types import UnetWrapperFunction
@@ -856,7 +857,9 @@ class ModelPatcher:
                     if m.comfy_patched_weights == True:
                         continue
 
-                for param in params:
+                for param, param_value in params.items():
+                    if hasattr(m, "comfy_cast_weights") and getattr(param_value, "is_meta", False):
+                        comfy.ops.disable_weight_init._zero_init_parameter(m, param)
                     key = key_param_name_to_key(n, param)
                     self.unpin_weight(key)
                     self.patch_weight_to_device(key, device_to=device_to)
diff --git a/comfy/ops.py b/comfy/ops.py
index 7a9b4b84c..050f7cda0 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -79,14 +79,21 @@ def cast_to_input(weight, input, non_blocking=False, copy=True):
     return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)
 
 
-def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant):
+def materialize_meta_param(s, param_keys):
+    for param_key in param_keys:
+        param = getattr(s, param_key, None)
+        if param is not None and getattr(param, "is_meta", False):
+            setattr(s, param_key, torch.nn.Parameter(torch.zeros(param.shape, dtype=param.dtype), requires_grad=param.requires_grad))
 
+
+def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant):
     #vbar doesn't support CPU weights, but some custom nodes have weird paths
     #that might switch the layer to the CPU and expect it to work. We have to take
     #a clone conservatively as we are mmapped and some SFT files are packed misaligned
     #If you are a custom node author reading this, please move your layer to the GPU
     #or declare your ModelPatcher as CPU in the first place.
     if comfy.model_management.is_device_cpu(device):
+        materialize_meta_param(s, ["weight", "bias"])
         weight = s.weight.to(dtype=dtype, copy=True)
         if isinstance(weight, QuantizedTensor):
             weight = weight.dequantize()
@@ -108,6 +115,7 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
             xfer_dest = comfy_aimdo.torch.aimdo_to_tensor(s._v, device)
 
     if not resident:
+        materialize_meta_param(s, ["weight", "bias"])
         cast_geometry = comfy.memory_management.tensors_to_geometries([ s.weight, s.bias ])
         cast_dest = None
 
@@ -306,6 +314,12 @@ class CastWeightBiasOp:
     bias_function = []
 
 class disable_weight_init:
+    @staticmethod
+    def _zero_init_parameter(module, name):
+        param = getattr(module, name)
+        device = None if getattr(param, "is_meta", False) else param.device
+        setattr(module, name, torch.nn.Parameter(torch.zeros(param.shape, device=device, dtype=param.dtype), requires_grad=False))
+
     @staticmethod
     def _lazy_load_from_state_dict(module, state_dict, prefix, local_metadata,
                                    missing_keys, unexpected_keys, weight_shape,
diff --git a/comfy/pinned_memory.py b/comfy/pinned_memory.py
index 6f142282d..6d3ba367a 100644
--- a/comfy/pinned_memory.py
+++ b/comfy/pinned_memory.py
@@ -2,7 +2,6 @@ import comfy.model_management
 import comfy.memory_management
 import comfy_aimdo.host_buffer
 import comfy_aimdo.torch
-import psutil
 
 from comfy.cli_args import args
 
@@ -12,11 +11,6 @@ def get_pin(module):
 def pin_memory(module):
     if module.pin_failed or args.disable_pinned_memory or get_pin(module) is not None:
         return
-    #FIXME: This is a RAM cache trigger event
-    ram_headroom = comfy.memory_management.RAM_CACHE_HEADROOM
-    #we split the difference and assume half the RAM cache headroom is for us
-    if ram_headroom > 0 and psutil.virtual_memory().available < (ram_headroom * 0.5):
-        comfy.memory_management.extra_ram_release(ram_headroom)
 
     size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ])
 
diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py
index eb4d3701d..6ed41bba8 100644
--- a/comfy_api/latest/_input_impl/video_types.py
+++ b/comfy_api/latest/_input_impl/video_types.py
@@ -12,6 +12,7 @@ import numpy as np
 import math
 import torch
 from .._util import VideoContainer, VideoCodec, VideoComponents
+import logging
 
 
 def container_to_output_format(container_format: str | None) -> str | None:
@@ -238,32 +239,89 @@ class VideoFromFile(VideoInput):
             start_time = max(self._get_raw_duration() + self.__start_time, 0)
         else:
             start_time = self.__start_time
+
         # Get video frames
         frames = []
+        audio_frames = []
         alphas = None
         start_pts = int(start_time / video_stream.time_base)
         end_pts = int((start_time + self.__duration) / video_stream.time_base)
-        container.seek(start_pts, stream=video_stream)
-        image_format = 'gbrpf32le'
-        for frame in container.decode(video_stream):
-            if alphas is None:
-                for comp in frame.format.components:
-                    if comp.is_alpha:
-                        alphas = []
-                        image_format = 'gbrapf32le'
-                        break
 
-            if frame.pts < start_pts:
-                continue
-            if self.__duration and frame.pts >= end_pts:
+        if start_pts != 0:
+            container.seek(start_pts, stream=video_stream)
+
+        image_format = 'gbrpf32le'
+        audio = None
+
+        streams = [video_stream]
+        has_first_audio_frame = False
+        checked_alpha = False
+
+        # Default to False so we decode until EOF if duration is 0
+        video_done = False
+        audio_done = True
+
+        if len(container.streams.audio):
+            audio_stream = container.streams.audio[-1]
+            streams += [audio_stream]
+            resampler = av.audio.resampler.AudioResampler(format='fltp')
+            audio_done = False
+
+        for packet in container.demux(*streams):
+            if video_done and audio_done:
                 break
 
-            img = frame.to_ndarray(format=image_format)  # shape: (H, W, 4)
-            if alphas is None:
-                frames.append(torch.from_numpy(img))
-            else:
-                frames.append(torch.from_numpy(img[..., :-1]))
-                alphas.append(torch.from_numpy(img[..., -1:]))
+            if packet.stream.type == "video":
+                if video_done:
+                    continue
+                try:
+                    for frame in packet.decode():
+                        if frame.pts < start_pts:
+                            continue
+                        if self.__duration and frame.pts >= end_pts:
+                            video_done = True
+                            break
+
+                        if not checked_alpha:
+                            for comp in frame.format.components:
+                                if comp.is_alpha or frame.format.name == "pal8":
+                                    alphas = []
+                                    image_format = 'gbrapf32le'
+                                    break
+                            checked_alpha = True
+
+                        img = frame.to_ndarray(format=image_format)  # shape: (H, W, 4)
+                        if frame.rotation != 0:
+                            k = int(round(frame.rotation // 90))
+                            img = np.rot90(img, k=k, axes=(0, 1)).copy()
+                        if alphas is None:
+                            frames.append(torch.from_numpy(img))
+                        else:
+                            frames.append(torch.from_numpy(img[..., :-1]))
+                            alphas.append(torch.from_numpy(img[..., -1:]))
+                except av.error.InvalidDataError:
+                    logging.info("pyav decode error")
+
+            elif packet.stream.type == "audio":
+                if audio_done:
+                    continue
+
+                aframes = itertools.chain.from_iterable(
+                    map(resampler.resample, packet.decode())
+                )
+                for frame in aframes:
+                    if self.__duration and frame.time > start_time + self.__duration:
+                        audio_done = True
+                        break
+
+                    if not has_first_audio_frame:
+                        offset_seconds = start_time - frame.pts * audio_stream.time_base
+                        to_skip = max(0, int(offset_seconds * audio_stream.sample_rate))
+                        if to_skip < frame.samples:
+                            has_first_audio_frame = True
+                            audio_frames.append(frame.to_ndarray()[..., to_skip:])
+                    else:
+                        audio_frames.append(frame.to_ndarray())
 
         images = torch.stack(frames) if len(frames) > 0 else torch.zeros(0, 0, 0, 3)
         if alphas is not None:
@@ -272,42 +330,16 @@ class VideoFromFile(VideoInput):
         # Get frame rate
         frame_rate = Fraction(video_stream.average_rate) if video_stream.average_rate else Fraction(1)
 
-        # Get audio if available
-        audio = None
-        container.seek(start_pts, stream=video_stream)
-        # Use last stream for consistency
-        if len(container.streams.audio):
-            audio_stream = container.streams.audio[-1]
-            audio_frames = []
-            resample = av.audio.resampler.AudioResampler(format='fltp').resample
-            frames = itertools.chain.from_iterable(
-                map(resample, container.decode(audio_stream))
-            )
+        if len(audio_frames) > 0:
+            audio_data = np.concatenate(audio_frames, axis=1)  # shape: (channels, total_samples)
+            if self.__duration:
+                audio_data = audio_data[..., :int(self.__duration * audio_stream.sample_rate)]
 
-            has_first_frame = False
-            for frame in frames:
-                offset_seconds = start_time - frame.pts * audio_stream.time_base
-                to_skip = max(0, int(offset_seconds * audio_stream.sample_rate))
-                if to_skip < frame.samples:
-                    has_first_frame = True
-                    break
-            if has_first_frame:
-                audio_frames.append(frame.to_ndarray()[..., to_skip:])
-
-            for frame in frames:
-                if self.__duration and frame.time > start_time + self.__duration:
-                    break
-                audio_frames.append(frame.to_ndarray())  # shape: (channels, samples)
-            if len(audio_frames) > 0:
-                audio_data = np.concatenate(audio_frames, axis=1)  # shape: (channels, total_samples)
-                if self.__duration:
-                    audio_data = audio_data[..., :int(self.__duration * audio_stream.sample_rate)]
-
-                audio_tensor = torch.from_numpy(audio_data).unsqueeze(0)  # shape: (1, channels, total_samples)
-                audio = AudioInput({
-                    "waveform": audio_tensor,
-                    "sample_rate": int(audio_stream.sample_rate) if audio_stream.sample_rate else 1,
-                })
+            audio_tensor = torch.from_numpy(audio_data).unsqueeze(0)  # shape: (1, channels, total_samples)
+            audio = AudioInput({
+                "waveform": audio_tensor,
+                "sample_rate": int(audio_stream.sample_rate) if audio_stream.sample_rate else 1,
+            })
 
         metadata = container.metadata
         return VideoComponents(images=images, alpha=alphas, audio=audio, frame_rate=frame_rate, metadata=metadata)
diff --git a/comfy_api_nodes/apis/wan.py b/comfy_api_nodes/apis/wan.py
index 44b65e4f6..c64acae97 100644
--- a/comfy_api_nodes/apis/wan.py
+++ b/comfy_api_nodes/apis/wan.py
@@ -118,7 +118,7 @@ class Wan27ReferenceVideoInputField(BaseModel):
 class Wan27ReferenceVideoParametersField(BaseModel):
     resolution: str = Field(...)
     ratio: str | None = Field(None)
-    duration: int = Field(5, ge=2, le=10)
+    duration: int = Field(5, ge=2, le=15)
     watermark: bool = Field(False)
     seed: int = Field(..., ge=0, le=2147483647)
 
@@ -157,7 +157,7 @@ class Wan27VideoEditInputField(BaseModel):
 class Wan27VideoEditParametersField(BaseModel):
     resolution: str = Field(...)
     ratio: str | None = Field(None)
-    duration: int = Field(0)
+    duration: int | None = Field(0)
     audio_setting: str = Field("auto")
     watermark: bool = Field(False)
     seed: int = Field(..., ge=0, le=2147483647)
diff --git a/comfy_api_nodes/nodes_wan.py b/comfy_api_nodes/nodes_wan.py
index d1470894a..7d7466fb6 100644
--- a/comfy_api_nodes/nodes_wan.py
+++ b/comfy_api_nodes/nodes_wan.py
@@ -1646,6 +1646,557 @@ class Wan2ReferenceVideoApi(IO.ComfyNode):
         return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
 
 
+class HappyHorseTextToVideoApi(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="HappyHorseTextToVideoApi",
+            display_name="HappyHorse Text to Video",
+            category="api node/video/Wan",
+            description="Generates a video based on a text prompt using the HappyHorse model.",
+            inputs=[
+                IO.DynamicCombo.Input(
+                    "model",
+                    options=[
+                        IO.DynamicCombo.Option(
+                            "happyhorse-1.0-t2v",
+                            [
+                                IO.String.Input(
+                                    "prompt",
+                                    multiline=True,
+                                    default="",
+                                    tooltip="Prompt describing the elements and visual features. "
+                                    "Supports English and Chinese.",
+                                ),
+                                IO.Combo.Input(
+                                    "resolution",
+                                    options=["720P", "1080P"],
+                                ),
+                                IO.Combo.Input(
+                                    "ratio",
+                                    options=["16:9", "9:16", "1:1", "4:3", "3:4"],
+                                ),
+                                IO.Int.Input(
+                                    "duration",
+                                    default=5,
+                                    min=3,
+                                    max=15,
+                                    step=1,
+                                    display_mode=IO.NumberDisplay.number,
+                                ),
+                            ],
+                        ),
+                    ],
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed to use for generation.",
+                ),
+                IO.Boolean.Input(
+                    "watermark",
+                    default=False,
+                    tooltip="Whether to add an AI-generated watermark to the result.",
+                    advanced=True,
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution", "model.duration"]),
+                expr="""
+                (
+                  $res := $lookup(widgets, "model.resolution");
+                  $dur := $lookup(widgets, "model.duration");
+                  $ppsTable := { "720p": 0.14, "1080p": 0.24 };
+                  $pps := $lookup($ppsTable, $res);
+                  { "type": "usd", "usd": $pps * $dur }
+                )
+                """,
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: dict,
+        seed: int,
+        watermark: bool,
+    ):
+        validate_string(model["prompt"], strip_whitespace=False, min_length=1)
+        initial_response = await sync_op(
+            cls,
+            ApiEndpoint(
+                path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis",
+                method="POST",
+            ),
+            response_model=TaskCreationResponse,
+            data=Wan27Text2VideoTaskCreationRequest(
+                model=model["model"],
+                input=Text2VideoInputField(
+                    prompt=model["prompt"],
+                    negative_prompt=None,
+                ),
+                parameters=Wan27Text2VideoParametersField(
+                    resolution=model["resolution"],
+                    ratio=model["ratio"],
+                    duration=model["duration"],
+                    seed=seed,
+                    watermark=watermark,
+                ),
+            ),
+        )
+        if not initial_response.output:
+            raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
+        response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
+            response_model=VideoTaskStatusResponse,
+            status_extractor=lambda x: x.output.task_status,
+            poll_interval=7,
+        )
+        return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
+
+
+class HappyHorseImageToVideoApi(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="HappyHorseImageToVideoApi",
+            display_name="HappyHorse Image to Video",
+            category="api node/video/Wan",
+            description="Generate a video from a first-frame image using the HappyHorse model.",
+            inputs=[
+                IO.DynamicCombo.Input(
+                    "model",
+                    options=[
+                        IO.DynamicCombo.Option(
+                            "happyhorse-1.0-i2v",
+                            [
+                                IO.String.Input(
+                                    "prompt",
+                                    multiline=True,
+                                    default="",
+                                    tooltip="Prompt describing the elements and visual features. "
+                                    "Supports English and Chinese.",
+                                ),
+                                IO.Combo.Input(
+                                    "resolution",
+                                    options=["720P", "1080P"],
+                                ),
+                                IO.Int.Input(
+                                    "duration",
+                                    default=5,
+                                    min=3,
+                                    max=15,
+                                    step=1,
+                                    display_mode=IO.NumberDisplay.number,
+                                ),
+                            ],
+                        ),
+                    ],
+                ),
+                IO.Image.Input(
+                    "first_frame",
+                    tooltip="First frame image. The output aspect ratio is derived from this image.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed to use for generation.",
+                ),
+                IO.Boolean.Input(
+                    "watermark",
+                    default=False,
+                    tooltip="Whether to add an AI-generated watermark to the result.",
+                    advanced=True,
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution", "model.duration"]),
+                expr="""
+                (
+                  $res := $lookup(widgets, "model.resolution");
+                  $dur := $lookup(widgets, "model.duration");
+                  $ppsTable := { "720p": 0.14, "1080p": 0.24 };
+                  $pps := $lookup($ppsTable, $res);
+                  { "type": "usd", "usd": $pps * $dur }
+                )
+                """,
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: dict,
+        first_frame: Input.Image,
+        seed: int,
+        watermark: bool,
+    ):
+        media = [
+            Wan27MediaItem(
+                type="first_frame",
+                url=await upload_image_to_comfyapi(cls, image=first_frame),
+            )
+        ]
+        initial_response = await sync_op(
+            cls,
+            ApiEndpoint(
+                path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis",
+                method="POST",
+            ),
+            response_model=TaskCreationResponse,
+            data=Wan27ImageToVideoTaskCreationRequest(
+                model=model["model"],
+                input=Wan27ImageToVideoInputField(
+                    prompt=model["prompt"] or None,
+                    negative_prompt=None,
+                    media=media,
+                ),
+                parameters=Wan27ImageToVideoParametersField(
+                    resolution=model["resolution"],
+                    duration=model["duration"],
+                    seed=seed,
+                    watermark=watermark,
+                ),
+            ),
+        )
+        if not initial_response.output:
+            raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
+        response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
+            response_model=VideoTaskStatusResponse,
+            status_extractor=lambda x: x.output.task_status,
+            poll_interval=7,
+        )
+        return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
+
+
+class HappyHorseVideoEditApi(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="HappyHorseVideoEditApi",
+            display_name="HappyHorse Video Edit",
+            category="api node/video/Wan",
+            description="Edit a video using text instructions or reference images with the HappyHorse model. "
+            "Output duration is 3-15s and matches the input video; inputs longer than 15s are truncated.",
+            inputs=[
+                IO.DynamicCombo.Input(
+                    "model",
+                    options=[
+                        IO.DynamicCombo.Option(
+                            "happyhorse-1.0-video-edit",
+                            [
+                                IO.String.Input(
+                                    "prompt",
+                                    multiline=True,
+                                    default="",
+                                    tooltip="Editing instructions or style transfer requirements.",
+                                ),
+                                IO.Combo.Input(
+                                    "resolution",
+                                    options=["720P", "1080P"],
+                                ),
+                                IO.Combo.Input(
+                                    "ratio",
+                                    options=["16:9", "9:16", "1:1", "4:3", "3:4"],
+                                    tooltip="Aspect ratio. If not changed, approximates the input video ratio.",
+                                ),
+                                IO.Autogrow.Input(
+                                    "reference_images",
+                                    template=IO.Autogrow.TemplateNames(
+                                        IO.Image.Input("reference_image"),
+                                        names=[
+                                            "image1",
+                                            "image2",
+                                            "image3",
+                                            "image4",
+                                            "image5",
+                                        ],
+                                        min=0,
+                                    ),
+                                ),
+                            ],
+                        ),
+                    ],
+                ),
+                IO.Video.Input(
+                    "video",
+                    tooltip="The video to edit.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed to use for generation.",
+                ),
+                IO.Boolean.Input(
+                    "watermark",
+                    default=False,
+                    tooltip="Whether to add an AI-generated watermark to the result.",
+                    advanced=True,
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution"]),
+                expr="""
+                (
+                  $res := $lookup(widgets, "model.resolution");
+                  $ppsTable := { "720p": 0.14, "1080p": 0.24 };
+                  $pps := $lookup($ppsTable, $res);
+                  { "type": "usd", "usd": $pps, "format": { "suffix": "/second" } }
+                )
+                """,
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: dict,
+        video: Input.Video,
+        seed: int,
+        watermark: bool,
+    ):
+        validate_string(model["prompt"], strip_whitespace=False, min_length=1)
+        validate_video_duration(video, min_duration=3, max_duration=60)
+        media = [Wan27MediaItem(type="video", url=await upload_video_to_comfyapi(cls, video))]
+        reference_images = model.get("reference_images", {})
+        for key in reference_images:
+            media.append(
+                Wan27MediaItem(
+                    type="reference_image", url=await upload_image_to_comfyapi(cls, image=reference_images[key])
+                )
+            )
+        initial_response = await sync_op(
+            cls,
+            ApiEndpoint(
+                path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis",
+                method="POST",
+            ),
+            response_model=TaskCreationResponse,
+            data=Wan27VideoEditTaskCreationRequest(
+                model=model["model"],
+                input=Wan27VideoEditInputField(prompt=model["prompt"], media=media),
+                parameters=Wan27VideoEditParametersField(
+                    resolution=model["resolution"],
+                    ratio=model["ratio"],
+                    duration=None,
+                    watermark=watermark,
+                    seed=seed,
+                ),
+            ),
+        )
+        if not initial_response.output:
+            raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
+        response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
+            response_model=VideoTaskStatusResponse,
+            status_extractor=lambda x: x.output.task_status,
+            poll_interval=7,
+        )
+        return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
+
+
+class HappyHorseReferenceVideoApi(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="HappyHorseReferenceVideoApi",
+            display_name="HappyHorse Reference to Video",
+            category="api node/video/Wan",
+            description="Generate a video featuring a person or object from reference materials with the HappyHorse "
+            "model. Supports single-character performances and multi-character interactions.",
+            inputs=[
+                IO.DynamicCombo.Input(
+                    "model",
+                    options=[
+                        IO.DynamicCombo.Option(
+                            "happyhorse-1.0-r2v",
+                            [
+                                IO.String.Input(
+                                    "prompt",
+                                    multiline=True,
+                                    default="",
+                                    tooltip="Prompt describing the video. Use identifiers such as 'character1' and "
+                                    "'character2' to refer to the reference characters.",
+                                ),
+                                IO.Combo.Input(
+                                    "resolution",
+                                    options=["720P", "1080P"],
+                                ),
+                                IO.Combo.Input(
+                                    "ratio",
+                                    options=["16:9", "9:16", "1:1", "4:3", "3:4"],
+                                ),
+                                IO.Int.Input(
+                                    "duration",
+                                    default=5,
+                                    min=3,
+                                    max=15,
+                                    step=1,
+                                    display_mode=IO.NumberDisplay.number,
+                                ),
+                                IO.Autogrow.Input(
+                                    "reference_images",
+                                    template=IO.Autogrow.TemplateNames(
+                                        IO.Image.Input("reference_image"),
+                                        names=[
+                                            "image1",
+                                            "image2",
+                                            "image3",
+                                            "image4",
+                                            "image5",
+                                            "image6",
+                                            "image7",
+                                            "image8",
+                                            "image9",
+                                        ],
+                                        min=1,
+                                    ),
+                                ),
+                            ],
+                        ),
+                    ],
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed to use for generation.",
+                ),
+                IO.Boolean.Input(
+                    "watermark",
+                    default=False,
+                    tooltip="Whether to add an AI-generated watermark to the result.",
+                    advanced=True,
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution", "model.duration"]),
+                expr="""
+                (
+                  $res := $lookup(widgets, "model.resolution");
+                  $dur := $lookup(widgets, "model.duration");
+                  $ppsTable := { "720p": 0.14, "1080p": 0.24 };
+                  $pps := $lookup($ppsTable, $res);
+                  { "type": "usd", "usd": $pps * $dur }
+                )
+                """,
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: dict,
+        seed: int,
+        watermark: bool,
+    ):
+        validate_string(model["prompt"], strip_whitespace=False, min_length=1)
+        media = []
+        reference_images = model.get("reference_images", {})
+        for key in reference_images:
+            media.append(
+                Wan27MediaItem(
+                    type="reference_image",
+                    url=await upload_image_to_comfyapi(cls, image=reference_images[key]),
+                )
+            )
+        if not media:
+            raise ValueError("At least one reference reference image must be provided.")
+
+        initial_response = await sync_op(
+            cls,
+            ApiEndpoint(
+                path="/proxy/wan/api/v1/services/aigc/video-generation/video-synthesis",
+                method="POST",
+            ),
+            response_model=TaskCreationResponse,
+            data=Wan27ReferenceVideoTaskCreationRequest(
+                model=model["model"],
+                input=Wan27ReferenceVideoInputField(
+                    prompt=model["prompt"],
+                    negative_prompt=None,
+                    media=media,
+                ),
+                parameters=Wan27ReferenceVideoParametersField(
+                    resolution=model["resolution"],
+                    ratio=model["ratio"],
+                    duration=model["duration"],
+                    watermark=watermark,
+                    seed=seed,
+                ),
+            ),
+        )
+        if not initial_response.output:
+            raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
+        response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
+            response_model=VideoTaskStatusResponse,
+            status_extractor=lambda x: x.output.task_status,
+            poll_interval=7,
+        )
+        return IO.NodeOutput(await download_url_to_video_output(response.output.video_url))
+
+
 class WanApiExtension(ComfyExtension):
     @override
     async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@@ -1660,6 +2211,10 @@ class WanApiExtension(ComfyExtension):
             Wan2VideoContinuationApi,
             Wan2VideoEditApi,
             Wan2ReferenceVideoApi,
+            HappyHorseTextToVideoApi,
+            HappyHorseImageToVideoApi,
+            HappyHorseVideoEditApi,
+            HappyHorseReferenceVideoApi,
         ]
 
 
diff --git a/comfy_execution/caching.py b/comfy_execution/caching.py
index f9c913bdb..ba1e8bc84 100644
--- a/comfy_execution/caching.py
+++ b/comfy_execution/caching.py
@@ -5,6 +5,7 @@ import psutil
 import time
 import torch
 from typing import Sequence, Mapping, Dict
+from comfy.model_patcher import ModelPatcher
 from comfy_execution.graph import DynamicPrompt
 from abc import ABC, abstractmethod
 
@@ -523,13 +524,15 @@ class RAMPressureCache(LRUCache):
         self.timestamps[self.cache_key_set.get_data_key(node_id)] = time.time()
         super().set_local(node_id, value)
 
-    def ram_release(self, target):
+    def ram_release(self, target, free_active=False):
         if psutil.virtual_memory().available >= target:
             return
 
         clean_list = []
 
         for key, cache_entry in self.cache.items():
+            if not free_active and self.used_generation[key] == self.generation:
+                continue
             oom_score =  RAM_CACHE_OLD_WORKFLOW_OOM_MULTIPLIER ** (self.generation - self.used_generation[key])
 
             ram_usage = RAM_CACHE_DEFAULT_RAM_USAGE
@@ -542,6 +545,9 @@ class RAMPressureCache(LRUCache):
                         scan_list_for_ram_usage(output)
                     elif isinstance(output, torch.Tensor) and output.device.type == 'cpu':
                         ram_usage += output.numel() * output.element_size()
+                    elif isinstance(output, ModelPatcher) and self.used_generation[key] != self.generation:
+                        #old ModelPatchers are the first to go
+                        ram_usage = 1e30
             scan_list_for_ram_usage(cache_entry.outputs)
 
             oom_score *= ram_usage
diff --git a/comfy_extras/nodes_hunyuan3d.py b/comfy_extras/nodes_hunyuan3d.py
index df0c3e4b1..fa55ead59 100644
--- a/comfy_extras/nodes_hunyuan3d.py
+++ b/comfy_extras/nodes_hunyuan3d.py
@@ -637,7 +637,7 @@ class SaveGLB(IO.ComfyNode):
                     ],
                     tooltip="Mesh or 3D file to save",
                 ),
-                IO.String.Input("filename_prefix", default="mesh/ComfyUI"),
+                IO.String.Input("filename_prefix", default="3d/ComfyUI"),
             ],
             hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo]
         )
diff --git a/comfy_extras/nodes_mask.py b/comfy_extras/nodes_mask.py
index c44602597..8ca947718 100644
--- a/comfy_extras/nodes_mask.py
+++ b/comfy_extras/nodes_mask.py
@@ -2,6 +2,7 @@ import numpy as np
 import scipy.ndimage
 import torch
 import comfy.utils
+import comfy.model_management
 import node_helpers
 from typing_extensions import override
 from comfy_api.latest import ComfyExtension, IO, UI
@@ -188,7 +189,7 @@ class SolidMask(IO.ComfyNode):
 
     @classmethod
     def execute(cls, value, width, height) -> IO.NodeOutput:
-        out = torch.full((1, height, width), value, dtype=torch.float32, device="cpu")
+        out = torch.full((1, height, width), value, dtype=torch.float32, device=comfy.model_management.intermediate_device())
         return IO.NodeOutput(out)
 
     solid = execute  # TODO: remove
@@ -262,6 +263,7 @@ class MaskComposite(IO.ComfyNode):
     def execute(cls, destination, source, x, y, operation) -> IO.NodeOutput:
         output = destination.reshape((-1, destination.shape[-2], destination.shape[-1])).clone()
         source = source.reshape((-1, source.shape[-2], source.shape[-1]))
+        source = source.to(output.device)
 
         left, top = (x, y,)
         right, bottom = (min(left + source.shape[-1], destination.shape[-1]), min(top + source.shape[-2], destination.shape[-2]))
diff --git a/comfy_extras/nodes_sd3.py b/comfy_extras/nodes_sd3.py
index c43844a1a..6655c1ba7 100644
--- a/comfy_extras/nodes_sd3.py
+++ b/comfy_extras/nodes_sd3.py
@@ -54,7 +54,7 @@ class EmptySD3LatentImage(io.ComfyNode):
 
     @classmethod
     def execute(cls, width, height, batch_size=1) -> io.NodeOutput:
-        latent = torch.zeros([batch_size, 16, height // 8, width // 8], device=comfy.model_management.intermediate_device())
+        latent = torch.zeros([batch_size, 16, height // 8, width // 8], device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype())
         return io.NodeOutput({"samples": latent, "downscale_ratio_spacial": 8})
 
     generate = execute  # TODO: remove
diff --git a/comfyui_version.py b/comfyui_version.py
index 2a1eb9905..53e7156e3 100644
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.19.3"
+__version__ = "0.20.1"
diff --git a/execution.py b/execution.py
index e15eb4bda..5a6d3404c 100644
--- a/execution.py
+++ b/execution.py
@@ -779,7 +779,7 @@ class PromptExecutor:
 
                     if self.cache_type == CacheType.RAM_PRESSURE:
                         comfy.model_management.free_memory(0, None, pins_required=ram_headroom, ram_required=ram_headroom)
-                        comfy.memory_management.extra_ram_release(ram_headroom)
+                        ram_release_callback(ram_headroom, free_active=True)
                 else:
                     # Only execute when the while-loop ends without break
                     # Send cached UI for intermediate output nodes that weren't executed
diff --git a/nodes.py b/nodes.py
index fb83da896..e73a0712e 100644
--- a/nodes.py
+++ b/nodes.py
@@ -32,7 +32,7 @@ import comfy.controlnet
 from comfy.comfy_types import IO, ComfyNodeABC, InputTypeDict, FileLocator
 from comfy_api.internal import register_versions, ComfyAPIWithVersion
 from comfy_api.version_list import supported_versions
-from comfy_api.latest import io, ComfyExtension
+from comfy_api.latest import io, ComfyExtension, InputImpl
 
 import comfy.clip_vision
 
@@ -1716,6 +1716,10 @@ class LoadImage:
     def load_image(self, image):
         image_path = folder_paths.get_annotated_filepath(image)
 
+        components = InputImpl.VideoFromFile(image_path).get_components()
+        if components.images.shape[0] > 0:
+            return (components.images, 1.0 - components.alpha[..., -1] if components.alpha is not None else torch.zeros((components.images.shape[0], 64, 64), dtype=torch.float32, device="cpu"))
+
         img = node_helpers.pillow(Image.open, image_path)
 
         output_images = []
diff --git a/pyproject.toml b/pyproject.toml
index 8fa92ecbe..633dac517 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.19.3"
+version = "0.20.1"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.10"
diff --git a/requirements.txt b/requirements.txt
index 6c7457e03..12c5ff7a9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 comfyui-frontend-package==1.42.15
-comfyui-workflow-templates==0.9.62
+comfyui-workflow-templates==0.9.63
 comfyui-embedded-docs==0.4.4
 torch
 torchsde
@@ -23,7 +23,7 @@ SQLAlchemy>=2.0
 filelock
 av>=14.2.0
 comfy-kitchen>=0.2.8
-comfy-aimdo==0.2.14
+comfy-aimdo==0.3.0
 requests
 simpleeval>=1.0.0
 blake3