From ae88cd19667889e2ed45befeefeee26a57ac6229 Mon Sep 17 00:00:00 2001 From: Glary-Bot Date: Mon, 27 Apr 2026 07:18:16 +0000 Subject: [PATCH] Cap ImageBlend channel-mismatch output at 4 channels (RGBA) Address review feedback: the previous fix allowed ImageBlend to return tensors with > 4 channels (e.g. blending a 3-channel and a 5-channel image produced a 5-channel tensor). This shifted the original failure from blend-time to save/preview-time, because SaveImage and PreviewImage both call PIL.Image.fromarray, which only supports 1/3/4-channel arrays. Fix: - In Blend.execute, the alignment target is now min(max(c1, c2), 4): any image with more than 4 channels is truncated, any image with fewer is padded with 1.0s up to the (capped) target. This makes the RGB/RGBA case work and also makes the >4-channel case work end-to-end rather than just deferring its failure. - Update the regression test that previously codified the wrong 5-channel-output behavior to assert the correct 4-channel cap. - Add test_output_capped_at_four_channels (both inputs > 4 channels). - Add test_save_compatible_output_passes_through_pil that mirrors SaveImage's exact PIL.Image.fromarray conversion to catch regressions in the save/preview path. - Add a small workflow-validation test (image_blend_workflow_test.py) that loads tests/inference/graphs/image_blend_channel_mismatch.json and verifies its node types and wiring, so the demo workflow can't silently bitrot. Verified end-to-end against a local ComfyUI server: the workflow runs, output is RGBA, downstream SaveImage succeeds. --- comfy_extras/nodes_post_processing.py | 26 +++++---- .../comfy_extras_test/image_blend_test.py | 33 ++++++++++- .../image_blend_workflow_test.py | 56 +++++++++++++++++++ 3 files changed, 104 insertions(+), 11 deletions(-) create mode 100644 tests-unit/comfy_extras_test/image_blend_workflow_test.py diff --git a/comfy_extras/nodes_post_processing.py b/comfy_extras/nodes_post_processing.py index 465488bdf..ed9c52c82 100644 --- a/comfy_extras/nodes_post_processing.py +++ b/comfy_extras/nodes_post_processing.py @@ -36,16 +36,22 @@ class Blend(io.ComfyNode): @classmethod def execute(cls, image1: torch.Tensor, image2: torch.Tensor, blend_factor: float, blend_mode: str) -> io.NodeOutput: image2 = image2.to(image1.device) - # Match channel counts when one image has an extra channel (typically - # an alpha channel, e.g. RGB + RGBA) by padding the image with fewer - # channels with 1.0s. Mirrors the logic used by the ImageStitch node - # so behavior is consistent across nodes. - if image1.shape[-1] != image2.shape[-1]: - max_channels = max(image1.shape[-1], image2.shape[-1]) - if image1.shape[-1] < max_channels: - image1 = torch.cat([image1, torch.ones(*image1.shape[:-1], max_channels - image1.shape[-1], device=image1.device, dtype=image1.dtype)], dim=-1) - if image2.shape[-1] < max_channels: - image2 = torch.cat([image2, torch.ones(*image2.shape[:-1], max_channels - image2.shape[-1], device=image2.device, dtype=image2.dtype)], dim=-1) + # Reconcile mismatched channel counts. Downstream nodes (SaveImage, + # PreviewImage) ultimately call PIL.Image.fromarray which only + # supports 1/3/4-channel arrays, so we cap the output at 4 channels + # (RGBA): any image with > 4 channels is truncated, and any image + # with fewer channels than the (capped) target is padded with 1.0s + # so the extra slot behaves like an opaque alpha channel. + if image1.shape[-1] != image2.shape[-1] or image1.shape[-1] > 4 or image2.shape[-1] > 4: + target_channels = min(max(image1.shape[-1], image2.shape[-1]), 4) + if image1.shape[-1] > target_channels: + image1 = image1[..., :target_channels] + elif image1.shape[-1] < target_channels: + image1 = torch.cat([image1, torch.ones(*image1.shape[:-1], target_channels - image1.shape[-1], device=image1.device, dtype=image1.dtype)], dim=-1) + if image2.shape[-1] > target_channels: + image2 = image2[..., :target_channels] + elif image2.shape[-1] < target_channels: + image2 = torch.cat([image2, torch.ones(*image2.shape[:-1], target_channels - image2.shape[-1], device=image2.device, dtype=image2.dtype)], dim=-1) if image1.shape != image2.shape: image2 = image2.permute(0, 3, 1, 2) image2 = comfy.utils.common_upscale(image2, image1.shape[2], image1.shape[1], upscale_method='bicubic', crop='center') diff --git a/tests-unit/comfy_extras_test/image_blend_test.py b/tests-unit/comfy_extras_test/image_blend_test.py index 0e931b4b6..63b2ce89a 100644 --- a/tests-unit/comfy_extras_test/image_blend_test.py +++ b/tests-unit/comfy_extras_test/image_blend_test.py @@ -52,11 +52,42 @@ class TestImageBlend: This is the exact runtime error reported in CORE-103: 'The size of tensor a (5) must match the size of tensor b (3) at non-singleton dimension 3'. + + The output is capped at 4 channels (RGBA) because downstream + SaveImage/PreviewImage rely on PIL.Image.fromarray, which only + supports 1/3/4-channel arrays. Without this cap, the failure would + just shift from blend-time to save-time. """ image1 = self.create_test_image(channels=3) image2 = self.create_test_image(channels=5) result = Blend.execute(image1, image2, 0.5, "multiply") - assert result[0].shape == (1, 64, 64, 5) + assert result[0].shape == (1, 64, 64, 4) + + def test_output_capped_at_four_channels(self): + """Both inputs having > 4 channels should still produce a 4-channel + output, since SaveImage/PreviewImage cannot serialize anything + wider than RGBA via PIL.Image.fromarray.""" + image1 = self.create_test_image(channels=6) + image2 = self.create_test_image(channels=5) + result = Blend.execute(image1, image2, 0.5, "normal") + assert result[0].shape == (1, 64, 64, 4) + + def test_save_compatible_output_passes_through_pil(self): + """The blended result must be convertible by PIL.Image.fromarray, + which is what SaveImage/PreviewImage do downstream. Catches the + case where a >4-channel output would silently break save/preview.""" + from PIL import Image + import numpy as np + + image1 = self.create_test_image(channels=3) + image2 = self.create_test_image(channels=5) + result = Blend.execute(image1, image2, 0.5, "normal") + # Mirror SaveImage's exact conversion (nodes.py:1662) + arr = np.clip(255.0 * result[0][0].cpu().numpy(), 0, 255).astype(np.uint8) + img = Image.fromarray(arr) + assert img.mode in ("L", "RGB", "RGBA"), ( + f"Output mode {img.mode!r} cannot be saved by SaveImage" + ) def test_different_size_and_channels(self): """Different spatial size AND different channel counts should both be reconciled.""" diff --git a/tests-unit/comfy_extras_test/image_blend_workflow_test.py b/tests-unit/comfy_extras_test/image_blend_workflow_test.py new file mode 100644 index 000000000..0575fa3de --- /dev/null +++ b/tests-unit/comfy_extras_test/image_blend_workflow_test.py @@ -0,0 +1,56 @@ +import json +import pathlib + + +WORKFLOW_PATH = ( + pathlib.Path(__file__).resolve().parents[2] + / "tests" + / "inference" + / "graphs" + / "image_blend_channel_mismatch.json" +) + + +def test_workflow_loads(): + with open(WORKFLOW_PATH) as f: + graph = json.load(f) + assert isinstance(graph, dict) and graph, "workflow JSON is empty" + + +def test_workflow_uses_expected_node_types(): + """The workflow uses a fixed, minimal set of nodes. If any are renamed + or removed upstream, this test fails fast instead of letting the demo + bitrot silently.""" + expected = { + "EmptyImage", + "SolidMask", + "JoinImageWithAlpha", + "ImageBlend", + "SaveImage", + } + with open(WORKFLOW_PATH) as f: + graph = json.load(f) + actual = {node["class_type"] for node in graph.values()} + assert expected.issubset(actual), ( + f"workflow is missing required node types: {expected - actual}" + ) + + +def test_workflow_exercises_imageblend_with_mismatched_channels(): + """Sanity-check that the workflow actually wires an RGB output and an + RGBA output into ImageBlend (the CORE-103 case). If someone edits the + JSON and accidentally breaks this guarantee, the demo loses its point.""" + with open(WORKFLOW_PATH) as f: + graph = json.load(f) + blend_nodes = [n for n in graph.values() if n["class_type"] == "ImageBlend"] + assert len(blend_nodes) == 1, "expected exactly one ImageBlend node" + blend = blend_nodes[0] + src1_id, _ = blend["inputs"]["image1"] + src2_id, _ = blend["inputs"]["image2"] + types = {graph[src1_id]["class_type"], graph[src2_id]["class_type"]} + assert "JoinImageWithAlpha" in types, ( + "workflow no longer feeds an RGBA image into ImageBlend" + ) + assert "EmptyImage" in types, ( + "workflow no longer feeds a plain RGB image into ImageBlend" + )