Merge branch 'dev' into gradio4

2024-03-24 16:26:38 -04:00
parent 40e4ca99c5 b0b90dc0d7
commit 25f636cb3a
103 changed files with 1580 additions and 721 deletions
@@ -301,7 +301,7 @@ class DDPMV1(pl.LightningModule):
        elif self.parameterization == "x0":
            target = x_start
        else:
-            raise NotImplementedError(f"Paramterization {self.parameterization} not yet supported")
+            raise NotImplementedError(f"Parameterization {self.parameterization} not yet supported")

        loss = self.get_loss(model_out, target, mean=False).mean(dim=[1, 2, 3])

@@ -880,7 +880,7 @@ class LatentDiffusionV1(DDPMV1):
    def apply_model(self, x_noisy, t, cond, return_ids=False):

        if isinstance(cond, dict):
-            # hybrid case, cond is exptected to be a dict
+            # hybrid case, cond is expected to be a dict
            pass
        else:
            if not isinstance(cond, list):
@@ -916,7 +916,7 @@ class LatentDiffusionV1(DDPMV1):
                cond_list = [{c_key: [c[:, :, :, :, i]]} for i in range(c.shape[-1])]

            elif self.cond_stage_key == 'coordinates_bbox':
-                assert 'original_image_size' in self.split_input_params, 'BoudingBoxRescaling is missing original_image_size'
+                assert 'original_image_size' in self.split_input_params, 'BoundingBoxRescaling is missing original_image_size'

                # assuming padding of unfold is always 0 and its dilation is always 1
                n_patches_per_row = int((w - ks[0]) / stride[0] + 1)
@@ -926,7 +926,7 @@ class LatentDiffusionV1(DDPMV1):
                num_downs = self.first_stage_model.encoder.num_resolutions - 1
                rescale_latent = 2 ** (num_downs)

-                # get top left postions of patches as conforming for the bbbox tokenizer, therefore we
+                # get top left positions of patches as conforming for the bbbox tokenizer, therefore we
                # need to rescale the tl patch coordinates to be in between (0,1)
                tl_patch_coordinates = [(rescale_latent * stride[0] * (patch_nr % n_patches_per_row) / full_img_w,
                                         rescale_latent * stride[1] * (patch_nr // n_patches_per_row) / full_img_h)
@@ -30,7 +30,7 @@ def factorization(dimension: int, factor:int=-1) -> tuple[int, int]:
    In LoRA with Kroneckor Product, first value is a value for weight scale.
    secon value is a value for weight.

-    Becuase of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different.
+    Because of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different.

    examples)
    factor
@@ -29,7 +29,6 @@ class NetworkOnDisk:

        def read_metadata():
            metadata = sd_models.read_metadata_from_safetensors(filename)
-            metadata.pop('ssmd_cover_images', None)  # those are cover images, and they are too big to display in UI as text

            return metadata

@@ -117,6 +116,12 @@ class NetworkModule:

        if hasattr(self.sd_module, 'weight'):
            self.shape = self.sd_module.weight.shape
+        elif isinstance(self.sd_module, nn.MultiheadAttention):
+            # For now, only self-attn use Pytorch's MHA
+            # So assume all qkvo proj have same shape
+            self.shape = self.sd_module.out_proj.weight.shape
+        else:
+            self.shape = None

        self.ops = None
        self.extra_kwargs = {}
@@ -146,6 +151,9 @@ class NetworkModule:
        self.alpha = weights.w["alpha"].item() if "alpha" in weights.w else None
        self.scale = weights.w["scale"].item() if "scale" in weights.w else None

+        self.dora_scale = weights.w.get("dora_scale", None)
+        self.dora_norm_dims = len(self.shape) - 1
+
    def multiplier(self):
        if 'transformer' in self.sd_key[:20]:
            return self.network.te_multiplier
@@ -160,6 +168,27 @@ class NetworkModule:

        return 1.0

+    def apply_weight_decompose(self, updown, orig_weight):
+        # Match the device/dtype
+        orig_weight = orig_weight.to(updown.dtype)
+        dora_scale = self.dora_scale.to(device=orig_weight.device, dtype=updown.dtype)
+        updown = updown.to(orig_weight.device)
+
+        merged_scale1 = updown + orig_weight
+        merged_scale1_norm = (
+            merged_scale1.transpose(0, 1)
+            .reshape(merged_scale1.shape[1], -1)
+            .norm(dim=1, keepdim=True)
+            .reshape(merged_scale1.shape[1], *[1] * self.dora_norm_dims)
+            .transpose(0, 1)
+        )
+
+        dora_merged = (
+            merged_scale1 * (dora_scale / merged_scale1_norm)
+        )
+        final_updown = dora_merged - orig_weight
+        return final_updown
+
    def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None):
        if self.bias is not None:
            updown = updown.reshape(self.bias.shape)
@@ -175,6 +204,9 @@ class NetworkModule:
        if ex_bias is not None:
            ex_bias = ex_bias * self.multiplier()

+        if self.dora_scale is not None:
+            updown = self.apply_weight_decompose(updown, orig_weight)
+
        return updown * self.calc_scale() * self.multiplier(), ex_bias

    def calc_updown(self, target):
@@ -36,13 +36,6 @@ class NetworkModuleOFT(network.NetworkModule):
            # self.alpha is unused
            self.dim = self.oft_blocks.shape[1] # (num_blocks, block_size, block_size)

-        # LyCORIS BOFT
-        if self.oft_blocks.dim() == 4:
-            self.is_boft = True
-        self.rescale = weights.w.get('rescale', None)
-        if self.rescale is not None:
-            self.rescale = self.rescale.reshape(-1, *[1]*(self.org_module[0].weight.dim() - 1))
-
        is_linear = type(self.sd_module) in [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear]
        is_conv = type(self.sd_module) in [torch.nn.Conv2d]
        is_other_linear = type(self.sd_module) in [torch.nn.MultiheadAttention] # unsupported
@@ -54,6 +47,13 @@ class NetworkModuleOFT(network.NetworkModule):
        elif is_other_linear:
            self.out_dim = self.sd_module.embed_dim

+        # LyCORIS BOFT
+        if self.oft_blocks.dim() == 4:
+            self.is_boft = True
+        self.rescale = weights.w.get('rescale', None)
+        if self.rescale is not None and not is_other_linear:
+            self.rescale = self.rescale.reshape(-1, *[1]*(self.org_module[0].weight.dim() - 1))
+
        self.num_blocks = self.dim
        self.block_size = self.out_dim // self.dim
        self.constraint = (0 if self.alpha is None else self.alpha) * self.out_dim
@@ -355,7 +355,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
    """
    Applies the currently selected set of networks to the weights of torch layer self.
    If weights already have this particular set of networks applied, does nothing.
-    If not, restores orginal weights from backup and alters weights according to networks.
+    If not, restores original weights from backup and alters weights according to networks.
    """

    network_layer_name = getattr(self, 'network_layer_name', None)
@@ -429,9 +429,12 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
            if isinstance(self, torch.nn.MultiheadAttention) and module_q and module_k and module_v and module_out:
                try:
                    with torch.no_grad():
-                        updown_q, _ = module_q.calc_updown(self.in_proj_weight)
-                        updown_k, _ = module_k.calc_updown(self.in_proj_weight)
-                        updown_v, _ = module_v.calc_updown(self.in_proj_weight)
+                        # Send "real" orig_weight into MHA's lora module
+                        qw, kw, vw = self.in_proj_weight.chunk(3, 0)
+                        updown_q, _ = module_q.calc_updown(qw)
+                        updown_k, _ = module_k.calc_updown(kw)
+                        updown_v, _ = module_v.calc_updown(vw)
+                        del qw, kw, vw
                        updown_qkv = torch.vstack([updown_q, updown_k, updown_v])
                        updown_out, ex_bias = module_out.calc_updown(self.out_proj.weight)

@@ -149,6 +149,8 @@ class LoraUserMetadataEditor(ui_extra_networks_user_metadata.UserMetadataEditor)

            v = random.random() * max_count
            if count > v:
+                for x in "({[]})":
+                    tag = tag.replace(x, '\\' + x)
                res.append(tag)

        return ", ".join(sorted(res))
@@ -31,7 +31,7 @@ class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
            "name": name,
            "filename": lora_on_disk.filename,
            "shorthash": lora_on_disk.shorthash,
-            "preview": self.find_preview(path),
+            "preview": self.find_preview(path) or self.find_embedded_preview(path, name, lora_on_disk.metadata),
            "description": self.find_description(path),
            "search_terms": search_terms,
            "local_preview": f"{path}.{shared.opts.samples_format}",
@@ -43,6 +43,7 @@ onUiLoaded(async() => {
    });

    function getActiveTab(elements, all = false) {
+        if (!elements.img2imgTabs) return null;
        const tabs = elements.img2imgTabs.querySelectorAll("button");

        if (all) return tabs;
@@ -57,6 +58,7 @@ onUiLoaded(async() => {
    // Get tab ID
    function getTabId(elements) {
        const activeTab = getActiveTab(elements);
+        if (!activeTab) return null;
        return tabNameToElementId[activeTab.innerText];
    }

@@ -247,6 +249,7 @@ onUiLoaded(async() => {
    let isMoving = false;
    let mouseX, mouseY;
    let activeElement;
+    let interactedWithAltKey = false;

    const elements = Object.fromEntries(
        Object.keys(elementIDs).map(id => [
@@ -260,7 +263,7 @@ onUiLoaded(async() => {
        const targetElement = gradioApp().querySelector(elemId);

        if (!targetElement) {
-            console.log("Element not found");
+            console.log("Element not found", elemId);
            return;
        }

@@ -380,7 +383,8 @@ onUiLoaded(async() => {

        // Create tooltip
        function createTooltip() {
-            const toolTipElemnt = targetElement.querySelector(".image-container");
+            const toolTipElement =
+                targetElement.querySelector(".image-container");
            const tooltip = document.createElement("div");
            tooltip.className = "canvas-tooltip";

@@ -442,16 +446,26 @@ onUiLoaded(async() => {
            tooltip.appendChild(tooltipContent);

            // Add a hint element to the target element
-            toolTipElemnt.appendChild(tooltip);
-
-            return tooltip;
+            toolTipElement.appendChild(tooltip);
        }

        //Show tool tip if setting enable
-        const canvasTooltip = createTooltip();
+        if (hotkeysConfig.canvas_show_tooltip) {
+            createTooltip();
+        }

-        if (!hotkeysConfig.canvas_show_tooltip) {
-            canvasTooltip.style.display = "none";
+        // In the course of research, it was found that the tag img is very harmful when zooming and creates white canvases. This hack allows you to almost never think about this problem, it has no effect on webui.
+        function fixCanvas() {
+            const activeTab = getActiveTab(elements)?.textContent.trim();
+
+            if (activeTab && activeTab !== "img2img") {
+                const img = targetElement.querySelector(`${elemId} img`);
+
+                if (img && img.style.display !== "none") {
+                    img.style.display = "none";
+                    img.style.visibility = "hidden";
+                }
+            }
        }

        // Reset the zoom level and pan position of the target element to their initial values
@@ -570,6 +584,10 @@ onUiLoaded(async() => {
            if (isModifierKey(e, hotkeysConfig.canvas_hotkey_zoom)) {
                e.preventDefault();

+                if (hotkeysConfig.canvas_hotkey_zoom === "Alt") {
+                    interactedWithAltKey = true;
+                }
+
                let zoomPosX, zoomPosY;
                let delta = 0.2;
                if (elemData[elemId].zoomLevel > 7) {
@@ -767,17 +785,29 @@ onUiLoaded(async() => {
        targetElement.addEventListener("mouseleave", handleMouseLeave);

        // Reset zoom when click on another tab
-        elements.img2imgTabs.addEventListener("click", resetZoom);
+        if (elements.img2imgTabs) {
+            elements.img2imgTabs.addEventListener("click", resetZoom);
+            elements.img2imgTabs.addEventListener("click", () => {
+                // targetElement.style.width = "";
+                if (parseInt(targetElement.style.width) > 865) {
+                    setTimeout(fitToElement, 0);
+                }
+            });
+        }

        targetElement.addEventListener("wheel", e => {
            // change zoom level
-            const operation = e.deltaY > 0 ? "-" : "+";
+            const operation = (e.deltaY || -e.wheelDelta) > 0 ? "-" : "+";
            changeZoomLevel(operation, e);

            // Handle brush size adjustment with ctrl key pressed
            if (isModifierKey(e, hotkeysConfig.canvas_hotkey_adjust)) {
                e.preventDefault();

+                if (hotkeysConfig.canvas_hotkey_adjust === "Alt") {
+                    interactedWithAltKey = true;
+                }
+
                // Increase or decrease brush size based on scroll direction
                adjustBrushSize(elemId, e.deltaY);
            }
@@ -817,6 +847,20 @@ onUiLoaded(async() => {
        document.addEventListener("keydown", handleMoveKeyDown);
        document.addEventListener("keyup", handleMoveKeyUp);

+
+        // Prevent firefox from opening main menu when alt is used as a hotkey for zoom or brush size
+        function handleAltKeyUp(e) {
+            if (e.key !== "Alt" || !interactedWithAltKey) {
+                return;
+            }
+
+            e.preventDefault();
+            interactedWithAltKey = false;
+        }
+
+        document.addEventListener("keyup", handleAltKeyUp);
+
+
        // Detect zoom level and update the pan speed.
        function updatePanPosition(movementX, movementY) {
            let panSpeed = 2;
@@ -8,8 +8,8 @@ shared.options_templates.update(shared.options_section(('canvas_hotkey', "Canvas
    "canvas_hotkey_grow_brush": shared.OptionInfo("W", "Enlarge the brush size"),
    "canvas_hotkey_move": shared.OptionInfo("F", "Moving the canvas").info("To work correctly in firefox, turn off 'Automatically search the page text when typing' in the browser settings"),
    "canvas_hotkey_fullscreen": shared.OptionInfo("S", "Fullscreen Mode, maximizes the picture so that it fits into the screen and stretches it to its full width "),
-    "canvas_hotkey_reset": shared.OptionInfo("R", "Reset zoom and canvas positon"),
-    "canvas_hotkey_overlap": shared.OptionInfo("O", "Toggle overlap").info("Technical button, neededs for testing"),
+    "canvas_hotkey_reset": shared.OptionInfo("R", "Reset zoom and canvas position"),
+    "canvas_hotkey_overlap": shared.OptionInfo("O", "Toggle overlap").info("Technical button, needed for testing"),
    "canvas_show_tooltip": shared.OptionInfo(True, "Enable tooltip on the canvas"),
    "canvas_blur_prompt": shared.OptionInfo(False, "Take the focus off the prompt when working with a canvas"),
    "canvas_disabled_functions": shared.OptionInfo(["Overlap"], "Disable function that you don't use", gr.CheckboxGroup, {"choices": ["Zoom","Adjust brush size","Hotkey enlarge brush","Hotkey shrink brush","Moving canvas","Fullscreen","Reset Zoom","Overlap"]}),
@@ -1,7 +1,7 @@
 import math

 import gradio as gr
-from modules import scripts, shared, ui_components, ui_settings, infotext_utils
+from modules import scripts, shared, ui_components, ui_settings, infotext_utils, errors
 from modules.ui_components import FormColumn


@@ -42,7 +42,11 @@ class ExtraOptionsSection(scripts.Script):
                            setting_name = extra_options[index]

                            with FormColumn():
-                                comp = ui_settings.create_setting_component(setting_name)
+                                try:
+                                    comp = ui_settings.create_setting_component(setting_name)
+                                except KeyError:
+                                    errors.report(f"Can't add extra options for {setting_name} in ui")
+                                    continue

                            self.comps.append(comp)
                            self.setting_names.append(setting_name)
@@ -0,0 +1,64 @@
+from PIL import Image
+
+from modules import scripts_postprocessing, ui_components
+import gradio as gr
+
+
+def center_crop(image: Image, w: int, h: int):
+    iw, ih = image.size
+    if ih / h < iw / w:
+        sw = w * ih / h
+        box = (iw - sw) / 2, 0, iw - (iw - sw) / 2, ih
+    else:
+        sh = h * iw / w
+        box = 0, (ih - sh) / 2, iw, ih - (ih - sh) / 2
+    return image.resize((w, h), Image.Resampling.LANCZOS, box)
+
+
+def multicrop_pic(image: Image, mindim, maxdim, minarea, maxarea, objective, threshold):
+    iw, ih = image.size
+    err = lambda w, h: 1 - (lambda x: x if x < 1 else 1 / x)(iw / ih / (w / h))
+    wh = max(((w, h) for w in range(mindim, maxdim + 1, 64) for h in range(mindim, maxdim + 1, 64)
+              if minarea <= w * h <= maxarea and err(w, h) <= threshold),
+             key=lambda wh: (wh[0] * wh[1], -err(*wh))[::1 if objective == 'Maximize area' else -1],
+             default=None
+             )
+    return wh and center_crop(image, *wh)
+
+
+class ScriptPostprocessingAutosizedCrop(scripts_postprocessing.ScriptPostprocessing):
+    name = "Auto-sized crop"
+    order = 4020
+
+    def ui(self):
+        with ui_components.InputAccordion(False, label="Auto-sized crop") as enable:
+            gr.Markdown('Each image is center-cropped with an automatically chosen width and height.')
+            with gr.Row():
+                mindim = gr.Slider(minimum=64, maximum=2048, step=8, label="Dimension lower bound", value=384, elem_id="postprocess_multicrop_mindim")
+                maxdim = gr.Slider(minimum=64, maximum=2048, step=8, label="Dimension upper bound", value=768, elem_id="postprocess_multicrop_maxdim")
+            with gr.Row():
+                minarea = gr.Slider(minimum=64 * 64, maximum=2048 * 2048, step=1, label="Area lower bound", value=64 * 64, elem_id="postprocess_multicrop_minarea")
+                maxarea = gr.Slider(minimum=64 * 64, maximum=2048 * 2048, step=1, label="Area upper bound", value=640 * 640, elem_id="postprocess_multicrop_maxarea")
+            with gr.Row():
+                objective = gr.Radio(["Maximize area", "Minimize error"], value="Maximize area", label="Resizing objective", elem_id="postprocess_multicrop_objective")
+                threshold = gr.Slider(minimum=0, maximum=1, step=0.01, label="Error threshold", value=0.1, elem_id="postprocess_multicrop_threshold")
+
+        return {
+            "enable": enable,
+            "mindim": mindim,
+            "maxdim": maxdim,
+            "minarea": minarea,
+            "maxarea": maxarea,
+            "objective": objective,
+            "threshold": threshold,
+        }
+
+    def process(self, pp: scripts_postprocessing.PostprocessedImage, enable, mindim, maxdim, minarea, maxarea, objective, threshold):
+        if not enable:
+            return
+
+        cropped = multicrop_pic(pp.image, mindim, maxdim, minarea, maxarea, objective, threshold)
+        if cropped is not None:
+            pp.image = cropped
+        else:
+            print(f"skipped {pp.image.width}x{pp.image.height} image (can't find suitable size within error threshold)")
@@ -0,0 +1,30 @@
+from modules import scripts_postprocessing, ui_components, deepbooru, shared
+import gradio as gr
+
+
+class ScriptPostprocessingCeption(scripts_postprocessing.ScriptPostprocessing):
+    name = "Caption"
+    order = 4040
+
+    def ui(self):
+        with ui_components.InputAccordion(False, label="Caption") as enable:
+            option = gr.CheckboxGroup(value=["Deepbooru"], choices=["Deepbooru", "BLIP"], show_label=False)
+
+        return {
+            "enable": enable,
+            "option": option,
+        }
+
+    def process(self, pp: scripts_postprocessing.PostprocessedImage, enable, option):
+        if not enable:
+            return
+
+        captions = [pp.caption]
+
+        if "Deepbooru" in option:
+            captions.append(deepbooru.model.tag(pp.image))
+
+        if "BLIP" in option:
+            captions.append(shared.interrogator.interrogate(pp.image.convert("RGB")))
+
+        pp.caption = ", ".join([x for x in captions if x])
@@ -0,0 +1,32 @@
+from PIL import ImageOps, Image
+
+from modules import scripts_postprocessing, ui_components
+import gradio as gr
+
+
+class ScriptPostprocessingCreateFlippedCopies(scripts_postprocessing.ScriptPostprocessing):
+    name = "Create flipped copies"
+    order = 4030
+
+    def ui(self):
+        with ui_components.InputAccordion(False, label="Create flipped copies") as enable:
+            with gr.Row():
+                option = gr.CheckboxGroup(value=["Horizontal"], choices=["Horizontal", "Vertical", "Both"], show_label=False)
+
+        return {
+            "enable": enable,
+            "option": option,
+        }
+
+    def process(self, pp: scripts_postprocessing.PostprocessedImage, enable, option):
+        if not enable:
+            return
+
+        if "Horizontal" in option:
+            pp.extra_images.append(ImageOps.mirror(pp.image))
+
+        if "Vertical" in option:
+            pp.extra_images.append(pp.image.transpose(Image.Transpose.FLIP_TOP_BOTTOM))
+
+        if "Both" in option:
+            pp.extra_images.append(pp.image.transpose(Image.Transpose.FLIP_TOP_BOTTOM).transpose(Image.Transpose.FLIP_LEFT_RIGHT))
@@ -0,0 +1,54 @@
+
+from modules import scripts_postprocessing, ui_components, errors
+import gradio as gr
+
+from modules.textual_inversion import autocrop
+
+
+class ScriptPostprocessingFocalCrop(scripts_postprocessing.ScriptPostprocessing):
+    name = "Auto focal point crop"
+    order = 4010
+
+    def ui(self):
+        with ui_components.InputAccordion(False, label="Auto focal point crop") as enable:
+            face_weight = gr.Slider(label='Focal point face weight', value=0.9, minimum=0.0, maximum=1.0, step=0.05, elem_id="postprocess_focal_crop_face_weight")
+            entropy_weight = gr.Slider(label='Focal point entropy weight', value=0.15, minimum=0.0, maximum=1.0, step=0.05, elem_id="postprocess_focal_crop_entropy_weight")
+            edges_weight = gr.Slider(label='Focal point edges weight', value=0.5, minimum=0.0, maximum=1.0, step=0.05, elem_id="postprocess_focal_crop_edges_weight")
+            debug = gr.Checkbox(label='Create debug image', elem_id="train_process_focal_crop_debug")
+
+        return {
+            "enable": enable,
+            "face_weight": face_weight,
+            "entropy_weight": entropy_weight,
+            "edges_weight": edges_weight,
+            "debug": debug,
+        }
+
+    def process(self, pp: scripts_postprocessing.PostprocessedImage, enable, face_weight, entropy_weight, edges_weight, debug):
+        if not enable:
+            return
+
+        if not pp.shared.target_width or not pp.shared.target_height:
+            return
+
+        dnn_model_path = None
+        try:
+            dnn_model_path = autocrop.download_and_cache_models()
+        except Exception:
+            errors.report("Unable to load face detection model for auto crop selection. Falling back to lower quality haar method.", exc_info=True)
+
+        autocrop_settings = autocrop.Settings(
+            crop_width=pp.shared.target_width,
+            crop_height=pp.shared.target_height,
+            face_points_weight=face_weight,
+            entropy_points_weight=entropy_weight,
+            corner_points_weight=edges_weight,
+            annotate_image=debug,
+            dnn_model_path=dnn_model_path,
+        )
+
+        result, *others = autocrop.crop_image(pp.image, autocrop_settings)
+
+        pp.image = result
+        pp.extra_images = [pp.create_copy(x, nametags=["focal-crop-debug"], disable_processing=True) for x in others]
+
@@ -0,0 +1,71 @@
+import math
+
+from modules import scripts_postprocessing, ui_components
+import gradio as gr
+
+
+def split_pic(image, inverse_xy, width, height, overlap_ratio):
+    if inverse_xy:
+        from_w, from_h = image.height, image.width
+        to_w, to_h = height, width
+    else:
+        from_w, from_h = image.width, image.height
+        to_w, to_h = width, height
+    h = from_h * to_w // from_w
+    if inverse_xy:
+        image = image.resize((h, to_w))
+    else:
+        image = image.resize((to_w, h))
+
+    split_count = math.ceil((h - to_h * overlap_ratio) / (to_h * (1.0 - overlap_ratio)))
+    y_step = (h - to_h) / (split_count - 1)
+    for i in range(split_count):
+        y = int(y_step * i)
+        if inverse_xy:
+            splitted = image.crop((y, 0, y + to_h, to_w))
+        else:
+            splitted = image.crop((0, y, to_w, y + to_h))
+        yield splitted
+
+
+class ScriptPostprocessingSplitOversized(scripts_postprocessing.ScriptPostprocessing):
+    name = "Split oversized images"
+    order = 4000
+
+    def ui(self):
+        with ui_components.InputAccordion(False, label="Split oversized images") as enable:
+            with gr.Row():
+                split_threshold = gr.Slider(label='Threshold', value=0.5, minimum=0.0, maximum=1.0, step=0.05, elem_id="postprocess_split_threshold")
+                overlap_ratio = gr.Slider(label='Overlap ratio', value=0.2, minimum=0.0, maximum=0.9, step=0.05, elem_id="postprocess_overlap_ratio")
+
+        return {
+            "enable": enable,
+            "split_threshold": split_threshold,
+            "overlap_ratio": overlap_ratio,
+        }
+
+    def process(self, pp: scripts_postprocessing.PostprocessedImage, enable, split_threshold, overlap_ratio):
+        if not enable:
+            return
+
+        width = pp.shared.target_width
+        height = pp.shared.target_height
+
+        if not width or not height:
+            return
+
+        if pp.image.height > pp.image.width:
+            ratio = (pp.image.width * height) / (pp.image.height * width)
+            inverse_xy = False
+        else:
+            ratio = (pp.image.height * width) / (pp.image.width * height)
+            inverse_xy = True
+
+        if ratio >= 1.0 or ratio > split_threshold:
+            return
+
+        result, *others = split_pic(pp.image, inverse_xy, width, height, overlap_ratio)
+
+        pp.image = result
+        pp.extra_images = [pp.create_copy(x) for x in others]
+
@@ -57,10 +57,14 @@ def latent_blend(settings, a, b, t):

    # NOTE: We use inplace operations wherever possible.

-    # [4][w][h] to [1][4][w][h]
-    t2 = t.unsqueeze(0)
-    # [4][w][h] to [1][1][w][h] - the [4] seem redundant.
-    t3 = t[0].unsqueeze(0).unsqueeze(0)
+    if len(t.shape) == 3:
+        # [4][w][h] to [1][4][w][h]
+        t2 = t.unsqueeze(0)
+        # [4][w][h] to [1][1][w][h] - the [4] seem redundant.
+        t3 = t[0].unsqueeze(0).unsqueeze(0)
+    else:
+        t2 = t
+        t3 = t[:, 0][:, None]

    one_minus_t2 = 1 - t2
    one_minus_t3 = 1 - t3
@@ -104,7 +108,7 @@ def latent_blend(settings, a, b, t):

 def get_modified_nmask(settings, nmask, sigma):
    """
-    Converts a negative mask representing the transparency of the original latent vectors being overlayed
+    Converts a negative mask representing the transparency of the original latent vectors being overlaid
    to a mask that is scaled according to the denoising strength for this step.

    Where:
@@ -135,7 +139,10 @@ def apply_adaptive_masks(
    from PIL import Image, ImageOps, ImageFilter

    # TODO: Bias the blending according to the latent mask, add adjustable parameter for bias control.
-    latent_mask = nmask[0].float()
+    if len(nmask.shape) == 3:
+        latent_mask = nmask[0].float()
+    else:
+        latent_mask = nmask[:, 0].float()
    # convert the original mask into a form we use to scale distances for thresholding
    mask_scalar = 1 - (torch.clamp(latent_mask, min=0, max=1) ** (settings.mask_blend_scale / 2))
    mask_scalar = (0.5 * (1 - settings.composite_mask_influence)
@@ -157,7 +164,14 @@ def apply_adaptive_masks(
                                                   percentile_min=0.25, percentile_max=0.75, min_width=1)

        # The distance at which opacity of original decreases to 50%
-        half_weighted_distance = settings.composite_difference_threshold * mask_scalar
+        if len(mask_scalar.shape) == 3:
+            if mask_scalar.shape[0] > i:
+                half_weighted_distance = settings.composite_difference_threshold * mask_scalar[i]
+            else:
+                half_weighted_distance = settings.composite_difference_threshold * mask_scalar[0]
+        else:
+            half_weighted_distance = settings.composite_difference_threshold * mask_scalar
+
        converted_mask = converted_mask / half_weighted_distance

        converted_mask = 1 / (1 + converted_mask ** settings.composite_difference_contrast)