Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 115 additions & 0 deletions demo/realtime-img2img/controlnet_registry.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -119,5 +119,120 @@ available_controlnets:
default_preprocessor: "feedback"
default_scale: 0.6
description: "Uses image feedback for enhanced details (SDXL)"
preprocessor_params:
image_resolution: 512

- id: "depth_xinsir_sdxl"
name: "Depth Detection (xinsir)"
model_id: "xinsir/controlnet-depth-sdxl-1.0"
default_preprocessor: "depth_tensorrt"
default_scale: 0.8
description: "Estimates depth information from images — xinsir SDXL variant"
preprocessor_params:
detect_resolution: 518
image_resolution: 512

- id: "scribble_sdxl"
name: "Scribble"
model_id: "xinsir/controlnet-scribble-sdxl-1.0"
default_preprocessor: "scribble_tensorrt"
default_scale: 0.8
description: "Produces sketch-like scribble edge conditioning (SDXL)"
preprocessor_params:
image_resolution: 512

sd21:
- id: "canny_sd21"
name: "Canny Edge Detection"
model_id: "thibaud/controlnet-sd21-canny-diffusers"
default_preprocessor: "canny"
default_scale: 0.8
description: "Detects edges and outlines in images (SD2.1)"
preprocessor_params:
low_threshold: 100
high_threshold: 200

- id: "depth_sd21"
name: "Depth Estimation"
model_id: "thibaud/controlnet-sd21-depth-diffusers"
default_preprocessor: "depth_tensorrt"
default_scale: 0.8
description: "Estimates depth from images (SD2.1)"
preprocessor_params:
detect_resolution: 518
image_resolution: 512

- id: "openpose_sd21"
name: "OpenPose"
model_id: "thibaud/controlnet-sd21-openpose-diffusers"
default_preprocessor: "pose_tensorrt"
default_scale: 0.8
description: "Detects human body pose (SD2.1)"
preprocessor_params:
detect_resolution: 640
image_resolution: 512

- id: "scribble_sd21"
name: "Scribble"
model_id: "thibaud/controlnet-sd21-scribble-diffusers"
default_preprocessor: "scribble_tensorrt"
default_scale: 0.8
description: "Generates from rough sketches (SD2.1)"
preprocessor_params:
image_resolution: 512

- id: "hed_sd21"
name: "HED Soft Edge"
model_id: "thibaud/controlnet-sd21-hed-diffusers"
default_preprocessor: "hed_tensorrt"
default_scale: 0.8
description: "Soft edge / HED boundary detection (SD2.1)"
preprocessor_params:
image_resolution: 512

- id: "normalbae_sd21"
name: "Normal Map (BAE)"
model_id: "thibaud/controlnet-sd21-normalbae-diffusers"
default_preprocessor: "normal_bae_tensorrt"
default_scale: 0.8
description: "Surface normal estimation (SD2.1)"
preprocessor_params:
image_resolution: 512

- id: "lineart_sd21"
name: "Lineart"
model_id: "thibaud/controlnet-sd21-lineart-diffusers"
default_preprocessor: "standard_lineart"
default_scale: 0.8
description: "Line-art extraction (SD2.1)"
preprocessor_params:
gaussian_sigma: 6.0
intensity_threshold: 8

- id: "zoedepth_sd21"
name: "ZoeDepth"
model_id: "thibaud/controlnet-sd21-zoedepth-diffusers"
default_preprocessor: "depth_tensorrt"
default_scale: 0.8
description: "Metric depth estimation (SD2.1)"
preprocessor_params:
detect_resolution: 518
image_resolution: 512

- id: "color_sd21"
name: "Color"
model_id: "thibaud/controlnet-sd21-color-diffusers"
default_preprocessor: "passthrough"
default_scale: 0.8
description: "Color/palette conditioning (SD2.1)"
preprocessor_params:
image_resolution: 512

- id: "ade20k_sd21"
name: "Segmentation (ADE20K)"
model_id: "thibaud/controlnet-sd21-ade20k-diffusers"
default_preprocessor: "passthrough"
default_scale: 0.8
description: "Semantic segmentation conditioning (SD2.1)"
preprocessor_params:
image_resolution: 512
19 changes: 13 additions & 6 deletions demo/realtime-img2img/routes/controlnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,16 +239,23 @@ async def get_available_controlnets_endpoint(app_instance=Depends(get_app_instan
model_type = "sd15" # Default fallback

# Try to determine model type from pipeline config or uploaded config
if app_instance.pipeline and hasattr(app_instance.pipeline, 'config') and app_instance.pipeline.config:
model_id = app_instance.pipeline.config.get('model_id', '')
if 'sdxl' in model_id.lower() or 'xl' in model_id.lower():
if app_instance.pipeline and hasattr(app_instance.pipeline, "config") and app_instance.pipeline.config:
model_id = app_instance.pipeline.config.get("model_id", "")
ml = model_id.lower()
if "sdxl" in ml or "xl" in ml:
model_type = "sdxl"
elif "sd-turbo" in ml or "sd21" in ml or "sd2.1" in ml or "2-1" in ml or "stable-diffusion-2" in ml:
model_type = "sd21"
elif app_instance.app_state.uploaded_config:
# If no pipeline yet, try to get model type from uploaded config
model_id = app_instance.app_state.uploaded_config.get('model_id_or_path', '')
if 'sdxl' in model_id.lower() or 'xl' in model_id.lower():
model_id = app_instance.app_state.uploaded_config.get("model_id_or_path", "")
ml = model_id.lower()
if "sdxl" in ml or "xl" in ml:
model_type = "sdxl"

elif "sd-turbo" in ml or "sd21" in ml or "sd2.1" in ml or "2-1" in ml or "stable-diffusion-2" in ml:
model_type = "sd21"


# Handle case where available_controlnets dependency returns None
if available_controlnets is None:
logging.warning("get_available_controlnets: available_controlnets dependency returned None")
Expand Down
51 changes: 36 additions & 15 deletions src/streamdiffusion/preprocessing/processors/canny.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import torch
from typing import Union
from .base import BasePreprocessor
from .category_params import EDGE_SMOOTHNESS_PARAM, apply_edge_smoothness

#TODO provide gpu native edge detection
class CannyPreprocessor(BasePreprocessor):
Expand All @@ -29,8 +30,9 @@ def get_preprocessor_metadata(cls):
"type": "int",
"default": 200,
"range": [1, 255],
"description": "Upper threshold for edge detection. Higher values are more selective."
}
"description": "Upper threshold for edge detection. Higher values are more selective.",
},
**EDGE_SMOOTHNESS_PARAM,
},
"use_cases": ["Line art", "Architecture", "Technical drawings", "Clean edge detection"]
}
Expand Down Expand Up @@ -60,10 +62,21 @@ def _process_core(self, image: Image.Image) -> Image.Image:
gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
else:
gray = image_np

low_threshold = self.params.get('low_threshold', 100)
high_threshold = self.params.get('high_threshold', 200)


# Optional smoothness pre-blur (category-standard edge param).
# Applied before cv2.Canny so that coarser smoothing suppresses high-frequency
# texture, yielding sparser / softer edges without changing threshold semantics.
smoothness = float(self.params.get("smoothness", 0.0))
if smoothness > 0.0:
sigma = smoothness * 2.0
radius = max(1, int(sigma * 3.0 + 0.5))
k_size = 2 * radius + 1
gray = cv2.GaussianBlur(gray, (k_size, k_size), sigma)

low_threshold = self.params.get("low_threshold", 100)
high_threshold = self.params.get("high_threshold", 200)


edges = cv2.Canny(gray, low_threshold, high_threshold)
edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)

Expand All @@ -77,18 +90,26 @@ def _process_tensor_core(self, image_tensor: torch.Tensor) -> torch.Tensor:
gray_tensor = 0.299 * image_tensor[0] + 0.587 * image_tensor[1] + 0.114 * image_tensor[2]
else:
gray_tensor = image_tensor[0] if image_tensor.shape[0] == 1 else image_tensor

gray_cpu = gray_tensor.cpu()
gray_np = (gray_cpu * 255).clamp(0, 255).to(torch.uint8).numpy()

low_threshold = self.params.get('low_threshold', 100)
high_threshold = self.params.get('high_threshold', 200)


# Optional smoothness pre-blur (category-standard edge param).
smoothness = float(self.params.get("smoothness", 0.0))
if smoothness > 0.0:
sigma = smoothness * 2.0
radius = max(1, int(sigma * 3.0 + 0.5))
k_size = 2 * radius + 1
gray_np = cv2.GaussianBlur(gray_np, (k_size, k_size), sigma)

low_threshold = self.params.get("low_threshold", 100)
high_threshold = self.params.get("high_threshold", 200)

edges = cv2.Canny(gray_np, low_threshold, high_threshold)

edges_tensor = torch.from_numpy(edges).float() / 255.0
edges_tensor = edges_tensor.to(device=self.device, dtype=self.dtype)

edges_rgb = edges_tensor.unsqueeze(0).repeat(3, 1, 1)
return edges_rgb

return edges_rgb
Loading