roboflow
diff --git a/‎examples/webrtc/webrtc_worker.py‎
Lines changed: 410 additions & 38 deletions b/‎examples/webrtc/webrtc_worker.py‎
Lines changed: 410 additions & 38 deletions
diff --git a/‎inference/core/env.py‎
Lines changed: 1 addition & 1 deletion b/‎inference/core/env.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎inference/core/exceptions.py‎
Lines changed: 4 additions & 0 deletions b/‎inference/core/exceptions.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎inference/core/interfaces/http/error_handlers.py‎
Lines changed: 19 additions & 0 deletions b/‎inference/core/interfaces/http/error_handlers.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎inference/core/interfaces/http/http_api.py‎
Lines changed: 2 additions & 0 deletions b/‎inference/core/interfaces/http/http_api.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎inference/core/interfaces/stream_manager/manager_app/entities.py‎
Lines changed: 4 additions & 4 deletions b/‎inference/core/interfaces/stream_manager/manager_app/entities.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎inference/core/interfaces/stream_manager/manager_app/webrtc.py‎
Lines changed: 3 additions & 3 deletions b/‎inference/core/interfaces/stream_manager/manager_app/webrtc.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎inference/core/interfaces/webrtc_worker/entities.py‎
Lines changed: 43 additions & 17 deletions b/‎inference/core/interfaces/webrtc_worker/entities.py‎
Lines changed: 43 additions & 17 deletions
diff --git a/‎inference/core/interfaces/webrtc_worker/modal.py‎
Lines changed: 43 additions & 25 deletions b/‎inference/core/interfaces/webrtc_worker/modal.py‎
Lines changed: 43 additions & 25 deletions
@@ -699,7 +699,7 @@
 WEBRTC_MODAL_RESPONSE_TIMEOUT = int(os.getenv("WEBRTC_MODAL_RESPONSE_TIMEOUT", "60"))
 # seconds
 WEBRTC_MODAL_FUNCTION_TIME_LIMIT = int(
-    os.getenv("WEBRTC_MODAL_FUNCTION_TIME_LIMIT", "60")
+    os.getenv("WEBRTC_MODAL_FUNCTION_TIME_LIMIT", "3600")
 )
 WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT = str2bool(
     os.getenv("WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT", "True")
 
@@ -216,3 +216,7 @@ def __init__(self, message: str, inner_error: Exception):
     @property
     def inner_error(self) -> Exception:
         return self._inner_error
+
+
+class WebRTCConfigurationError(Exception):
+    pass
@@ -28,6 +28,7 @@
     RoboflowAPITimeoutError,
     RoboflowAPIUnsuccessfulRequestError,
     ServiceConfigurationError,
+    WebRTCConfigurationError,
     WorkspaceLoadError,
 )
 from inference.core.interfaces.stream_manager.api.errors import (
@@ -358,6 +359,15 @@ def wrapped_route(*args, **kwargs):
                     "inner_error_type": error.inner_error_type,
                 },
             )
+        except WebRTCConfigurationError as error:
+            logger.error("%s: %s", type(error).__name__, error)
+            resp = JSONResponse(
+                status_code=400,
+                content={
+                    "message": str(error),
+                    "error_type": "WebRTCConfigurationError",
+                },
+            )
         except Exception as error:
             logger.exception("%s: %s", type(error).__name__, error)
             resp = JSONResponse(status_code=500, content={"message": "Internal error."})
@@ -661,6 +671,15 @@ async def wrapped_route(*args, **kwargs):
                     "inner_error_type": error.inner_error_type,
                 },
             )
+        except WebRTCConfigurationError as error:
+            logger.error("%s: %s", type(error).__name__, error)
+            resp = JSONResponse(
+                status_code=400,
+                content={
+                    "message": str(error),
+                    "error_type": "WebRTCConfigurationError",
+                },
+            )
         except Exception as error:
             logger.exception("%s: %s", type(error).__name__, error)
             resp = JSONResponse(status_code=500, content={"message": "Internal error."})
 
@@ -171,6 +171,7 @@
     MissingServiceSecretError,
     RoboflowAPINotAuthorizedError,
     RoboflowAPINotNotFoundError,
+    WebRTCConfigurationError,
     WorkspaceLoadError,
 )
 from inference.core.interfaces.base import BaseInterface
@@ -1467,6 +1468,7 @@ async def initialise_webrtc_worker(
                         "RoboflowAPINotAuthorizedError": RoboflowAPINotAuthorizedError,
                         "RoboflowAPINotNotFoundError": RoboflowAPINotNotFoundError,
                         "ValidationError": ValidationError,
+                        "WebRTCConfigurationError": WebRTCConfigurationError,
                     }
                     exc = expected_exceptions.get(
                         worker_result.exception_type, Exception
 
@@ -112,8 +112,8 @@ class InitialiseWebRTCPipelinePayload(InitialisePipelinePayload):
         WEBRTC_REALTIME_PROCESSING  # this parameter controls only webrtc processing, not inference pipeline strategies
     )
     webrtc_turn_config: Optional[WebRTCTURNConfig] = None
-    stream_output: Optional[List[Optional[str]]] = Field(default_factory=list)
-    data_output: Optional[List[Optional[str]]] = Field(default_factory=list)
+    stream_output: Optional[List[str]] = Field(default_factory=list)
+    data_output: Optional[List[str]] = Field(default_factory=list)
     webcam_fps: Optional[float] = (
         None  # TODO: this parameter is now passed for both webcam and video source
     )
@@ -124,8 +124,8 @@ class InitialiseWebRTCPipelinePayload(InitialisePipelinePayload):
 
 
 class WebRTCData(BaseModel):
-    stream_output: Optional[str] = None
-    data_output: Optional[str] = None
+    stream_output: Optional[List[str]] = None
+    data_output: Optional[List[str]] = None
 
 
 class ConsumeResultsPayload(BaseModel):
 
@@ -338,7 +338,7 @@ def __init__(
         video_transform_track: VideoTransformTrack,
         asyncio_loop: asyncio.AbstractEventLoop,
         stream_output: Optional[str] = None,
-        data_output: Optional[str] = None,
+        data_output: Optional[List[str]] = None,
         *args,
         **kwargs,
     ):
@@ -347,7 +347,7 @@ def __init__(
         self.video_transform_track: VideoTransformTrack = video_transform_track
         self._consumers_signalled: bool = False
         self.stream_output: Optional[str] = stream_output
-        self.data_output: Optional[str] = data_output
+        self.data_output: Optional[List[str]] = data_output
         self.data_channel: Optional[RTCDataChannel] = None
 
 
@@ -384,7 +384,7 @@ async def init_rtc_peer_connection(
     webrtc_realtime_processing: bool = True,
     webcam_fps: Optional[float] = None,
     stream_output: Optional[str] = None,
-    data_output: Optional[str] = None,
+    data_output: Optional[List[str]] = None,
 ) -> RTCPeerConnectionWithFPS:
     relay = MediaRelay()
     video_transform_track = VideoTransformTrack(
 
@@ -1,8 +1,12 @@
-from typing import List, Literal, Optional, Union
+from enum import Enum
+from typing import Any, Dict, List, Literal, Optional, Union
 
 from pydantic import BaseModel, Field
 
-from inference.core.env import WEBRTC_REALTIME_PROCESSING
+from inference.core.env import (
+    WEBRTC_MODAL_FUNCTION_TIME_LIMIT,
+    WEBRTC_REALTIME_PROCESSING,
+)
 from inference.core.interfaces.stream_manager.manager_app.entities import (
     WebRTCOffer,
     WebRTCTURNConfig,
@@ -18,22 +22,25 @@ class WebRTCWorkerRequest(BaseModel):
     webrtc_realtime_processing: bool = (
         WEBRTC_REALTIME_PROCESSING  # when set to True, MediaRelay.subscribe will be called with buffered=False
     )
-    stream_output: Optional[List[Optional[str]]] = Field(default_factory=list)
-    data_output: Optional[List[Optional[str]]] = Field(default_factory=list)
+    stream_output: Optional[List[str]] = Field(default=None)
+    data_output: Optional[List[str]] = Field(default=None)
     declared_fps: Optional[float] = None
     rtsp_url: Optional[str] = None
-    processing_timeout: Optional[int] = 60
+    processing_timeout: Optional[int] = WEBRTC_MODAL_FUNCTION_TIME_LIMIT
     # https://modal.com/docs/guide/gpu#specifying-gpu-type
-    requested_gpu: Literal[
-        "T4",
-        "L4",
-        "A10",
-        "A100",
-        "A100-40GB",
-        "A100-80GB",
-        "L40S" "H100/H100!",
-        "H200",
-        "B200",
+    requested_gpu: Optional[
+        Literal[
+            "T4",
+            "L4",
+            "A10",
+            "A100",
+            "A100-40GB",
+            "A100-80GB",
+            "L40S",
+            "H100/H100!",
+            "H200",
+            "B200",
+        ]
     ] = "T4"
 
 
@@ -47,8 +54,15 @@ class WebRTCVideoMetadata(BaseModel):
 
 
 class WebRTCOutput(BaseModel):
-    output_name: Optional[str] = None
-    serialized_output_data: Optional[str] = None
+    """Output sent via WebRTC data channel.
+
+    serialized_output_data contains a dictionary with workflow outputs:
+    - If data_output is None or []: no data sent (only metadata)
+    - If data_output is ["*"]: all workflow outputs (excluding images, unless explicitly named)
+    - If data_output is ["field1", "field2"]: only those fields (including images if explicitly named)
+    """
+
+    serialized_output_data: Optional[Dict[str, Any]] = None
     video_metadata: Optional[WebRTCVideoMetadata] = None
     errors: List[str] = Field(default_factory=list)
 
@@ -60,3 +74,15 @@ class WebRTCWorkerResult(BaseModel):
     error_message: Optional[str] = None
     error_context: Optional[str] = None
     inner_error: Optional[str] = None
+
+
+class StreamOutputMode(str, Enum):
+    AUTO_DETECT = "auto_detect"  # None -> auto-detect first image
+    NO_VIDEO = "no_video"  # [] -> no video track
+    SPECIFIC_FIELD = "specific"  # ["field"] -> use specific field
+
+
+class DataOutputMode(str, Enum):
+    NONE = "none"  # None or [] -> no data sent
+    ALL = "all"  # ["*"] -> send all (skip images)
+    SPECIFIC = "specific"  # ["field1", "field2"] -> send only these
@@ -4,7 +4,6 @@
 from inference.core import logger
 from inference.core.env import (
     ALLOW_CUSTOM_PYTHON_EXECUTION_IN_WORKFLOWS,
-    API_KEY,
     INTERNAL_WEIGHTS_URL_SUFFIX,
     LOG_LEVEL,
     MODAL_TOKEN_ID,
@@ -68,22 +67,14 @@
         image=video_processing_image,
     )
 
-    # https://modal.com/docs/reference/modal.App#cls
-    @app.cls(
-        min_containers=WEBRTC_MODAL_FUNCTION_MIN_CONTAINERS,
-        buffer_containers=WEBRTC_MODAL_FUNCTION_BUFFER_CONTAINERS,
-        scaledown_window=WEBRTC_MODAL_FUNCTION_SCALEDOWN_WINDOW,
-        timeout=WEBRTC_MODAL_FUNCTION_TIME_LIMIT,
-        enable_memory_snapshot=WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT,
-        experimental_options=(
-            {"enable_gpu_snapshot": True}
-            if WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT
-            and WEBRTC_MODAL_FUNCTION_GPU
-            else {}
-        ),
-        gpu=WEBRTC_MODAL_FUNCTION_GPU,
-        max_inputs=WEBRTC_MODAL_FUNCTION_MAX_INPUTS,
-        env={
+    decorator_kwargs = {
+        "min_containers": WEBRTC_MODAL_FUNCTION_MIN_CONTAINERS,
+        "buffer_containers": WEBRTC_MODAL_FUNCTION_BUFFER_CONTAINERS,
+        "scaledown_window": WEBRTC_MODAL_FUNCTION_SCALEDOWN_WINDOW,
+        "timeout": WEBRTC_MODAL_FUNCTION_TIME_LIMIT,
+        "enable_memory_snapshot": WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT,
+        "max_inputs": WEBRTC_MODAL_FUNCTION_MAX_INPUTS,
+        "env": {
             "ROBOFLOW_INTERNAL_SERVICE_SECRET": ROBOFLOW_INTERNAL_SERVICE_SECRET,
             "ROBOFLOW_INTERNAL_SERVICE_NAME": WEBRTC_MODAL_ROBOFLOW_INTERNAL_SERVICE_NAME,
             "PROJECT": PROJECT,
@@ -121,14 +112,11 @@
             "WEBRTC_MODAL_IMAGE_TAG": WEBRTC_MODAL_IMAGE_TAG,
             "WEBRTC_MODAL_RTSP_PLACEHOLDER": WEBRTC_MODAL_RTSP_PLACEHOLDER,
             "WEBRTC_MODAL_RTSP_PLACEHOLDER_URL": WEBRTC_MODAL_RTSP_PLACEHOLDER_URL,
-            "ONNXRUNTIME_EXECUTION_PROVIDERS": (
-                "CUDAExecutionProvider"
-                if WEBRTC_MODAL_FUNCTION_GPU
-                else "CPUExecutionProvider"
-            ),
+            "ONNXRUNTIME_EXECUTION_PROVIDERS": "[CUDAExecutionProvider,CPUExecutionProvider]",
         },
-        volumes={MODEL_CACHE_DIR: rfcache_volume},
-    )
+        "volumes": {MODEL_CACHE_DIR: rfcache_volume},
+    }
+
     class RTCPeerConnectionModal:
         @modal.method()
         def rtc_peer_connection_modal(
@@ -149,6 +137,27 @@ def send_answer(obj: WebRTCWorkerResult):
                 )
             )
 
+    # Modal derives function name from class name
+    # https://modal.com/docs/reference/modal.App#cls
+    @app.cls(
+        **{
+            **decorator_kwargs,
+            "enable_memory_snapshot": True,
+        }
+    )
+    class RTCPeerConnectionModalCPU(RTCPeerConnectionModal):
+        pass
+
+    @app.cls(
+        **{
+            **decorator_kwargs,
+            "gpu": WEBRTC_MODAL_FUNCTION_GPU,
+            "experimental_options": {"enable_gpu_snapshot": True},
+        }
+    )
+    class RTCPeerConnectionModalGPU(RTCPeerConnectionModal):
+        pass
+
     def spawn_rtc_peer_connection_modal(
         webrtc_request: WebRTCWorkerRequest,
     ) -> WebRTCWorkerResult:
@@ -164,6 +173,12 @@ def spawn_rtc_peer_connection_modal(
         except modal.exception.NotFoundError:
             logger.info("Deploying webrtc modal app %s", WEBRTC_MODAL_APP_NAME)
             app.deploy(name=WEBRTC_MODAL_APP_NAME, client=client)
+
+        if webrtc_request.requested_gpu:
+            RTCPeerConnectionModal = RTCPeerConnectionModalGPU
+        else:
+            RTCPeerConnectionModal = RTCPeerConnectionModalCPU
+
         # https://modal.com/docs/reference/modal.Cls#from_name
         deployed_cls = modal.Cls.from_name(
             app_name=app.name,
@@ -181,7 +196,10 @@ def spawn_rtc_peer_connection_modal(
         cls_with_options = deployed_cls.with_options(
             timeout=webrtc_request.processing_timeout,
         )
-        if webrtc_request.requested_gpu != WEBRTC_MODAL_FUNCTION_GPU:
+        if (
+            webrtc_request.requested_gpu is not None
+            and webrtc_request.requested_gpu != WEBRTC_MODAL_FUNCTION_GPU
+        ):
             logger.warning(
                 "Spawning webrtc modal function with custom gpu %s",
                 webrtc_request.requested_gpu,
Original file line number	Diff line number	Diff line change
`@@ -699,7 +699,7 @@`
`699`	`699`	`WEBRTC_MODAL_RESPONSE_TIMEOUT = int(os.getenv("WEBRTC_MODAL_RESPONSE_TIMEOUT", "60"))`
`700`	`700`	`# seconds`
`701`	`701`	`WEBRTC_MODAL_FUNCTION_TIME_LIMIT = int(`
`702`		`- os.getenv("WEBRTC_MODAL_FUNCTION_TIME_LIMIT", "60")`
	`702`	`+ os.getenv("WEBRTC_MODAL_FUNCTION_TIME_LIMIT", "3600")`
`703`	`703`	`)`
`704`	`704`	`WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT = str2bool(`
`705`	`705`	`os.getenv("WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT", "True")`