Skip to content

Commit 2b96fd8

Browse files
authored
Merge branch 'main' into webrtc-python-sdk-webcam
2 parents c1395e3 + cc874ed commit 2b96fd8

File tree

15 files changed

+1344
-286
lines changed

15 files changed

+1344
-286
lines changed

examples/webrtc/webrtc_worker.py

Lines changed: 410 additions & 38 deletions
Large diffs are not rendered by default.

inference/core/env.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -699,7 +699,7 @@
699699
WEBRTC_MODAL_RESPONSE_TIMEOUT = int(os.getenv("WEBRTC_MODAL_RESPONSE_TIMEOUT", "60"))
700700
# seconds
701701
WEBRTC_MODAL_FUNCTION_TIME_LIMIT = int(
702-
os.getenv("WEBRTC_MODAL_FUNCTION_TIME_LIMIT", "60")
702+
os.getenv("WEBRTC_MODAL_FUNCTION_TIME_LIMIT", "3600")
703703
)
704704
WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT = str2bool(
705705
os.getenv("WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT", "True")

inference/core/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,3 +216,7 @@ def __init__(self, message: str, inner_error: Exception):
216216
@property
217217
def inner_error(self) -> Exception:
218218
return self._inner_error
219+
220+
221+
class WebRTCConfigurationError(Exception):
222+
pass

inference/core/interfaces/http/error_handlers.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
RoboflowAPITimeoutError,
2929
RoboflowAPIUnsuccessfulRequestError,
3030
ServiceConfigurationError,
31+
WebRTCConfigurationError,
3132
WorkspaceLoadError,
3233
)
3334
from inference.core.interfaces.stream_manager.api.errors import (
@@ -358,6 +359,15 @@ def wrapped_route(*args, **kwargs):
358359
"inner_error_type": error.inner_error_type,
359360
},
360361
)
362+
except WebRTCConfigurationError as error:
363+
logger.error("%s: %s", type(error).__name__, error)
364+
resp = JSONResponse(
365+
status_code=400,
366+
content={
367+
"message": str(error),
368+
"error_type": "WebRTCConfigurationError",
369+
},
370+
)
361371
except Exception as error:
362372
logger.exception("%s: %s", type(error).__name__, error)
363373
resp = JSONResponse(status_code=500, content={"message": "Internal error."})
@@ -661,6 +671,15 @@ async def wrapped_route(*args, **kwargs):
661671
"inner_error_type": error.inner_error_type,
662672
},
663673
)
674+
except WebRTCConfigurationError as error:
675+
logger.error("%s: %s", type(error).__name__, error)
676+
resp = JSONResponse(
677+
status_code=400,
678+
content={
679+
"message": str(error),
680+
"error_type": "WebRTCConfigurationError",
681+
},
682+
)
664683
except Exception as error:
665684
logger.exception("%s: %s", type(error).__name__, error)
666685
resp = JSONResponse(status_code=500, content={"message": "Internal error."})

inference/core/interfaces/http/http_api.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@
171171
MissingServiceSecretError,
172172
RoboflowAPINotAuthorizedError,
173173
RoboflowAPINotNotFoundError,
174+
WebRTCConfigurationError,
174175
WorkspaceLoadError,
175176
)
176177
from inference.core.interfaces.base import BaseInterface
@@ -1467,6 +1468,7 @@ async def initialise_webrtc_worker(
14671468
"RoboflowAPINotAuthorizedError": RoboflowAPINotAuthorizedError,
14681469
"RoboflowAPINotNotFoundError": RoboflowAPINotNotFoundError,
14691470
"ValidationError": ValidationError,
1471+
"WebRTCConfigurationError": WebRTCConfigurationError,
14701472
}
14711473
exc = expected_exceptions.get(
14721474
worker_result.exception_type, Exception

inference/core/interfaces/stream_manager/manager_app/entities.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,8 @@ class InitialiseWebRTCPipelinePayload(InitialisePipelinePayload):
112112
WEBRTC_REALTIME_PROCESSING # this parameter controls only webrtc processing, not inference pipeline strategies
113113
)
114114
webrtc_turn_config: Optional[WebRTCTURNConfig] = None
115-
stream_output: Optional[List[Optional[str]]] = Field(default_factory=list)
116-
data_output: Optional[List[Optional[str]]] = Field(default_factory=list)
115+
stream_output: Optional[List[str]] = Field(default_factory=list)
116+
data_output: Optional[List[str]] = Field(default_factory=list)
117117
webcam_fps: Optional[float] = (
118118
None # TODO: this parameter is now passed for both webcam and video source
119119
)
@@ -124,8 +124,8 @@ class InitialiseWebRTCPipelinePayload(InitialisePipelinePayload):
124124

125125

126126
class WebRTCData(BaseModel):
127-
stream_output: Optional[str] = None
128-
data_output: Optional[str] = None
127+
stream_output: Optional[List[str]] = None
128+
data_output: Optional[List[str]] = None
129129

130130

131131
class ConsumeResultsPayload(BaseModel):

inference/core/interfaces/stream_manager/manager_app/webrtc.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ def __init__(
338338
video_transform_track: VideoTransformTrack,
339339
asyncio_loop: asyncio.AbstractEventLoop,
340340
stream_output: Optional[str] = None,
341-
data_output: Optional[str] = None,
341+
data_output: Optional[List[str]] = None,
342342
*args,
343343
**kwargs,
344344
):
@@ -347,7 +347,7 @@ def __init__(
347347
self.video_transform_track: VideoTransformTrack = video_transform_track
348348
self._consumers_signalled: bool = False
349349
self.stream_output: Optional[str] = stream_output
350-
self.data_output: Optional[str] = data_output
350+
self.data_output: Optional[List[str]] = data_output
351351
self.data_channel: Optional[RTCDataChannel] = None
352352

353353

@@ -384,7 +384,7 @@ async def init_rtc_peer_connection(
384384
webrtc_realtime_processing: bool = True,
385385
webcam_fps: Optional[float] = None,
386386
stream_output: Optional[str] = None,
387-
data_output: Optional[str] = None,
387+
data_output: Optional[List[str]] = None,
388388
) -> RTCPeerConnectionWithFPS:
389389
relay = MediaRelay()
390390
video_transform_track = VideoTransformTrack(

inference/core/interfaces/webrtc_worker/entities.py

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1-
from typing import List, Literal, Optional, Union
1+
from enum import Enum
2+
from typing import Any, Dict, List, Literal, Optional, Union
23

34
from pydantic import BaseModel, Field
45

5-
from inference.core.env import WEBRTC_REALTIME_PROCESSING
6+
from inference.core.env import (
7+
WEBRTC_MODAL_FUNCTION_TIME_LIMIT,
8+
WEBRTC_REALTIME_PROCESSING,
9+
)
610
from inference.core.interfaces.stream_manager.manager_app.entities import (
711
WebRTCOffer,
812
WebRTCTURNConfig,
@@ -18,22 +22,25 @@ class WebRTCWorkerRequest(BaseModel):
1822
webrtc_realtime_processing: bool = (
1923
WEBRTC_REALTIME_PROCESSING # when set to True, MediaRelay.subscribe will be called with buffered=False
2024
)
21-
stream_output: Optional[List[Optional[str]]] = Field(default_factory=list)
22-
data_output: Optional[List[Optional[str]]] = Field(default_factory=list)
25+
stream_output: Optional[List[str]] = Field(default=None)
26+
data_output: Optional[List[str]] = Field(default=None)
2327
declared_fps: Optional[float] = None
2428
rtsp_url: Optional[str] = None
25-
processing_timeout: Optional[int] = 60
29+
processing_timeout: Optional[int] = WEBRTC_MODAL_FUNCTION_TIME_LIMIT
2630
# https://modal.com/docs/guide/gpu#specifying-gpu-type
27-
requested_gpu: Literal[
28-
"T4",
29-
"L4",
30-
"A10",
31-
"A100",
32-
"A100-40GB",
33-
"A100-80GB",
34-
"L40S" "H100/H100!",
35-
"H200",
36-
"B200",
31+
requested_gpu: Optional[
32+
Literal[
33+
"T4",
34+
"L4",
35+
"A10",
36+
"A100",
37+
"A100-40GB",
38+
"A100-80GB",
39+
"L40S",
40+
"H100/H100!",
41+
"H200",
42+
"B200",
43+
]
3744
] = "T4"
3845

3946

@@ -47,8 +54,15 @@ class WebRTCVideoMetadata(BaseModel):
4754

4855

4956
class WebRTCOutput(BaseModel):
50-
output_name: Optional[str] = None
51-
serialized_output_data: Optional[str] = None
57+
"""Output sent via WebRTC data channel.
58+
59+
serialized_output_data contains a dictionary with workflow outputs:
60+
- If data_output is None or []: no data sent (only metadata)
61+
- If data_output is ["*"]: all workflow outputs (excluding images, unless explicitly named)
62+
- If data_output is ["field1", "field2"]: only those fields (including images if explicitly named)
63+
"""
64+
65+
serialized_output_data: Optional[Dict[str, Any]] = None
5266
video_metadata: Optional[WebRTCVideoMetadata] = None
5367
errors: List[str] = Field(default_factory=list)
5468

@@ -60,3 +74,15 @@ class WebRTCWorkerResult(BaseModel):
6074
error_message: Optional[str] = None
6175
error_context: Optional[str] = None
6276
inner_error: Optional[str] = None
77+
78+
79+
class StreamOutputMode(str, Enum):
80+
AUTO_DETECT = "auto_detect" # None -> auto-detect first image
81+
NO_VIDEO = "no_video" # [] -> no video track
82+
SPECIFIC_FIELD = "specific" # ["field"] -> use specific field
83+
84+
85+
class DataOutputMode(str, Enum):
86+
NONE = "none" # None or [] -> no data sent
87+
ALL = "all" # ["*"] -> send all (skip images)
88+
SPECIFIC = "specific" # ["field1", "field2"] -> send only these

inference/core/interfaces/webrtc_worker/modal.py

Lines changed: 43 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from inference.core import logger
55
from inference.core.env import (
66
ALLOW_CUSTOM_PYTHON_EXECUTION_IN_WORKFLOWS,
7-
API_KEY,
87
INTERNAL_WEIGHTS_URL_SUFFIX,
98
LOG_LEVEL,
109
MODAL_TOKEN_ID,
@@ -68,22 +67,14 @@
6867
image=video_processing_image,
6968
)
7069

71-
# https://modal.com/docs/reference/modal.App#cls
72-
@app.cls(
73-
min_containers=WEBRTC_MODAL_FUNCTION_MIN_CONTAINERS,
74-
buffer_containers=WEBRTC_MODAL_FUNCTION_BUFFER_CONTAINERS,
75-
scaledown_window=WEBRTC_MODAL_FUNCTION_SCALEDOWN_WINDOW,
76-
timeout=WEBRTC_MODAL_FUNCTION_TIME_LIMIT,
77-
enable_memory_snapshot=WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT,
78-
experimental_options=(
79-
{"enable_gpu_snapshot": True}
80-
if WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT
81-
and WEBRTC_MODAL_FUNCTION_GPU
82-
else {}
83-
),
84-
gpu=WEBRTC_MODAL_FUNCTION_GPU,
85-
max_inputs=WEBRTC_MODAL_FUNCTION_MAX_INPUTS,
86-
env={
70+
decorator_kwargs = {
71+
"min_containers": WEBRTC_MODAL_FUNCTION_MIN_CONTAINERS,
72+
"buffer_containers": WEBRTC_MODAL_FUNCTION_BUFFER_CONTAINERS,
73+
"scaledown_window": WEBRTC_MODAL_FUNCTION_SCALEDOWN_WINDOW,
74+
"timeout": WEBRTC_MODAL_FUNCTION_TIME_LIMIT,
75+
"enable_memory_snapshot": WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT,
76+
"max_inputs": WEBRTC_MODAL_FUNCTION_MAX_INPUTS,
77+
"env": {
8778
"ROBOFLOW_INTERNAL_SERVICE_SECRET": ROBOFLOW_INTERNAL_SERVICE_SECRET,
8879
"ROBOFLOW_INTERNAL_SERVICE_NAME": WEBRTC_MODAL_ROBOFLOW_INTERNAL_SERVICE_NAME,
8980
"PROJECT": PROJECT,
@@ -121,14 +112,11 @@
121112
"WEBRTC_MODAL_IMAGE_TAG": WEBRTC_MODAL_IMAGE_TAG,
122113
"WEBRTC_MODAL_RTSP_PLACEHOLDER": WEBRTC_MODAL_RTSP_PLACEHOLDER,
123114
"WEBRTC_MODAL_RTSP_PLACEHOLDER_URL": WEBRTC_MODAL_RTSP_PLACEHOLDER_URL,
124-
"ONNXRUNTIME_EXECUTION_PROVIDERS": (
125-
"CUDAExecutionProvider"
126-
if WEBRTC_MODAL_FUNCTION_GPU
127-
else "CPUExecutionProvider"
128-
),
115+
"ONNXRUNTIME_EXECUTION_PROVIDERS": "[CUDAExecutionProvider,CPUExecutionProvider]",
129116
},
130-
volumes={MODEL_CACHE_DIR: rfcache_volume},
131-
)
117+
"volumes": {MODEL_CACHE_DIR: rfcache_volume},
118+
}
119+
132120
class RTCPeerConnectionModal:
133121
@modal.method()
134122
def rtc_peer_connection_modal(
@@ -149,6 +137,27 @@ def send_answer(obj: WebRTCWorkerResult):
149137
)
150138
)
151139

140+
# Modal derives function name from class name
141+
# https://modal.com/docs/reference/modal.App#cls
142+
@app.cls(
143+
**{
144+
**decorator_kwargs,
145+
"enable_memory_snapshot": True,
146+
}
147+
)
148+
class RTCPeerConnectionModalCPU(RTCPeerConnectionModal):
149+
pass
150+
151+
@app.cls(
152+
**{
153+
**decorator_kwargs,
154+
"gpu": WEBRTC_MODAL_FUNCTION_GPU,
155+
"experimental_options": {"enable_gpu_snapshot": True},
156+
}
157+
)
158+
class RTCPeerConnectionModalGPU(RTCPeerConnectionModal):
159+
pass
160+
152161
def spawn_rtc_peer_connection_modal(
153162
webrtc_request: WebRTCWorkerRequest,
154163
) -> WebRTCWorkerResult:
@@ -164,6 +173,12 @@ def spawn_rtc_peer_connection_modal(
164173
except modal.exception.NotFoundError:
165174
logger.info("Deploying webrtc modal app %s", WEBRTC_MODAL_APP_NAME)
166175
app.deploy(name=WEBRTC_MODAL_APP_NAME, client=client)
176+
177+
if webrtc_request.requested_gpu:
178+
RTCPeerConnectionModal = RTCPeerConnectionModalGPU
179+
else:
180+
RTCPeerConnectionModal = RTCPeerConnectionModalCPU
181+
167182
# https://modal.com/docs/reference/modal.Cls#from_name
168183
deployed_cls = modal.Cls.from_name(
169184
app_name=app.name,
@@ -181,7 +196,10 @@ def spawn_rtc_peer_connection_modal(
181196
cls_with_options = deployed_cls.with_options(
182197
timeout=webrtc_request.processing_timeout,
183198
)
184-
if webrtc_request.requested_gpu != WEBRTC_MODAL_FUNCTION_GPU:
199+
if (
200+
webrtc_request.requested_gpu is not None
201+
and webrtc_request.requested_gpu != WEBRTC_MODAL_FUNCTION_GPU
202+
):
185203
logger.warning(
186204
"Spawning webrtc modal function with custom gpu %s",
187205
webrtc_request.requested_gpu,

0 commit comments

Comments
 (0)