Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
361 commits
Select commit Hold shift + click to select a range
1b6a5ef
Record RFDETR persistent cache experiment
aseembits93 May 23, 2026
c13fe64
Record RFDETR query-index empty retest
aseembits93 May 23, 2026
c4af952
Record RFDETR limited mask allocation retest
aseembits93 May 23, 2026
2f7e3f0
Record RFDETR CUDA connections tuning
aseembits93 May 23, 2026
ea7062d
Record RFDETR graph stream priority test
aseembits93 May 23, 2026
c4d37b1
Record RFDETR caller-stream clone test
aseembits93 May 23, 2026
2d7b242
Record RFDETR NCU selector analysis
aseembits93 May 23, 2026
0dc6c17
Record RFDETR enqueue profile flag test
aseembits93 May 23, 2026
a8673b2
Record RFDETR opt2 TRT rebuild
aseembits93 May 23, 2026
5232d3e
Record RFDETR preprocessing override test
aseembits93 May 23, 2026
b457b58
Record RFDETR highest priority stream test
aseembits93 May 23, 2026
3790576
Record RFDETR clean post-stream check
aseembits93 May 23, 2026
5a6e228
Record RFDETR empty class filter test
aseembits93 May 23, 2026
f547904
Record RFDETR depth2 graph gap profile
aseembits93 May 23, 2026
14f6a88
Record RFDETR graph output pool test
aseembits93 May 23, 2026
f492522
Record RFDETR external graph input test
aseembits93 May 23, 2026
5f7403d
Record RFDETR graph input copy flag test
aseembits93 May 23, 2026
429b6f7
Record RFDETR captured output copy test
aseembits93 May 23, 2026
257b137
Record RFDETR two stage selector test
aseembits93 May 23, 2026
5e3c333
Record RFDETR TensorRT NVTX verbosity test
aseembits93 May 23, 2026
b5ee9bc
Record RFDETR TRT warmup count test
aseembits93 May 23, 2026
234ab7a
Record RFDETR postprocess stream copy test
aseembits93 May 23, 2026
240ee16
Record RFDETR accepted depth2 profile
aseembits93 May 23, 2026
7723869
Record RFDETR pooled graph input test
aseembits93 May 23, 2026
e8610d6
Record RFDETR alternate plan test
aseembits93 May 23, 2026
b7b7e87
Record RFDETR selector warp test
aseembits93 May 23, 2026
20dd422
Record RFDETR mask resize block test
aseembits93 May 23, 2026
c52c57c
Record RFDETR pinned view test
aseembits93 May 23, 2026
197f4f7
Record RFDETR caller stream wait test
aseembits93 May 23, 2026
e7aa886
Record RFDETR postprocess priority test
aseembits93 May 23, 2026
6e0a564
Record RFDETR preprocess priority test
aseembits93 May 23, 2026
82ae40f
Record RFDETR inference priority test
aseembits93 May 23, 2026
bbb591e
Record RFDETR fp16 logits test
aseembits93 May 23, 2026
8686a44
Record RFDETR D2H event sync test
aseembits93 May 23, 2026
077214b
Record RFDETR metadata array test
aseembits93 May 23, 2026
a0f88dc
Record RFDETR sigmoid buffer test
aseembits93 May 23, 2026
2704012
Record RFDETR zero aux streams test
aseembits93 May 23, 2026
d9ff98c
Record RFDETR caller graph stream test
aseembits93 May 23, 2026
3a76df9
Record fresh RFDETR accepted nsys profile
aseembits93 May 23, 2026
a4d9347
Record RFDETR int64 query index test
aseembits93 May 23, 2026
bf91807
Record RFDETR resize map test
aseembits93 May 23, 2026
5962e32
Record RFDETR fp16 mask output test
aseembits93 May 23, 2026
cbfcfa9
Record RFDETR captured input copy test
aseembits93 May 23, 2026
0bd1d1e
Record RFDETR fused buffer reuse test
aseembits93 May 23, 2026
e93835b
Record RFDETR output copy variant test
aseembits93 May 23, 2026
87745e3
Record RFDETR mask clone overlap test
aseembits93 May 23, 2026
65d0f74
Record RFDETR Triton num stages test
aseembits93 May 23, 2026
183b98b
Record RFDETR D2H copy order test
aseembits93 May 23, 2026
d662211
Record RFDETR count read test
aseembits93 May 23, 2026
2bda1d1
Record RFDETR 4x resize test
aseembits93 May 23, 2026
dede753
Record RFDETR accepted graph profile
aseembits93 May 23, 2026
b5a3fae
Record RFDETR graph pool test
aseembits93 May 23, 2026
2f27903
Record RFDETR external TRT memory test
aseembits93 May 23, 2026
5ce4435
Record RFDETR aux stream count test
aseembits93 May 23, 2026
f5700ed
Record RFDETR forward lock test
aseembits93 May 23, 2026
2164972
Record RFDETR context hit test
aseembits93 May 23, 2026
a88ae5d
Record RFDETR borrowed mask test
aseembits93 May 23, 2026
c38cffe
Record RFDETR selected mask copy test
aseembits93 May 23, 2026
e16efe6
Record RFDETR selector cap test
aseembits93 May 23, 2026
2e5ff64
Record RFDETR current graph profile
aseembits93 May 23, 2026
c6a790c
Record RFDETR small output borrow test
aseembits93 May 23, 2026
ab0f0a9
Record RFDETR accepted graph profile
aseembits93 May 23, 2026
bc1e1ed
Record RFDETR query index dtype test
aseembits93 May 23, 2026
4ff332d
Record RFDETR TensorRT enqueue profile test
aseembits93 May 23, 2026
7feb8dc
Record RFDETR TensorRT allocation strategy test
aseembits93 May 23, 2026
d4347fd
Record RFDETR producer tuning probes
aseembits93 May 23, 2026
d00d5e5
Record RFDETR adaptive fixed copy test
aseembits93 May 23, 2026
5d935e1
Record RFDETR output clone order test
aseembits93 May 23, 2026
1c367fa
Record RFDETR accepted depth-2 profile refresh
aseembits93 May 23, 2026
3a65cfb
Record RFDETR clone order rejection
aseembits93 May 23, 2026
b4e55f0
Record RFDETR graph-stream sigmoid test
aseembits93 May 23, 2026
6dc0e6a
Record RFDETR clock lock runtime probe
aseembits93 May 23, 2026
c7e6701
Record RFDETR max-clock scheduler probe
aseembits93 May 23, 2026
b91f35a
Record RFDETR max-clock clone scheduling test
aseembits93 May 23, 2026
7a85bcb
Warm RFDETR TensorRT graph capture
aseembits93 May 23, 2026
184dfa1
Record RFDETR warmup count tuning
aseembits93 May 23, 2026
33bef60
Record RFDETR warmed graph profile
aseembits93 May 23, 2026
e8bab19
Clone RFDETR graph capture result after warmup
aseembits93 May 23, 2026
98345d5
Record RFDETR capture clone removal test
aseembits93 May 23, 2026
ff49594
Record RFDETR pre-capture warmup test
aseembits93 May 23, 2026
01c9a9e
Record RFDETR warmed graph follow-up tests
aseembits93 May 23, 2026
b1f076c
Record RFDETR clock and output buffer probes
aseembits93 May 23, 2026
c6fef19
Record RFDETR engine and clock probes
aseembits93 May 23, 2026
139be6a
Record RFDETR captured postprocess probe
aseembits93 May 23, 2026
64101e8
Record RFDETR TensorRT NCU snapshot
aseembits93 May 23, 2026
dee5362
Record RFDETR version-compatible engine rebuild probes
aseembits93 May 23, 2026
31a7a25
Record RFDETR allocator backend probe
aseembits93 May 23, 2026
5a1a97b
Record RFDETR ONNX backend probe
aseembits93 May 23, 2026
715ef04
Record RFDETR Torch export source probe
aseembits93 May 23, 2026
67e96e3
Record RFDETR TensorRT cache-limit probe
aseembits93 May 23, 2026
26d3578
Record RFDETR mask allocation probe
aseembits93 May 23, 2026
f953e9c
Record RFDETR accepted refresh profile
aseembits93 May 23, 2026
55e50ec
Record RFDETR direct graph postprocess probe
aseembits93 May 23, 2026
cc8ba47
Record RFDETR CUDA connection probe
aseembits93 May 23, 2026
832cfbe
Record RFDETR graph output pool probe
aseembits93 May 23, 2026
77a7b39
Record RFDETR exact D2H copy probe
aseembits93 May 23, 2026
905bef2
Record RFDETR concurrent graph replay probe
aseembits93 May 23, 2026
276434b
Record RFDETR SV conversion probe
aseembits93 May 23, 2026
0652421
Record RFDETR profile selection probe
aseembits93 May 23, 2026
86e17f2
Record RFDETR class mapping dtype probe
aseembits93 May 23, 2026
05c0cb9
Record RFDETR compute mode probe
aseembits93 May 23, 2026
255f0f7
Record RFDETR CPU scheduling probes
aseembits93 May 23, 2026
e9195ea
Record RFDETR scheduling refresh probes
aseembits93 May 23, 2026
6b23edb
Record RFDETR Python optimize probe
aseembits93 May 23, 2026
b8d422a
Record RFDETR clean graph gap profile
aseembits93 May 23, 2026
4f9e477
Record RFDETR graph pool probe
aseembits93 May 23, 2026
bbc1c0b
Record RFDETR user graph refresh profile
aseembits93 May 23, 2026
0373175
Record RFDETR masked selector load probe
aseembits93 May 23, 2026
78d3d5d
Record RFDETR TF32 runtime probe
aseembits93 May 23, 2026
af3cb0f
Record RFDETR CUDA cache runtime probe
aseembits93 May 23, 2026
1e5db26
Record RFDETR normalization LUT probe
aseembits93 May 23, 2026
9f3c81c
Record RFDETR TensorRT layer profile
aseembits93 May 23, 2026
945b9ce
Record RFDETR cuBLAS workspace probe
aseembits93 May 23, 2026
904acd2
Record RFDETR native allocator split probe
aseembits93 May 23, 2026
e0ea047
Record RFDETR graph-bound refresh profile
aseembits93 May 23, 2026
513fce1
Record RFDETR selector mask cleanup probe
aseembits93 May 23, 2026
668aae6
Record RFDETR x-axis resize probe
aseembits93 May 23, 2026
00213e8
Record RFDETR single-image contiguous probe
aseembits93 May 23, 2026
7227ec9
Record RFDETR postprocess yield probe
aseembits93 May 23, 2026
18e326e
Record RFDETR selector index-return probe
aseembits93 May 23, 2026
18c1e70
Record RFDETR single-image postprocess probe
aseembits93 May 23, 2026
a943e8e
Record RFDETR combined output copy probe
aseembits93 May 23, 2026
a1e0aea
Record RFDETR graph spacing refresh profile
aseembits93 May 23, 2026
995615a
Record RFDETR progress print probe
aseembits93 May 23, 2026
7ab5273
Record RFDETR package metadata recheck
aseembits93 May 23, 2026
f5388eb
Record RFDETR static batch shortcut probe
aseembits93 May 23, 2026
2f70fe9
Record RFDETR CPU profile refresh
aseembits93 May 23, 2026
a4a14ec
Record RFDETR all-thread CPU profile
aseembits93 May 23, 2026
21a2a2f
Record RFDETR depth-2 graphbound profile
aseembits93 May 23, 2026
0192c1b
Record RFDETR scheduling and preprocessing probes
aseembits93 May 23, 2026
ca6fe39
Record RFDETR selector NCU snapshot
aseembits93 May 23, 2026
0e78d53
Record RFDETR host allocator probe
aseembits93 May 23, 2026
7bfafbe
Record RFDETR OpenCV thread probe
aseembits93 May 23, 2026
7d1b03f
Record RFDETR graph stream placement probe
aseembits93 May 23, 2026
f20e23d
Record RFDETR Python allocator probe
aseembits93 May 23, 2026
e5f188e
Record RFDETR depth-2 D2H copy probe
aseembits93 May 23, 2026
4510ab1
Record RFDETR graph-body visibility diagnostic
aseembits93 May 23, 2026
c5d8f81
Record RFDETR CUDA graph node profile
aseembits93 May 23, 2026
761ff10
Record RFDETR graph-node MHA NCU snapshot
aseembits93 May 23, 2026
8026ec7
Record RFDETR engine tactic visibility audit
aseembits93 May 23, 2026
c6acc59
Record RFDETR graph-node GEMM NCU snapshot
aseembits93 May 23, 2026
6534371
Record RFDETR three-aux-stream probe
aseembits93 May 23, 2026
5d93394
Record RFDETR depth-2 graphbound final profile
aseembits93 May 23, 2026
f8ab202
Record RFDETR resize block-size probe
aseembits93 May 23, 2026
d05bb4e
Tune RFDETR mask resize launch warps
aseembits93 May 23, 2026
f68060a
Record RFDETR class-name mapping probe
aseembits93 May 23, 2026
7c7d1ce
Record RFDETR one-warp resize probe
aseembits93 May 23, 2026
425da88
Record RFDETR UUID hex probe
aseembits93 May 23, 2026
9edd45a
Record RFDETR low-bubble CPU probes
aseembits93 May 23, 2026
f9a37c3
Record RFDETR TensorRT graph ceiling
aseembits93 May 23, 2026
c394a29
Record RFDETR TensorRT runtime thread sweep
aseembits93 May 23, 2026
6b3b3e9
Record RFDETR TensorRT shape inference probe
aseembits93 May 23, 2026
c9069fe
Record RFDETR TensorRT helper ceiling
aseembits93 May 23, 2026
d0bb451
Record RFDETR graph-paced depth2 profile
aseembits93 May 23, 2026
3fda211
Record RFDETR package metadata refresh
aseembits93 May 23, 2026
77ec8c8
Record RFDETR TensorRT input event probe
aseembits93 May 23, 2026
1895d5e
Record RFDETR TensorRT memory refresh probe
aseembits93 May 23, 2026
2e4de34
Record RFDETR two-warp resize NCU
aseembits93 May 23, 2026
4240cf5
Record RFDETR countless resize probe
aseembits93 May 23, 2026
746c292
Record RFDETR current graph-node profile
aseembits93 May 23, 2026
a77bfdc
Record RFDETR mask resize early-return probe
aseembits93 May 23, 2026
3e92946
Record RFDETR H1688 GEMM NCU profile
aseembits93 May 23, 2026
6e3844d
Record RFDETR Myelin GELU NCU profile
aseembits93 May 23, 2026
5727674
Record RFDETR pinned host view probe
aseembits93 May 23, 2026
24be9aa
Record RFDETR Myelin layernorm NCU profile
aseembits93 May 23, 2026
5d0251e
Record RFDETR 128x64 GEMM NCU profile
aseembits93 May 23, 2026
174735e
Record RFDETR 32x32 GEMM NCU profile
aseembits93 May 23, 2026
f846727
Record RFDETR depthwise conv NCU profile
aseembits93 May 23, 2026
d08592e
Record RFDETR fprop GEMM NCU profile
aseembits93 May 23, 2026
dc95df7
Record RFDETR 64x64 GEMM NCU profile
aseembits93 May 23, 2026
734e3a4
Record RFDETR persistent cache probe
aseembits93 May 23, 2026
a3de92f
Record RFDETR split-k GEMM NCU profile
aseembits93 May 23, 2026
a866d08
Record RFDETR engine inspector diagnostic
aseembits93 May 23, 2026
01ec14b
Record RFDETR Myelin transpose NCU profile
aseembits93 May 23, 2026
f7ce8d9
Record RFDETR 128x64x32 GEMM NCU profile
aseembits93 May 23, 2026
112a906
Record RFDETR indexed fprop NCU profile
aseembits93 May 23, 2026
abe62e7
Record RFDETR 128x128 NT GEMM NCU profile
aseembits93 May 23, 2026
839efdc
Record RFDETR 64x32 GEMM NCU profile
aseembits93 May 23, 2026
a95f6e8
Record RFDETR 128x128x64 GEMM NCU profile
aseembits93 May 23, 2026
103db31
Record RFDETR 64x32 TT GEMM NCU profile
aseembits93 May 23, 2026
f70a042
Record RFDETR CUDA connection sweep
aseembits93 May 23, 2026
dc21765
Record RFDETR runtime profiler probe
aseembits93 May 23, 2026
7b20008
Record RFDETR detailed engine rebuild probe
aseembits93 May 23, 2026
c3b1e58
Record RFDETR Myelin softmax tail NCU profile
aseembits93 May 23, 2026
cdf2f17
Record RFDETR H1688 TN GEMM NCU profile
aseembits93 May 23, 2026
e9b5b80
Record RFDETR Myelin mean tail NCU profile
aseembits93 May 23, 2026
c4f8b84
Record RFDETR depth2 graph-node nsys refresh
aseembits93 May 23, 2026
04000b6
Record RFDETR 64x32 FP32 TN GEMM NCU profile
aseembits93 May 23, 2026
b42cc4b
Record RFDETR Myelin transpose tail NCU profile
aseembits93 May 23, 2026
f3fb77a
Record RFDETR Myelin SiLU tail NCU profile
aseembits93 May 23, 2026
9b74f4f
Record RFDETR Myelin TopK NCU profile
aseembits93 May 23, 2026
e130a99
Record RFDETR unsorted TopK rebuild diagnostic
aseembits93 May 23, 2026
848824d
Record RFDETR tactic-source rebuild diagnostic
aseembits93 May 23, 2026
f03180d
Record RFDETR Myelin reshape tail NCU profile
aseembits93 May 23, 2026
36428cd
Record RFDETR 32x32 FP32 TN GEMM NCU profile
aseembits93 May 23, 2026
6134b0d
Record RFDETR 64x64 FP32 fused GEMM NCU profile
aseembits93 May 23, 2026
528cffc
Record RFDETR Myelin compare-select NCU profile
aseembits93 May 23, 2026
4de2d93
Record RFDETR 32x32 FP32 aligna2 GEMM NCU profile
aseembits93 May 24, 2026
e0516bc
Record RFDETR depth2 low-bubble nsys refresh
aseembits93 May 24, 2026
129a033
Record RFDETR ResizeVectorized NCU profile
aseembits93 May 24, 2026
a18900d
Record RFDETR Myelin add-transpose NCU profile
aseembits93 May 24, 2026
9d17f31
Record RFDETR Myelin cast NCU profile
aseembits93 May 24, 2026
f46d259
Record RFDETR smaller MHA NCU profile
aseembits93 May 24, 2026
3badca5
Record RFDETR fused 128x128 FP32 GEMM NCU profile
aseembits93 May 24, 2026
03d01f7
Record RFDETR exact Myelin layernorm NCU profile
aseembits93 May 24, 2026
637ce45
Record RFDETR exact Myelin ERF NCU profile
aseembits93 May 24, 2026
293aaee
Record RFDETR depth2 low-bubble refresh
aseembits93 May 24, 2026
2c65c4f
Record rejected RFDETR cache lookup experiment
aseembits93 May 24, 2026
9ca8997
Record RFDETR TensorRT engine inspector dump
aseembits93 May 24, 2026
35b36dc
Record RFDETR package and output allocator diagnostics
aseembits93 May 24, 2026
72c9cce
Record RFDETR depth2 profile and record-stream experiment
aseembits93 May 24, 2026
2554bd7
Record RFDETR query index allocation experiment
aseembits93 May 24, 2026
a8d2763
Record RFDETR TRT profile emission experiment
aseembits93 May 24, 2026
29b5886
Record RFDETR graph replay ceiling diagnostic
aseembits93 May 24, 2026
d410c7d
Refresh RFDETR official package metadata
aseembits93 May 24, 2026
1d945ba
Record RFDETR TensorRT graph stream imbalance
aseembits93 May 24, 2026
63db94c
Record RFDETR top GEMM scheduler profile
aseembits93 May 24, 2026
fb62e59
Record RFDETR TRT persistent cache experiment
aseembits93 May 24, 2026
6327a7f
Record RFDETR driver output copy experiment
aseembits93 May 24, 2026
503214c
Record RFDETR MHA scheduler profile
aseembits93 May 24, 2026
0446111
Record RFDETR fused GEMM scheduler profile
aseembits93 May 24, 2026
a6c7618
Record RFDETR vertical resize prototype
aseembits93 May 24, 2026
9a7790c
Record RFDETR local low-bubble nsys refresh
aseembits93 May 24, 2026
43b5751
Record RFDETR H1688 scheduler profile
aseembits93 May 24, 2026
29af3a6
Record RFDETR TRT batch constraint
aseembits93 May 24, 2026
d2c7a87
Record RFDETR graph upload diagnostic
aseembits93 May 24, 2026
891ecd3
Record RFDETR graph launch lead diagnostic
aseembits93 May 24, 2026
d5a5098
Record RFDETR TensorRT TopK scheduler profile
aseembits93 May 24, 2026
22e2328
Record RFDETR output clone overlap rejection
aseembits93 May 24, 2026
73ef03c
Record RFDETR TensorRT context knob audit
aseembits93 May 24, 2026
ecbb9ca
Record RFDETR 128x64 GEMM scheduler profile
aseembits93 May 24, 2026
e046600
Record RFDETR 32x32 GEMM scheduler profile
aseembits93 May 24, 2026
b279f81
Record RFDETR depthwise conv scheduler profile
aseembits93 May 24, 2026
3f17259
Record RFDETR TensorRT layer profile
aseembits93 May 24, 2026
72978ff
Record RFDETR fused mask resize profile
aseembits93 May 24, 2026
2a22ed9
Record RFDETR fused selector profile
aseembits93 May 24, 2026
94ae33b
Record RFDETR preprocess resize profile
aseembits93 May 24, 2026
2557ba0
Record RFDETR exact-count D2H rejection
aseembits93 May 24, 2026
b86e845
Record RFDETR split-k GEMM profile
aseembits93 May 24, 2026
42cea10
Record RFDETR continued depth-2 nsys trace
aseembits93 May 24, 2026
4be6f8b
Record RFDETR implicit GEMM scheduler profile
aseembits93 May 24, 2026
20ec002
Record RFDETR split-k 128 GEMM scheduler profile
aseembits93 May 24, 2026
5287d39
Record RFDETR 128x64x32 GEMM scheduler profile
aseembits93 May 24, 2026
83621e4
Record RFDETR indexed implicit GEMM profile
aseembits93 May 24, 2026
f309e35
Record RFDETR 128x128 NT GEMM scheduler profile
aseembits93 May 24, 2026
5a9495c
Record RFDETR 64x32 TN GEMM scheduler profile
aseembits93 May 24, 2026
efb87ff
Record RFDETR opt4 TensorRT rebuild result
aseembits93 May 24, 2026
ef956c1
Refresh RFDETR official package metadata
aseembits93 May 24, 2026
658b781
Record RFDETR sparse TensorRT rebuild result
aseembits93 May 24, 2026
00c6209
Record RFDETR aux0 TensorRT rebuild result
aseembits93 May 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,231 changes: 3,231 additions & 0 deletions development/stream_interface/rfdetr_nano_seg_trt_optimization_log.md

Large diffs are not rendered by default.

110 changes: 92 additions & 18 deletions inference/core/interfaces/stream/inference_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,7 @@ def init_with_workflow(
video_source_properties: Optional[Dict[str, float]] = None,
workflow_init_parameters: Optional[Dict[str, Any]] = None,
workflows_thread_pool_workers: int = 4,
max_inflight_workflow_batches: int = 2,
cancel_thread_pool_tasks_on_exit: bool = True,
video_metadata_input_name: str = "video_metadata",
batch_collection_timeout: Optional[float] = None,
Expand Down Expand Up @@ -547,6 +548,9 @@ def init_with_workflow(
with custom plugins.
workflows_thread_pool_workers (int): Number of workers for workflows thread pool which is used
by workflows blocks to run background tasks.
max_inflight_workflow_batches (int): Number of workflow frame batches that can be processed concurrently
by the inference pipeline. Values greater than one allow CPU-heavy workflow result processing for
one frame batch to overlap with GPU work for another frame batch, while preserving output order.
cancel_thread_pool_tasks_on_exit (bool): Flag to decide if unstated background tasks should be
canceled at the end of InferencePipeline processing. By default, when video file ends or
pipeline is stopped, tasks that has not started will be cancelled.
Expand Down Expand Up @@ -690,6 +694,7 @@ def init_with_workflow(
batch_collection_timeout=batch_collection_timeout,
predictions_queue_size=predictions_queue_size,
decoding_buffer_size=decoding_buffer_size,
inference_thread_pool_workers=max_inflight_workflow_batches,
)

@classmethod
Expand All @@ -710,6 +715,7 @@ def init_with_custom_logic(
sink_mode: SinkMode = SinkMode.ADAPTIVE,
predictions_queue_size: int = PREDICTIONS_QUEUE_SIZE,
decoding_buffer_size: int = DEFAULT_BUFFER_SIZE,
inference_thread_pool_workers: int = 1,
) -> "InferencePipeline":
"""
This class creates the abstraction for making inferences from given workflow against video stream.
Expand Down Expand Up @@ -780,6 +786,8 @@ def init_with_custom_logic(
default value is taken from INFERENCE_PIPELINE_PREDICTIONS_QUEUE_SIZE env variable
decoding_buffer_size (int): size of video source decoding buffer
default value is taken from VIDEO_SOURCE_BUFFER_SIZE env variable
inference_thread_pool_workers (int): Number of frame batches that can be processed concurrently.
Output dispatching order is preserved.

Other ENV variables involved in low-level configuration:
* INFERENCE_PIPELINE_PREDICTIONS_QUEUE_SIZE - size of buffer for predictions that are ready for dispatching
Expand Down Expand Up @@ -826,6 +834,7 @@ def init_with_custom_logic(
on_pipeline_end=on_pipeline_end,
batch_collection_timeout=batch_collection_timeout,
sink_mode=sink_mode,
inference_thread_pool_workers=inference_thread_pool_workers,
)

def __init__(
Expand All @@ -841,6 +850,7 @@ def __init__(
max_fps: Optional[float] = None,
batch_collection_timeout: Optional[float] = None,
sink_mode: SinkMode = SinkMode.ADAPTIVE,
inference_thread_pool_workers: int = 1,
):
self._on_video_frame = on_video_frame
self._video_sources = video_sources
Expand All @@ -858,6 +868,12 @@ def __init__(
self._on_pipeline_end = on_pipeline_end
self._batch_collection_timeout = batch_collection_timeout
self._sink_mode = sink_mode
try:
self._inference_thread_pool_workers = max(
1, int(inference_thread_pool_workers)
)
except (TypeError, ValueError):
self._inference_thread_pool_workers = 1

def start(self, use_main_thread: bool = True) -> None:
self._stop = False
Expand Down Expand Up @@ -910,26 +926,52 @@ def _execute_inference(self) -> None:
status_update_handlers=self._status_update_handlers,
)
logger.info(f"Inference thread started")
inference_executor = None
try:
for video_frames in self._generate_frames():
self._watchdog.on_model_inference_started(
frames=video_frames,
)
predictions = self._on_video_frame(video_frames)
self._watchdog.on_model_prediction_ready(
frames=video_frames,
)
self._predictions_queue.put((predictions, video_frames))
send_inference_pipeline_status_update(
severity=UpdateSeverity.DEBUG,
event_type=INFERENCE_COMPLETED_EVENT,
payload={
"frames_ids": [f.frame_id for f in video_frames],
"frames_timestamps": [f.frame_timestamp for f in video_frames],
"sources_id": [f.source_id for f in video_frames],
},
status_update_handlers=self._status_update_handlers,
if self._inference_thread_pool_workers == 1:
for video_frames in self._generate_frames():
predictions, video_frames = self._run_inference_on_video_frames(
video_frames=video_frames
)
self._emit_inference_result(
predictions=predictions,
video_frames=video_frames,
)
else:
inference_executor = ThreadPoolExecutor(
max_workers=self._inference_thread_pool_workers
)
pending_predictions = {}
next_submitted_batch = 0
next_dispatched_batch = 0
for video_frames in self._generate_frames():
while (
len(pending_predictions) >= self._inference_thread_pool_workers
):
predictions, predicted_video_frames = pending_predictions.pop(
next_dispatched_batch
).result()
self._emit_inference_result(
predictions=predictions,
video_frames=predicted_video_frames,
)
next_dispatched_batch += 1
pending_predictions[next_submitted_batch] = (
inference_executor.submit(
self._run_inference_on_video_frames,
video_frames=video_frames,
)
)
next_submitted_batch += 1
while next_dispatched_batch < next_submitted_batch:
predictions, predicted_video_frames = pending_predictions.pop(
next_dispatched_batch
).result()
self._emit_inference_result(
predictions=predictions,
video_frames=predicted_video_frames,
)
next_dispatched_batch += 1

except Exception as error:
payload = {
Expand All @@ -945,6 +987,8 @@ def _execute_inference(self) -> None:
)
logger.exception(f"Encountered inference error: {error}")
finally:
if inference_executor is not None:
inference_executor.shutdown(wait=True, cancel_futures=True)
self._predictions_queue.put(None)
send_inference_pipeline_status_update(
severity=UpdateSeverity.INFO,
Expand All @@ -953,6 +997,36 @@ def _execute_inference(self) -> None:
)
logger.info(f"Inference thread finished")

def _run_inference_on_video_frames(
self,
video_frames: List[VideoFrame],
) -> Tuple[List[AnyPrediction], List[VideoFrame]]:
self._watchdog.on_model_inference_started(
frames=video_frames,
)
predictions = self._on_video_frame(video_frames)
self._watchdog.on_model_prediction_ready(
frames=video_frames,
)
return predictions, video_frames

def _emit_inference_result(
self,
predictions: List[AnyPrediction],
video_frames: List[VideoFrame],
) -> None:
self._predictions_queue.put((predictions, video_frames))
send_inference_pipeline_status_update(
severity=UpdateSeverity.DEBUG,
event_type=INFERENCE_COMPLETED_EVENT,
payload={
"frames_ids": [f.frame_id for f in video_frames],
"frames_timestamps": [f.frame_timestamp for f in video_frames],
"sources_id": [f.source_id for f in video_frames],
},
status_update_handlers=self._status_update_handlers,
)

def _dispatch_inference_results(self) -> None:
while True:
inference_results: Optional[
Expand Down
110 changes: 108 additions & 2 deletions inference/core/interfaces/stream/model_handlers/workflows.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
from typing import List, Optional
from typing import Callable, Dict, List, Optional

from inference.core.interfaces.camera.entities import VideoFrame
from inference.core.workflows.execution_engine.core import ExecutionEngine
from inference.core.workflows.execution_engine.entities.base import VideoMetadata
from inference.core.workflows.execution_engine.entities.base import (
ImageParentMetadata,
VideoMetadata,
WorkflowImageData,
)


class WorkflowRunner:
def __init__(self) -> None:
self._fast_path_cache: Dict[
int, Optional[Callable[[List[VideoFrame]], List[dict]]]
] = {}

def run_workflow(
self,
Expand All @@ -17,6 +25,13 @@ def run_workflow(
serialize_results: bool = False,
_is_preview: bool = False,
) -> List[dict]:
if not workflows_parameters and not serialize_results and not _is_preview:
fast_path = self._get_single_step_fast_path(
execution_engine=execution_engine,
image_input_name=image_input_name,
)
if fast_path is not None:
return fast_path(video_frames)
if workflows_parameters is None:
workflows_parameters = {}
# TODO: pass fps reflecting each stream to workflows_parameters
Expand Down Expand Up @@ -58,3 +73,94 @@ def run_workflow(
serialize_results=serialize_results,
_is_preview=_is_preview,
)

def _get_single_step_fast_path(
self,
execution_engine: ExecutionEngine,
image_input_name: str,
) -> Optional[Callable[[List[VideoFrame]], List[dict]]]:
cache_key = id(execution_engine)
if cache_key not in self._fast_path_cache:
self._fast_path_cache[cache_key] = self._build_single_step_fast_path(
execution_engine=execution_engine,
image_input_name=image_input_name,
)
return self._fast_path_cache[cache_key]

@staticmethod
def _build_single_step_fast_path(
execution_engine: ExecutionEngine,
image_input_name: str,
) -> Optional[Callable[[List[VideoFrame]], List[dict]]]:
inner_engine = getattr(execution_engine, "_engine", None)
compiled_workflow = getattr(inner_engine, "_compiled_workflow", None)
if compiled_workflow is None:
return None
if compiled_workflow.input_substitutions:
return None
if len(compiled_workflow.steps) != 1:
return None
if len(compiled_workflow.workflow_definition.inputs) != 1:
return None
if len(compiled_workflow.workflow_definition.outputs) != 1:
return None
step_name, initialised_step = next(iter(compiled_workflow.steps.items()))
manifest = initialised_step.manifest
if (
getattr(manifest, "type", None)
!= "roboflow_core/roboflow_instance_segmentation_model@v3"
):
return None
if getattr(manifest, "images", None) != f"$inputs.{image_input_name}":
return None
output = compiled_workflow.workflow_definition.outputs[0]
if (
output.name != "predictions"
or output.selector != f"$steps.{step_name}.predictions"
):
return None
step = initialised_step.step

def run_single_step_workflow(video_frames: List[VideoFrame]) -> List[dict]:
workflow_images = []
for idx, video_frame in enumerate(video_frames):
video_metadata = VideoMetadata(
video_identifier=(
str(video_frame.source_id)
if video_frame.source_id
else "default_source"
),
frame_number=video_frame.frame_id,
frame_timestamp=video_frame.frame_timestamp,
fps=video_frame.fps,
measured_fps=video_frame.measured_fps,
comes_from_video_file=video_frame.comes_from_video_file,
)
parent_id = f"{image_input_name}.[{idx}]"
parent_metadata = ImageParentMetadata(parent_id=parent_id)
workflow_images.append(
WorkflowImageData(
parent_metadata=parent_metadata,
workflow_root_ancestor_metadata=parent_metadata,
numpy_image=video_frame.image,
video_metadata=video_metadata,
)
)
step_results = step.run(
images=workflow_images,
model_id=manifest.model_id,
confidence_mode=manifest.confidence_mode,
custom_confidence=manifest.custom_confidence,
class_agnostic_nms=manifest.class_agnostic_nms,
class_filter=manifest.class_filter,
iou_threshold=manifest.iou_threshold,
max_detections=manifest.max_detections,
max_candidates=manifest.max_candidates,
mask_decode_mode=manifest.mask_decode_mode,
tradeoff_factor=manifest.tradeoff_factor,
disable_active_learning=manifest.disable_active_learning,
active_learning_target_dataset=manifest.active_learning_target_dataset,
)
return [{"predictions": result["predictions"]} for result in step_results]

return run_single_step_workflow
5 changes: 4 additions & 1 deletion inference/core/models/inference_models_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,10 @@ def map_inference_kwargs(self, kwargs: dict) -> dict:
disable_static_crop=kwargs.get("disable_preproc_static_crop", False),
)
kwargs["pre_processing_overrides"] = pre_processing_overrides
if "rle" in self._model.supported_mask_formats:
if (
kwargs.get("response_mask_format") == "rle"
and "rle" in self._model.supported_mask_formats
):
kwargs["mask_format"] = "rle"
return kwargs

Expand Down
10 changes: 0 additions & 10 deletions inference/core/utils/postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,6 @@ def masks2poly(masks: np.ndarray) -> List[np.ndarray]:
m_bool = np.ascontiguousarray(m_bool)
m_uint8 = m_bool.view(np.uint8)

# Quickly skip empty masks
if not np.any(m_uint8):
segments.append(np.zeros((0, 2), dtype=np.float32))
continue

segments.append(mask2poly(m_uint8))
return segments

Expand Down Expand Up @@ -88,11 +83,6 @@ def masks2multipoly(masks: np.ndarray) -> List[np.ndarray]:
m_bool = np.ascontiguousarray(m_bool)
m_uint8 = m_bool.view(np.uint8)

# Quickly skip empty masks
if not np.any(m_uint8):
segments.append([np.zeros((0, 2), dtype=np.float32)])
continue

segments.append(mask2multipoly(m_uint8))
return segments

Expand Down
5 changes: 4 additions & 1 deletion inference/core/workflows/core_steps/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,10 @@ def convert_inference_detections_batch_to_sv_detections(
raw_predictions = filter_out_invalid_polygons(predictions=raw_predictions)
parent_ids = [d.get(PARENT_ID_KEY, "") for d in raw_predictions]
detection_ids = [
d.get(DETECTION_ID_KEY, str(uuid.uuid4())) for d in raw_predictions
d[DETECTION_ID_KEY]
if DETECTION_ID_KEY in d
else str(uuid.uuid4())
for d in raw_predictions
]
detections[DETECTION_ID_KEY] = np.array(detection_ids)
detections[PARENT_ID_KEY] = np.array(parent_ids)
Expand Down
Loading