From 0e5e41e0ed09a53676eda77be6d3b80465b1663c Mon Sep 17 00:00:00 2001 From: ChrisRackauckas-Claude Date: Sat, 4 Apr 2026 07:47:57 -0400 Subject: [PATCH 1/4] Add Polyester-based threading extension for FastBroadcast VectorOfArray (#564) Replace Base.Threads threading with Polyester @batch in FastBroadcast VectorOfArray extension. When Polyester is loaded, the 3-way extension RecursiveArrayToolsFastBroadcastPolyesterExt provides threaded broadcasting using Polyester.@batch. Without Polyester, threaded broadcasting falls back to serial to avoid issues with mixing Polyester and Base threading. Bump version to 3.54.0. Co-Authored-By: Chris Rackauckas Co-Authored-By: Claude Opus 4.6 (1M context) --- Project.toml | 8 +++- ext/RecursiveArrayToolsFastBroadcastExt.jl | 25 ++-------- ...siveArrayToolsFastBroadcastPolyesterExt.jl | 48 +++++++++++++++++++ test/interface_tests.jl | 22 +++++++++ 4 files changed, 80 insertions(+), 23 deletions(-) create mode 100644 ext/RecursiveArrayToolsFastBroadcastPolyesterExt.jl diff --git a/Project.toml b/Project.toml index 71c98244..fe154cec 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "RecursiveArrayTools" uuid = "731186ca-8d62-57ce-b412-fbd966d074cd" authors = ["Chris Rackauckas "] -version = "3.53.0" +version = "3.54.0" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" @@ -25,11 +25,13 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +Polyester = "f517fe37-dbe3-4b94-8317-1923a5111588" Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [extensions] RecursiveArrayToolsFastBroadcastExt = "FastBroadcast" +RecursiveArrayToolsFastBroadcastPolyesterExt = ["FastBroadcast", "Polyester"] RecursiveArrayToolsForwardDiffExt = "ForwardDiff" RecursiveArrayToolsKernelAbstractionsExt = "KernelAbstractions" RecursiveArrayToolsMeasurementsExt = "Measurements" @@ -56,6 +58,7 @@ Measurements = "2.11" MonteCarloMeasurements = "1.2" NLsolve = "4.5" Pkg = "1" +Polyester = "0.7.16" PrecompileTools = "1.2.1" Random = "1" RecipesBase = "1.3.4" @@ -84,6 +87,7 @@ Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7" MonteCarloMeasurements = "0987c9cc-fe09-11e8-30f0-b96dd679fdca" NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +Polyester = "f517fe37-dbe3-4b94-8317-1923a5111588" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" @@ -97,4 +101,4 @@ Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Aqua", "FastBroadcast", "ForwardDiff", "KernelAbstractions", "Measurements", "NLsolve", "Pkg", "Random", "SafeTestsets", "SciMLBase", "SparseArrays", "StaticArrays", "Statistics", "StructArrays", "Tables", "Test", "Unitful", "Zygote"] +test = ["Aqua", "FastBroadcast", "ForwardDiff", "KernelAbstractions", "Measurements", "NLsolve", "Pkg", "Polyester", "Random", "SafeTestsets", "SciMLBase", "SparseArrays", "StaticArrays", "Statistics", "StructArrays", "Tables", "Test", "Unitful", "Zygote"] diff --git a/ext/RecursiveArrayToolsFastBroadcastExt.jl b/ext/RecursiveArrayToolsFastBroadcastExt.jl index 0bc7dc4a..52667bd5 100644 --- a/ext/RecursiveArrayToolsFastBroadcastExt.jl +++ b/ext/RecursiveArrayToolsFastBroadcastExt.jl @@ -27,27 +27,10 @@ const AbstractVectorOfSArray = AbstractVectorOfArray{ return dst end -@inline function FastBroadcast.fast_materialize!( - ::Threaded, dst::AbstractVectorOfSArray, - bc::Broadcast.Broadcasted{S} - ) where {S} - if FastBroadcast.use_fast_broadcast(S) - Threads.@threads for i in 1:length(dst.u) - unpacked = RecursiveArrayTools.unpack_voa(bc, i) - dst.u[i] = StaticArraysCore.similar_type(dst.u[i])( - unpacked[j] - for j in eachindex(unpacked) - ) - end - else - Broadcast.materialize!(dst, bc) - end - return dst -end - -# Fallback for non-SArray VectorOfArray: the generic threaded path splits -# along the last axis via views, which does not correctly partition work for -# VectorOfArray. Fall back to serial broadcasting. +# Fallback for VectorOfArray: the generic threaded path splits along the last +# axis via views, which does not correctly partition work for VectorOfArray. +# Fall back to serial broadcasting. The RecursiveArrayToolsFastBroadcastPolyesterExt +# extension provides proper Polyester-based threading when Polyester is loaded. @inline function FastBroadcast.fast_materialize!( ::Threaded, dst::AbstractVectorOfArray, bc::Broadcast.Broadcasted diff --git a/ext/RecursiveArrayToolsFastBroadcastPolyesterExt.jl b/ext/RecursiveArrayToolsFastBroadcastPolyesterExt.jl new file mode 100644 index 00000000..2e6c2fc2 --- /dev/null +++ b/ext/RecursiveArrayToolsFastBroadcastPolyesterExt.jl @@ -0,0 +1,48 @@ +module RecursiveArrayToolsFastBroadcastPolyesterExt + +using RecursiveArrayTools +using FastBroadcast +using FastBroadcast: Serial, Threaded +using Polyester +using StaticArraysCore + +const AbstractVectorOfSArray = AbstractVectorOfArray{ + T, N, <:AbstractVector{<:StaticArraysCore.SArray}, +} where {T, N} + +@inline function _polyester_fast_materialize!( + dst::AbstractVectorOfSArray, + bc::Broadcast.Broadcasted{S} + ) where {S} + if FastBroadcast.use_fast_broadcast(S) + @batch for i in 1:length(dst.u) + unpacked = RecursiveArrayTools.unpack_voa(bc, i) + dst.u[i] = StaticArraysCore.similar_type(dst.u[i])( + unpacked[j] + for j in eachindex(unpacked) + ) + end + else + Broadcast.materialize!(dst, bc) + end + return dst +end + +@inline function FastBroadcast.fast_materialize!( + ::Threaded, dst::AbstractVectorOfSArray, + bc::Broadcast.Broadcasted{S} + ) where {S} + return _polyester_fast_materialize!(dst, bc) +end + +# Disambiguation: this method is more specific than both the base ext's +# (::Threaded, ::AbstractVectorOfArray, ::Broadcasted) fallback and +# the above (::Threaded, ::AbstractVectorOfSArray, ::Broadcasted{S}). +@inline function FastBroadcast.fast_materialize!( + ::Threaded, dst::AbstractVectorOfSArray, + bc::Broadcast.Broadcasted + ) + return _polyester_fast_materialize!(dst, bc) +end + +end # module diff --git a/test/interface_tests.jl b/test/interface_tests.jl index f5b10118..a042a372 100644 --- a/test/interface_tests.jl +++ b/test/interface_tests.jl @@ -1,5 +1,6 @@ using RecursiveArrayTools, StaticArrays, Test using FastBroadcast +using Polyester using SymbolicIndexingInterface: SymbolCache t = 1:3 @@ -296,6 +297,27 @@ f3!(z, zz) @test all(x -> x == SVector(3.0, 3.0), v_t.u) end +# Test Polyester-based threaded FastBroadcast extension (issue #564) +@testset "Polyester-threaded @.. with VectorOfArray{SArray}" begin + # Verify the Polyester extension is loaded + @test Base.get_extension( + Base.PkgId(RecursiveArrayTools), + :RecursiveArrayToolsFastBroadcastPolyesterExt + ) !== nothing + + # Test basic threaded broadcast with Polyester + u_p = VectorOfArray(fill(SVector(2.0, 3.0), 3, 3)) + v_p = copy(u_p) + @.. thread = true v_p = v_p + u_p + @test all(x -> x == SVector(4.0, 6.0), v_p.u) + + # Test with larger array to exercise Polyester batching + u_large = VectorOfArray(fill(SVector(1.0, 1.0, 1.0), 100)) + v_large = VectorOfArray(fill(SVector(0.0, 0.0, 0.0), 100)) + @.. thread = true v_large = u_large * 2.0 + @test all(x -> x == SVector(2.0, 2.0, 2.0), v_large.u) +end + struct ImmutableVectorOfArray{T, N, A} <: AbstractVectorOfArray{T, N, A} u::A # A <: AbstractArray{<: AbstractArray{T, N - 1}} end From 8c4602ca156354c6a7de1a2acbf21ab730662dc2 Mon Sep 17 00:00:00 2001 From: ChrisRackauckas-Claude Date: Sat, 4 Apr 2026 09:55:26 -0400 Subject: [PATCH 2/4] Add informative error when threading VoA{SArray} without Polyester When Polyester is not loaded and a user requests threaded FastBroadcast on VectorOfArray{SArray}, throw an error explaining they need to load Polyester.jl. Also fix tests to use Vector-of-SVector construction (not Matrix-of-SVector) to properly exercise the SArray-specific path. Co-Authored-By: Chris Rackauckas Co-Authored-By: Claude Opus 4.6 (1M context) --- ext/RecursiveArrayToolsFastBroadcastExt.jl | 14 ++++++++++---- ...RecursiveArrayToolsFastBroadcastPolyesterExt.jl | 3 +++ src/RecursiveArrayTools.jl | 5 +++++ test/interface_tests.jl | 8 ++++---- 4 files changed, 22 insertions(+), 8 deletions(-) diff --git a/ext/RecursiveArrayToolsFastBroadcastExt.jl b/ext/RecursiveArrayToolsFastBroadcastExt.jl index 52667bd5..69130653 100644 --- a/ext/RecursiveArrayToolsFastBroadcastExt.jl +++ b/ext/RecursiveArrayToolsFastBroadcastExt.jl @@ -27,14 +27,20 @@ const AbstractVectorOfSArray = AbstractVectorOfArray{ return dst end -# Fallback for VectorOfArray: the generic threaded path splits along the last -# axis via views, which does not correctly partition work for VectorOfArray. -# Fall back to serial broadcasting. The RecursiveArrayToolsFastBroadcastPolyesterExt -# extension provides proper Polyester-based threading when Polyester is loaded. +# Fallback for non-SArray VectorOfArray: the generic threaded path splits along +# the last axis via views, which does not correctly partition work for +# VectorOfArray. Fall back to serial broadcasting. +# For SArray VectorOfArray, throw an informative error telling the user to +# load Polyester.jl for threaded broadcasting. @inline function FastBroadcast.fast_materialize!( ::Threaded, dst::AbstractVectorOfArray, bc::Broadcast.Broadcasted ) + if dst isa AbstractVectorOfSArray && !RecursiveArrayTools.POLYESTER_LOADED[] + error("Threaded FastBroadcast on VectorOfArray{SArray} requires Polyester.jl. " * + "Add `using Polyester` to enable threaded broadcasting, or use " * + "`@.. thread=false` for serial broadcasting.") + end return FastBroadcast.fast_materialize!(Serial(), dst, bc) end diff --git a/ext/RecursiveArrayToolsFastBroadcastPolyesterExt.jl b/ext/RecursiveArrayToolsFastBroadcastPolyesterExt.jl index 2e6c2fc2..9e9837d7 100644 --- a/ext/RecursiveArrayToolsFastBroadcastPolyesterExt.jl +++ b/ext/RecursiveArrayToolsFastBroadcastPolyesterExt.jl @@ -6,6 +6,9 @@ using FastBroadcast: Serial, Threaded using Polyester using StaticArraysCore +# Signal to the base FastBroadcast extension that Polyester threading is available. +RecursiveArrayTools.POLYESTER_LOADED[] = true + const AbstractVectorOfSArray = AbstractVectorOfArray{ T, N, <:AbstractVector{<:StaticArraysCore.SArray}, } where {T, N} diff --git a/src/RecursiveArrayTools.jl b/src/RecursiveArrayTools.jl index f6f813fc..66d7f899 100644 --- a/src/RecursiveArrayTools.jl +++ b/src/RecursiveArrayTools.jl @@ -142,6 +142,11 @@ module RecursiveArrayTools export ArrayPartition, AP, NamedArrayPartition + # Flag set to `true` by RecursiveArrayToolsFastBroadcastPolyesterExt when + # Polyester is loaded. Checked by the FastBroadcast ext to decide whether + # to throw an informative error on threaded VoA{SArray} operations. + const POLYESTER_LOADED = Ref(false) + include("precompilation.jl") end # module diff --git a/test/interface_tests.jl b/test/interface_tests.jl index a042a372..6ab7d17e 100644 --- a/test/interface_tests.jl +++ b/test/interface_tests.jl @@ -305,15 +305,15 @@ end :RecursiveArrayToolsFastBroadcastPolyesterExt ) !== nothing - # Test basic threaded broadcast with Polyester - u_p = VectorOfArray(fill(SVector(2.0, 3.0), 3, 3)) + # Test basic threaded broadcast with Polyester (Vector-of-SVector storage) + u_p = VectorOfArray([SVector(2.0, 3.0) for _ in 1:9]) v_p = copy(u_p) @.. thread = true v_p = v_p + u_p @test all(x -> x == SVector(4.0, 6.0), v_p.u) # Test with larger array to exercise Polyester batching - u_large = VectorOfArray(fill(SVector(1.0, 1.0, 1.0), 100)) - v_large = VectorOfArray(fill(SVector(0.0, 0.0, 0.0), 100)) + u_large = VectorOfArray([SVector(1.0, 1.0, 1.0) for _ in 1:100]) + v_large = VectorOfArray([SVector(0.0, 0.0, 0.0) for _ in 1:100]) @.. thread = true v_large = u_large * 2.0 @test all(x -> x == SVector(2.0, 2.0, 2.0), v_large.u) end From 4ba242422b12f8527a08cd09136d3b8126f53a68 Mon Sep 17 00:00:00 2001 From: ChrisRackauckas-Claude Date: Sun, 5 Apr 2026 06:34:25 -0400 Subject: [PATCH 3/4] Remove redundant POLYESTER_LOADED flag When Polyester is loaded, RecursiveArrayToolsFastBroadcastPolyesterExt defines more-specific methods for AbstractVectorOfSArray that win dispatch, so the base ext's AbstractVectorOfArray fallback is only reached when Polyester is not loaded. The flag check was redundant. Co-Authored-By: Chris Rackauckas Co-Authored-By: Claude Opus 4.6 (1M context) --- ext/RecursiveArrayToolsFastBroadcastExt.jl | 5 ++++- ext/RecursiveArrayToolsFastBroadcastPolyesterExt.jl | 3 --- src/RecursiveArrayTools.jl | 5 ----- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/ext/RecursiveArrayToolsFastBroadcastExt.jl b/ext/RecursiveArrayToolsFastBroadcastExt.jl index 69130653..c74b2b3e 100644 --- a/ext/RecursiveArrayToolsFastBroadcastExt.jl +++ b/ext/RecursiveArrayToolsFastBroadcastExt.jl @@ -36,7 +36,10 @@ end ::Threaded, dst::AbstractVectorOfArray, bc::Broadcast.Broadcasted ) - if dst isa AbstractVectorOfSArray && !RecursiveArrayTools.POLYESTER_LOADED[] + # When Polyester is loaded, RecursiveArrayToolsFastBroadcastPolyesterExt + # defines more-specific methods for AbstractVectorOfSArray, so reaching + # this method with an SArray VoA means Polyester is not loaded. + if dst isa AbstractVectorOfSArray error("Threaded FastBroadcast on VectorOfArray{SArray} requires Polyester.jl. " * "Add `using Polyester` to enable threaded broadcasting, or use " * "`@.. thread=false` for serial broadcasting.") diff --git a/ext/RecursiveArrayToolsFastBroadcastPolyesterExt.jl b/ext/RecursiveArrayToolsFastBroadcastPolyesterExt.jl index 9e9837d7..2e6c2fc2 100644 --- a/ext/RecursiveArrayToolsFastBroadcastPolyesterExt.jl +++ b/ext/RecursiveArrayToolsFastBroadcastPolyesterExt.jl @@ -6,9 +6,6 @@ using FastBroadcast: Serial, Threaded using Polyester using StaticArraysCore -# Signal to the base FastBroadcast extension that Polyester threading is available. -RecursiveArrayTools.POLYESTER_LOADED[] = true - const AbstractVectorOfSArray = AbstractVectorOfArray{ T, N, <:AbstractVector{<:StaticArraysCore.SArray}, } where {T, N} diff --git a/src/RecursiveArrayTools.jl b/src/RecursiveArrayTools.jl index 66d7f899..f6f813fc 100644 --- a/src/RecursiveArrayTools.jl +++ b/src/RecursiveArrayTools.jl @@ -142,11 +142,6 @@ module RecursiveArrayTools export ArrayPartition, AP, NamedArrayPartition - # Flag set to `true` by RecursiveArrayToolsFastBroadcastPolyesterExt when - # Polyester is loaded. Checked by the FastBroadcast ext to decide whether - # to throw an informative error on threaded VoA{SArray} operations. - const POLYESTER_LOADED = Ref(false) - include("precompilation.jl") end # module From 973db5687e135a65f8341843f15220e81a5d2a44 Mon Sep 17 00:00:00 2001 From: ChrisRackauckas-Claude Date: Sun, 5 Apr 2026 07:31:01 -0400 Subject: [PATCH 4/4] Run Tests workflow on v3-backport branch The Tests.yml workflow was only triggering on PRs to master, so PRs targeting v3-backport had no CI coverage. Add v3-backport to the branches list for both pull_request and push triggers. Co-Authored-By: Chris Rackauckas Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/Tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/Tests.yml b/.github/workflows/Tests.yml index ac60bb49..4d4f86e1 100644 --- a/.github/workflows/Tests.yml +++ b/.github/workflows/Tests.yml @@ -4,11 +4,13 @@ on: pull_request: branches: - master + - v3-backport paths-ignore: - 'docs/**' push: branches: - master + - v3-backport paths-ignore: - 'docs/**'