From bbbdd600edb301f27438b3425ef8ec1224b7e916 Mon Sep 17 00:00:00 2001 From: bdrhill Date: Fri, 22 May 2026 00:13:38 +0200 Subject: [PATCH 1/2] fix(FiniteDiff): use fdhtype for hvp to match hessian accuracy Route hvp through fdhtype (default Val(:hcentral)) instead of the generic JVP-of-gradient fallback which used fdtype (default Val(:forward)). The forward-over-forward finite difference composition loses precision. Fixes #1012 Co-Authored-By: Claude Opus 4.5 --- .../onearg.jl | 137 ++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/DifferentiationInterface/ext/DifferentiationInterfaceFiniteDiffExt/onearg.jl b/DifferentiationInterface/ext/DifferentiationInterfaceFiniteDiffExt/onearg.jl index 82b769ef6..c1b149b48 100644 --- a/DifferentiationInterface/ext/DifferentiationInterfaceFiniteDiffExt/onearg.jl +++ b/DifferentiationInterface/ext/DifferentiationInterfaceFiniteDiffExt/onearg.jl @@ -550,3 +550,140 @@ function DI.value_gradient_and_hessian!( ) return fc(x), grad, hess end + +## HVP + +struct FiniteDiffHVPPrep{SIG, C1, C2, RG, AG, RH, AH, H} <: DI.HVPPrep{SIG} + _sig::Val{SIG} + gradient_cache::C1 + hessian_cache::C2 + relstep_g::RG + absstep_g::AG + relstep_h::RH + absstep_h::AH + hess::H +end + +function DI.prepare_hvp_nokwarg( + strict::Val, f, backend::AutoFiniteDiff, x, tx::NTuple, contexts::Vararg{DI.Context, C} + ) where {C} + _sig = DI.signature(f, backend, x, tx, contexts...; strict) + fc = DI.fix_tail(f, map(DI.unwrap, contexts)...) + y = fc(x) + df = zero(y) .* x + gradient_cache = GradientCache(df, x, fdtype(backend)) + hessian_cache = HessianCache(x, fdhtype(backend)) + relstep_g = if isnothing(backend.relstep) + default_relstep(fdtype(backend), eltype(x)) + else + backend.relstep + end + relstep_h = if isnothing(backend.relstep) + default_relstep(fdhtype(backend), eltype(x)) + else + backend.relstep + end + absstep_g = if isnothing(backend.absstep) + relstep_g + else + backend.absstep + end + absstep_h = if isnothing(backend.absstep) + relstep_h + else + backend.absstep + end + hess = similar(x, eltype(x), (length(x), length(x))) + return FiniteDiffHVPPrep( + _sig, gradient_cache, hessian_cache, relstep_g, absstep_g, relstep_h, absstep_h, hess + ) +end + +function DI.hvp( + f, + prep::FiniteDiffHVPPrep, + backend::AutoFiniteDiff, + x, + tx::NTuple, + contexts::Vararg{DI.Context, C}, + ) where {C} + DI.check_prep(f, prep, backend, x, tx, contexts...) + (; relstep_h, absstep_h, hess) = prep + fc = DI.fix_tail(f, map(DI.unwrap, contexts)...) + finite_difference_hessian!( + hess, fc, x, prep.hessian_cache; relstep = relstep_h, absstep = absstep_h + ) + tg = map(tx) do dx + hess * dx + end + return tg +end + +function DI.hvp!( + f, + tg::NTuple, + prep::FiniteDiffHVPPrep, + backend::AutoFiniteDiff, + x, + tx::NTuple, + contexts::Vararg{DI.Context, C}, + ) where {C} + DI.check_prep(f, prep, backend, x, tx, contexts...) + (; relstep_h, absstep_h, hess) = prep + fc = DI.fix_tail(f, map(DI.unwrap, contexts)...) + finite_difference_hessian!( + hess, fc, x, prep.hessian_cache; relstep = relstep_h, absstep = absstep_h + ) + for b in eachindex(tx, tg) + mul!(tg[b], hess, tx[b]) + end + return tg +end + +function DI.gradient_and_hvp( + f, + prep::FiniteDiffHVPPrep, + backend::AutoFiniteDiff, + x, + tx::NTuple, + contexts::Vararg{DI.Context, C}, + ) where {C} + DI.check_prep(f, prep, backend, x, tx, contexts...) + (; relstep_g, absstep_g, relstep_h, absstep_h, hess) = prep + fc = DI.fix_tail(f, map(DI.unwrap, contexts)...) + grad = finite_difference_gradient( + fc, x, prep.gradient_cache; relstep = relstep_g, absstep = absstep_g + ) + finite_difference_hessian!( + hess, fc, x, prep.hessian_cache; relstep = relstep_h, absstep = absstep_h + ) + tg = map(tx) do dx + hess * dx + end + return grad, tg +end + +function DI.gradient_and_hvp!( + f, + grad, + tg::NTuple, + prep::FiniteDiffHVPPrep, + backend::AutoFiniteDiff, + x, + tx::NTuple, + contexts::Vararg{DI.Context, C}, + ) where {C} + DI.check_prep(f, prep, backend, x, tx, contexts...) + (; relstep_g, absstep_g, relstep_h, absstep_h, hess) = prep + fc = DI.fix_tail(f, map(DI.unwrap, contexts)...) + finite_difference_gradient!( + grad, fc, x, prep.gradient_cache; relstep = relstep_g, absstep = absstep_g + ) + finite_difference_hessian!( + hess, fc, x, prep.hessian_cache; relstep = relstep_h, absstep = absstep_h + ) + for b in eachindex(tx, tg) + mul!(tg[b], hess, tx[b]) + end + return grad, tg +end From fede0bd598045ff7e5cbc5837d1bb117ba95c9a8 Mon Sep 17 00:00:00 2001 From: bdrhill Date: Fri, 22 May 2026 01:04:45 +0200 Subject: [PATCH 2/2] fix(FiniteDiff): handle matrix inputs in hvp, add tests - Fix matrix input handling by using vec/reshape for proper dimensions - Enable hvp tests in test_differentiation (remove from excluded) - Add step size tests for prepare_hvp - Add explicit HVP accuracy tests for issue #1012 Co-Authored-By: Claude Opus 4.5 --- .../onearg.jl | 8 +-- .../test/Back/FiniteDiff/test.jl | 58 ++++++++++++++++++- 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/DifferentiationInterface/ext/DifferentiationInterfaceFiniteDiffExt/onearg.jl b/DifferentiationInterface/ext/DifferentiationInterfaceFiniteDiffExt/onearg.jl index c1b149b48..8c4b84031 100644 --- a/DifferentiationInterface/ext/DifferentiationInterfaceFiniteDiffExt/onearg.jl +++ b/DifferentiationInterface/ext/DifferentiationInterfaceFiniteDiffExt/onearg.jl @@ -614,7 +614,7 @@ function DI.hvp( hess, fc, x, prep.hessian_cache; relstep = relstep_h, absstep = absstep_h ) tg = map(tx) do dx - hess * dx + reshape(hess * vec(dx), size(x)) end return tg end @@ -635,7 +635,7 @@ function DI.hvp!( hess, fc, x, prep.hessian_cache; relstep = relstep_h, absstep = absstep_h ) for b in eachindex(tx, tg) - mul!(tg[b], hess, tx[b]) + mul!(vec(tg[b]), hess, vec(tx[b])) end return tg end @@ -658,7 +658,7 @@ function DI.gradient_and_hvp( hess, fc, x, prep.hessian_cache; relstep = relstep_h, absstep = absstep_h ) tg = map(tx) do dx - hess * dx + reshape(hess * vec(dx), size(x)) end return grad, tg end @@ -683,7 +683,7 @@ function DI.gradient_and_hvp!( hess, fc, x, prep.hessian_cache; relstep = relstep_h, absstep = absstep_h ) for b in eachindex(tx, tg) - mul!(tg[b], hess, tx[b]) + mul!(vec(tg[b]), hess, vec(tx[b])) end return grad, tg end diff --git a/DifferentiationInterface/test/Back/FiniteDiff/test.jl b/DifferentiationInterface/test/Back/FiniteDiff/test.jl index d2a63f03e..04f416e92 100644 --- a/DifferentiationInterface/test/Back/FiniteDiff/test.jl +++ b/DifferentiationInterface/test/Back/FiniteDiff/test.jl @@ -26,7 +26,7 @@ end use_tuples = true, include_smaller = true, ); - excluded = [:second_derivative, :hvp], + excluded = [:second_derivative], logging = LOGGING, ) @@ -43,7 +43,7 @@ end AutoFiniteDiff(; relstep = cbrt(eps(Float64)), absstep = cbrt(eps(Float64))), AutoFiniteDiff(; dir = 0.5), ]; - excluded = [:second_derivative, :hvp], + excluded = [:second_derivative], logging = LOGGING, ) end @@ -90,6 +90,11 @@ end; @test prep.absstep_h == 1000 @test prep.relstep_g == 0.1 @test prep.relstep_h == 0.1 + prep = prepare_hvp(sum, backend, [1.0], ([1.0],)) + @test prep.absstep_g == 1000 + @test prep.absstep_h == 1000 + @test prep.relstep_g == 0.1 + @test prep.relstep_h == 0.1 backend = AutoFiniteDiff(; relstep = 0.1) preps = [ @@ -110,6 +115,55 @@ end; @test prep.absstep_h == 0.1 @test prep.relstep_g == 0.1 @test prep.relstep_h == 0.1 + prep = prepare_hvp(sum, backend, [1.0], ([1.0],)) + @test prep.absstep_g == 0.1 + @test prep.absstep_h == 0.1 + @test prep.relstep_g == 0.1 + @test prep.relstep_h == 0.1 +end + +@testset "HVP accuracy (issue 1012)" begin + # hvp should match hessian * v for default AutoFiniteDiff() + # Previously, hvp used fdtype (forward) while hessian used fdhtype (central), + # causing significant accuracy differences + backend = AutoFiniteDiff() + + for (f, x, v) in [ + (x -> sum(x .^ 2), [1.0, 2.0, 3.0], [1.0, 0.0, 0.0]), + (x -> sum(x .^ 3), [1.0, 2.0, 3.0], [1.0, 0.0, 0.0]), + (x -> sum(x .^ 4), [1.0, 2.0, 3.0], [1.0, 0.0, 0.0]), + (x -> x' * [1 2; 3 4] * x, [1.0, 2.0], [1.0, 0.0]), + ] + H = hessian(f, backend, x) + Hv_direct = H * v + Hv_hvp = hvp(f, backend, x, (v,))[1] + @test Hv_hvp ≈ Hv_direct rtol = 1e-10 + end + + # Also test hvp!, gradient_and_hvp, gradient_and_hvp! + f(x) = sum(x .^ 2) + x = [1.0, 2.0, 3.0] + v = [1.0, 0.0, 0.0] + H = hessian(f, backend, x) + expected_Hv = H * v + expected_grad = [2.0, 4.0, 6.0] + + # hvp! + tg = (similar(x),) + hvp!(f, tg, backend, x, (v,)) + @test tg[1] ≈ expected_Hv rtol = 1e-10 + + # gradient_and_hvp + grad, tg = gradient_and_hvp(f, backend, x, (v,)) + @test grad ≈ expected_grad rtol = 1e-6 + @test tg[1] ≈ expected_Hv rtol = 1e-10 + + # gradient_and_hvp! + grad = similar(x) + tg = (similar(x),) + gradient_and_hvp!(f, grad, tg, backend, x, (v,)) + @test grad ≈ expected_grad rtol = 1e-6 + @test tg[1] ≈ expected_Hv rtol = 1e-10 end include("benchmark.jl")