diff --git a/docs/Project.toml b/docs/Project.toml new file mode 100644 index 0000000..a791f81 --- /dev/null +++ b/docs/Project.toml @@ -0,0 +1,10 @@ +[deps] +DualArrays = "429e4e16-f749-45f3-beec-30742fae38ce" +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" + +[compat] +Documenter = "1" +DualArrays = "0.2.0" + +[sources] +DualArrays = { path = ".." } diff --git a/docs/make.jl b/docs/make.jl new file mode 100644 index 0000000..dd3f7db --- /dev/null +++ b/docs/make.jl @@ -0,0 +1,22 @@ +using Pkg + +Pkg.activate(@__DIR__) +Pkg.instantiate() + +using Documenter +using DualArrays + +# Setup for doctests in docstrings +DocMeta.setdocmeta!(DualArrays, :DocTestSetup, :(using DualArrays)) + +makedocs(; + format = Documenter.HTML( + canonical = "https://github.com/dlfivefifty/DualArrays.jl", + ), + pages = [ + "Home" => "index.md", + ], + sitename = "DualArrays.jl", +) + +deploydocs(; repo = "github.com/dlfivefifty/DualArrays.jl") \ No newline at end of file diff --git a/docs/src/index.md b/docs/src/index.md new file mode 100644 index 0000000..f8b3c8d --- /dev/null +++ b/docs/src/index.md @@ -0,0 +1,90 @@ +# DualArrays.jl + +Documentation for `DualArrays.jl`. + +## Dual + +```@docs +DualArrays.Dual +``` + +```@example +using DualArrays + +a = Dual(2.0, [1.0, 2.0, 3.0]) +b = Dual(3.0, [4.0, 5.0, 6.0]) + +a * b +``` + +## DualArray + +```@docs +DualArrays.DualArray +DualArrays.DualVector +DualArrays.DualMatrix +``` + +```@example +using DualArrays + +v = DualVector([1.0, 2.0, 3.0], [1 2 3; 4 5 6; 7 8 9]) +v[2] +``` + +## ArrayOperator + +```@docs +DualArrays.ArrayOperator +``` + +### Broadcasting with ArrayOperators + +```@eval +using DualArrays +import Base.Docs +import Markdown + +Markdown.parse(sprint(show, Docs.doc(DualArrays.ArrayOperatorBroadcastStyle))) +``` + +```@example +using DualArrays + +t = ArrayOperator{1}([1 2 3; 4 5 6; 7 8 9]) + +t .+ 1 +``` + +### Multiplication with ArrayOperators + +```@eval +using DualArrays +import Base.Docs +import Markdown + +Markdown.parse(sprint(show, Docs.doc(DualArrays._contract))) +``` + +```@example +using DualArrays + +t = ArrayOperator{1}([1 2 3; 4 5 6; 7 8 9]) + +t * t +``` + +## Utilities + +```@docs +DualArrays.jacobian +``` + +```@example +using DualArrays + +f(x) = sin.(x) .+ x .* x + +J = jacobian(f, [1.0, 2.0, 3.0]) +J.data +``` diff --git a/src/DualArrays.jl b/src/DualArrays.jl index a5d2230..000fd9f 100644 --- a/src/DualArrays.jl +++ b/src/DualArrays.jl @@ -7,6 +7,8 @@ This package provides: - `Dual`: A dual number type for storing values and their derivatives - `DualVector`: A vector of dual numbers represented with a Jacobian matrix - `DualMatrix`: A matrix of dual numbers represented with a Jacobian tensor` +- `ArrayOperator`: A data structure that stores an array equipped with an output and input dimension. + Useful for tensor computations involving higher order DualArrays Differentiation rules are mostly provided by ChainRules.jl. """ module DualArrays diff --git a/src/arithmetic.jl b/src/arithmetic.jl index dc6d16e..4217d80 100644 --- a/src/arithmetic.jl +++ b/src/arithmetic.jl @@ -3,35 +3,29 @@ # Tensor transpose transpose(t::ArrayOperator{N, M}) where {N, M} = ArrayOperator{M}(transpose(t.data)) -""" -Addition/Subtraction of DualVectors. -""" - +# Addition/Subtraction of DualVectors. for op in (:+, :-) @eval $op(x::DualVector, y::DualVector) = DualVector($op(x.value, y.value), $op(x.jacobian, y.jacobian)) @eval $op(x::DualVector, y::AbstractVector) = DualVector($op(x.value, y), x.jacobian) @eval $op(x::AbstractVector, y::DualVector) = DualVector($op(x, y.value), y.jacobian) end -""" -Matrix multiplication with a DualVector. -""" -*(x::AbstractMatrix, y::DualVector) = DualVector(x * y.value, x * y.jacobian) - -""" -this section attempts to define broadcasting rules on DualVectors for functions -that either: -- take a single real argument (function applied to each element) -- are binary operations (binary operation applied to scalar and each element) - -We use DiffRules.jl for symbolic derivatives and define our overloads accordingly. -""" +# Matrix multiplication with a DualVector. +*(x::AbstractMatrix, y::DualVector) = DualVector(x * y.value, x * y.jacobian) +### +# this section attempts to define broadcasting rules on DualVectors for functions +# that either: +# +# - take a single real argument (function applied to each element) +# - are binary operations (binary operation applied to scalar and each element) +# +# We use DiffRules.jl for symbolic derivatives and define our overloads accordingly. +### + +# Helper function: Given an n-ary function f, return its partial derivatives as a tuple of function. function diff_fn(f, n) - """ - Helper function: Given an n-ary function f, return its partial derivatives as a tuple of function. - """ syms = ntuple(_ -> gensym(), n) d = DiffRules.diffrule(:Base, f, syms...) d = d isa Tuple ? d : (d,) @@ -121,12 +115,10 @@ LinearAlgebra.dot(x::AbstractVector, y::DualVector) = Dual(dot(x, y.value), tran # solve \(x::AbstractMatrix, y::DualVector) = DualVector(x \ y.value, ArrayOperator{1}(x \ y.jacobian.data)) -""" ------------------------ -Multiplication with ArrayOperators ------------------------ -* for ArrayOperators generalises from matrix/vector + +""" +Multiplication (*) for ArrayOperators generalises from matrix/vector multiplication and uses tensor contraction provided by TensorOperations.jl. Let an (A, B) ArrayOperator denote an ArrayOperator with output dimension A and input dimension B @@ -135,21 +127,22 @@ allow multiplication between an (A, B) ArrayOperator and a (B, C) ArrayOperator, with the result being an (A, C) ArrayOperator. This generalises from: --An inner product: a row vector * a column vector -> a scalar: - (0, 1) * (1, 0) -> (0, 0) +- An inner product: a row vector * a column vector -> a scalar: + + (0, 1) * (1, 0) -> (0, 0) --An outer product: a column vector * a row vector -> a matrix: - (1, 0) * (0, 1) -> (1, 1) +- An outer product: a column vector * a row vector -> a matrix: + + (1, 0) * (0, 1) -> (1, 1) - matrix/vector multiplication: a matrix * a column vector -> a column vector: - (1, 1) * (1, 0) -> (1, 0) + + (1, 1) * (1, 0) -> (1, 0) We can treat multiplication between ArrayOperators and regular arrays as multiplication between two ArrayOperators: given an (A, B) ArrayOperator and an L-array, we can fix it as having input dimension B and infer C = L - B. We use this contraction rule to multiply the two. """ - -# Helper function to perform tensor contraction between x and y function _contract(x, y, A, B, C) x_idx = (ntuple(i -> i, A), ntuple(i -> i + A, B)) y_idx = (ntuple(i -> i, B), ntuple(i -> i + B, C)) @@ -158,6 +151,7 @@ function _contract(x, y, A, B, C) return TensorOperations.tensorcontract(x, x_idx, false, y, y_idx, false, ret_idx, 1) end + function *(x::ArrayOperator{A, B}, y::ArrayOperator{B, C}) where {A, B, C} return ArrayOperator{A}(_contract(x.data, y.data, A, B, C)) end diff --git a/src/indexing.jl b/src/indexing.jl index 0ceb7fc..b357811 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -4,8 +4,6 @@ using ArrayLayouts, FillArrays, LinearAlgebra, SparseArrays sparse_getindex(a...) = layout_getindex(a...) - -# TODO: should we move these? sparse_getindex(D::Diagonal, k::Integer, ::Colon) = OneElement(D.diag[k], k, size(D, 2)) sparse_getindex(D::Diagonal, ::Colon, j::Integer) = OneElement(D.diag[j], j, size(D, 1)) sparse_getindex(d::DualArray, args...) = d[args...] @@ -49,16 +47,13 @@ getindex(t::ArrayOperator, i::Vararg{Int}) = getindex(t.data, i...) # Indexing with only slices/ranges returns a similar tensor getindex(t::ArrayOperator{N, M}, i::Vararg{Union{Colon, UnitRange}}) where {N, M} = ArrayOperator{N}(sparse_getindex(t.data, i...)) -""" -Extract a single Dual number from a DualArray. -""" + +# Extract a single Dual number from a DualArray. function getindex(x::DualArray, args::Vararg{Int}) Dual(x.value[args...], x.jacobian[args, :]) end -""" -Extract a sub-DualVector from a DualVector using a range. -""" +# Extract a sub-DualVector from a DualVector using a range. function getindex(x::DualVector, y::UnitRange) newval = x.value[y] newjac = x.jacobian[y, :] diff --git a/src/types.jl b/src/types.jl index fb516a0..da914a3 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1,6 +1,8 @@ # Core type definitions for DualArrays.jl """ + ArrayOperator{N, M, T, L} + This type represents a linear map from an M-array to an N-array, with a N+M=L-dimensional array as its underlying data. This can be thought of analogously to an L-tensor equipped with a contraction pattern characterised by (N, M). Specifically, the tensor has dimensions @@ -54,27 +56,28 @@ sum(t::ArrayOperator; kwargs...) = sum(t.data; kwargs...) ==(a::ArrayOperator{N, M}, b::ArrayOperator{N, M}) where {N, M} = a.data == b.data isapprox(a::ArrayOperator{N, M}, b::ArrayOperator{N, M}; kwargs...) where {N, M} = isapprox(a.data, b.data; kwargs...) -# -------------------- -# Broadcasting with ArrayOperators -# -------------------- -# -# We want broadcasting on ArrayOperators to behave -# as it would on the underlying array, but preserving the -# ArrayOperator{N, M, T, L} type with the correct type parameters. -# T and L are preserved or inferred from type promotion. -# When the highest order is an ArrayOperator, N and M of that -# ArrayOperator are preserved. We do not support binary broadcasts -# for cases where: -# -# 1. The highest order is not an ArrayOperator -# 2. We have an (N, M) ArrayOperator and an (N2, M2) ArrayOperator -# with N + M = N2 + M2 but (N, M) != (N2, M2). -# -# As inferring N and M of the resulting ArrayOperator is ambiguous. - -# We define a custom broadcast style. -# We inherit from the L-array broadcast style and require output dimension N -# as extra information. +""" +We want broadcasting on ArrayOperators to behave +as it would on the underlying array, but preserving the +ArrayOperator{N, M, T, L} type with the correct type parameters. + +T and L are preserved or inferred from type promotion. + +When the highest order is an ArrayOperator, N and M of that +ArrayOperator are preserved. We do not support binary broadcasts +for cases where: + +1. The highest order is not an ArrayOperator +2. We have an (N, M) ArrayOperator and an (N2, M2) ArrayOperator + with N + M = N2 + M2 but (N, M) != (N2, M2). + +As inferring N and M of the resulting ArrayOperator is ambiguous. + +We define a custom broadcast style. + +We inherit from the L-array broadcast style and require output dimension N +as extra information. +""" struct ArrayOperatorBroadcastStyle{L, N} <: Broadcast.AbstractArrayStyle{L} end Base.BroadcastStyle(::Type{<:ArrayOperator{N, <:Any, <:Any, L}}) where {L, N} = ArrayOperatorBroadcastStyle{L, N}() @@ -135,6 +138,19 @@ A dual number type that stores a value and its partials (derivatives). # Fields - `value::T`: The primal value - `partials::Partials`: The partial derivatives stored as an array + +# Constructors + Dual(value::T, partials::AbstractArray{S}) where {S, T} + +Lets us construct a Dual number with a value and an array of partials. +The partials are currently stored as an array due to a technicality in +the way that Julia differentiates scalars and 0-arrays, meaning that +having the partials as an ArrayOperator{0} would be incorrect. + + Dual(value::T, partials::ArrayOperator{0, M, S, L}) where {L, S, M, T} + Dual(value::T, partials::ArrayOperator{N, 0, S, L}) where {L, S, N, T} + +We nevertheless allow construction of a Dual using an ArrayOperator to ensure interoperability. """ struct Dual{T, Partials <: AbstractArray{T}} <: Real value::T @@ -160,22 +176,31 @@ Dual(value::T, partials::ArrayOperator{N, 0, S, L}) where {L, S, N, T} = Dual(va """ - DualVector{T, M <: AbstractMatrix{T}} <: AbstractVector{Dual{T}} + DualArray{T, N, A <: AbstractArray{T,N}} <: AbstractArray{Dual{T}, N} Represents a vector of dual numbers given by: - values + jacobian * [ε₁, …, εₙ] + values + jacobian * ϵ -For now the entries just return the values when indexed. +Where value is an N-array of the primals, ϵ is an +M-array of dual parts and the jacobian is an N+M array of coefficients +such that right multiplication with ϵ gives a Dual N-array. # Fields -- `value::Vector{T}`: The primal values -- `jacobian::M`: The Jacobian matrix containing partial derivatives +- `value::AbstractArray{T,N}`: The primal values +- `jacobian::ArrayOperator{N, M, T, L}`: The Jacobian tensor containing partial derivatives + +# Constructors + DualArray(value::AbstractArray, jacobian::ArrayOperator) + +Creates a DualArray with the given value and Jacobian, ensuring that the Jacobian +has a suitable shape and output dimensions (i.e that the jacobian when multiplying ϵ +returns an N-array of the correct shape). -# Constructor - DualVector(value::Vector{T}, jacobian::M) where {T, M <: AbstractMatrix{T}} + DualArray(value::AbstractArray{S, M}, jacobian::AbstractArray{T, N}) where {S, T, N, M} -Constructs a DualVector, ensuring that the vector length matches the number of rows in the Jacobian. +We can also, given the jacobian as an AbstractArray, can infer a suitable input and output order +from the value array. """ struct DualArray{T, N, A <: AbstractArray{T,N}, J <: (ArrayOperator{N, M, T, L} where {L, M})} <: AbstractArray{Dual{T}, N} value::A @@ -194,9 +219,8 @@ DualArray{T,N}(value::A, jacobian::J) where {T, N, A <: AbstractArray{T,N}, J <: DualArray{T,N}(value, jacobian) where {T, N} = DualArray{T,N}(elconvert(T, value), elconvert(T, jacobian)) -""" -Constructor that forces type compatibility -""" + +# Constructor that forces type compatibility function DualArray(value::AbstractArray, jacobian::ArrayOperator) T = promote_type(eltype(value), eltype(jacobian)) DualArray{T}(value, jacobian) @@ -207,7 +231,17 @@ function DualArray(value::AbstractArray{S, M}, jacobian::AbstractArray{T, N}) wh DualArray(value, ArrayOperator{M}(jacobian)) end +""" + DualVector{T} + +It is often convenient to work with this alias for a Dual 1-array. +""" const DualVector = DualArray{T, 1} where {T} +""" + DualVector{T} + +It is often convenient to work with this alias for a Dual 2-array. +""" const DualMatrix = DualArray{T, 2} where {T} function DualVector(value::AbstractArray, jacobian::ArrayOperator) diff --git a/src/utilities.jl b/src/utilities.jl index bea6ee4..9111077 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -1,8 +1,7 @@ # Miscellaneous functions for DualArrays.jl using FillArrays, BandedMatrices, LinearAlgebra -""" -Sum all elements of a DualVector, returning a single Dual number. -""" + +# Sum all elements of a DualVector, returning a single Dual number. function Base.sum(x::DualVector) n = length(x.value) Dual(sum(x.value), vec(sum(x.jacobian; dims=1))) @@ -20,19 +19,16 @@ _value(x::Number) = x _size(x::Real) = 1 _size(x::DualVector) = size(x.jacobian.data, 2) -""" -Vertically concatenate Dual numbers and DualVectors. -""" + +# Vertically concatenate Dual numbers and DualVectors. function Base.vcat(x::Union{Dual, DualVector}...) value = vcat((d.value for d in x)...) jacobian = vcat((_jacobian(d) for d in x)...) DualVector(value, jacobian) end -""" -Vertically concatenate Real numbers and DualVectors. -""" +# Vertically concatenate Real numbers and DualVectors. function Base.vcat(x::Union{Real, DualVector}...) # Avoid stack overflow if all(i -> i isa Real, x) @@ -44,17 +40,16 @@ function Base.vcat(x::Union{Real, DualVector}...) DualVector(val, jac) end -""" -Custom display method for DualVectors. -""" + +# Custom display method for DualVectors. show(io::IO, ::MIME"text/plain", x::DualArray) = (show(io, x.value); print(io, " + "); show(io, x.jacobian); print(io, "𝛜")) show(io::IO, x::DualArray) = show(io, MIME"text/plain"(), x) """ + jacobian(f::Function, x::AbstractVector, id=Eye) Utility function to compute the jacobian of a function `f` at point `x`. -Analogous to `ForwardDiff.jacobian`. `id` selects the type of (sparse) identity to use and must be either `Eye` (default) or `BandedMatrix`. """