TuringLang
diff --git a/‎Project.toml
Lines changed: 15 additions & 1 deletion b/‎Project.toml
Lines changed: 15 additions & 1 deletion
diff --git a/‎bench/benchmarks.jl
Lines changed: 0 additions & 1 deletion b/‎bench/benchmarks.jl
Lines changed: 0 additions & 1 deletion
diff --git a/‎bench/normallognormal.jl
Lines changed: 8 additions & 1 deletion b/‎bench/normallognormal.jl
Lines changed: 8 additions & 1 deletion
diff --git a/‎bench/unconstrdist.jl
Lines changed: 7 additions & 0 deletions b/‎bench/unconstrdist.jl
Lines changed: 7 additions & 0 deletions
diff --git a/‎docs/src/examples.md
Lines changed: 30 additions & 20 deletions b/‎docs/src/examples.md
Lines changed: 30 additions & 20 deletions
diff --git a/‎docs/src/families.md
Lines changed: 9 additions & 2 deletions b/‎docs/src/families.md
Lines changed: 9 additions & 2 deletions
diff --git a/‎docs/src/paramspacesgd/repgradelbo.md
Lines changed: 21 additions & 5 deletions b/‎docs/src/paramspacesgd/repgradelbo.md
Lines changed: 21 additions & 5 deletions
diff --git a/‎src/AdvancedVI.jl
Lines changed: 3 additions & 0 deletions b/‎src/AdvancedVI.jl
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/algorithms/paramspacesgd/repgradelbo.jl
Lines changed: 18 additions & 5 deletions b/‎src/algorithms/paramspacesgd/repgradelbo.jl
Lines changed: 18 additions & 5 deletions
diff --git a/‎test/Project.toml
Lines changed: 0 additions & 2 deletions b/‎test/Project.toml
Lines changed: 0 additions & 2 deletions
@@ -5,6 +5,7 @@ version = "0.5.0"
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
+ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
 DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
@@ -20,31 +21,44 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [weakdeps]
 Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
+Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
+Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
+ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 
 [extensions]
-AdvancedVIBijectorsExt = "Bijectors"
+AdvancedVIBijectorsExt = ["Bijectors", "Optimisers"]
+AdvancedVIEnzymeExt = ["Enzyme", "ChainRulesCore"]
+AdvancedVIMooncakeExt = ["Mooncake", "ChainRulesCore"]
+AdvancedVIReverseDiffExt = ["ReverseDiff", "ChainRulesCore"]
 
 [compat]
 ADTypes = "1"
 Accessors = "0.1"
 Bijectors = "0.13, 0.14, 0.15"
+ChainRulesCore = "1"
 DiffResults = "1"
 DifferentiationInterface = "0.6, 0.7"
 Distributions = "0.25.111"
 DocStringExtensions = "0.8, 0.9"
+Enzyme = "0.13"
 FillArrays = "1.3"
 Functors = "0.4, 0.5"
 LinearAlgebra = "1"
 LogDensityProblems = "2"
+Mooncake = "0.4"
 Optimisers = "0.2.16, 0.3, 0.4"
 ProgressMeter = "1.6"
 Random = "1"
+ReverseDiff = "1"
 StatsBase = "0.32, 0.33, 0.34"
 julia = "1.10, 1.11.2"
 
 [extras]
 Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
+Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
+Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
 
@@ -47,7 +47,6 @@ begin
             ],
             (adname, adtype) in [
                 ("Zygote", AutoZygote()),
-                ("ForwardDiff", AutoForwardDiff()),
                 ("ReverseDiff", AutoReverseDiff()),
                 ("Mooncake", AutoMooncake(; config=Mooncake.Config())),
                 # ("Enzyme", AutoEnzyme(; mode=Enzyme.set_runtime_activity(Enzyme.Reverse), function_annotation=Enzyme.Const)),
 
@@ -12,12 +12,19 @@ function LogDensityProblems.logdensity(model::NormalLogNormal, θ)
     return log_density_x + log_density_y
 end
 
+function LogDensityProblems.logdensity_and_gradient(model::NormalLogNormal, θ)
+    return (
+        LogDensityProblems.logdensity(model, θ),
+        ForwardDiff.gradient(Base.Fix1(LogDensityProblems.logdensity, model), θ),
+    )
+end
+
 function LogDensityProblems.dimension(model::NormalLogNormal)
     return length(model.μ_y) + 1
 end
 
 function LogDensityProblems.capabilities(::Type{<:NormalLogNormal})
-    return LogDensityProblems.LogDensityOrder{0}()
+    return LogDensityProblems.LogDensityOrder{1}()
 end
 
 function Bijectors.bijector(model::NormalLogNormal)
 
@@ -7,6 +7,13 @@ function LogDensityProblems.logdensity(model::UnconstrDist, x)
     return logpdf(model.dist, x)
 end
 
+function LogDensityProblems.logdensity_and_gradient(model::UnconstrDist, θ)
+    return (
+        LogDensityProblems.logdensity(model, θ),
+        ForwardDiff.gradient(Base.Fix1(LogDensityProblems.logdensity, model), θ),
+    )
+end
+
 function LogDensityProblems.dimension(model::UnconstrDist)
     return length(model.dist)
 end
 
@@ -15,6 +15,7 @@ Using the `LogDensityProblems` interface, we the model can be defined as follows
 
 ```@example elboexample
 using LogDensityProblems
+using ForwardDiff
 
 struct NormalLogNormal{MX,SX,MY,SY}
     μ_x::MX
@@ -28,15 +29,26 @@ function LogDensityProblems.logdensity(model::NormalLogNormal, θ)
     return logpdf(LogNormal(μ_x, σ_x), θ[1]) + logpdf(MvNormal(μ_y, Σ_y), θ[2:end])
 end
 
+function LogDensityProblems.logdensity_and_gradient(model::NormalLogNormal, θ)
+    return (
+        LogDensityProblems.logdensity(model, θ),
+        ForwardDiff.gradient(Base.Fix1(LogDensityProblems.logdensity, model), θ),
+    )
+end
+
 function LogDensityProblems.dimension(model::NormalLogNormal)
     return length(model.μ_y) + 1
 end
 
 function LogDensityProblems.capabilities(::Type{<:NormalLogNormal})
-    return LogDensityProblems.LogDensityOrder{0}()
+    return LogDensityProblems.LogDensityOrder{1}()
 end
 ```
 
+Notice that the model supports first-order differentiation [capability](https://www.tamaspapp.eu/LogDensityProblems.jl/stable/#LogDensityProblems.capabilities).
+The required order of differentiation capability will vary depending on the VI algorithm.
+In this example, we will use `KLMinRepGradDescent`, which requires first-order capability.
+
 Let's now instantiate the model
 
 ```@example elboexample
@@ -51,7 +63,23 @@ model = NormalLogNormal(μ_x, σ_x, μ_y, Diagonal(σ_y .^ 2));
 nothing
 ```
 
-Since the `y` follows a log-normal prior, its support is bounded to be the positive half-space ``\mathbb{R}_+``.
+Let's now load `AdvancedVI`.
+In addition to gradients of the target log-density, `KLMinRepGradDescent` internally uses automatic differentiation.
+Therefore, we have to select an AD framework to be used within `KLMinRepGradDescent`.
+(This does not need to be the same as the AD backend used for the first-order capability of `model`.)
+The selected AD framework needs to be communicated to `AdvancedVI` using the [ADTypes](https://github.com/SciML/ADTypes.jl) interface.
+Here, we will use `ForwardDiff`, which can be selected by later passing `ADTypes.AutoForwardDiff()`.
+
+```@example elboexample
+using ADTypes, ReverseDiff
+using AdvancedVI
+
+alg = KLMinRepGradDescent(AutoReverseDiff());
+nothing
+```
+
+Now, `KLMinRepGradDescent` requires the variational approximation and the target log-density to have the same support.
+Since `y` follows a log-normal prior, its support is bounded to be the positive half-space ``\mathbb{R}_+``.
 Thus, we will use [Bijectors](https://github.com/TuringLang/Bijectors.jl) to match the support of our target posterior and the variational approximation.
 
 ```@example elboexample
@@ -70,24 +98,6 @@ binv = inverse(b)
 nothing
 ```
 
-Let's now load `AdvancedVI`.
-Since BBVI relies on automatic differentiation (AD), we need to load an AD library, *before* loading `AdvancedVI`.
-Also, the selected AD framework needs to be communicated to `AdvancedVI` using the [ADTypes](https://github.com/SciML/ADTypes.jl) interface.
-Here, we will use `ForwardDiff`, which can be selected by later passing `ADTypes.AutoForwardDiff()`.
-
-```@example elboexample
-using Optimisers
-using ADTypes, ForwardDiff
-using AdvancedVI
-```
-
-We now need to select 1. a variational objective, and 2. a variational family.
-Here, we will use the [`RepGradELBO` objective](@ref repgradelbo), which expects an object implementing the [`LogDensityProblems`](https://github.com/tpapp/LogDensityProblems.jl) interface, and the inverse bijector.
-
-```@example elboexample
-alg = KLMinRepGradDescent(AutoForwardDiff())
-```
-
 For the variational family, we will use the classic mean-field Gaussian family.
 
 ```@example elboexample
 
@@ -138,7 +138,7 @@ using LinearAlgebra
 using LogDensityProblems
 using Optimisers
 using Plots
-using ReverseDiff
+using ForwardDiff, ReverseDiff
 
 struct Target{D}
     dist::D
@@ -148,12 +148,19 @@ function LogDensityProblems.logdensity(model::Target, θ)
     logpdf(model.dist, θ)
 end
 
+function LogDensityProblems.logdensity_and_gradient(model::Target, θ)
+    return (
+        LogDensityProblems.logdensity(model, θ),
+        ForwardDiff.gradient(Base.Fix1(LogDensityProblems.logdensity, model), θ),
+    )
+end
+
 function LogDensityProblems.dimension(model::Target)
     return length(model.dist)
 end
 
 function LogDensityProblems.capabilities(::Type{<:Target})
-    return LogDensityProblems.LogDensityOrder{0}()
+    return LogDensityProblems.LogDensityOrder{1}()
 end
 
 n_dims     = 30
 
@@ -127,7 +127,7 @@ using Plots
 using Random
 
 using Optimisers
-using ADTypes, ForwardDiff
+using ADTypes, ForwardDiff, ReverseDiff
 using AdvancedVI
 
 struct NormalLogNormal{MX,SX,MY,SY}
@@ -142,12 +142,19 @@ function LogDensityProblems.logdensity(model::NormalLogNormal, θ)
     logpdf(LogNormal(μ_x, σ_x), θ[1]) + logpdf(MvNormal(μ_y, Σ_y), θ[2:end])
 end
 
+function LogDensityProblems.logdensity_and_gradient(model::NormalLogNormal, θ)
+    return (
+        LogDensityProblems.logdensity(model, θ),
+        ForwardDiff.gradient(Base.Fix1(LogDensityProblems.logdensity, model), θ),
+    )
+end
+
 function LogDensityProblems.dimension(model::NormalLogNormal)
     length(model.μ_y) + 1
 end
 
 function LogDensityProblems.capabilities(::Type{<:NormalLogNormal})
-    LogDensityProblems.LogDensityOrder{0}()
+    LogDensityProblems.LogDensityOrder{1}()
 end
 
 n_dims = 10
@@ -185,7 +192,7 @@ binv = inverse(b)
 q0_trans = Bijectors.TransformedDistribution(q0, binv)
 
 cfe = KLMinRepGradDescent(
-    AutoForwardDiff(); entropy=ClosedFormEntropy(), optimizer=Adam(1e-2)
+    AutoReverseDiff(); entropy=ClosedFormEntropy(), optimizer=Adam(1e-2)
 )
 nothing
 ```
@@ -194,7 +201,7 @@ The repgradelbo estimator can instead be created as follows:
 
 ```@example repgradelbo
 stl = KLMinRepGradDescent(
-    AutoForwardDiff(); entropy=StickingTheLandingEntropy(), optimizer=Adam(1e-2)
+    AutoReverseDiff(); entropy=StickingTheLandingEntropy(), optimizer=Adam(1e-2)
 )
 nothing
 ```
@@ -227,6 +234,15 @@ _, info_stl, _ = AdvancedVI.optimize(
     callback      = callback,
 ); 
 
+_, info_stl, _ = AdvancedVI.optimize(
+    stl,
+    max_iter,
+    model,
+    q0_trans;
+    show_progress = false,
+    callback      = callback,
+); 
+
 t        = [i.iteration for i in info_cfe]
 elbo_cfe = [i.elbo      for i in info_cfe]
 elbo_stl = [i.elbo      for i in info_stl]
@@ -302,7 +318,7 @@ nothing
 
 ```@setup repgradelbo
 _, info_qmc, _ = AdvancedVI.optimize(
-    KLMinRepGradDescent(AutoForwardDiff(); n_samples=n_montecarlo, optimizer=Adam(1e-2)),
+    KLMinRepGradDescent(AutoReverseDiff(); n_samples=n_montecarlo, optimizer=Adam(1e-2)),
     max_iter,
     model,
     q0_trans;
 
@@ -18,6 +18,7 @@ using LogDensityProblems
 using ADTypes
 using DiffResults
 using DifferentiationInterface
+using ChainRulesCore
 
 using FillArrays
 
@@ -95,6 +96,8 @@ This is an indirection for handling the type stability of `restructure`, as some
 """
 restructure_ad_forward(::ADTypes.AbstractADType, restructure, params) = restructure(params)
 
+include("mixedad_logdensity.jl")
+
 # Variational Families
 export MvLocationScale, MeanFieldGaussian, FullRankGaussian
 
 
@@ -13,7 +13,9 @@ Evidence lower-bound objective with the reparameterization gradient formulation[
 # Requirements
 - The variational approximation ``q_{\\lambda}`` implements `rand`.
 - The target distribution and the variational approximation have the same support.
-- The target `LogDensityProblems.logdensity(prob, x)` must be differentiable with respect to `x` by the selected AD backend.
+- The target `LogDensityProblem` must have a capability at least `LogDensityProblems.LogDensityOrder{1}()`.
+- Only the AD backend `ReverseDiff`, `Zygote`, `Mooncake` are supported.
+- The sampling process `rand(q)` must be differentiable by the selected AD backend.
 
 Depending on the options, additional requirements on ``q_{\\lambda}`` may apply.
 """
@@ -26,23 +28,33 @@ function init(
     rng::Random.AbstractRNG,
     obj::RepGradELBO,
     adtype::ADTypes.AbstractADType,
-    prob,
+    prob::Prob,
     params,
     restructure,
-)
+) where {Prob}
     q_stop = restructure(params)
+    capability = LogDensityProblems.capabilities(Prob)
+    @assert adtype isa Union{<:AutoReverseDiff,<:AutoZygote,<:AutoMooncake,<:AutoEnzyme}
+    ad_prob = if capability < LogDensityProblems.LogDensityOrder{1}()
+        @warn "The capability of the provided log-density problem $(capability) is less than $(LogDensityProblems.LogDensityOrder{1}()) " *
+            "Will attempt to directly differentiate through `LogDensityProblems.logdensity`. " *
+            "If this is not intended, please supply a log-density problem with cabality at least $(LogDensityProblems.LogDensityOrder{1}())"
+        prob
+    else
+        MixedADLogDensityProblem(prob)
+    end
     aux = (
         rng=rng,
         adtype=adtype,
         obj=obj,
-        problem=prob,
+        problem=ad_prob,
         restructure=restructure,
         q_stop=q_stop,
     )
     obj_ad_prep = AdvancedVI._prepare_gradient(
         estimate_repgradelbo_ad_forward, adtype, params, aux
     )
-    return (obj_ad_prep=obj_ad_prep, problem=prob)
+    return (obj_ad_prep=obj_ad_prep, problem=ad_prob)
 end
 
 function RepGradELBO(n_samples::Int; entropy::AbstractEntropyEstimator=ClosedFormEntropy())
@@ -132,6 +144,7 @@ function estimate_gradient!(
     params,
     restructure,
     state,
+    args...,
 )
     (; obj_ad_prep, problem) = state
     q_stop = restructure(params)
 
@@ -4,7 +4,6 @@ Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
 DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
 DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
-DistributionsAD = "ced4e74d-a319-5a8a-b0ac-84af2272839c"
 Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
@@ -30,7 +29,6 @@ Bijectors = "0.13, 0.14, 0.15"
 DiffResults = "1"
 DifferentiationInterface = "0.6, 0.7"
 Distributions = "0.25.111"
-DistributionsAD = "0.6.45"
 Enzyme = "0.13, 0.14, 0.15"
 FillArrays = "1.6.1"
 ForwardDiff = "0.10.36, 1"