TuringLang
diff --git a/Diff for: ‎.github/workflows/CI.yml
+7-4 b/Diff for: ‎.github/workflows/CI.yml
+7-4
diff --git a/Diff for: ‎Project.toml
+7-31 b/Diff for: ‎Project.toml
+7-31
diff --git a/Diff for: ‎README.md
+8-6 b/Diff for: ‎README.md
+8-6
diff --git a/Diff for: ‎docs/src/api.md
+1-8 b/Diff for: ‎docs/src/api.md
+1-8
diff --git a/Diff for: ‎docs/src/example.md
+5-2 b/Diff for: ‎docs/src/example.md
+5-2
diff --git a/Diff for: ‎ext/NormalizingFlowsEnzymeExt.jl
-25 b/Diff for: ‎ext/NormalizingFlowsEnzymeExt.jl
-25
diff --git a/Diff for: ‎ext/NormalizingFlowsForwardDiffExt.jl
-28 b/Diff for: ‎ext/NormalizingFlowsForwardDiffExt.jl
-28
diff --git a/Diff for: ‎ext/NormalizingFlowsReverseDiffExt.jl
-22 b/Diff for: ‎ext/NormalizingFlowsReverseDiffExt.jl
-22
diff --git a/Diff for: ‎ext/NormalizingFlowsZygoteExt.jl
-23 b/Diff for: ‎ext/NormalizingFlowsZygoteExt.jl
-23
diff --git a/Diff for: ‎src/NormalizingFlows.jl
+15-31 b/Diff for: ‎src/NormalizingFlows.jl
+15-31
diff --git a/Diff for: ‎src/objectives.jl
+1-1 b/Diff for: ‎src/objectives.jl
+1-1
diff --git a/Diff for: ‎src/objectives/elbo.jl
+1-1 b/Diff for: ‎src/objectives/elbo.jl
+1-1
@@ -1,15 +1,18 @@
 name: CI
+
 on:
   push:
     branches:
       - main
     tags: ['*']
   pull_request:
+
 concurrency:
   # Skip intermediate builds: always.
   # Cancel intermediate builds: only if it is a pull request build.
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+
 jobs:
   test:
     name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
@@ -19,17 +22,17 @@ jobs:
       matrix:
         version:
           - '1'
-          - '1.6'
+          - 'min'
         os:
           - ubuntu-latest
         arch:
           - x64
     steps:
-      - uses: actions/checkout@v3
-      - uses: julia-actions/setup-julia@v1
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - uses: julia-actions/cache@v1
+      - uses: julia-actions/cache@v2
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
@@ -1,50 +1,26 @@
 name = "NormalizingFlows"
 uuid = "50e4474d-9f12-44b7-af7a-91ab30ff6256"
-version = "0.1.1"
+version = "0.2.0"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
 Bijectors = "76274a88-744f-5084-9051-94815aaf08c4"
-DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
+DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
-[weakdeps]
-Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
-ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
-ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
-Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
-
-[extensions]
-NormalizingFlowsEnzymeExt = "Enzyme"
-NormalizingFlowsForwardDiffExt = "ForwardDiff"
-NormalizingFlowsReverseDiffExt = "ReverseDiff"
-NormalizingFlowsZygoteExt = "Zygote"
-
 [compat]
-ADTypes = "0.1, 0.2, 1"
-Bijectors = "0.12.6, 0.13, 0.14"
-DiffResults = "1"
+ADTypes = "1"
+Bijectors = "0.12.6, 0.13, 0.14, 0.15"
+DifferentiationInterface = "0.6.42"
 Distributions = "0.25"
 DocStringExtensions = "0.9"
-Enzyme = "0.11, 0.12, 0.13"
-ForwardDiff = "0.10.25"
-Optimisers = "0.2.16, 0.3"
+Optimisers = "0.2.16, 0.3, 0.4"
 ProgressMeter = "1.0.0"
-Requires = "1"
-ReverseDiff = "1.14"
 StatsBase = "0.33, 0.34"
-Zygote = "0.6"
-julia = "1.6"
-
-[extras]
-Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
-ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
-ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
-Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+julia = "1.10"
@@ -4,7 +4,7 @@
 [![Build Status](https://github.com/TuringLang/NormalizingFlows.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/TuringLang/NormalizingFlows.jl/actions/workflows/CI.yml?query=branch%3Amain)
 
 
-**Last updated: 2023-Aug-23**
+**Last updated: 2025-Mar-04**
 
 A normalizing flow library for Julia.
 
@@ -21,16 +21,16 @@ See the [documentation](https://turinglang.org/NormalizingFlows.jl/dev/) for mor
 To install the package, run the following command in the Julia REPL:
 ```julia
 ]  # enter Pkg mode
-(@v1.9) pkg> add git@github.com:TuringLang/NormalizingFlows.jl.git
+(@v1.11) pkg> add NormalizingFlows
 ```
 Then simply run the following command to use the package:
 ```julia
 using NormalizingFlows
 ```
 
 ## Quick recap of normalizing flows
-Normalizing flows transform a simple reference distribution $q_0$ (sometimes known as base distribution) to 
-a complex distribution $q$ using invertible functions.
+Normalizing flows transform a simple reference distribution $q_0$ (sometimes referred to as the base distribution) 
+to a complex distribution $q$ using invertible functions.
 
 In more details, given the base distribution, usually a standard Gaussian distribution, i.e., $q_0 = \mathcal{N}(0, I)$,
 we apply a series of parameterized invertible transformations (called flow layers), $T_{1, \theta_1}, \cdots, T_{N, \theta_k}$, yielding that
@@ -56,7 +56,7 @@ Given the feasibility of i.i.d. sampling and density evaluation, normalizing flo
 \text{Reverse KL:}\quad
 &\arg\min _{\theta} \mathbb{E}_{q_{\theta}}\left[\log q_{\theta}(Z)-\log p(Z)\right] \\
 &= \arg\min _{\theta} \mathbb{E}_{q_0}\left[\log \frac{q_\theta(T_N\circ \cdots \circ T_1(Z_0))}{p(T_N\circ \cdots \circ T_1(Z_0))}\right] \\
-&= \arg\max _{\theta} \mathbb{E}_{q_0}\left[ \log p\left(T_N \circ \cdots \circ T_1(Z_0)\right)-\log q_0(X)+\sum_{n=1}^N \log J_n\left(F_n \circ \cdots \circ F_1(X)\right)\right]
+&= \arg\max _{\theta} \mathbb{E}_{q_0}\left[ \log p\left(T_N \circ \cdots \circ T_1(Z_0)\right)-\log q_0(Z_0)+\sum_{n=1}^N \log J_n\left(T_n \circ \cdots \circ T_1(Z_0)\right)\right]
 \end{aligned}
 ```
 and 
@@ -76,10 +76,12 @@ normalizing constant.
 In contrast, forward KL minimization is typically used for **generative modeling**, 
 where one wants to learn the underlying distribution of some data.
 
-## Current status and TODOs
+## Current status and to-dos
 
 - [x] general interface development
 - [x] documentation
+- [ ] integrating [Lux.jl](https://lux.csail.mit.edu/stable/tutorials/intermediate/7_RealNVP) and [Reactant.jl](https://github.com/EnzymeAD/Reactant.jl). 
+This could potentially solve the GPU compatibility issue as well.
 - [ ] including more NF examples/Tutorials
     - WIP: [PR#11](https://github.com/TuringLang/NormalizingFlows.jl/pull/11) 
 - [ ] GPU compatibility
 
@@ -15,6 +15,7 @@ For example of Gaussian VI, we can construct the flow as follows:
 ```@julia
 using Distributions, Bijectors
 T= Float32
+@leaf MvNormal # to prevent params in q₀ from being optimized
 q₀ = MvNormal(zeros(T, 2), ones(T, 2))
 flow = Bijectors.transformed(q₀, Bijectors.Shift(zeros(T,2)) ∘ Bijectors.Scale(ones(T, 2)))
 ```
@@ -83,11 +84,3 @@ NormalizingFlows.loglikelihood
 ```@docs
 NormalizingFlows.optimize
 ```
-
-
-## Utility Functions for Taking Gradient
-```@docs
-NormalizingFlows.grad!
-NormalizingFlows.value_and_gradient!
-```
-
@@ -36,6 +36,7 @@ Here we used the `PlanarLayer()` from `Bijectors.jl` to construct a
 
 ```julia
 using Bijectors, FunctionChains
+using Functors
 
 function create_planar_flow(n_layers::Int, q₀)
     d = length(q₀)
@@ -45,7 +46,9 @@ function create_planar_flow(n_layers::Int, q₀)
 end
 
 # create a 20-layer planar flow
-flow = create_planar_flow(20, MvNormal(zeros(Float32, 2), I))
+@leaf MvNormal # to prevent params in q₀ from being optimized
+q₀ = MvNormal(zeros(Float32, 2), I)
+flow = create_planar_flow(20, q₀)
 flow_untrained = deepcopy(flow) # keep a copy of the untrained flow for comparison
 ```
 *Notice that here the flow layers are chained together using `fchain` function from [`FunctionChains.jl`](https://github.com/oschulz/FunctionChains.jl). 
@@ -116,4 +119,4 @@ plot!(title = "Comparison of Trained and Untrained Flow", xlabel = "X", ylabel=
 
 ## Reference 
 
-- Rezende, D. and Mohamed, S., 2015. *Variational inference with normalizing flows*. International Conference on Machine Learning  
+- Rezende, D. and Mohamed, S., 2015. *Variational inference with normalizing flows*. International Conference on Machine Learning  
@@ -4,14 +4,12 @@ using Bijectors
 using Optimisers
 using LinearAlgebra, Random, Distributions, StatsBase
 using ProgressMeter
-using ADTypes, DiffResults
+using ADTypes
+import DifferentiationInterface as DI
 
 using DocStringExtensions
 
-export train_flow, elbo, loglikelihood, value_and_gradient!
-
-using ADTypes
-using DiffResults
+export train_flow, elbo, loglikelihood
 
 """
     train_flow([rng::AbstractRNG, ]vo, flow, args...; kwargs...)
@@ -30,7 +28,13 @@ Train the given normalizing flow `flow` by calling `optimize`.
 - `optimiser::Optimisers.AbstractRule=Optimisers.ADAM()`: optimiser to compute the steps
 - `ADbackend::ADTypes.AbstractADType=ADTypes.AutoZygote()`: 
     automatic differentiation backend, currently supports
-    `ADTypes.AutoZygote()`, `ADTypes.ForwardDiff()`, and `ADTypes.ReverseDiff()`. 
+    `ADTypes.AutoZygote()`, `ADTypes.ForwardDiff()`, `ADTypes.ReverseDiff()`, 
+    `ADTypes.AutoMooncake()` and
+    `ADTypes.AutoEnzyme(;
+        mode=Enzyme.set_runtime_activity(Enzyme.Reverse),
+        function_annotation=Enzyme.Const,
+    )`.
+    If user wants to use `AutoEnzyme`, please make sure to include the `set_runtime_activity` and `function_annotation` as shown above.
 - `kwargs...`: additional keyword arguments for `optimize` (See [`optimize`](@ref) for details)
 
 # Returns
@@ -57,13 +61,15 @@ function train_flow(
     # otherwise the compilation time for destructure will be too long
     θ_flat, re = Optimisers.destructure(flow)
 
+    loss(θ, rng, args...) = -vo(rng, re(θ), args...)
+
     # Normalizing flow training loop 
     θ_flat_trained, opt_stats, st = optimize(
-        rng,
         ADbackend,
-        vo,
+        loss,
         θ_flat,
         re,
+        rng,
         args...;
         max_iters=max_iters,
         optimiser=optimiser,
@@ -74,29 +80,7 @@ function train_flow(
     return flow_trained, opt_stats, st
 end
 
-include("train.jl")
+include("optimize.jl")
 include("objectives.jl")
 
-# optional dependencies 
-if !isdefined(Base, :get_extension) # check whether :get_extension is defined in Base
-    using Requires
-end
-
-# Question: should Exts be loaded here or in train.jl? 
-function __init__()
-    @static if !isdefined(Base, :get_extension)
-        @require ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" include(
-            "../ext/NormalizingFlowsForwardDiffExt.jl"
-        )
-        @require ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" include(
-            "../ext/NormalizingFlowsReverseDiffExt.jl"
-        )
-        @require Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" include(
-            "../ext/NormalizingFlowsEnzymeExt.jl"
-        )
-        @require Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" include(
-            "../ext/NormalizingFlowsZygoteExt.jl"
-        )
-    end
-end
 end
@@ -1,2 +1,2 @@
 include("objectives/elbo.jl")
-include("objectives/loglikelihood.jl")
+include("objectives/loglikelihood.jl") # not fully tested
@@ -42,4 +42,4 @@ end
 
 function elbo(flow::Bijectors.TransformedDistribution, logp, n_samples)
     return elbo(Random.default_rng(), flow, logp, n_samples)
-end
+end
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`include("objectives/elbo.jl")`
`2`		`-include("objectives/loglikelihood.jl")`
	`2`	`+include("objectives/loglikelihood.jl") # not fully tested`