jump-dev · joaquimg · Feb 1, 2025 · Jan 12, 2025 · Jan 31, 2025 · Jan 12, 2025
diff --git a/Project.toml b/Project.toml
@@ -12,6 +12,7 @@ LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
 MathOptSetDistances = "3b969827-a86c-476c-9527-bb6f1a8fbad5"
+ParametricOptInterface = "0ce4ce61-57bf-432b-a095-efac525d185e"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [compat]
@@ -21,5 +22,6 @@ IterativeSolvers = "0.9"
 JuMP = "1"
 LazyArrays = "0.21, 0.22, 1"
 MathOptInterface = "1.18"
-MathOptSetDistances = "0.2.7"
+MathOptSetDistances = "0.2.9"
+ParametricOptInterface = "0.9.0"
 julia = "1.6"
diff --git a/README.md b/README.md
@@ -31,7 +31,76 @@ examples, tutorials, and an API reference.
 
 ## Use with JuMP
 
-Use DiffOpt with JuMP by following this brief example:
+### DiffOpt-JuMP API with `Parameters`
+
+```julia
+using JuMP, DiffOpt, HiGHS
+
+model = Model(
+    () -> DiffOpt.diff_optimizer(
+        HiGHS.Optimizer;
+        with_parametric_opt_interface = true,
+    ),
+)
+set_silent(model)
+
+p_val = 4.0
+pc_val = 2.0
+@variable(model, x)
+@variable(model, p in Parameter(p_val))
+@variable(model, pc in Parameter(pc_val))
+@constraint(model, cons, pc * x >= 3 * p)
+@objective(model, Min, 2x)
+optimize!(model)
+@show value(x) == 3 * p_val / pc_val
+
+# the function is
+# x(p, pc) = 3p / pc
+# hence,
+# dx/dp = 3 / pc
+# dx/dpc = -3p / pc^2
+
+# First, try forward mode AD
+
+# differentiate w.r.t. p
+direction_p = 3.0
+MOI.set(model, DiffOpt.ForwardConstraintSet(), ParameterRef(p), Parameter(direction_p))
+DiffOpt.forward_differentiate!(model)
+@show MOI.get(model, DiffOpt.ForwardVariablePrimal(), x) == direction_p * 3 / pc_val
+
+# update p and pc
+p_val = 2.0
+pc_val = 6.0
+set_parameter_value(p, p_val)
+set_parameter_value(pc, pc_val)
+# re-optimize
+optimize!(model)
+# check solution
+@show value(x) ≈ 3 * p_val / pc_val
+
+# stop differentiating with respect to p
+DiffOpt.empty_input_sensitivities!(model)
+# differentiate w.r.t. pc
+direction_pc = 10.0
+MOI.set(model, DiffOpt.ForwardConstraintSet(), ParameterRef(pc), Parameter(direction_pc))
+DiffOpt.forward_differentiate!(model)
+@show abs(MOI.get(model, DiffOpt.ForwardVariablePrimal(), x) -
+    -direction_pc * 3 * p_val / pc_val^2) < 1e-5
+
+# always a good practice to clear previously set sensitivities
+DiffOpt.empty_input_sensitivities!(model)
+# Now, reverse model AD
+direction_x = 10.0
+MOI.set(model, DiffOpt.ReverseVariablePrimal(), x, direction_x)
+DiffOpt.reverse_differentiate!(model)
+@show MOI.get(model, DiffOpt.ReverseConstraintSet(), ParameterRef(p)) == MOI.Parameter(direction_x * 3 / pc_val)
+@show abs(MOI.get(model, DiffOpt.ReverseConstraintSet(), ParameterRef(pc)).value -
+    -direction_x * 3 * p_val / pc_val^2) < 1e-5
+```
+
+### Low level DiffOpt-JuMP API:
+
+A brief example:
 
 ```julia
 using JuMP, DiffOpt, HiGHS

diff --git a/docs/src/examples/custom-relu.jl b/docs/src/examples/custom-relu.jl
@@ -32,7 +32,7 @@ function matrix_relu(
     @variable(model, x[1:layer_size, 1:batch_size] >= 0)
     @objective(model, Min, x[:]'x[:] - 2y[:]'x[:])
     optimize!(model)
-    return value.(x)
+    return Float32.(value.(x))
 end
 
 # Define the reverse differentiation rule, for the function we defined above.
@@ -42,9 +42,9 @@ function ChainRulesCore.rrule(::typeof(matrix_relu), y::Matrix{T}) where {T}
     function pullback_matrix_relu(dl_dx)
         ## some value from the backpropagation (e.g., loss) is denoted by `l`
         ## so `dl_dy` is the derivative of `l` wrt `y`
-        x = model[:x] # load decision variable `x` into scope
-        dl_dy = zeros(T, size(dl_dx))
-        dl_dq = zeros(T, size(dl_dx))
+        x = model[:x]::Matrix{JuMP.VariableRef} # load decision variable `x` into scope
+        dl_dy = zeros(T, size(x))
+        dl_dq = zeros(T, size(x))
         ## set sensitivities
         MOI.set.(model, DiffOpt.ReverseVariablePrimal(), x[:], dl_dx[:])
         ## compute grad
@@ -76,13 +76,13 @@ m = Flux.Chain(
 
 N = 1000 # batch size
 ## Preprocessing train data
-imgs = MLDatasets.MNIST.traintensor(1:N)
-labels = MLDatasets.MNIST.trainlabels(1:N)
+imgs = MLDatasets.MNIST(; split = :train).features[:, :, 1:N]
+labels = MLDatasets.MNIST(; split = :train).targets[1:N]
 train_X = float.(reshape(imgs, size(imgs, 1) * size(imgs, 2), N)) # stack images
 train_Y = Flux.onehotbatch(labels, 0:9);
 ## Preprocessing test data
-test_imgs = MLDatasets.MNIST.testtensor(1:N)
-test_labels = MLDatasets.MNIST.testlabels(1:N)
+test_imgs = MLDatasets.MNIST(; split = :test).features[:, :, 1:N]
+test_labels = MLDatasets.MNIST(; split = :test).targets[1:N];
 test_X = float.(reshape(test_imgs, size(test_imgs, 1) * size(test_imgs, 2), N))
 test_Y = Flux.onehotbatch(test_labels, 0:9);
 
@@ -97,19 +97,12 @@ dataset = repeated((train_X, train_Y), epochs);
 # ## Network training
 
 # training loss function, Flux optimizer
-custom_loss(x, y) = Flux.crossentropy(m(x), y)
-opt = Flux.Adam()
-evalcb = () -> @show(custom_loss(train_X, train_Y))
+custom_loss(m, x, y) = Flux.crossentropy(m(x), y)
+opt = Flux.setup(Flux.Adam(), m)
 
 # Train to optimize network parameters
 
-@time Flux.train!(
-    custom_loss,
-    Flux.params(m),
-    dataset,
-    opt,
-    cb = Flux.throttle(evalcb, 5),
-);
+@time Flux.train!(custom_loss, m, dataset, opt);
 
 # Although our custom implementation takes time, it is able to reach similar
 # accuracy as the usual ReLU function implementation.

diff --git a/docs/src/examples/polyhedral_project.jl b/docs/src/examples/polyhedral_project.jl
@@ -54,7 +54,7 @@ function (polytope::Polytope{N})(
     )
     @objective(model, Min, dot(x - y, x - y))
     optimize!(model)
-    return JuMP.value.(x)
+    return Float32.(JuMP.value.(x))
 end
 
 # The `@functor` macro from Flux implements auxiliary functions for collecting the parameters of
@@ -75,12 +75,12 @@ function ChainRulesCore.rrule(
     model = direct_model(DiffOpt.diff_optimizer(Ipopt.Optimizer))
     xv = polytope(y; model = model)
     function pullback_matrix_projection(dl_dx)
-        layer_size, batch_size = size(dl_dx)
         dl_dx = ChainRulesCore.unthunk(dl_dx)
         ##  `dl_dy` is the derivative of `l` wrt `y`
-        x = model[:x]
+        x = model[:x]::Matrix{JuMP.VariableRef}
+        layer_size, batch_size = size(x)
         ## grad wrt input parameters
-        dl_dy = zeros(size(dl_dx))
+        dl_dy = zeros(size(x))
         ## grad wrt layer parameters
         dl_dw = zero.(polytope.w)
         dl_db = zero(polytope.b)
@@ -122,13 +122,13 @@ m = Flux.Chain(
 
 M = 500 # batch size
 ## Preprocessing train data
-imgs = MLDatasets.MNIST.traintensor(1:M)
-labels = MLDatasets.MNIST.trainlabels(1:M);
+imgs = MLDatasets.MNIST(; split = :train).features[:, :, 1:M]
+labels = MLDatasets.MNIST(; split = :train).targets[1:M]
 train_X = float.(reshape(imgs, size(imgs, 1) * size(imgs, 2), M)) # stack images
 train_Y = Flux.onehotbatch(labels, 0:9);
 ## Preprocessing test data
-test_imgs = MLDatasets.MNIST.testtensor(1:M)
-test_labels = MLDatasets.MNIST.testlabels(1:M)
+test_imgs = MLDatasets.MNIST(; split = :test).features[:, :, 1:M]
+test_labels = MLDatasets.MNIST(; split = :test).targets[1:M]
 test_X = float.(reshape(test_imgs, size(test_imgs, 1) * size(test_imgs, 2), M))
 test_Y = Flux.onehotbatch(test_labels, 0:9);
 
@@ -142,19 +142,12 @@ dataset = repeated((train_X, train_Y), epochs);
 # ## Network training
 
 # training loss function, Flux optimizer
-custom_loss(x, y) = Flux.crossentropy(m(x), y)
-opt = Flux.ADAM()
-evalcb = () -> @show(custom_loss(train_X, train_Y))
+custom_loss(m, x, y) = Flux.crossentropy(m(x), y)
+opt = Flux.setup(Flux.Adam(), m)
 
 # Train to optimize network parameters
 
-@time Flux.train!(
-    custom_loss,
-    Flux.params(m),
-    dataset,
-    opt,
-    cb = Flux.throttle(evalcb, 5),
-);
+@time Flux.train!(custom_loss, m, dataset, opt);
 
 # Although our custom implementation takes time, it is able to reach similar
 # accuracy as the usual ReLU function implementation.

diff --git a/docs/src/manual.md b/docs/src/manual.md
@@ -4,9 +4,9 @@
     As of now, this package only works for optimization models that can be written either in convex conic form or convex quadratic form.
 
 
-## Supported objectives & constraints - scheme 1
+## Supported objectives & constraints - `QuadraticProgram` backend
 
-For `QPTH`/`OPTNET` style backend, the package supports following `Function-in-Set` constraints: 
+For `QuadraticProgram` backend, the package supports following `Function-in-Set` constraints: 
 
 |  MOI Function | MOI Set |
 |:-------|:---------------|
@@ -26,9 +26,9 @@ and the following objective types:
 | `ScalarQuadraticFunction`  | 
 
 
-## Supported objectives & constraints - scheme 2
+## Supported objectives & constraints - `ConicProgram` backend
 
-For `DiffCP`/`CVXPY` style backend, the package supports following `Function-in-Set` constraints: 
+For the `ConicProgram` backend, the package supports following `Function-in-Set` constraints: 
 
 |  MOI Function | MOI Set |
 |:-------|:---------------|
@@ -50,19 +50,16 @@ and the following objective types:
 |   `VariableIndex`   |
 |   `ScalarAffineFunction`   |
 
+Other conic sets such as `RotatedSecondOrderCone` and `PositiveSemidefiniteConeSquare` are supported through bridges.
 
-## Creating a differentiable optimizer
+
+## Creating a differentiable MOI optimizer
 
 You can create a differentiable optimizer over an existing MOI solver by using the `diff_optimizer` utility. 
 ```@docs
 diff_optimizer
 ```
 
-## Adding new sets and constraints
-
-The DiffOpt `Optimizer` behaves similarly to other MOI Optimizers
-and implements the `MOI.AbstractOptimizer` API.
-
 ## Projections on cone sets
 
 DiffOpt requires taking projections and finding projection gradients of vectors while computing the jacobians. For this purpose, we use [MathOptSetDistances.jl](https://github.com/matbesancon/MathOptSetDistances.jl), which is a dedicated package for computing set distances, projections and projection gradients.
@@ -104,6 +101,4 @@ In the light of above, DiffOpt differentiates program variables ``x``, ``s``, ``
 - OptNet: Differentiable Optimization as a Layer in Neural Networks
 
 ### Backward Pass vector
-One possible point of confusion in finding Jacobians is the role of the backward pass vector - above eqn (7), *OptNet: Differentiable Optimization as a Layer in Neural Networks*. While differentiating convex programs, it is often the case that we don't want to find the actual derivatives, rather we might be interested in computing the product of Jacobians with a *backward pass vector*, often used in backprop in machine learning/automatic differentiation. This is what happens in scheme 1 of `DiffOpt` backend.
-
-But, for the conic system (scheme 2), we provide perturbations in conic data (`dA`, `db`, `dc`) to compute pertubations (`dx`, `dy`, `dz`) in input variables. Unlike the quadratic case, these perturbations are actual derivatives, not the product with a backward pass vector. This is an important distinction between the two schemes of differential optimization.
+One possible point of confusion in finding Jacobians is the role of the backward pass vector - above eqn (7), *OptNet: Differentiable Optimization as a Layer in Neural Networks*. While differentiating convex programs, it is often the case that we don't want to find the actual derivatives, rather we might be interested in computing the product of Jacobians with a *backward pass vector*, often used in backpropagation in machine learning/automatic differentiation. This is what happens in `DiffOpt` backends.
diff --git a/src/ConicProgram/ConicProgram.jl b/src/ConicProgram/ConicProgram.jl
@@ -183,6 +183,18 @@
         )
     b = model.model.constraints.constants
 
+    if any(isnan, model.y) || length(model.y) < length(b)
+        error(
+            "Some constraints are missing a value for the `ConstraintDualStart` attribute.",
+        )
+    end
+
+    if any(isnan, model.s) || length(model.s) < length(b)
+        error(
+            "Some constraints are missing a value for the `ConstraintPrimalStart` attribute.",
+        )
+    end
+
     if MOI.get(model, MOI.ObjectiveSense()) == MOI.FEASIBILITY_SENSE
         c = SparseArrays.spzeros(size(A, 2))
     else

diff --git a/src/DiffOpt.jl b/src/DiffOpt.jl
@@ -12,12 +12,14 @@ import LazyArrays
 import LinearAlgebra
 import MathOptInterface as MOI
 import MathOptSetDistances as MOSD
+import ParametricOptInterface as POI
 import SparseArrays
 
 include("utils.jl")
 include("product_of_sets.jl")
 include("diff_opt.jl")
 include("moi_wrapper.jl")
+include("parameters.jl")
 include("jump_moi_overloads.jl")
 
 include("copy_dual.jl")
@@ -40,4 +42,7 @@ end
 
 export diff_optimizer
 
+# TODO
+# add precompilation statements
+
 end # module
diff --git a/src/bridges.jl b/src/bridges.jl
@@ -43,6 +43,25 @@
         MOI.get(model, attr, bridge.vector_constraint),
     )[1]
 end
+
+function MOI.set(
+    model::MOI.ModelLike,
+    attr::ForwardConstraintFunction,
+    bridge::MOI.Bridges.Constraint.ScalarizeBridge,
+    value,
+)
+    MOI.set.(model, attr, bridge.scalar_constraints, value)
+    return
+end
+
+function MOI.get(
+    model::MOI.ModelLike,
+    attr::ReverseConstraintFunction,
+    bridge::MOI.Bridges.Constraint.ScalarizeBridge,
+)
+    return _vectorize(MOI.get.(model, attr, bridge.scalar_constraints))
+end
+
 function MOI.get(
     model::MOI.ModelLike,
     attr::DiffOpt.ReverseConstraintFunction,

diff --git a/src/diff_opt.jl b/src/diff_opt.jl
@@ -58,6 +58,17 @@ The output solution differentials can be queried with the attribute
 """
 function forward_differentiate! end
 
+"""
+    empty_input_sensitivities!(model::MOI.ModelLike)
+
+Empty the input sensitivities of the model.
+Sets to zero all the sensitivities set by the user with method such as:
+- `MOI.set(model, DiffOpt.ReverseVariablePrimal(), variable_index, value)`
+- `MOI.set(model, DiffOpt.ForwardObjectiveFunction(), expression)`
+- `MOI.set(model, DiffOpt.ForwardConstraintFunction(), index, expression)`
+"""
+function empty_input_sensitivities! end
+
 """
     ForwardObjectiveFunction <: MOI.AbstractModelAttribute