From d4581e47f8a57350a5f9d6df93d73f585a4b0ff1 Mon Sep 17 00:00:00 2001 From: "Documenter.jl" Date: Wed, 8 Jan 2025 22:10:32 +0000 Subject: [PATCH] build based on d09d1b1 --- .../index.html | 298 ++--- dev/examples/autotuning-ridge/index.html | 318 ++--- dev/examples/chainrules_unit/index.html | 1046 ++++++++-------- dev/examples/custom-relu/index.html | 2 +- .../matrix-inversion-manual/index.html | 2 +- dev/examples/nearest_correlation/index.html | 18 +- dev/examples/polyhedral_project/index.html | 2 +- .../sensitivity-analysis-ridge/index.html | 1086 ++++++++--------- .../sensitivity-analysis-svm/index.html | 544 ++++----- dev/index.html | 2 +- dev/intro/index.html | 2 +- dev/manual/index.html | 2 +- dev/reference/index.html | 8 +- dev/search/index.html | 2 +- dev/search_index.js | 2 +- dev/usage/index.html | 2 +- 16 files changed, 1668 insertions(+), 1668 deletions(-) diff --git a/dev/examples/Thermal_Generation_Dispatch_Example/index.html b/dev/examples/Thermal_Generation_Dispatch_Example/index.html index f591cfdf..2eeaee69 100644 --- a/dev/examples/Thermal_Generation_Dispatch_Example/index.html +++ b/dev/examples/Thermal_Generation_Dispatch_Example/index.html @@ -116,53 +116,53 @@ ) - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Result Sensitivity Analysis:

Plots.plot(
     d,
     data_results[1, :, I+2:2*(I+1)];
@@ -173,55 +173,55 @@
 )
- + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Results for the reverse context

Result Primal Values:

Plots.plot(
     d,
     data_results[2, :, 1:I+1];
@@ -232,53 +232,53 @@ 

- + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Result Sensitivity Analysis:

Plots.plot(
     d,
     data_results[2, :, I+2:2*(I+1)];
@@ -289,53 +289,53 @@ 

- + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

This page was generated using Literate.jl.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

This page was generated using Literate.jl.

diff --git a/dev/examples/autotuning-ridge/index.html b/dev/examples/autotuning-ridge/index.html index 5f9872b9..0f9c1518 100644 --- a/dev/examples/autotuning-ridge/index.html +++ b/dev/examples/autotuning-ridge/index.html @@ -71,55 +71,55 @@ Plots.title!("Normalized MSE on training and testing sets")

- + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Leveraging differentiable optimization: computing the derivative of the solution

Using DiffOpt, we can compute ∂w_i/∂α, the derivative of the learned solution ̂w w.r.t. the regularization parameter.

function compute_dw_dα(model, w)
     D = length(w)
     dw_dα = zeros(D)
@@ -176,55 +176,55 @@ 

- + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Visualize the convergence of α to its optimal value

Plots.plot(
     iters,
     ᾱ;
@@ -236,43 +236,43 @@ 

- + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + +

Visualize the convergence of the objective function

Plots.plot(
     iters,
@@ -285,45 +285,45 @@ 

- + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + +

Visualize the convergence of the derivative to zero

Plots.plot(
     iters,
@@ -336,44 +336,44 @@ 

- + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + -

This page was generated using Literate.jl.

+

This page was generated using Literate.jl.

diff --git a/dev/examples/chainrules_unit/index.html b/dev/examples/chainrules_unit/index.html index db66bb31..ee249674 100644 --- a/dev/examples/chainrules_unit/index.html +++ b/dev/examples/chainrules_unit/index.html @@ -110,539 +110,539 @@ Plots.xlims!(0.0, 3.5)

- + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Forward Differentiation

Forward differentiation rule for the solution map of the unit commitment problem. It takes as arguments:

  1. the perturbations on the input parameters
  2. the differentiated function
  3. the primal values of the input parameters,

and returns a tuple (primal_output, perturbations), the main primal result and the perturbation propagated to this result:

function ChainRulesCore.frule(
     (_, Δload1_demand, Δload2_demand, Δgen_costs, Δnoload_costs),
     ::typeof(unit_commitment),
@@ -778,4 +778,4 @@ 

Invenia Technical Computing.


This page was generated using Literate.jl.

+ -0.0

The sensitivity of the generation is propagated to the sensitivity of both loads at the second time frame.

This example integrating ChainRules was designed with support from Invenia Technical Computing.


This page was generated using Literate.jl.

diff --git a/dev/examples/custom-relu/index.html b/dev/examples/custom-relu/index.html index 986efff1..55c8cf7b 100644 --- a/dev/examples/custom-relu/index.html +++ b/dev/examples/custom-relu/index.html @@ -93,4 +93,4 @@ custom_loss(train_X, train_Y) = 1.691865f0 custom_loss(train_X, train_Y) = 1.610134f0 custom_loss(train_X, train_Y) = 1.5316879f0 -109.718281 seconds (76.78 M allocations: 4.763 GiB, 1.44% gc time, 0.71% compilation time)

Although our custom implementation takes time, it is able to reach similar accuracy as the usual ReLU function implementation.

Accuracy results

Average of correct guesses

accuracy(x, y) = Statistics.mean(Flux.onecold(m(x)) .== Flux.onecold(y));

Training accuracy

accuracy(train_X, train_Y)
0.562

Test accuracy

accuracy(test_X, test_Y)
0.478

Note that the accuracy is low due to simplified training. It is possible to increase the number of samples N, the number of epochs epoch and the connectivity inner.


This page was generated using Literate.jl.

+108.495299 seconds (76.71 M allocations: 4.762 GiB, 1.47% gc time, 0.71% compilation time)

Although our custom implementation takes time, it is able to reach similar accuracy as the usual ReLU function implementation.

Accuracy results

Average of correct guesses

accuracy(x, y) = Statistics.mean(Flux.onecold(m(x)) .== Flux.onecold(y));

Training accuracy

accuracy(train_X, train_Y)
0.562

Test accuracy

accuracy(test_X, test_Y)
0.478

Note that the accuracy is low due to simplified training. It is possible to increase the number of samples N, the number of epochs epoch and the connectivity inner.


This page was generated using Literate.jl.

diff --git a/dev/examples/matrix-inversion-manual/index.html b/dev/examples/matrix-inversion-manual/index.html index 663cf31c..99d2ee80 100644 --- a/dev/examples/matrix-inversion-manual/index.html +++ b/dev/examples/matrix-inversion-manual/index.html @@ -68,4 +68,4 @@ 0.0 * index(x[1]) - 1.0, # to indicate the direction vector to get directional derivatives )

Note that 0.0 * index(x[1]) is used to make its type typeof(0.0 * index(x[1]) - 1.0) <: MOI.AbstractScalarFunction. To indicate different direction to get directional derivative, users should replace 0.0 * index(x[1]) - 1.0 as the form of dG*x - dh, where dG and dh correspond to the elements of direction vectors along G and h axes, respectively.

Compute derivatives

DiffOpt.forward_differentiate!(model)

Query derivative

dx = MOI.get.(model, DiffOpt.ForwardVariablePrimal(), x)
2-element Vector{Float64}:
  0.2500000038571342
- 0.7500000115714025

This page was generated using Literate.jl.

+ 0.7500000115714025


This page was generated using Literate.jl.

diff --git a/dev/examples/nearest_correlation/index.html b/dev/examples/nearest_correlation/index.html index 56b2c74d..5fa7cf61 100644 --- a/dev/examples/nearest_correlation/index.html +++ b/dev/examples/nearest_correlation/index.html @@ -37,12 +37,12 @@ ------------------------------------------------------------------ iter | pri res | dua res | gap | obj | scale | time (s) ------------------------------------------------------------------ - 0| 9.98e-01 3.35e+00 2.27e+01 -1.27e+01 1.00e-01 1.31e-04 - 75| 5.98e-05 2.10e-06 1.25e-04 -6.72e+00 1.00e-01 3.78e-04 + 0| 9.98e-01 3.35e+00 2.27e+01 -1.27e+01 1.00e-01 1.20e-04 + 75| 5.98e-05 2.10e-06 1.25e-04 -6.72e+00 1.00e-01 3.68e-04 ------------------------------------------------------------------ status: solved -timings: total: 3.79e-04s = setup: 4.94e-05s + solve: 3.30e-04s - lin-sys: 2.31e-05s, cones: 2.04e-04s, accel: 5.01e-06s +timings: total: 3.69e-04s = setup: 4.65e-05s + solve: 3.22e-04s + lin-sys: 2.29e-05s, cones: 2.04e-04s, accel: 3.92e-06s ------------------------------------------------------------------ objective = -6.721507 ------------------------------------------------------------------

The projection of A is:

X
3×3 Matrix{Float64}:
@@ -72,12 +72,12 @@
 ------------------------------------------------------------------
  iter | pri res | dua res |   gap   |   obj   |  scale  | time (s)
 ------------------------------------------------------------------
-     0| 1.02e+00  3.57e+00  3.24e+01 -3.42e+01  1.00e-01  1.60e-04
-    75| 2.52e-05  7.03e-07  7.14e-05 -1.74e+01  1.00e-01  4.85e-04
+     0| 1.02e+00  3.57e+00  3.24e+01 -3.42e+01  1.00e-01  1.01e-04
+    75| 2.52e-05  7.03e-07  7.14e-05 -1.74e+01  1.00e-01  4.27e-04
 ------------------------------------------------------------------
 status:  solved
-timings: total: 4.86e-04s = setup: 7.75e-05s + solve: 4.09e-04s
-	 lin-sys: 3.26e-05s, cones: 2.70e-04s, accel: 4.39e-06s
+timings: total: 4.28e-04s = setup: 4.84e-05s + solve: 3.79e-04s
+	 lin-sys: 3.17e-05s, cones: 2.66e-04s, accel: 4.22e-06s
 ------------------------------------------------------------------
 objective = -17.447239
 ------------------------------------------------------------------

The projection of A is:

X
4×4 Matrix{Float64}:
@@ -88,4 +88,4 @@
   0.101794    0.228658   -0.0880952  -0.0490976
   0.228658    0.0565823   0.158685   -0.0880952
  -0.0880952   0.158685    0.0565823   0.228658
- -0.0490976  -0.0880952   0.228658    0.101794

This page was generated using Literate.jl.

+ -0.0490976 -0.0880952 0.228658 0.101794

This page was generated using Literate.jl.

diff --git a/dev/examples/polyhedral_project/index.html b/dev/examples/polyhedral_project/index.html index 2d42d0c8..302b5399 100644 --- a/dev/examples/polyhedral_project/index.html +++ b/dev/examples/polyhedral_project/index.html @@ -119,4 +119,4 @@ custom_loss(train_X, train_Y) = 0.7472685f0 custom_loss(train_X, train_Y) = 0.65253365f0 custom_loss(train_X, train_Y) = 0.5644549f0 - 64.838840 seconds (35.89 M allocations: 4.149 GiB, 2.35% gc time, 1.40% compilation time)

Although our custom implementation takes time, it is able to reach similar accuracy as the usual ReLU function implementation.

Accuracy results

Average of correct guesses

accuracy(x, y) = Statistics.mean(Flux.onecold(m(x)) .== Flux.onecold(y));

Training accuracy

accuracy(train_X, train_Y)
0.9

Test accuracy

accuracy(test_X, test_Y)
0.744

Note that the accuracy is low due to simplified training. It is possible to increase the number of samples N, the number of epochs epoch and the connectivity inner.


This page was generated using Literate.jl.

+ 64.343004 seconds (35.89 M allocations: 4.149 GiB, 2.37% gc time, 1.43% compilation time)

Although our custom implementation takes time, it is able to reach similar accuracy as the usual ReLU function implementation.

Accuracy results

Average of correct guesses

accuracy(x, y) = Statistics.mean(Flux.onecold(m(x)) .== Flux.onecold(y));

Training accuracy

accuracy(train_X, train_Y)
0.9

Test accuracy

accuracy(test_X, test_Y)
0.744

Note that the accuracy is low due to simplified training. It is possible to increase the number of samples N, the number of epochs epoch and the connectivity inner.


This page was generated using Literate.jl.

diff --git a/dev/examples/sensitivity-analysis-ridge/index.html b/dev/examples/sensitivity-analysis-ridge/index.html index ab1445e7..a55c8024 100644 --- a/dev/examples/sensitivity-analysis-ridge/index.html +++ b/dev/examples/sensitivity-analysis-ridge/index.html @@ -52,203 +52,203 @@ p - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Differentiate

Now that we've solved the problem, we can compute the sensitivity of optimal values of the slope w with respect to perturbations in the data points (x,y).

alpha = 0.4
 model, w, b = fit_ridge(X, Y, alpha)
 ŵ = value(w)
@@ -283,195 +283,195 @@ 

Diff Plots.title!("Regression slope sensitivity with respect to x")

- + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
p = Plots.scatter(
     X,
@@ -491,194 +491,194 @@ 

Diff Plots.title!("Regression slope sensitivity with respect to y")

- + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -

Note the points with less central x values induce a greater y sensitivity of the slope.


This page was generated using Literate.jl.

+

Note the points with less central x values induce a greater y sensitivity of the slope.


This page was generated using Literate.jl.

diff --git a/dev/examples/sensitivity-analysis-svm/index.html b/dev/examples/sensitivity-analysis-svm/index.html index 794ecd07..877e2534 100644 --- a/dev/examples/sensitivity-analysis-svm/index.html +++ b/dev/examples/sensitivity-analysis-svm/index.html @@ -49,155 +49,155 @@ ) - + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Gradient of hyperplane wrt the data point coordinates

Now that we've solved the SVM, we can compute the sensitivity of optimal values – the separating hyperplane in our case – with respect to perturbations of the problem data – the data points – using DiffOpt.

How does a change in coordinates of the data points, X, affects the position of the hyperplane? This is achieved by finding gradients of w and b with respect to X[i].

Begin differentiating the model. analogous to varying θ in the expression:

\[y_{i} (w^T (X_{i} + \theta) + b) \ge 1 - \xi_{i}\]

∇ = zeros(N)
 for i in 1:N
     for j in 1:N
@@ -229,149 +229,149 @@ 

- + - + - + - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -

This page was generated using Literate.jl.

+

This page was generated using Literate.jl.

diff --git a/dev/index.html b/dev/index.html index ad66a139..35101782 100644 --- a/dev/index.html +++ b/dev/index.html @@ -1,2 +1,2 @@ -Home · DiffOpt.jl

DiffOpt.jl

DiffOpt.jl is a package for differentiating convex optimization program (JuMP.jl or MathOptInterface.jl models) with respect to program parameters. Note that this package does not contain any solver. This package has two major backends, available via the reverse_differentiate! and forward_differentiate! methods, to differentiate models (quadratic or conic) with optimal solutions.

Note

Currently supports linear programs (LP), convex quadratic programs (QP) and convex conic programs (SDP, SOCP, exponential cone constraints only).

Installation

DiffOpt can be installed through the Julia package manager:

(v1.3) pkg> add https://github.com/jump-dev/DiffOpt.jl

Why are Differentiable optimization problems important?

Differentiable optimization is a promising field of convex optimization and has many potential applications in game theory, control theory and machine learning (specifically deep learning - refer this video for more). Recent work has shown how to differentiate specific subclasses of convex optimization problems. But several applications remain unexplored (refer section 8 of this really good thesis). With the help of automatic differentiation, differentiable optimization can have a significant impact on creating end-to-end differentiable systems to model neural networks, stochastic processes, or a game.

Contributing

Contributions to this package are more than welcome, if you find a bug or have any suggestions for the documentation please post it on the github issue tracker.

When contributing please note that the package follows the JuMP style guide

+Home · DiffOpt.jl

DiffOpt.jl

DiffOpt.jl is a package for differentiating convex optimization program (JuMP.jl or MathOptInterface.jl models) with respect to program parameters. Note that this package does not contain any solver. This package has two major backends, available via the reverse_differentiate! and forward_differentiate! methods, to differentiate models (quadratic or conic) with optimal solutions.

Note

Currently supports linear programs (LP), convex quadratic programs (QP) and convex conic programs (SDP, SOCP, exponential cone constraints only).

Installation

DiffOpt can be installed through the Julia package manager:

(v1.3) pkg> add https://github.com/jump-dev/DiffOpt.jl

Why are Differentiable optimization problems important?

Differentiable optimization is a promising field of convex optimization and has many potential applications in game theory, control theory and machine learning (specifically deep learning - refer this video for more). Recent work has shown how to differentiate specific subclasses of convex optimization problems. But several applications remain unexplored (refer section 8 of this really good thesis). With the help of automatic differentiation, differentiable optimization can have a significant impact on creating end-to-end differentiable systems to model neural networks, stochastic processes, or a game.

Contributing

Contributions to this package are more than welcome, if you find a bug or have any suggestions for the documentation please post it on the github issue tracker.

When contributing please note that the package follows the JuMP style guide

diff --git a/dev/intro/index.html b/dev/intro/index.html index 8b3865e3..29fda8fd 100644 --- a/dev/intro/index.html +++ b/dev/intro/index.html @@ -11,4 +11,4 @@ & \operatorname {subject\;to} & g_{i}(x; \theta)\leq 0,\quad i=1,\dots ,m\\ & & A(\theta) x = b(\theta) \end{array} -\end{split}\]

which is the input of $l(x^*(\theta))$, a loss function. Our goal is to choose the best parameter $\theta$ so that $l$ is optimized. Here, $l(x^*(\theta))$ is the objective function and $\theta$ is the decision variable. In order to apply a gradient-based strategy to this problem, we need to differentiate $l$ with respect to $\theta$.

\[\frac{\partial l(x^*(\theta))}{\partial \theta} = \frac{\partial l(x^*(\theta))}{\partial x^*(\theta)} \frac{\partial x^*(\theta)}{\partial \theta}\]

By implicit function theorem, this translates to differentiating the program data, i.e. functions $f$, $g_i(x)$ and matrices $A$, $b$, with respect to $\theta$.

This is can be achieved in two steps or passes:

  1. Forward pass - Given an initial value of $\theta$, solves the optimization problem to find $x^*(\theta)$
  2. Backward pass - Given $x^*$, differentiate and find $\frac{\partial x^*(\theta)}{\partial \theta}$
+\end{split}\]

which is the input of $l(x^*(\theta))$, a loss function. Our goal is to choose the best parameter $\theta$ so that $l$ is optimized. Here, $l(x^*(\theta))$ is the objective function and $\theta$ is the decision variable. In order to apply a gradient-based strategy to this problem, we need to differentiate $l$ with respect to $\theta$.

\[\frac{\partial l(x^*(\theta))}{\partial \theta} = \frac{\partial l(x^*(\theta))}{\partial x^*(\theta)} \frac{\partial x^*(\theta)}{\partial \theta}\]

By implicit function theorem, this translates to differentiating the program data, i.e. functions $f$, $g_i(x)$ and matrices $A$, $b$, with respect to $\theta$.

This is can be achieved in two steps or passes:

  1. Forward pass - Given an initial value of $\theta$, solves the optimization problem to find $x^*(\theta)$
  2. Backward pass - Given $x^*$, differentiate and find $\frac{\partial x^*(\theta)}{\partial \theta}$
diff --git a/dev/manual/index.html b/dev/manual/index.html index 956d612b..9ab6eeaf 100644 --- a/dev/manual/index.html +++ b/dev/manual/index.html @@ -10,4 +10,4 @@ \mbox{subject to} & A x + s = b \quad \quad & \mbox{subject to} & A^T y + c = 0 \\ & s \in \mathcal{K} & & y \in \mathcal{K}^* \end{array} -\end{split}\]

where

  • $x \in R^n$ is the primal variable, $y \in R^m$ is the dual variable, and $s \in R^m$ is the primal slack

variable

  • $\mathcal{K} \subseteq R^m$ is a closed convex cone and $\mathcal{K}^* \subseteq R^m$ is the corresponding dual cone

variable

  • $A \in R^{m \times n}$, $b \in R^m$, $c \in R^n$ are problem data

In the light of above, DiffOpt differentiates program variables $x$, $s$, $y$ w.r.t pertubations/sensivities in problem data i.e. $dA$, $db$, $dc$. This is achieved via implicit differentiation and matrix differential calculus.

Note that the primal (P) and dual (D) are self-duals of each other. Similarly, for the constraints we support, $\mathcal{K}$ is same in format as $\mathcal{K}^*$.

Reference articles

  • Differentiating Through a Cone Program - Akshay Agrawal, Shane Barratt, Stephen Boyd, Enzo Busseti, Walaa M. Moursi, 2019
  • A fast and differentiable QP solver for PyTorch. Crafted by Brandon Amos and J. Zico Kolter.
  • OptNet: Differentiable Optimization as a Layer in Neural Networks

Backward Pass vector

One possible point of confusion in finding Jacobians is the role of the backward pass vector - above eqn (7), OptNet: Differentiable Optimization as a Layer in Neural Networks. While differentiating convex programs, it is often the case that we don't want to find the actual derivatives, rather we might be interested in computing the product of Jacobians with a backward pass vector, often used in backprop in machine learning/automatic differentiation. This is what happens in scheme 1 of DiffOpt backend.

But, for the conic system (scheme 2), we provide perturbations in conic data (dA, db, dc) to compute pertubations (dx, dy, dz) in input variables. Unlike the quadratic case, these perturbations are actual derivatives, not the product with a backward pass vector. This is an important distinction between the two schemes of differential optimization.

+\end{split}\]

where

  • $x \in R^n$ is the primal variable, $y \in R^m$ is the dual variable, and $s \in R^m$ is the primal slack

variable

  • $\mathcal{K} \subseteq R^m$ is a closed convex cone and $\mathcal{K}^* \subseteq R^m$ is the corresponding dual cone

variable

  • $A \in R^{m \times n}$, $b \in R^m$, $c \in R^n$ are problem data

In the light of above, DiffOpt differentiates program variables $x$, $s$, $y$ w.r.t pertubations/sensivities in problem data i.e. $dA$, $db$, $dc$. This is achieved via implicit differentiation and matrix differential calculus.

Note that the primal (P) and dual (D) are self-duals of each other. Similarly, for the constraints we support, $\mathcal{K}$ is same in format as $\mathcal{K}^*$.

Reference articles

  • Differentiating Through a Cone Program - Akshay Agrawal, Shane Barratt, Stephen Boyd, Enzo Busseti, Walaa M. Moursi, 2019
  • A fast and differentiable QP solver for PyTorch. Crafted by Brandon Amos and J. Zico Kolter.
  • OptNet: Differentiable Optimization as a Layer in Neural Networks

Backward Pass vector

One possible point of confusion in finding Jacobians is the role of the backward pass vector - above eqn (7), OptNet: Differentiable Optimization as a Layer in Neural Networks. While differentiating convex programs, it is often the case that we don't want to find the actual derivatives, rather we might be interested in computing the product of Jacobians with a backward pass vector, often used in backprop in machine learning/automatic differentiation. This is what happens in scheme 1 of DiffOpt backend.

But, for the conic system (scheme 2), we provide perturbations in conic data (dA, db, dc) to compute pertubations (dx, dy, dz) in input variables. Unlike the quadratic case, these perturbations are actual derivatives, not the product with a backward pass vector. This is an important distinction between the two schemes of differential optimization.

diff --git a/dev/reference/index.html b/dev/reference/index.html index b7fbcadc..c5249a78 100644 --- a/dev/reference/index.html +++ b/dev/reference/index.html @@ -1,14 +1,14 @@ -Reference · DiffOpt.jl

Reference

DiffOpt.AbstractLazyScalarFunctionType
abstract type AbstractLazyScalarFunction <: MOI.AbstractScalarFunction end

Subtype of MOI.AbstractScalarFunction that is not a standard MOI scalar function but can be converted to one using standard_form.

The function can also be inspected lazily using JuMP.coefficient or quad_sym_half.

source
DiffOpt.ForwardConstraintFunctionType
ForwardConstraintFunction <: MOI.AbstractConstraintAttribute

A MOI.AbstractConstraintAttribute to set input data to forward differentiation, that is, problem input data.

For instance, if the scalar constraint of index ci contains θ * (x + 2y) <= 5θ, for the purpose of computing the derivative with respect to θ, the following should be set:

MOI.set(model, DiffOpt.ForwardConstraintFunction(), ci, 1.0 * x + 2.0 * y - 5.0)

Note that we use -5 as the ForwardConstraintFunction sets the tangent of the ConstraintFunction so we consider the expression θ * (x + 2y - 5).

source
DiffOpt.ForwardObjectiveFunctionType
ForwardObjectiveFunction <: MOI.AbstractModelAttribute

A MOI.AbstractModelAttribute to set input data to forward differentiation, that is, problem input data. The possible values are any MOI.AbstractScalarFunction. A MOI.ScalarQuadraticFunction can only be used in linearly constrained quadratic models.

For instance, if the objective contains θ * (x + 2y), for the purpose of computing the derivative with respect to θ, the following should be set:

MOI.set(model, DiffOpt.ForwardObjectiveFunction(), 1.0 * x + 2.0 * y)

where x and y are the relevant MOI.VariableIndex.

source
DiffOpt.ForwardVariablePrimalType
ForwardVariablePrimal <: MOI.AbstractVariableAttribute

A MOI.AbstractVariableAttribute to get output data from forward differentiation, that is, problem solution.

For instance, to get the tangent of the variable of index vi corresponding to the tangents given to ForwardObjectiveFunction and ForwardConstraintFunction, do the following:

MOI.get(model, DiffOpt.ForwardVariablePrimal(), vi)
source
DiffOpt.IndexMappedFunctionType
IndexMappedFunction{F<:MOI.AbstractFunction} <: AbstractLazyScalarFunction

Lazily represents the function MOI.Utilities.map_indices(index_map, DiffOpt.standard_form(func)).

source
DiffOpt.MOItoJuMPType
MOItoJuMP{F<:MOI.AbstractScalarFunction} <: JuMP.AbstractJuMPScalar

Lazily represents the function JuMP.jump_function(model, DiffOpt.standard_form(func)).

source
DiffOpt.MatrixScalarQuadraticFunctionType
struct MatrixScalarQuadraticFunction{T, VT, MT} <: MOI.AbstractScalarFunction
+Reference · DiffOpt.jl

Reference

DiffOpt.AbstractLazyScalarFunctionType
abstract type AbstractLazyScalarFunction <: MOI.AbstractScalarFunction end

Subtype of MOI.AbstractScalarFunction that is not a standard MOI scalar function but can be converted to one using standard_form.

The function can also be inspected lazily using JuMP.coefficient or quad_sym_half.

source
DiffOpt.ForwardConstraintFunctionType
ForwardConstraintFunction <: MOI.AbstractConstraintAttribute

A MOI.AbstractConstraintAttribute to set input data to forward differentiation, that is, problem input data.

For instance, if the scalar constraint of index ci contains θ * (x + 2y) <= 5θ, for the purpose of computing the derivative with respect to θ, the following should be set:

MOI.set(model, DiffOpt.ForwardConstraintFunction(), ci, 1.0 * x + 2.0 * y - 5.0)

Note that we use -5 as the ForwardConstraintFunction sets the tangent of the ConstraintFunction so we consider the expression θ * (x + 2y - 5).

source
DiffOpt.ForwardObjectiveFunctionType
ForwardObjectiveFunction <: MOI.AbstractModelAttribute

A MOI.AbstractModelAttribute to set input data to forward differentiation, that is, problem input data. The possible values are any MOI.AbstractScalarFunction. A MOI.ScalarQuadraticFunction can only be used in linearly constrained quadratic models.

For instance, if the objective contains θ * (x + 2y), for the purpose of computing the derivative with respect to θ, the following should be set:

MOI.set(model, DiffOpt.ForwardObjectiveFunction(), 1.0 * x + 2.0 * y)

where x and y are the relevant MOI.VariableIndex.

source
DiffOpt.ForwardVariablePrimalType
ForwardVariablePrimal <: MOI.AbstractVariableAttribute

A MOI.AbstractVariableAttribute to get output data from forward differentiation, that is, problem solution.

For instance, to get the tangent of the variable of index vi corresponding to the tangents given to ForwardObjectiveFunction and ForwardConstraintFunction, do the following:

MOI.get(model, DiffOpt.ForwardVariablePrimal(), vi)
source
DiffOpt.IndexMappedFunctionType
IndexMappedFunction{F<:MOI.AbstractFunction} <: AbstractLazyScalarFunction

Lazily represents the function MOI.Utilities.map_indices(index_map, DiffOpt.standard_form(func)).

source
DiffOpt.MOItoJuMPType
MOItoJuMP{F<:MOI.AbstractScalarFunction} <: JuMP.AbstractJuMPScalar

Lazily represents the function JuMP.jump_function(model, DiffOpt.standard_form(func)).

source
DiffOpt.MatrixScalarQuadraticFunctionType
struct MatrixScalarQuadraticFunction{T, VT, MT} <: MOI.AbstractScalarFunction
     affine::VectorScalarAffineFunction{T,VT}
     terms::MT
-end

Represents the function x' * terms * x / 2 + affine as an MOI.AbstractScalarFunction where x[i] = MOI.VariableIndex(i). Use standard_form to convert it to a MOI.ScalarQuadraticFunction{T}.

source
DiffOpt.MatrixVectorAffineFunctionType
MatrixVectorAffineFunction{T, VT} <: MOI.AbstractVectorFunction

Represents the function terms * x + constant as an MOI.AbstractVectorFunction where x[i] = MOI.VariableIndex(i). Use standard_form to convert it to a MOI.VectorAffineFunction{T}.

source
DiffOpt.ModelConstructorType
ModelConstructor <: MOI.AbstractOptimizerAttribute

Determines which subtype of DiffOpt.AbstractModel to use for differentiation. When set to nothing, the first one out of model.model_constructors that support the problem is used.

source
DiffOpt.ObjectiveDualStartType
struct ObjectiveDualStart <: MOI.AbstractModelAttribute end

If the objective function had a dual, it would be -1 for the Lagrangian function to be the same. When the MOI.Bridges.Objective.SlackBridge is used, it creates a constraint. The dual of this constraint is therefore -1 as well. When setting this attribute, it allows to set the constraint dual of this constraint.

source
DiffOpt.ObjectiveFunctionAttributeType
struct ObjectiveFunctionAttribute{A,F} <: MOI.AbstractModelAttribute
+end

Represents the function x' * terms * x / 2 + affine as an MOI.AbstractScalarFunction where x[i] = MOI.VariableIndex(i). Use standard_form to convert it to a MOI.ScalarQuadraticFunction{T}.

source
DiffOpt.MatrixVectorAffineFunctionType
MatrixVectorAffineFunction{T, VT} <: MOI.AbstractVectorFunction

Represents the function terms * x + constant as an MOI.AbstractVectorFunction where x[i] = MOI.VariableIndex(i). Use standard_form to convert it to a MOI.VectorAffineFunction{T}.

source
DiffOpt.ModelConstructorType
ModelConstructor <: MOI.AbstractOptimizerAttribute

Determines which subtype of DiffOpt.AbstractModel to use for differentiation. When set to nothing, the first one out of model.model_constructors that support the problem is used.

source
DiffOpt.ObjectiveFunctionAttributeType
struct ObjectiveFunctionAttribute{A,F} <: MOI.AbstractModelAttribute
     attr::A
-end

Objective function attribute attr for the function type F. The type F is used by a MOI.Bridges.AbstractBridgeOptimizer to keep track of its position in a chain of objective bridges.

source
DiffOpt.ObjectiveSlackGapPrimalStartType
struct ObjectiveSlackGapPrimalStart <: MOI.AbstractModelAttribute end

If the objective function had a dual, it would be -1 for the Lagrangian function to be the same. When the MOI.Bridges.Objective.SlackBridge is used, it creates a constraint. The dual of this constraint is therefore -1 as well. When setting this attribute, it allows to set the constraint dual of this constraint.

source
DiffOpt.ProductOfSetsType
ProductOfSets{T} <: MOI.Utilities.OrderedProductOfSets{T}

The MOI.Utilities.@product_of_sets macro requires to know the list of sets at compile time. In DiffOpt however, the list depends on what the user is going to use as set as DiffOpt supports any set as long as it implements the required function of MathOptSetDistances. For this type, the list of sets can be given a run-time.

source
DiffOpt.ReverseConstraintFunctionType
ReverseConstraintFunction

An MOI.AbstractConstraintAttribute to get output data to reverse differentiation, that is, problem input data.

For instance, if the following returns x + 2y + 5, it means that the tangent has coordinate 1 for the coefficient of x, coordinate 2 for the coefficient of y and 5 for the function constant. If the constraint is of the form func == constant or func <= constant, the tangent for the constant on the right-hand side is -5.

MOI.get(model, DiffOpt.ReverseConstraintFunction(), ci)
source
DiffOpt.ReverseObjectiveFunctionType
ReverseObjectiveFunction <: MOI.AbstractModelAttribute

A MOI.AbstractModelAttribute to get output data to reverse differentiation, that is, problem input data.

For instance, to get the tangent of the objective function corresponding to the tangent given to ReverseVariablePrimal, do the following:

func = MOI.get(model, DiffOpt.ReverseObjectiveFunction())

Then, to get the sensitivity of the linear term with variable x, do

JuMP.coefficient(func, x)

To get the sensitivity with respect to the quadratic term with variables x and y, do either

JuMP.coefficient(func, x, y)

or

DiffOpt.quad_sym_half(func, x, y)
Warning

These two lines are not equivalent in case x == y, see quad_sym_half for the details on the difference between these two functions.

source
DiffOpt.ReverseVariablePrimalType
ReverseVariablePrimal <: MOI.AbstractVariableAttribute

A MOI.AbstractVariableAttribute to set input data to reverse differentiation, that is, problem solution.

For instance, to set the tangent of the variable of index vi, do the following:

MOI.set(model, DiffOpt.ReverseVariablePrimal(), x)
source
DiffOpt.SparseVectorAffineFunctionType
struct SparseVectorAffineFunction{T} <: MOI.AbstractVectorFunction
+end

Objective function attribute attr for the function type F. The type F is used by a MOI.Bridges.AbstractBridgeOptimizer to keep track of its position in a chain of objective bridges.

source
DiffOpt.ProductOfSetsType
ProductOfSets{T} <: MOI.Utilities.OrderedProductOfSets{T}

The MOI.Utilities.@product_of_sets macro requires to know the list of sets at compile time. In DiffOpt however, the list depends on what the user is going to use as set as DiffOpt supports any set as long as it implements the required function of MathOptSetDistances. For this type, the list of sets can be given a run-time.

source
DiffOpt.ReverseConstraintFunctionType
ReverseConstraintFunction

An MOI.AbstractConstraintAttribute to get output data to reverse differentiation, that is, problem input data.

For instance, if the following returns x + 2y + 5, it means that the tangent has coordinate 1 for the coefficient of x, coordinate 2 for the coefficient of y and 5 for the function constant. If the constraint is of the form func == constant or func <= constant, the tangent for the constant on the right-hand side is -5.

MOI.get(model, DiffOpt.ReverseConstraintFunction(), ci)
source
DiffOpt.ReverseObjectiveFunctionType
ReverseObjectiveFunction <: MOI.AbstractModelAttribute

A MOI.AbstractModelAttribute to get output data to reverse differentiation, that is, problem input data.

For instance, to get the tangent of the objective function corresponding to the tangent given to ReverseVariablePrimal, do the following:

func = MOI.get(model, DiffOpt.ReverseObjectiveFunction())

Then, to get the sensitivity of the linear term with variable x, do

JuMP.coefficient(func, x)

To get the sensitivity with respect to the quadratic term with variables x and y, do either

JuMP.coefficient(func, x, y)

or

DiffOpt.quad_sym_half(func, x, y)
Warning

These two lines are not equivalent in case x == y, see quad_sym_half for the details on the difference between these two functions.

source
DiffOpt.ReverseVariablePrimalType
ReverseVariablePrimal <: MOI.AbstractVariableAttribute

A MOI.AbstractVariableAttribute to set input data to reverse differentiation, that is, problem solution.

For instance, to set the tangent of the variable of index vi, do the following:

MOI.set(model, DiffOpt.ReverseVariablePrimal(), x)
source
DiffOpt.SparseVectorAffineFunctionType
struct SparseVectorAffineFunction{T} <: MOI.AbstractVectorFunction
     terms::SparseArrays.SparseMatrixCSC{T,Int}
     constants::Vector{T}
 end

The vector-valued affine function $A x + b$, where:

  • $A$ is the sparse matrix given by terms
  • $b$ is the vector constants
source
DiffOpt.VectorScalarAffineFunctionType
VectorScalarAffineFunction{T, VT} <: MOI.AbstractScalarFunction

Represents the function x ⋅ terms + constant as an MOI.AbstractScalarFunction where x[i] = MOI.VariableIndex(i). Use standard_form to convert it to a MOI.ScalarAffineFunction{T}.

source
DiffOpt.DπMethod
Dπ(v::Vector{Float64}, model, cones::ProductOfSets)

Given a model, its cones, find the gradient of the projection of the vectors v of length equal to the number of rows in the conic form onto the cartesian product of the cones corresponding to these rows. For more info, refer to https://github.com/matbesancon/MathOptSetDistances.jl

source
DiffOpt.dU_from_dQ!Method
dU_from_dQ!(dQ, U)

Return the solution dU of the matrix equation dQ = dU' * U + U' * dU where dQ and U are the two argument of the function.

This function overwrites the first argument dQ to store the solution. The matrix U is not however modified.

The matrix dQ is assumed to be symmetric and the matrix U is assumed to be upper triangular.

We can exploit the structure of U here:

  • If the factorization was obtained from SVD, U would be orthogonal
  • If the factorization was obtained from Cholesky, U would be upper triangular.

The MOI bridge uses Cholesky in order to exploit sparsity so we are in the second case.

We look for an upper triangular dU as well.

We can find each column of dU by solving a triangular linear system once the previous column have been found. Indeed, let dj be the jth column of dU dU' * U = vcat(dj'U for j in axes(U, 2)) Therefore, dQ[j, 1:j] = dj'U[:, 1:j] + U[:, j]'dU[:, 1:j]SodQ[j, 1:(j-1)] - U[:, j]' * dU[:, 1:(j-1)] = dj'U[:, 1:(j-1)]anddQ[j, j] / 2 = dj'U[:, j]`

source
DiffOpt.diff_optimizerMethod
diff_optimizer(optimizer_constructor)::Optimizer

Creates a DiffOpt.Optimizer, which is an MOI layer with an internal optimizer and other utility methods. Results (primal, dual and slack values) are obtained by querying the internal optimizer instantiated using the optimizer_constructor. These values are required for find jacobians with respect to problem data.

One define a differentiable model by using any solver of choice. Example:

julia> import DiffOpt, HiGHS
 
 julia> model = DiffOpt.diff_optimizer(HiGHS.Optimizer)
 julia> x = model.add_variable(model)
-julia> model.add_constraint(model, ...)
source
DiffOpt.map_rowsMethod
map_rows(f::Function, model, cones::ProductOfSets, map_mode::Union{Nested{T}, Flattened{T}})

Given a model, its cones and map_mode of type Nested (resp. Flattened), return a Vector{T} of length equal to the number of cones (resp. rows) in the conic form where the value for the index (resp. rows) corresponding to each cone is equal to f(ci, r) where ci is the corresponding constraint index in model and r is a UnitRange of the corresponding rows in the conic form.

source
DiffOpt.quad_sym_halfFunction
quad_sym_half(func, vi1::MOI.VariableIndex, vi2::MOI.VariableIndex)

Return Q[i,j] = Q[j,i] where the quadratic terms of func is represented by x' Q x / 2 for a symmetric matrix Q where x[i] = vi1 and x[j] = vi2. Note that while this is equal to JuMP.coefficient(func, vi1, vi2) if vi1 != vi2, in the case vi1 == vi2, it is rather equal to 2JuMP.coefficient(func, vi1, vi2).

source
DiffOpt.standard_formFunction
standard_form(func::AbstractLazyScalarFunction)

Converts func to a standard MOI scalar function.

standard_form(func::MOItoJuMP)

Converts func to a standard JuMP scalar function.

source
DiffOpt.ΔQ_from_ΔU!Method
ΔQ_from_ΔU!(ΔU, U)

Return the symmetric solution ΔQ of the matrix equation triu(ΔU) = 2triu(U * ΔQ) where ΔU and U are the two argument of the function.

This function overwrites the first argument ΔU to store the solution. The matrix U is not however modified.

The matrix U is assumed to be upper triangular.

We can exploit the structure of U here:

  • If the factorization was obtained from SVD, U would be orthogonal
  • If the factorization was obtained from Cholesky, U would be upper triangular.

The MOI bridge uses Cholesky in order to exploit sparsity so we are in the second case.

We can find each column of ΔQ by solving a triangular linear system.

source
DiffOpt.πMethod
π(v::Vector{Float64}, model::MOI.ModelLike, cones::ProductOfSets)

Given a model, its cones, find the projection of the vectors v of length equal to the number of rows in the conic form onto the cartesian product of the cones corresponding to these rows. For more info, refer to https://github.com/matbesancon/MathOptSetDistances.jl

source
DiffOpt.QuadraticProgram.ModelType
DiffOpt.QuadraticProgram.Model <: DiffOpt.AbstractModel

Model to differentiate quadratic programs.

For the reverse differentiation, it differentiates the optimal solution z and return product of jacobian matrices (dz / dQ, dz / dq, etc) with the backward pass vector dl / dz

The method computes the product of

  1. jacobian of problem solution z* with respect to problem parameters set with the DiffOpt.ReverseVariablePrimal
  2. a backward pass vector dl / dz, where l can be a loss function

Note that this method does not returns the actual jacobians.

For more info refer eqn(7) and eqn(8) of https://arxiv.org/pdf/1703.00443.pdf

source
DiffOpt.ConicProgram.ModelType
Diffopt.ConicProgram.Model <: DiffOpt.AbstractModel

Model to differentiate conic programs.

The forward differentiation computes the product of the derivative (Jacobian) at the conic program parameters A, b, c to the perturbations dA, db, dc.

The reverse differentiation computes the product of the transpose of the derivative (Jacobian) at the conic program parameters A, b, c to the perturbations dx, dy, ds.

For theoretical background, refer Section 3 of Differentiating Through a Cone Program, https://arxiv.org/abs/1904.09043

source
+julia> model.add_constraint(model, ...)
source
DiffOpt.map_rowsMethod
map_rows(f::Function, model, cones::ProductOfSets, map_mode::Union{Nested{T}, Flattened{T}})

Given a model, its cones and map_mode of type Nested (resp. Flattened), return a Vector{T} of length equal to the number of cones (resp. rows) in the conic form where the value for the index (resp. rows) corresponding to each cone is equal to f(ci, r) where ci is the corresponding constraint index in model and r is a UnitRange of the corresponding rows in the conic form.

source
DiffOpt.quad_sym_halfFunction
quad_sym_half(func, vi1::MOI.VariableIndex, vi2::MOI.VariableIndex)

Return Q[i,j] = Q[j,i] where the quadratic terms of func is represented by x' Q x / 2 for a symmetric matrix Q where x[i] = vi1 and x[j] = vi2. Note that while this is equal to JuMP.coefficient(func, vi1, vi2) if vi1 != vi2, in the case vi1 == vi2, it is rather equal to 2JuMP.coefficient(func, vi1, vi2).

source
DiffOpt.standard_formFunction
standard_form(func::AbstractLazyScalarFunction)

Converts func to a standard MOI scalar function.

standard_form(func::MOItoJuMP)

Converts func to a standard JuMP scalar function.

source
DiffOpt.ΔQ_from_ΔU!Method
ΔQ_from_ΔU!(ΔU, U)

Return the symmetric solution ΔQ of the matrix equation triu(ΔU) = 2triu(U * ΔQ) where ΔU and U are the two argument of the function.

This function overwrites the first argument ΔU to store the solution. The matrix U is not however modified.

The matrix U is assumed to be upper triangular.

We can exploit the structure of U here:

  • If the factorization was obtained from SVD, U would be orthogonal
  • If the factorization was obtained from Cholesky, U would be upper triangular.

The MOI bridge uses Cholesky in order to exploit sparsity so we are in the second case.

We can find each column of ΔQ by solving a triangular linear system.

source
DiffOpt.πMethod
π(v::Vector{Float64}, model::MOI.ModelLike, cones::ProductOfSets)

Given a model, its cones, find the projection of the vectors v of length equal to the number of rows in the conic form onto the cartesian product of the cones corresponding to these rows. For more info, refer to https://github.com/matbesancon/MathOptSetDistances.jl

source
DiffOpt.QuadraticProgram.ModelType
DiffOpt.QuadraticProgram.Model <: DiffOpt.AbstractModel

Model to differentiate quadratic programs.

For the reverse differentiation, it differentiates the optimal solution z and return product of jacobian matrices (dz / dQ, dz / dq, etc) with the backward pass vector dl / dz

The method computes the product of

  1. jacobian of problem solution z* with respect to problem parameters set with the DiffOpt.ReverseVariablePrimal
  2. a backward pass vector dl / dz, where l can be a loss function

Note that this method does not returns the actual jacobians.

For more info refer eqn(7) and eqn(8) of https://arxiv.org/pdf/1703.00443.pdf

source
DiffOpt.ConicProgram.ModelType
Diffopt.ConicProgram.Model <: DiffOpt.AbstractModel

Model to differentiate conic programs.

The forward differentiation computes the product of the derivative (Jacobian) at the conic program parameters A, b, c to the perturbations dA, db, dc.

The reverse differentiation computes the product of the transpose of the derivative (Jacobian) at the conic program parameters A, b, c to the perturbations dx, dy, ds.

For theoretical background, refer Section 3 of Differentiating Through a Cone Program, https://arxiv.org/abs/1904.09043

source
diff --git a/dev/search/index.html b/dev/search/index.html index 2e1587c3..6ab18d58 100644 --- a/dev/search/index.html +++ b/dev/search/index.html @@ -1,2 +1,2 @@ -Search · DiffOpt.jl +Search · DiffOpt.jl diff --git a/dev/search_index.js b/dev/search_index.js index 0b75600e..ae5e7c6d 100644 --- a/dev/search_index.js +++ b/dev/search_index.js @@ -1,3 +1,3 @@ var documenterSearchIndex = {"docs": -[{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"EditURL = \"sensitivity-analysis-ridge.jl\"","category":"page"},{"location":"examples/sensitivity-analysis-ridge/#Sensitivity-Analysis-of-Ridge-Regression","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"","category":"section"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"(Image: )","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"This example illustrates the sensitivity analysis of data points in a Ridge Regression problem. The general form of the problem is given below:","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"beginsplit\nbeginarray ll\nmboxminimize sum_i=1^N (y_i - w x_i - b)^2 + alpha (w^2 + b^2) \nendarray\nendsplit","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"where","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"w, b are slope and intercept of the regressing line\nx, y are the N data points\nα is the regularization constant","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"which is equivalent to:","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"beginsplit\nbeginarray ll\nmboxminimize e^tope + alpha (w^2) \nmboxst e_i = y_i - w x_i - b quad quad i=1N \nendarray\nendsplit","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"This tutorial uses the following packages","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"using JuMP\nimport DiffOpt\nimport Random\nimport Ipopt\nimport Plots\nusing LinearAlgebra: dot","category":"page"},{"location":"examples/sensitivity-analysis-ridge/#Define-and-solve-the-problem","page":"Sensitivity Analysis of Ridge Regression","title":"Define and solve the problem","text":"","category":"section"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"Construct a set of noisy (guassian) data points around a line.","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"Random.seed!(42)\n\nN = 150\n\nw = 2 * abs(randn())\nb = rand()\nX = randn(N)\nY = w * X .+ b + 0.8 * randn(N);\nnothing #hide","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"The helper method fit_ridge defines and solves the corresponding model. The ridge regression is modeled with quadratic programming (quadratic objective and linear constraints) and solved in generic methods of Ipopt. This is not the standard way of solving the ridge regression problem this is done here for didactic purposes.","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"function fit_ridge(X, Y, alpha = 0.1)\n N = length(Y)\n # Initialize a JuMP Model with Ipopt solver\n model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))\n set_silent(model)\n @variable(model, w) # angular coefficient\n @variable(model, b) # linear coefficient\n # expression defining approximation error\n @expression(model, e[i = 1:N], Y[i] - w * X[i] - b)\n # objective minimizing squared error and ridge penalty\n @objective(model, Min, 1 / N * dot(e, e) + alpha * (w^2))\n optimize!(model)\n return model, w, b # return model & variables\nend","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"Plot the data points and the fitted line for different alpha values","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"p = Plots.scatter(X, Y; label = nothing, legend = :topleft)\nmi, ma = minimum(X), maximum(X)\nPlots.title!(\"Fitted lines and points\")\n\nfor alpha in 0.5:0.5:1.5\n local model, w, b = fit_ridge(X, Y, alpha)\n ŵ = value(w)\n b̂ = value(b)\n Plots.plot!(\n p,\n [mi, ma],\n [mi * ŵ + b̂, ma * ŵ + b̂];\n label = \"alpha=$alpha\",\n width = 2,\n )\nend\np","category":"page"},{"location":"examples/sensitivity-analysis-ridge/#Differentiate","page":"Sensitivity Analysis of Ridge Regression","title":"Differentiate","text":"","category":"section"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"Now that we've solved the problem, we can compute the sensitivity of optimal values of the slope w with respect to perturbations in the data points (x,y).","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"alpha = 0.4\nmodel, w, b = fit_ridge(X, Y, alpha)\nŵ = value(w)\nb̂ = value(b)","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"We first compute sensitivity of the slope with respect to a perturbation of the independent variable x.","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"Recalling that the points (x_i y_i) appear in the objective function as: (yi - b - w*xi)^2, the DiffOpt.ForwardObjectiveFunction attribute must be set accordingly, with the terms multiplying the parameter in the objective. When considering the perturbation of a parameter θ, DiffOpt.ForwardObjectiveFunction() takes in the expression in the objective that multiplies θ. If θ appears with a quadratic and a linear form: θ^2 a x + θ b y, then the expression to pass to ForwardObjectiveFunction is 2θ a x + b y.","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"Sensitivity with respect to x and y","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"∇y = zero(X)\n∇x = zero(X)\nfor i in 1:N\n MOI.set(\n model,\n DiffOpt.ForwardObjectiveFunction(),\n 2w^2 * X[i] + 2b * w - 2 * w * Y[i],\n )\n DiffOpt.forward_differentiate!(model)\n ∇x[i] = MOI.get(model, DiffOpt.ForwardVariablePrimal(), w)\n MOI.set(model, DiffOpt.ForwardObjectiveFunction(), (2Y[i] - 2b - 2w * X[i]))\n DiffOpt.forward_differentiate!(model)\n ∇y[i] = MOI.get(model, DiffOpt.ForwardVariablePrimal(), w)\nend","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"Visualize point sensitivities with respect to regression points.","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"p = Plots.scatter(\n X,\n Y;\n color = [dw < 0 ? :blue : :red for dw in ∇x],\n markersize = [5 * abs(dw) + 1.2 for dw in ∇x],\n label = \"\",\n)\nmi, ma = minimum(X), maximum(X)\nPlots.plot!(\n p,\n [mi, ma],\n [mi * ŵ + b̂, ma * ŵ + b̂];\n color = :blue,\n label = \"\",\n)\nPlots.title!(\"Regression slope sensitivity with respect to x\")","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"p = Plots.scatter(\n X,\n Y;\n color = [dw < 0 ? :blue : :red for dw in ∇y],\n markersize = [5 * abs(dw) + 1.2 for dw in ∇y],\n label = \"\",\n)\nmi, ma = minimum(X), maximum(X)\nPlots.plot!(\n p,\n [mi, ma],\n [mi * ŵ + b̂, ma * ŵ + b̂];\n color = :blue,\n label = \"\",\n)\nPlots.title!(\"Regression slope sensitivity with respect to y\")","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"Note the points with less central x values induce a greater y sensitivity of the slope.","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"This page was generated using Literate.jl.","category":"page"},{"location":"intro/#Introduction","page":"Introduction","title":"Introduction","text":"","category":"section"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"An optimization problem is the problem of finding the best solution from all feasible solutions. The standard form of an optimization problem is ","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"beginaligned\nunderset xoperatorname minimize f(x)operatorname subjectto g_i(x)leq 0quad i=1dots mh_j(x)=0quad j=1dots p\nendaligned","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"Note that finding solution to most of the optimization problems is computationally intractable. Here we consider a subset of those problems called convex optimization problems, which admit polynomial time solutions. The standard form of a convex optimization problem is ","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"beginaligned\nunderset xoperatorname minimize f(x)operatorname subjectto g_i(x)leq 0quad i=1dots mA x = b\nendaligned","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"where f and g_i are convex functions.","category":"page"},{"location":"intro/#Parameterized-problems","page":"Introduction","title":"Parameterized problems","text":"","category":"section"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"In practice, convex optimization problems include parameters, apart from the decision variables, which determines the structure of the problem itself i.e. the objective function and constraints. Hence they affect the solution too. A general form of a parameterized convex optimization problem is ","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"beginaligned\nunderset xoperatorname minimize f(x theta)operatorname subjectto g_i(x theta)leq 0quad i=1dots mA(theta) x = b(theta)\nendaligned","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"where theta is the parameter. In different fields, these parameters go by different names:","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"Hyperparameters in machine learning\nRisk aversion or other backtesing parameters in financial modelling\nParameterized systems in control theory","category":"page"},{"location":"intro/#What-do-we-mean-by-differentiating-a-parameterized-optimization-program?-Why-do-we-need-it?","page":"Introduction","title":"What do we mean by differentiating a parameterized optimization program? Why do we need it?","text":"","category":"section"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"Often, parameters are chosen and tuned by hand - an iterative process - and the structure of the problem is crafted manually. But it is possible to do an automatic gradient based tuning of parameters.","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"Consider solution of the parametrized optimization problem, x(theta),","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"beginsplit\nbeginarray lll\nx^*(theta)= underset xoperatorname argmin f(x theta)\n operatorname subjectto g_i(x theta)leq 0quad i=1dots m\n A(theta) x = b(theta)\nendarray\nendsplit","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"which is the input of l(x^*(theta)), a loss function. Our goal is to choose the best parameter theta so that l is optimized. Here, l(x^*(theta)) is the objective function and theta is the decision variable. In order to apply a gradient-based strategy to this problem, we need to differentiate l with respect to theta.","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"fracpartial l(x^*(theta))partial theta = fracpartial l(x^*(theta))partial x^*(theta) fracpartial x^*(theta)partial theta","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"By implicit function theorem, this translates to differentiating the program data, i.e. functions f, g_i(x) and matrices A, b, with respect to theta.","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"This is can be achieved in two steps or passes:","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"Forward pass - Given an initial value of theta, solves the optimization problem to find x^*(theta)\nBackward pass - Given x^*, differentiate and find fracpartial x^*(theta)partial theta","category":"page"},{"location":"manual/#Manual","page":"Manual","title":"Manual","text":"","category":"section"},{"location":"manual/","page":"Manual","title":"Manual","text":"note: Note\nAs of now, this package only works for optimization models that can be written either in convex conic form or convex quadratic form.","category":"page"},{"location":"manual/#Supported-objectives-and-constraints-scheme-1","page":"Manual","title":"Supported objectives & constraints - scheme 1","text":"","category":"section"},{"location":"manual/","page":"Manual","title":"Manual","text":"For QPTH/OPTNET style backend, the package supports following Function-in-Set constraints: ","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"MOI Function MOI Set\nVariableIndex GreaterThan\nVariableIndex LessThan\nVariableIndex EqualTo\nScalarAffineFunction GreaterThan\nScalarAffineFunction LessThan\nScalarAffineFunction EqualTo","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"and the following objective types: ","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"MOI Function\nVariableIndex\nScalarAffineFunction\nScalarQuadraticFunction","category":"page"},{"location":"manual/#Supported-objectives-and-constraints-scheme-2","page":"Manual","title":"Supported objectives & constraints - scheme 2","text":"","category":"section"},{"location":"manual/","page":"Manual","title":"Manual","text":"For DiffCP/CVXPY style backend, the package supports following Function-in-Set constraints: ","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"MOI Function MOI Set\nVectorOfVariables Nonnegatives\nVectorOfVariables Nonpositives\nVectorOfVariables Zeros\nVectorOfVariables SecondOrderCone\nVectorOfVariables PositiveSemidefiniteConeTriangle\nVectorAffineFunction Nonnegatives\nVectorAffineFunction Nonpositives\nVectorAffineFunction Zeros\nVectorAffineFunction SecondOrderCone\nVectorAffineFunction PositiveSemidefiniteConeTriangle","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"and the following objective types: ","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"MOI Function\nVariableIndex\nScalarAffineFunction","category":"page"},{"location":"manual/#Creating-a-differentiable-optimizer","page":"Manual","title":"Creating a differentiable optimizer","text":"","category":"section"},{"location":"manual/","page":"Manual","title":"Manual","text":"You can create a differentiable optimizer over an existing MOI solver by using the diff_optimizer utility. ","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"diff_optimizer","category":"page"},{"location":"manual/#DiffOpt.diff_optimizer","page":"Manual","title":"DiffOpt.diff_optimizer","text":"diff_optimizer(optimizer_constructor)::Optimizer\n\nCreates a DiffOpt.Optimizer, which is an MOI layer with an internal optimizer and other utility methods. Results (primal, dual and slack values) are obtained by querying the internal optimizer instantiated using the optimizer_constructor. These values are required for find jacobians with respect to problem data.\n\nOne define a differentiable model by using any solver of choice. Example:\n\njulia> import DiffOpt, HiGHS\n\njulia> model = DiffOpt.diff_optimizer(HiGHS.Optimizer)\njulia> x = model.add_variable(model)\njulia> model.add_constraint(model, ...)\n\n\n\n\n\n","category":"function"},{"location":"manual/#Adding-new-sets-and-constraints","page":"Manual","title":"Adding new sets and constraints","text":"","category":"section"},{"location":"manual/","page":"Manual","title":"Manual","text":"The DiffOpt Optimizer behaves similarly to other MOI Optimizers and implements the MOI.AbstractOptimizer API.","category":"page"},{"location":"manual/#Projections-on-cone-sets","page":"Manual","title":"Projections on cone sets","text":"","category":"section"},{"location":"manual/","page":"Manual","title":"Manual","text":"DiffOpt requires taking projections and finding projection gradients of vectors while computing the jacobians. For this purpose, we use MathOptSetDistances.jl, which is a dedicated package for computing set distances, projections and projection gradients.","category":"page"},{"location":"manual/#Conic-problem-formulation","page":"Manual","title":"Conic problem formulation","text":"","category":"section"},{"location":"manual/","page":"Manual","title":"Manual","text":"note: Note\nAs of now, the package is using SCS geometric form for affine expressions in cones.","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"Consider a convex conic optimization problem in its primal (P) and dual (D) forms:","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"beginsplit\nbeginarray llcc\ntextbfPrimal Problem textbfDual Problem \nmboxminimize c^T x quad quad mboxminimize b^T y \nmboxsubject to A x + s = b quad quad mboxsubject to A^T y + c = 0 \n s in mathcalK y in mathcalK^*\nendarray\nendsplit","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"where","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"x in R^n is the primal variable, y in R^m is the dual variable, and s in R^m is the primal slack","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"variable","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"mathcalK subseteq R^m is a closed convex cone and mathcalK^* subseteq R^m is the corresponding dual cone","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"variable","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"A in R^m times n, b in R^m, c in R^n are problem data","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"In the light of above, DiffOpt differentiates program variables x, s, y w.r.t pertubations/sensivities in problem data i.e. dA, db, dc. This is achieved via implicit differentiation and matrix differential calculus.","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"Note that the primal (P) and dual (D) are self-duals of each other. Similarly, for the constraints we support, mathcalK is same in format as mathcalK^*.","category":"page"},{"location":"manual/#Reference-articles","page":"Manual","title":"Reference articles","text":"","category":"section"},{"location":"manual/","page":"Manual","title":"Manual","text":"Differentiating Through a Cone Program - Akshay Agrawal, Shane Barratt, Stephen Boyd, Enzo Busseti, Walaa M. Moursi, 2019\nA fast and differentiable QP solver for PyTorch. Crafted by Brandon Amos and J. Zico Kolter.\nOptNet: Differentiable Optimization as a Layer in Neural Networks","category":"page"},{"location":"manual/#Backward-Pass-vector","page":"Manual","title":"Backward Pass vector","text":"","category":"section"},{"location":"manual/","page":"Manual","title":"Manual","text":"One possible point of confusion in finding Jacobians is the role of the backward pass vector - above eqn (7), OptNet: Differentiable Optimization as a Layer in Neural Networks. While differentiating convex programs, it is often the case that we don't want to find the actual derivatives, rather we might be interested in computing the product of Jacobians with a backward pass vector, often used in backprop in machine learning/automatic differentiation. This is what happens in scheme 1 of DiffOpt backend.","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"But, for the conic system (scheme 2), we provide perturbations in conic data (dA, db, dc) to compute pertubations (dx, dy, dz) in input variables. Unlike the quadratic case, these perturbations are actual derivatives, not the product with a backward pass vector. This is an important distinction between the two schemes of differential optimization.","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"EditURL = \"sensitivity-analysis-svm.jl\"","category":"page"},{"location":"examples/sensitivity-analysis-svm/#Sensitivity-Analysis-of-SVM","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"","category":"section"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"(Image: )","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"This notebook illustrates sensitivity analysis of data points in a Support Vector Machine (inspired from @matbesancon's SimpleSVMs.)","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"For reference, Section 10.1 of https://online.stat.psu.edu/stat508/book/export/html/792 gives an intuitive explanation of what it means to have a sensitive hyperplane or data point. The general form of the SVM training problem is given below (with ell_2 regularization):","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"beginsplit\nbeginarray ll\nmboxminimize lambdaw^2 + sum_i=1^N xi_i \nmboxst xi_i ge 0 quad quad i=1N \n y_i (w^T X_i + b) ge 1 - xi_i quad i=1N \nendarray\nendsplit","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"where","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"X, y are the N data points\nw is the support vector\nb determines the offset b/||w|| of the hyperplane with normal w\nξ is the soft-margin loss\nλ is the ell_2 regularization.","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"This tutorial uses the following packages","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"using JuMP # The mathematical programming modelling language\nimport DiffOpt # JuMP extension for differentiable optimization\nimport Ipopt # Optimization solver that handles quadratic programs\nimport LinearAlgebra\nimport Plots\nimport Random","category":"page"},{"location":"examples/sensitivity-analysis-svm/#Define-and-solve-the-SVM","page":"Sensitivity Analysis of SVM","title":"Define and solve the SVM","text":"","category":"section"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"Construct two clusters of data points.","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"N = 100\nD = 2\n\nRandom.seed!(62)\nX = vcat(randn(N ÷ 2, D), randn(N ÷ 2, D) .+ [2.0, 2.0]')\ny = append!(ones(N ÷ 2), -ones(N ÷ 2))\nλ = 0.05;\nnothing #hide","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"Let's initialize a special model that can understand sensitivities","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))\nMOI.set(model, MOI.Silent(), true)","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"Add the variables","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"@variable(model, ξ[1:N] >= 0)\n@variable(model, w[1:D])\n@variable(model, b);\nnothing #hide","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"Add the constraints.","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"@constraint(\n model,\n con[i in 1:N],\n y[i] * (LinearAlgebra.dot(X[i, :], w) + b) >= 1 - ξ[i]\n);\nnothing #hide","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"Define the objective and solve","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"@objective(model, Min, λ * LinearAlgebra.dot(w, w) + sum(ξ))\n\noptimize!(model)","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"We can visualize the separating hyperplane.","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"loss = objective_value(model)\n\nwv = value.(w)\n\nbv = value(b)\n\nsvm_x = [-2.0, 4.0] # arbitrary points\nsvm_y = (-bv .- wv[1] * svm_x) / wv[2]\n\np = Plots.scatter(\n X[:, 1],\n X[:, 2];\n color = [yi > 0 ? :red : :blue for yi in y],\n label = \"\",\n)\nPlots.plot!(\n p,\n svm_x,\n svm_y;\n label = \"loss = $(round(loss, digits=2))\",\n width = 3,\n)","category":"page"},{"location":"examples/sensitivity-analysis-svm/#Gradient-of-hyperplane-wrt-the-data-point-coordinates","page":"Sensitivity Analysis of SVM","title":"Gradient of hyperplane wrt the data point coordinates","text":"","category":"section"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"Now that we've solved the SVM, we can compute the sensitivity of optimal values – the separating hyperplane in our case – with respect to perturbations of the problem data – the data points – using DiffOpt.","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"How does a change in coordinates of the data points, X, affects the position of the hyperplane? This is achieved by finding gradients of w and b with respect to X[i].","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"Begin differentiating the model. analogous to varying θ in the expression:","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"y_i (w^T (X_i + theta) + b) ge 1 - xi_i","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"∇ = zeros(N)\nfor i in 1:N\n for j in 1:N\n if i == j\n # we consider identical perturbations on all x_i coordinates\n MOI.set(\n model,\n DiffOpt.ForwardConstraintFunction(),\n con[j],\n y[j] * sum(w),\n )\n else\n MOI.set(model, DiffOpt.ForwardConstraintFunction(), con[j], 0.0)\n end\n end\n DiffOpt.forward_differentiate!(model)\n dw = MOI.get.(model, DiffOpt.ForwardVariablePrimal(), w)\n db = MOI.get(model, DiffOpt.ForwardVariablePrimal(), b)\n ∇[i] = LinearAlgebra.norm(dw) + LinearAlgebra.norm(db)\nend","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"We can visualize the separating hyperplane sensitivity with respect to the data points. Note that all the small numbers were converted into 1/10 of the largest value to show all the points of the set.","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"p3 = Plots.scatter(\n X[:, 1],\n X[:, 2];\n color = [yi > 0 ? :red : :blue for yi in y],\n label = \"\",\n markersize = 2 * (max.(1.8∇, 0.2 * maximum(∇))),\n)\nPlots.yaxis!(p3, (-2, 4.5))\nPlots.plot!(p3, svm_x, svm_y; label = \"\", width = 3)\nPlots.title!(\"Sensitivity of the separator to data point variations\")","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"This page was generated using Literate.jl.","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"EditURL = \"matrix-inversion-manual.jl\"","category":"page"},{"location":"examples/matrix-inversion-manual/#Differentiating-a-QP-wrt-a-single-variable","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"","category":"section"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"(Image: )","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Consider the quadratic program","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"beginsplit\nbeginarray ll\nmboxminimize frac12 x^T Q x + q^T x \nmboxsubject to G x leq h x in mathcalR^2 \nendarray\nendsplit","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"where Q, q, G are fixed and h is the single parameter.","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"In this example, we'll try to differentiate the QP wrt h, by finding its jacobian by hand (using Eqn (6) of QPTH article) and compare the results:","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Manual compuation\nUsing JuMP and DiffOpt","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Assuming","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Q = [[4, 1], [1, 2]]\nq = [1, 1]\nG = [1, 1]","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"and begining with a starting value of h=-1","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"few values just for reference","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"variable optimal value note\nx* [-0.25; -0.75] Primal optimal\n𝜆∗ -0.75 Dual optimal","category":"page"},{"location":"examples/matrix-inversion-manual/#Finding-Jacobian-using-matrix-inversion","page":"Differentiating a QP wrt a single variable","title":"Finding Jacobian using matrix inversion","text":"","category":"section"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Lets formulate Eqn (6) of QPTH article for our QP. If we assume h as the only parameter and Q,q,G as fixed problem data - also note that our QP doesn't involves Ax=b constraint - then Eqn (6) reduces to","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"begingather\n beginbmatrix\n Q G^T \n lambda^* G G x^* - h\n endbmatrix\n beginbmatrix\n dx \n d lambda\n endbmatrix\n =\n beginbmatrix\n 0 \n lambda^* dh\n endbmatrix\nendgather","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Now to find the jacobians $ \\frac{\\partial x}{\\partial h}, \\frac{\\partial \\lambda}{\\partial h}$ we substitute dh = I = [1] and plug in values of Q,q,G to get","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"begingather\n beginbmatrix\n 4 1 1 \n 1 2 1 \n -075 -075 0\n endbmatrix\n beginbmatrix\n fracpartial x_1partial h \n fracpartial x_2partial h \n fracpartial lambdapartial h\n endbmatrix\n =\n beginbmatrix\n 0 \n 0 \n -075\n endbmatrix\nendgather","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Upon solving using matrix inversion, the jacobian is","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"fracpartial x_1partial h = 025 fracpartial x_2partial h = 075 fracpartial lambdapartial h = -175","category":"page"},{"location":"examples/matrix-inversion-manual/#Finding-Jacobian-using-JuMP-and-DiffOpt","page":"Differentiating a QP wrt a single variable","title":"Finding Jacobian using JuMP and DiffOpt","text":"","category":"section"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"using JuMP\nimport DiffOpt\nimport Ipopt\n\nn = 2 # variable dimension\nm = 1; # no of inequality constraints\n\nQ = [4.0 1.0; 1.0 2.0]\nq = [1.0; 1.0]\nG = [1.0 1.0;]\nh = [-1.0;] # initial values set","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Initialize empty model","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))\nset_silent(model)","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Add the variables","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"@variable(model, x[1:2])","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Add the constraints.","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"@constraint(model, cons[j in 1:1], sum(G[j, i] * x[i] for i in 1:2) <= h[j]);\n\n@objective(\n model,\n Min,\n 1 / 2 * sum(Q[j, i] * x[i] * x[j] for i in 1:2, j in 1:2) +\n sum(q[i] * x[i] for i in 1:2)\n)","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Solve problem","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"optimize!(model)","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"primal solution","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"value.(x)","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"dual solution","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"dual.(cons)","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"set sensitivitity","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"MOI.set(\n model,\n DiffOpt.ForwardConstraintFunction(),\n cons[1],\n 0.0 * index(x[1]) - 1.0, # to indicate the direction vector to get directional derivatives\n)","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Note that 0.0 * index(x[1]) is used to make its type typeof(0.0 * index(x[1]) - 1.0) <: MOI.AbstractScalarFunction. To indicate different direction to get directional derivative, users should replace 0.0 * index(x[1]) - 1.0 as the form of dG*x - dh, where dG and dh correspond to the elements of direction vectors along G and h axes, respectively.","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Compute derivatives","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"DiffOpt.forward_differentiate!(model)","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Query derivative","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"dx = MOI.get.(model, DiffOpt.ForwardVariablePrimal(), x)","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"This page was generated using Literate.jl.","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"EditURL = \"autotuning-ridge.jl\"","category":"page"},{"location":"examples/autotuning-ridge/#Auto-tuning-Hyperparameters","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"","category":"section"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"(Image: )","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"This example shows how to learn a hyperparameter in Ridge Regression using a gradient descent routine. Let the regularized regression problem be formulated as:","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"beginequation\nmin_w quad frac12nd sum_i=1^n (w^T x_i - y_i)^2 + fracalpha2d w _2^2\nendequation","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"where","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"x, y are the data points\nw are the learned weights\nα is the hyperparameter acting on regularization.","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"The main optimization model will be formulated with JuMP. Using the gradient of the optimal weights with respect to the regularization parameters computed with DiffOpt, we can perform a gradient descent on top of the inner model to minimize the test loss.","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"This tutorial uses the following packages","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"using JuMP # The mathematical programming modelling language\nimport DiffOpt # JuMP extension for differentiable optimization\nimport Ipopt # Optimization solver that handles quadratic programs\nimport LinearAlgebra\nimport Plots\nimport Random","category":"page"},{"location":"examples/autotuning-ridge/#Generating-a-noisy-regression-dataset","page":"Auto-tuning Hyperparameters","title":"Generating a noisy regression dataset","text":"","category":"section"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Random.seed!(42)\n\nN = 100\nD = 20\nnoise = 5\n\nw_real = 10 * randn(D)\nX = 10 * randn(N, D)\ny = X * w_real + noise * randn(N)\nl = N ÷ 2 # test train split\n\nX_train = X[1:l, :]\nX_test = X[l+1:N, :]\ny_train = y[1:l]\ny_test = y[l+1:N];\nnothing #hide","category":"page"},{"location":"examples/autotuning-ridge/#Defining-the-regression-problem","page":"Auto-tuning Hyperparameters","title":"Defining the regression problem","text":"","category":"section"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"We implement the regularized regression problem as a function taking the problem data, building a JuMP model and solving it.","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"function fit_ridge(model, X, y, α)\n JuMP.empty!(model)\n set_silent(model)\n N, D = size(X)\n @variable(model, w[1:D])\n @expression(model, err_term, X * w - y)\n @objective(\n model,\n Min,\n LinearAlgebra.dot(err_term, err_term) / (2 * N * D) +\n α * LinearAlgebra.dot(w, w) / (2 * D),\n )\n optimize!(model)\n @assert termination_status(model) in\n [MOI.OPTIMAL, MOI.LOCALLY_SOLVED, MOI.ALMOST_LOCALLY_SOLVED]\n return w\nend","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"We can solve the problem for several values of α to visualize the effect of regularization on the testing and training loss.","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"αs = 0.00:0.01:0.50\nmse_test = Float64[]\nmse_train = Float64[]\nmodel = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))\n(Ntest, D) = size(X_test)\n(Ntrain, D) = size(X_train)\nfor α in αs\n w = fit_ridge(model, X_train, y_train, α)\n ŵ = value.(w)\n ŷ_test = X_test * ŵ\n ŷ_train = X_train * ŵ\n push!(mse_test, LinearAlgebra.norm(ŷ_test - y_test)^2 / (2 * Ntest * D))\n push!(\n mse_train,\n LinearAlgebra.norm(ŷ_train - y_train)^2 / (2 * Ntrain * D),\n )\nend","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Visualize the Mean Score Error metric","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Plots.plot(\n αs,\n mse_test ./ sum(mse_test);\n label = \"MSE test\",\n xaxis = \"α\",\n yaxis = \"MSE\",\n legend = (0.8, 0.2),\n width = 3,\n)\nPlots.plot!(\n αs,\n mse_train ./ sum(mse_train);\n label = \"MSE train\",\n linestyle = :dash,\n width = 3,\n)\nPlots.title!(\"Normalized MSE on training and testing sets\")","category":"page"},{"location":"examples/autotuning-ridge/#Leveraging-differentiable-optimization:-computing-the-derivative-of-the-solution","page":"Auto-tuning Hyperparameters","title":"Leveraging differentiable optimization: computing the derivative of the solution","text":"","category":"section"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Using DiffOpt, we can compute ∂w_i/∂α, the derivative of the learned solution ̂w w.r.t. the regularization parameter.","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"function compute_dw_dα(model, w)\n D = length(w)\n dw_dα = zeros(D)\n MOI.set(\n model,\n DiffOpt.ForwardObjectiveFunction(),\n LinearAlgebra.dot(w, w) / (2 * D),\n )\n DiffOpt.forward_differentiate!(model)\n for i in 1:D\n dw_dα[i] = MOI.get(model, DiffOpt.ForwardVariablePrimal(), w[i])\n end\n return dw_dα\nend","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Using ∂w_i/∂α computed with compute_dw_dα, we can compute the derivative of the test loss w.r.t. the parameter α by composing derivatives.","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"function d_testloss_dα(model, X_test, y_test, w, ŵ)\n N, D = size(X_test)\n dw_dα = compute_dw_dα(model, w)\n err_term = X_test * ŵ - y_test\n return sum(eachindex(err_term)) do i\n return LinearAlgebra.dot(X_test[i, :], dw_dα) * err_term[i]\n end / (N * D)\nend","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"We can define a meta-optimizer function performing gradient descent on the test loss w.r.t. the regularization parameter.","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"function descent(α0, max_iters = 100; fixed_step = 0.01, grad_tol = 1e-3)\n α_s = Float64[]\n ∂α_s = Float64[]\n test_loss = Float64[]\n α = α0\n N, D = size(X_test)\n model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))\n for iter in 1:max_iters\n w = fit_ridge(model, X_train, y_train, α)\n ŵ = value.(w)\n err_term = X_test * ŵ - y_test\n ∂α = d_testloss_dα(model, X_test, y_test, w, ŵ)\n push!(α_s, α)\n push!(∂α_s, ∂α)\n push!(test_loss, LinearAlgebra.norm(err_term)^2 / (2 * N * D))\n α -= fixed_step * ∂α\n if abs(∂α) ≤ grad_tol\n break\n end\n end\n return α_s, ∂α_s, test_loss\nend\n\nᾱ, ∂ᾱ, msē = descent(0.10, 500)\niters = 1:length(ᾱ);\nnothing #hide","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Visualize gradient descent and convergence","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Plots.plot(\n αs,\n mse_test;\n label = \"MSE test\",\n xaxis = (\"α\"),\n legend = :topleft,\n width = 2,\n)\nPlots.plot!(ᾱ, msē; label = \"learned α\", width = 5, style = :dot)\nPlots.title!(\"Regularizer learning\")","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Visualize the convergence of α to its optimal value","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Plots.plot(\n iters,\n ᾱ;\n label = nothing,\n color = :blue,\n xaxis = (\"Iterations\"),\n legend = :bottom,\n title = \"Convergence of α\",\n)","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Visualize the convergence of the objective function","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Plots.plot(\n iters,\n msē;\n label = nothing,\n color = :red,\n xaxis = (\"Iterations\"),\n legend = :bottom,\n title = \"Convergence of MSE\",\n)","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Visualize the convergence of the derivative to zero","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Plots.plot(\n iters,\n ∂ᾱ;\n label = nothing,\n color = :green,\n xaxis = (\"Iterations\"),\n legend = :bottom,\n title = \"Convergence of ∂α\",\n)","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"This page was generated using Literate.jl.","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"EditURL = \"polyhedral_project.jl\"","category":"page"},{"location":"examples/polyhedral_project/#Polyhedral-QP-layer","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"","category":"section"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"(Image: )","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"We use DiffOpt to define a custom network layer which, given an input matrix y, computes its projection onto a polytope defined by a fixed number of inequalities: a_i^T x ≥ b_i. A neural network is created using Flux.jl and trained on the MNIST dataset, integrating this quadratic optimization layer.","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"The QP is solved in the forward pass, and its DiffOpt derivative is used in the backward pass expressed with ChainRulesCore.rrule.","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"This example is similar to the custom ReLU layer, except that the layer is parameterized by the hyperplanes (w,b) and not a simple stateless function. This also means that ChainRulesCore.rrule must return the derivatives of the output with respect to the layer parameters to allow for backpropagation.","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"using JuMP\nimport DiffOpt\nimport Ipopt\nimport ChainRulesCore\nimport Flux\nimport MLDatasets\nimport Statistics\nusing Base.Iterators: repeated\nusing LinearAlgebra\nusing Random\n\nRandom.seed!(42)","category":"page"},{"location":"examples/polyhedral_project/#The-Polytope-representation-and-its-derivative","page":"Polyhedral QP layer","title":"The Polytope representation and its derivative","text":"","category":"section"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"struct Polytope{N}\n w::NTuple{N,Vector{Float64}}\n b::Vector{Float64}\nend\n\nPolytope(w::NTuple{N}) where {N} = Polytope{N}(w, randn(N))","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"We define a \"call\" operation on the polytope, making it a so-called functor. Calling the polytope with a matrix y operates an Euclidean projection of this matrix onto the polytope.","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"function (polytope::Polytope{N})(\n y::AbstractMatrix;\n model = direct_model(DiffOpt.diff_optimizer(Ipopt.Optimizer)),\n) where {N}\n layer_size, batch_size = size(y)\n empty!(model)\n set_silent(model)\n @variable(model, x[1:layer_size, 1:batch_size])\n @constraint(\n model,\n greater_than_cons[idx in 1:N, sample in 1:batch_size],\n dot(polytope.w[idx], x[:, sample]) ≥ polytope.b[idx]\n )\n @objective(model, Min, dot(x - y, x - y))\n optimize!(model)\n return JuMP.value.(x)\nend","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"The @functor macro from Flux implements auxiliary functions for collecting the parameters of our custom layer and operating backpropagation.","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"Flux.@functor Polytope","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"Define the reverse differentiation rule, for the function we defined above. Flux uses ChainRules primitives to implement reverse-mode differentiation of the whole network. To learn the current layer (the polytope the layer contains), the gradient is computed with respect to the Polytope fields in a ChainRulesCore.Tangent type which is used to represent derivatives with respect to structs. For more details about backpropagation, visit Introduction, ChainRulesCore.jl.","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"function ChainRulesCore.rrule(\n polytope::Polytope{N},\n y::AbstractMatrix,\n) where {N}\n model = direct_model(DiffOpt.diff_optimizer(Ipopt.Optimizer))\n xv = polytope(y; model = model)\n function pullback_matrix_projection(dl_dx)\n layer_size, batch_size = size(dl_dx)\n dl_dx = ChainRulesCore.unthunk(dl_dx)\n # `dl_dy` is the derivative of `l` wrt `y`\n x = model[:x]\n # grad wrt input parameters\n dl_dy = zeros(size(dl_dx))\n # grad wrt layer parameters\n dl_dw = zero.(polytope.w)\n dl_db = zero(polytope.b)\n # set sensitivities\n MOI.set.(model, DiffOpt.ReverseVariablePrimal(), x, dl_dx)\n # compute grad\n DiffOpt.reverse_differentiate!(model)\n # compute gradient wrt objective function parameter y\n obj_expr = MOI.get(model, DiffOpt.ReverseObjectiveFunction())\n dl_dy .= -2 * JuMP.coefficient.(obj_expr, x)\n greater_than_cons = model[:greater_than_cons]\n for idx in 1:N, sample in 1:batch_size\n cons_expr = MOI.get(\n model,\n DiffOpt.ReverseConstraintFunction(),\n greater_than_cons[idx, sample],\n )\n dl_db[idx] -= JuMP.constant(cons_expr) / batch_size\n dl_dw[idx] .+=\n JuMP.coefficient.(cons_expr, x[:, sample]) / batch_size\n end\n dself = ChainRulesCore.Tangent{Polytope{N}}(; w = dl_dw, b = dl_db)\n return (dself, dl_dy)\n end\n return xv, pullback_matrix_projection\nend","category":"page"},{"location":"examples/polyhedral_project/#Define-the-Network","page":"Polyhedral QP layer","title":"Define the Network","text":"","category":"section"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"layer_size = 20\nm = Flux.Chain(\n Flux.Dense(784, layer_size), # 784 being image linear dimension (28 x 28)\n Polytope((randn(layer_size), randn(layer_size), randn(layer_size))),\n Flux.Dense(layer_size, 10), # 10 being the number of outcomes (0 to 9)\n Flux.softmax,\n)","category":"page"},{"location":"examples/polyhedral_project/#Prepare-data","page":"Polyhedral QP layer","title":"Prepare data","text":"","category":"section"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"M = 500 # batch size\n# Preprocessing train data\nimgs = MLDatasets.MNIST.traintensor(1:M)\nlabels = MLDatasets.MNIST.trainlabels(1:M);\ntrain_X = float.(reshape(imgs, size(imgs, 1) * size(imgs, 2), M)) # stack images\ntrain_Y = Flux.onehotbatch(labels, 0:9);\n# Preprocessing test data\ntest_imgs = MLDatasets.MNIST.testtensor(1:M)\ntest_labels = MLDatasets.MNIST.testlabels(1:M)\ntest_X = float.(reshape(test_imgs, size(test_imgs, 1) * size(test_imgs, 2), M))\ntest_Y = Flux.onehotbatch(test_labels, 0:9);\nnothing #hide","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"Define input data The original data is repeated epochs times because Flux.train! only loops through the data set once","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"epochs = 50\ndataset = repeated((train_X, train_Y), epochs);\nnothing #hide","category":"page"},{"location":"examples/polyhedral_project/#Network-training","page":"Polyhedral QP layer","title":"Network training","text":"","category":"section"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"training loss function, Flux optimizer","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"custom_loss(x, y) = Flux.crossentropy(m(x), y)\nopt = Flux.ADAM()\nevalcb = () -> @show(custom_loss(train_X, train_Y))","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"Train to optimize network parameters","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"@time Flux.train!(\n custom_loss,\n Flux.params(m),\n dataset,\n opt,\n cb = Flux.throttle(evalcb, 5),\n);\nnothing #hide","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"Although our custom implementation takes time, it is able to reach similar accuracy as the usual ReLU function implementation.","category":"page"},{"location":"examples/polyhedral_project/#Accuracy-results","page":"Polyhedral QP layer","title":"Accuracy results","text":"","category":"section"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"Average of correct guesses","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"accuracy(x, y) = Statistics.mean(Flux.onecold(m(x)) .== Flux.onecold(y));\nnothing #hide","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"Training accuracy","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"accuracy(train_X, train_Y)","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"Test accuracy","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"accuracy(test_X, test_Y)","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"Note that the accuracy is low due to simplified training. It is possible to increase the number of samples N, the number of epochs epoch and the connectivity inner.","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"This page was generated using Literate.jl.","category":"page"},{"location":"reference/#Reference","page":"Reference","title":"Reference","text":"","category":"section"},{"location":"reference/","page":"Reference","title":"Reference","text":"","category":"page"},{"location":"reference/","page":"Reference","title":"Reference","text":"Modules = [DiffOpt, DiffOpt.QuadraticProgram, DiffOpt.ConicProgram]","category":"page"},{"location":"reference/#DiffOpt.AbstractLazyScalarFunction","page":"Reference","title":"DiffOpt.AbstractLazyScalarFunction","text":"abstract type AbstractLazyScalarFunction <: MOI.AbstractScalarFunction end\n\nSubtype of MOI.AbstractScalarFunction that is not a standard MOI scalar function but can be converted to one using standard_form.\n\nThe function can also be inspected lazily using JuMP.coefficient or quad_sym_half.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.AbstractModel","page":"Reference","title":"DiffOpt.AbstractModel","text":"abstract type AbstractModel <: MOI.ModelLike end\n\nModel supporting forward_differentiate! and reverse_differentiate!.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.DifferentiateTimeSec","page":"Reference","title":"DiffOpt.DifferentiateTimeSec","text":"DifferentiateTimeSec()\n\nA model attribute for the total elapsed time (in seconds) for computing the differentiation information.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ForwardConstraintFunction","page":"Reference","title":"DiffOpt.ForwardConstraintFunction","text":"ForwardConstraintFunction <: MOI.AbstractConstraintAttribute\n\nA MOI.AbstractConstraintAttribute to set input data to forward differentiation, that is, problem input data.\n\nFor instance, if the scalar constraint of index ci contains θ * (x + 2y) <= 5θ, for the purpose of computing the derivative with respect to θ, the following should be set:\n\nMOI.set(model, DiffOpt.ForwardConstraintFunction(), ci, 1.0 * x + 2.0 * y - 5.0)\n\nNote that we use -5 as the ForwardConstraintFunction sets the tangent of the ConstraintFunction so we consider the expression θ * (x + 2y - 5).\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ForwardObjectiveFunction","page":"Reference","title":"DiffOpt.ForwardObjectiveFunction","text":"ForwardObjectiveFunction <: MOI.AbstractModelAttribute\n\nA MOI.AbstractModelAttribute to set input data to forward differentiation, that is, problem input data. The possible values are any MOI.AbstractScalarFunction. A MOI.ScalarQuadraticFunction can only be used in linearly constrained quadratic models.\n\nFor instance, if the objective contains θ * (x + 2y), for the purpose of computing the derivative with respect to θ, the following should be set:\n\nMOI.set(model, DiffOpt.ForwardObjectiveFunction(), 1.0 * x + 2.0 * y)\n\nwhere x and y are the relevant MOI.VariableIndex.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ForwardVariablePrimal","page":"Reference","title":"DiffOpt.ForwardVariablePrimal","text":"ForwardVariablePrimal <: MOI.AbstractVariableAttribute\n\nA MOI.AbstractVariableAttribute to get output data from forward differentiation, that is, problem solution.\n\nFor instance, to get the tangent of the variable of index vi corresponding to the tangents given to ForwardObjectiveFunction and ForwardConstraintFunction, do the following:\n\nMOI.get(model, DiffOpt.ForwardVariablePrimal(), vi)\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.IndexMappedFunction","page":"Reference","title":"DiffOpt.IndexMappedFunction","text":"IndexMappedFunction{F<:MOI.AbstractFunction} <: AbstractLazyScalarFunction\n\nLazily represents the function MOI.Utilities.map_indices(index_map, DiffOpt.standard_form(func)).\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.MOItoJuMP","page":"Reference","title":"DiffOpt.MOItoJuMP","text":"MOItoJuMP{F<:MOI.AbstractScalarFunction} <: JuMP.AbstractJuMPScalar\n\nLazily represents the function JuMP.jump_function(model, DiffOpt.standard_form(func)).\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.MatrixScalarQuadraticFunction","page":"Reference","title":"DiffOpt.MatrixScalarQuadraticFunction","text":"struct MatrixScalarQuadraticFunction{T, VT, MT} <: MOI.AbstractScalarFunction\n affine::VectorScalarAffineFunction{T,VT}\n terms::MT\nend\n\nRepresents the function x' * terms * x / 2 + affine as an MOI.AbstractScalarFunction where x[i] = MOI.VariableIndex(i). Use standard_form to convert it to a MOI.ScalarQuadraticFunction{T}.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.MatrixVectorAffineFunction","page":"Reference","title":"DiffOpt.MatrixVectorAffineFunction","text":"MatrixVectorAffineFunction{T, VT} <: MOI.AbstractVectorFunction\n\nRepresents the function terms * x + constant as an MOI.AbstractVectorFunction where x[i] = MOI.VariableIndex(i). Use standard_form to convert it to a MOI.VectorAffineFunction{T}.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ModelConstructor","page":"Reference","title":"DiffOpt.ModelConstructor","text":"ModelConstructor <: MOI.AbstractOptimizerAttribute\n\nDetermines which subtype of DiffOpt.AbstractModel to use for differentiation. When set to nothing, the first one out of model.model_constructors that support the problem is used.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ObjectiveDualStart","page":"Reference","title":"DiffOpt.ObjectiveDualStart","text":"struct ObjectiveDualStart <: MOI.AbstractModelAttribute end\n\nIf the objective function had a dual, it would be -1 for the Lagrangian function to be the same. When the MOI.Bridges.Objective.SlackBridge is used, it creates a constraint. The dual of this constraint is therefore -1 as well. When setting this attribute, it allows to set the constraint dual of this constraint.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ObjectiveFunctionAttribute","page":"Reference","title":"DiffOpt.ObjectiveFunctionAttribute","text":"struct ObjectiveFunctionAttribute{A,F} <: MOI.AbstractModelAttribute\n attr::A\nend\n\nObjective function attribute attr for the function type F. The type F is used by a MOI.Bridges.AbstractBridgeOptimizer to keep track of its position in a chain of objective bridges.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ObjectiveSlackGapPrimalStart","page":"Reference","title":"DiffOpt.ObjectiveSlackGapPrimalStart","text":"struct ObjectiveSlackGapPrimalStart <: MOI.AbstractModelAttribute end\n\nIf the objective function had a dual, it would be -1 for the Lagrangian function to be the same. When the MOI.Bridges.Objective.SlackBridge is used, it creates a constraint. The dual of this constraint is therefore -1 as well. When setting this attribute, it allows to set the constraint dual of this constraint.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ProductOfSets","page":"Reference","title":"DiffOpt.ProductOfSets","text":"ProductOfSets{T} <: MOI.Utilities.OrderedProductOfSets{T}\n\nThe MOI.Utilities.@product_of_sets macro requires to know the list of sets at compile time. In DiffOpt however, the list depends on what the user is going to use as set as DiffOpt supports any set as long as it implements the required function of MathOptSetDistances. For this type, the list of sets can be given a run-time.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ReverseConstraintFunction","page":"Reference","title":"DiffOpt.ReverseConstraintFunction","text":"ReverseConstraintFunction\n\nAn MOI.AbstractConstraintAttribute to get output data to reverse differentiation, that is, problem input data.\n\nFor instance, if the following returns x + 2y + 5, it means that the tangent has coordinate 1 for the coefficient of x, coordinate 2 for the coefficient of y and 5 for the function constant. If the constraint is of the form func == constant or func <= constant, the tangent for the constant on the right-hand side is -5.\n\nMOI.get(model, DiffOpt.ReverseConstraintFunction(), ci)\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ReverseObjectiveFunction","page":"Reference","title":"DiffOpt.ReverseObjectiveFunction","text":"ReverseObjectiveFunction <: MOI.AbstractModelAttribute\n\nA MOI.AbstractModelAttribute to get output data to reverse differentiation, that is, problem input data.\n\nFor instance, to get the tangent of the objective function corresponding to the tangent given to ReverseVariablePrimal, do the following:\n\nfunc = MOI.get(model, DiffOpt.ReverseObjectiveFunction())\n\nThen, to get the sensitivity of the linear term with variable x, do\n\nJuMP.coefficient(func, x)\n\nTo get the sensitivity with respect to the quadratic term with variables x and y, do either\n\nJuMP.coefficient(func, x, y)\n\nor\n\nDiffOpt.quad_sym_half(func, x, y)\n\nwarning: Warning\nThese two lines are not equivalent in case x == y, see quad_sym_half for the details on the difference between these two functions.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ReverseVariablePrimal","page":"Reference","title":"DiffOpt.ReverseVariablePrimal","text":"ReverseVariablePrimal <: MOI.AbstractVariableAttribute\n\nA MOI.AbstractVariableAttribute to set input data to reverse differentiation, that is, problem solution.\n\nFor instance, to set the tangent of the variable of index vi, do the following:\n\nMOI.set(model, DiffOpt.ReverseVariablePrimal(), x)\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.SparseVectorAffineFunction","page":"Reference","title":"DiffOpt.SparseVectorAffineFunction","text":"struct SparseVectorAffineFunction{T} <: MOI.AbstractVectorFunction\n terms::SparseArrays.SparseMatrixCSC{T,Int}\n constants::Vector{T}\nend\n\nThe vector-valued affine function A x + b, where:\n\nA is the sparse matrix given by terms\nb is the vector constants\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.VectorScalarAffineFunction","page":"Reference","title":"DiffOpt.VectorScalarAffineFunction","text":"VectorScalarAffineFunction{T, VT} <: MOI.AbstractScalarFunction\n\nRepresents the function x ⋅ terms + constant as an MOI.AbstractScalarFunction where x[i] = MOI.VariableIndex(i). Use standard_form to convert it to a MOI.ScalarAffineFunction{T}.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.Dπ-Union{Tuple{T}, Tuple{Vector{T}, MathOptInterface.ModelLike, DiffOpt.ProductOfSets}} where T","page":"Reference","title":"DiffOpt.Dπ","text":"Dπ(v::Vector{Float64}, model, cones::ProductOfSets)\n\nGiven a model, its cones, find the gradient of the projection of the vectors v of length equal to the number of rows in the conic form onto the cartesian product of the cones corresponding to these rows. For more info, refer to https://github.com/matbesancon/MathOptSetDistances.jl\n\n\n\n\n\n","category":"method"},{"location":"reference/#DiffOpt.add_all_model_constructors-Tuple{Any}","page":"Reference","title":"DiffOpt.add_all_model_constructors","text":"add_all_model_constructors(model)\n\nAdd all constructors of AbstractModel defined in this package to model with add_model_constructor.\n\n\n\n\n\n","category":"method"},{"location":"reference/#DiffOpt.add_model_constructor-Tuple{DiffOpt.Optimizer, Any}","page":"Reference","title":"DiffOpt.add_model_constructor","text":"addmodelconstructor(optimizer::Optimizer, model_constructor)\n\nAdd the constructor of AbstractModel for optimizer to choose from when trying to differentiate.\n\n\n\n\n\n","category":"method"},{"location":"reference/#DiffOpt.dU_from_dQ!-Tuple{Any, Any}","page":"Reference","title":"DiffOpt.dU_from_dQ!","text":"dU_from_dQ!(dQ, U)\n\nReturn the solution dU of the matrix equation dQ = dU' * U + U' * dU where dQ and U are the two argument of the function.\n\nThis function overwrites the first argument dQ to store the solution. The matrix U is not however modified.\n\nThe matrix dQ is assumed to be symmetric and the matrix U is assumed to be upper triangular.\n\nWe can exploit the structure of U here:\n\nIf the factorization was obtained from SVD, U would be orthogonal\nIf the factorization was obtained from Cholesky, U would be upper triangular.\n\nThe MOI bridge uses Cholesky in order to exploit sparsity so we are in the second case.\n\nWe look for an upper triangular dU as well.\n\nWe can find each column of dU by solving a triangular linear system once the previous column have been found. Indeed, let dj be the jth column of dU dU' * U = vcat(dj'U for j in axes(U, 2)) Therefore, dQ[j, 1:j] = dj'U[:, 1:j] + U[:, j]'dU[:, 1:j]SodQ[j, 1:(j-1)] - U[:, j]' * dU[:, 1:(j-1)] = dj'U[:, 1:(j-1)]anddQ[j, j] / 2 = dj'U[:, j]`\n\n\n\n\n\n","category":"method"},{"location":"reference/#DiffOpt.diff_optimizer-Tuple{Any}","page":"Reference","title":"DiffOpt.diff_optimizer","text":"diff_optimizer(optimizer_constructor)::Optimizer\n\nCreates a DiffOpt.Optimizer, which is an MOI layer with an internal optimizer and other utility methods. Results (primal, dual and slack values) are obtained by querying the internal optimizer instantiated using the optimizer_constructor. These values are required for find jacobians with respect to problem data.\n\nOne define a differentiable model by using any solver of choice. Example:\n\njulia> import DiffOpt, HiGHS\n\njulia> model = DiffOpt.diff_optimizer(HiGHS.Optimizer)\njulia> x = model.add_variable(model)\njulia> model.add_constraint(model, ...)\n\n\n\n\n\n","category":"method"},{"location":"reference/#DiffOpt.forward_differentiate!","page":"Reference","title":"DiffOpt.forward_differentiate!","text":"forward_differentiate!(model::Optimizer)\n\nWrapper method for the forward pass. This method will consider as input a currently solved problem and differentials with respect to problem data set with the ForwardObjectiveFunction and ForwardConstraintFunction attributes. The output solution differentials can be queried with the attribute ForwardVariablePrimal.\n\n\n\n\n\n","category":"function"},{"location":"reference/#DiffOpt.map_rows-Tuple{Function, Any, DiffOpt.ProductOfSets, Union{DiffOpt.Flattened, DiffOpt.Nested}}","page":"Reference","title":"DiffOpt.map_rows","text":"map_rows(f::Function, model, cones::ProductOfSets, map_mode::Union{Nested{T}, Flattened{T}})\n\nGiven a model, its cones and map_mode of type Nested (resp. Flattened), return a Vector{T} of length equal to the number of cones (resp. rows) in the conic form where the value for the index (resp. rows) corresponding to each cone is equal to f(ci, r) where ci is the corresponding constraint index in model and r is a UnitRange of the corresponding rows in the conic form.\n\n\n\n\n\n","category":"method"},{"location":"reference/#DiffOpt.quad_sym_half","page":"Reference","title":"DiffOpt.quad_sym_half","text":"quad_sym_half(func, vi1::MOI.VariableIndex, vi2::MOI.VariableIndex)\n\nReturn Q[i,j] = Q[j,i] where the quadratic terms of func is represented by x' Q x / 2 for a symmetric matrix Q where x[i] = vi1 and x[j] = vi2. Note that while this is equal to JuMP.coefficient(func, vi1, vi2) if vi1 != vi2, in the case vi1 == vi2, it is rather equal to 2JuMP.coefficient(func, vi1, vi2).\n\n\n\n\n\n","category":"function"},{"location":"reference/#DiffOpt.reverse_differentiate!","page":"Reference","title":"DiffOpt.reverse_differentiate!","text":"reverse_differentiate!(model::MOI.ModelLike)\n\nWrapper method for the backward pass / reverse differentiation. This method will consider as input a currently solved problem and differentials with respect to the solution set with the ReverseVariablePrimal attribute. The output problem data differentials can be queried with the attributes ReverseObjectiveFunction and ReverseConstraintFunction.\n\n\n\n\n\n","category":"function"},{"location":"reference/#DiffOpt.standard_form","page":"Reference","title":"DiffOpt.standard_form","text":"standard_form(func::AbstractLazyScalarFunction)\n\nConverts func to a standard MOI scalar function.\n\nstandard_form(func::MOItoJuMP)\n\nConverts func to a standard JuMP scalar function.\n\n\n\n\n\n","category":"function"},{"location":"reference/#DiffOpt.ΔQ_from_ΔU!-Tuple{Any, Any}","page":"Reference","title":"DiffOpt.ΔQ_from_ΔU!","text":"ΔQ_from_ΔU!(ΔU, U)\n\nReturn the symmetric solution ΔQ of the matrix equation triu(ΔU) = 2triu(U * ΔQ) where ΔU and U are the two argument of the function.\n\nThis function overwrites the first argument ΔU to store the solution. The matrix U is not however modified.\n\nThe matrix U is assumed to be upper triangular.\n\nWe can exploit the structure of U here:\n\nIf the factorization was obtained from SVD, U would be orthogonal\nIf the factorization was obtained from Cholesky, U would be upper triangular.\n\nThe MOI bridge uses Cholesky in order to exploit sparsity so we are in the second case.\n\nWe can find each column of ΔQ by solving a triangular linear system.\n\n\n\n\n\n","category":"method"},{"location":"reference/#DiffOpt.π-Union{Tuple{T}, Tuple{Vector{T}, MathOptInterface.ModelLike, DiffOpt.ProductOfSets}} where T","page":"Reference","title":"DiffOpt.π","text":"π(v::Vector{Float64}, model::MOI.ModelLike, cones::ProductOfSets)\n\nGiven a model, its cones, find the projection of the vectors v of length equal to the number of rows in the conic form onto the cartesian product of the cones corresponding to these rows. For more info, refer to https://github.com/matbesancon/MathOptSetDistances.jl\n\n\n\n\n\n","category":"method"},{"location":"reference/#DiffOpt.QuadraticProgram.LinearAlgebraSolver","page":"Reference","title":"DiffOpt.QuadraticProgram.LinearAlgebraSolver","text":"LinearAlgebraSolver\n\nOptimizer attribute for the solver to use for the linear algebra operations. Each solver must implement: solve_system(solver, LHS, RHS, iterative::Bool).\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.QuadraticProgram.Model","page":"Reference","title":"DiffOpt.QuadraticProgram.Model","text":"DiffOpt.QuadraticProgram.Model <: DiffOpt.AbstractModel\n\nModel to differentiate quadratic programs.\n\nFor the reverse differentiation, it differentiates the optimal solution z and return product of jacobian matrices (dz / dQ, dz / dq, etc) with the backward pass vector dl / dz\n\nThe method computes the product of\n\njacobian of problem solution z* with respect to problem parameters set with the DiffOpt.ReverseVariablePrimal\na backward pass vector dl / dz, where l can be a loss function\n\nNote that this method does not returns the actual jacobians.\n\nFor more info refer eqn(7) and eqn(8) of https://arxiv.org/pdf/1703.00443.pdf\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.QuadraticProgram.create_LHS_matrix","page":"Reference","title":"DiffOpt.QuadraticProgram.create_LHS_matrix","text":"create_LHS_matrix(z, λ, Q, G, h, A=nothing)\n\nInverse matrix specified on RHS of eqn(7) in https://arxiv.org/pdf/1703.00443.pdf\n\nHelper method while calling reverse_differentiate!\n\n\n\n\n\n","category":"function"},{"location":"reference/#DiffOpt.QuadraticProgram.solve_system-NTuple{4, Any}","page":"Reference","title":"DiffOpt.QuadraticProgram.solve_system","text":"Default solve_system call uses IterativeSolvers or the default linear solve\n\n\n\n\n\n","category":"method"},{"location":"reference/#DiffOpt.ConicProgram.Model","page":"Reference","title":"DiffOpt.ConicProgram.Model","text":"Diffopt.ConicProgram.Model <: DiffOpt.AbstractModel\n\nModel to differentiate conic programs.\n\nThe forward differentiation computes the product of the derivative (Jacobian) at the conic program parameters A, b, c to the perturbations dA, db, dc.\n\nThe reverse differentiation computes the product of the transpose of the derivative (Jacobian) at the conic program parameters A, b, c to the perturbations dx, dy, ds.\n\nFor theoretical background, refer Section 3 of Differentiating Through a Cone Program, https://arxiv.org/abs/1904.09043\n\n\n\n\n\n","category":"type"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"EditURL = \"nearest_correlation.jl\"","category":"page"},{"location":"examples/nearest_correlation/#Nearest-correlation","page":"Nearest correlation","title":"Nearest correlation","text":"","category":"section"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"(Image: )","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"This example illustrates the sensitivity analysis of the nearest correlation problem studied in [H02].","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"Higham, Nicholas J. Computing the nearest correlation matrix—a problem from finance. IMA journal of Numerical Analysis 22.3 (2002): 329-343.","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"using DiffOpt, JuMP, SCS, LinearAlgebra\nsolver = SCS.Optimizer\n\nfunction proj(A, dH = Diagonal(ones(size(A, 1))), H = ones(size(A)))\n n = LinearAlgebra.checksquare(A)\n model = Model(() -> DiffOpt.diff_optimizer(solver))\n @variable(model, X[1:n, 1:n] in PSDCone())\n @constraint(model, [i in 1:n], X[i, i] == 1)\n @objective(model, Min, sum((H .* (X - A)) .^ 2))\n MOI.set(\n model,\n DiffOpt.ForwardObjectiveFunction(),\n sum((dH .* (X - A)) .^ 2),\n )\n optimize!(model)\n DiffOpt.forward_differentiate!(model)\n dX = MOI.get.(model, DiffOpt.ForwardVariablePrimal(), X)\n return value.(X), dX\nend","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"Example from [H02, p. 334-335]:","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"A = LinearAlgebra.Tridiagonal(ones(2), ones(3), ones(2))","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"The projection is computed as follows:","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"X, dX = proj(A)\nnothing # hide","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"The projection of A is:","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"X","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"The derivative of the projection with respect to a uniform increase of the weights of the diagonal entries is:","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"dX","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"Example from [H02, Section 4, p. 340]:","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"A = LinearAlgebra.Tridiagonal(-ones(3), 2ones(4), -ones(3))","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"The projection is computed as follows:","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"X, dX = proj(A)\nnothing # hide","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"The projection of A is:","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"X","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"The derivative of the projection with respect to a uniform increase of the weights of the diagonal entries is:","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"dX","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"This page was generated using Literate.jl.","category":"page"},{"location":"usage/#Usage","page":"Usage","title":"Usage","text":"","category":"section"},{"location":"usage/","page":"Usage","title":"Usage","text":"Create a differentiable model from existing optimizers","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"using JuMP\nimport DiffOpt\nimport SCS\n\nmodel = DiffOpt.diff_optimizer(SCS.Optimizer)","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"Update and solve the model ","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"x = MOI.add_variables(model, 2)\nc = MOI.add_constraint(model, ...)\n\nMOI.optimize!(model)","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"Finally differentiate the model (primal and dual variables specifically) to obtain product of jacobians with respect to problem parameters and a backward pass vector. Currently DiffOpt supports two backends for differentiating a model:","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"To differentiate Convex Quadratic Program","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"beginalign*\n min_x in mathbbR^n frac12 x^T Q x + q^T x \n textst A x = b qquad b in mathbbR^m \n G x leq h qquad h in mathbbR^p\nendalign*","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"we can use the reverse_differentiate! method","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"MOI.set.(model,\n DiffOpt.ReverseVariablePrimal(), x, ones(2))\nDiffOpt.reverse_differentiate!(model)\ngrad_obj = MOI.get(model, DiffOpt.ReverseObjectiveFunction())\ngrad_con = MOI.get.(model, DiffOpt.ReverseConstraintFunction(), c)","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"To differentiate convex conic program","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"beginalign*\n min_x in mathbbR^n c^T x \n textst A x + s = b \n b in mathbbR^m \n s in mathcalK\nendalign*","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"we can use the forward_differentiate! method with perturbations in matrices A, b, c:","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"import LinearAlgebra: ⋅\nMOI.set(model, DiffOpt.ForwardObjectiveFunction(), ones(2) ⋅ x)\nDiffOpt.forward_differentiate!(model)\ngrad_x = MOI.get.(model, DiffOpt.ForwardVariablePrimal(), x)","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"EditURL = \"Thermal_Generation_Dispatch_Example.jl\"","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/#Thermal-Generation-Dispatch-Example","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"","category":"section"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"(Image: )","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"This example illustrates the sensitivity analysis of thermal generation dispatch problem.","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"This problem can be described as the choice of thermal generation g given a demand d, a price for thermal generation c and a penalty price c_{ϕ} for any demand not attended ϕ.","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"beginsplit\nbeginarray ll\nmboxminimize sum_i=1^N c_i g_i + c_phi phi \nmboxst g_i ge 0 quad i=1N \n g_i le G_i quad i=1N \n sum_i=1^N g_i + phi = d\nendarray\nendsplit","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"where","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"G_{i} is the maximum possible generation for a thermal generator i","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/#Define-and-solve-the-Thermal-Dispatch-Problem","page":"Thermal Generation Dispatch Example","title":"Define and solve the Thermal Dispatch Problem","text":"","category":"section"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"First, import the libraries.","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"using Test\nusing JuMP\nimport DiffOpt\nimport LinearAlgebra: dot\nimport HiGHS\nimport MathOptInterface as MOI\nimport Plots","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Define the model that will be construct given a set of parameters.","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"function generate_model(\n d::Float64;\n g_sup::Vector{Float64},\n c_g::Vector{Float64},\n c_ϕ::Float64,\n)\n # Creation of the Model and Parameters\n model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer))\n set_silent(model)\n I = length(g_sup)\n\n # Variables\n @variable(model, g[i in 1:I] >= 0.0)\n @variable(model, ϕ >= 0.0)\n\n # Constraints\n @constraint(model, limit_constraints_sup[i in 1:I], g[i] <= g_sup[i])\n @constraint(model, demand_constraint, sum(g) + ϕ == d)\n\n # Objectives\n @objective(model, Min, dot(c_g, g) + c_ϕ * ϕ)\n\n # Solve the model\n optimize!(model)\n\n # Return the solved model\n return model\nend","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Define the functions that will get the primal values g and \\phi and sensitivity analysis of the demand dg/dd and d\\phi/dd from a optimized model.","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"function diff_forward(model::Model, ϵ::Float64 = 1.0)\n # Initialization of parameters and references to simplify the notation\n vect_ref = [model[:g]; model[:ϕ]]\n I = length(model[:g])\n\n # Get the primal solution of the model\n vect = MOI.get.(model, MOI.VariablePrimal(), vect_ref)\n\n # Pass the perturbation to the DiffOpt Framework and set the context to Forward\n constraint_equation = convert(MOI.ScalarAffineFunction{Float64}, ϵ)\n MOI.set(\n model,\n DiffOpt.ForwardConstraintFunction(),\n model[:demand_constraint],\n constraint_equation,\n )\n DiffOpt.forward_differentiate!(model)\n\n # Get the derivative of the model\n dvect = MOI.get.(model, DiffOpt.ForwardVariablePrimal(), vect_ref)\n\n # Return the values as a vector\n return [vect; dvect]\nend\n\nfunction diff_reverse(model::Model, ϵ::Float64 = 1.0)\n # Initialization of parameters and references to simplify the notation\n vect_ref = [model[:g]; model[:ϕ]]\n I = length(model[:g])\n\n # Get the primal solution of the model\n vect = MOI.get.(model, MOI.VariablePrimal(), vect_ref)\n\n # Set variables needed for the DiffOpt Backward Framework\n dvect = Array{Float64,1}(undef, I + 1)\n perturbation = zeros(I + 1)\n\n # Loop for each primal variable\n for i in 1:I+1\n # Set the perturbation in the Primal Variables and set the context to Backward\n perturbation[i] = ϵ\n MOI.set.(model, DiffOpt.ReverseVariablePrimal(), vect_ref, perturbation)\n DiffOpt.reverse_differentiate!(model)\n\n # Get the value of the derivative of the model\n dvect[i] = JuMP.constant(\n MOI.get(\n model,\n DiffOpt.ReverseConstraintFunction(),\n model[:demand_constraint],\n ),\n )\n perturbation[i] = 0.0\n end\n\n # Return the values as a vector\n return [vect; dvect]\nend","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Initialize of Parameters","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"g_sup = [10.0, 20.0, 30.0]\nI = length(g_sup)\nd = 0.0:0.1:80\nd_size = length(d)\nc_g = [1.0, 3.0, 5.0]\nc_ϕ = 10.0;\nnothing #hide","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Generate models for each demand d","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"models = generate_model.(d; g_sup = g_sup, c_g = c_g, c_ϕ = c_ϕ);\nnothing #hide","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Get the results of models with the DiffOpt Forward and Backward context","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"result_forward = diff_forward.(models)\noptimize!.(models)\nresult_reverse = diff_reverse.(models);\nnothing #hide","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Organization of results to plot Initialize data_results that will contain every result","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"data_results = Array{Float64,3}(undef, 2, d_size, 2 * (I + 1));\nnothing #hide","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Populate the data_results array","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"for k in 1:d_size\n data_results[1, k, :] = result_forward[k]\n data_results[2, k, :] = result_reverse[k]\nend","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/#Results-with-Plot-graphs","page":"Thermal Generation Dispatch Example","title":"Results with Plot graphs","text":"","category":"section"},{"location":"examples/Thermal_Generation_Dispatch_Example/#Results-for-the-forward-context","page":"Thermal Generation Dispatch Example","title":"Results for the forward context","text":"","category":"section"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Result Primal Values:","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Plots.plot(\n d,\n data_results[1, :, 1:I+1];\n title = \"Generation by Demand\",\n label = [\"Thermal Generation 1\" \"Thermal Generation 2\" \"Thermal Generation 3\" \"Generation Deficit\"],\n xlabel = \"Demand [unit]\",\n ylabel = \"Generation [unit]\",\n)","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Result Sensitivity Analysis:","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Plots.plot(\n d,\n data_results[1, :, I+2:2*(I+1)];\n title = \"Sensitivity of Generation by Demand\",\n label = [\"T. Gen. 1 Sensitivity\" \"T. Gen. 2 Sensitivity\" \"T. Gen. 3 Sensitivity\" \"Gen. Deficit Sensitivity\"],\n xlabel = \"Demand [unit]\",\n ylabel = \"Sensitivity [-]\",\n)","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/#Results-for-the-reverse-context","page":"Thermal Generation Dispatch Example","title":"Results for the reverse context","text":"","category":"section"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Result Primal Values:","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Plots.plot(\n d,\n data_results[2, :, 1:I+1];\n title = \"Generation by Demand\",\n label = [\"Thermal Generation 1\" \"Thermal Generation 2\" \"Thermal Generation 3\" \"Generation Deficit\"],\n xlabel = \"Demand [unit]\",\n ylabel = \"Generation [unit]\",\n)","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Result Sensitivity Analysis:","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Plots.plot(\n d,\n data_results[2, :, I+2:2*(I+1)];\n title = \"Sensitivity of Generation by Demand\",\n label = [\"T. Gen. 1 Sensitivity\" \"T. Gen. 2 Sensitivity\" \"T. Gen. 3 Sensitivity\" \"Gen. Deficit Sensitivity\"],\n xlabel = \"Demand [unit]\",\n ylabel = \"Sensitivity [-]\",\n)","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"This page was generated using Literate.jl.","category":"page"},{"location":"#DiffOpt.jl","page":"Home","title":"DiffOpt.jl","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"DiffOpt.jl is a package for differentiating convex optimization program (JuMP.jl or MathOptInterface.jl models) with respect to program parameters. Note that this package does not contain any solver. This package has two major backends, available via the reverse_differentiate! and forward_differentiate! methods, to differentiate models (quadratic or conic) with optimal solutions.","category":"page"},{"location":"","page":"Home","title":"Home","text":"note: Note\nCurrently supports linear programs (LP), convex quadratic programs (QP) and convex conic programs (SDP, SOCP, exponential cone constraints only). ","category":"page"},{"location":"#Installation","page":"Home","title":"Installation","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"DiffOpt can be installed through the Julia package manager:","category":"page"},{"location":"","page":"Home","title":"Home","text":"(v1.3) pkg> add https://github.com/jump-dev/DiffOpt.jl","category":"page"},{"location":"#Why-are-Differentiable-optimization-problems-important?","page":"Home","title":"Why are Differentiable optimization problems important?","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"Differentiable optimization is a promising field of convex optimization and has many potential applications in game theory, control theory and machine learning (specifically deep learning - refer this video for more). Recent work has shown how to differentiate specific subclasses of convex optimization problems. But several applications remain unexplored (refer section 8 of this really good thesis). With the help of automatic differentiation, differentiable optimization can have a significant impact on creating end-to-end differentiable systems to model neural networks, stochastic processes, or a game.","category":"page"},{"location":"#Contributing","page":"Home","title":"Contributing","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"Contributions to this package are more than welcome, if you find a bug or have any suggestions for the documentation please post it on the github issue tracker.","category":"page"},{"location":"","page":"Home","title":"Home","text":"When contributing please note that the package follows the JuMP style guide","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"EditURL = \"chainrules_unit.jl\"","category":"page"},{"location":"examples/chainrules_unit/#ChainRules-integration-demo:-Relaxed-Unit-Commitment","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"","category":"section"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"(Image: )","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"In this example, we will demonstrate the integration of DiffOpt with ChainRulesCore.jl, the library allowing the definition of derivatives for functions that can then be used by automatic differentiation systems.","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"using JuMP\nimport DiffOpt\nimport Plots\nimport LinearAlgebra: ⋅\nimport HiGHS\nimport ChainRulesCore","category":"page"},{"location":"examples/chainrules_unit/#Unit-commitment-problem","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"Unit commitment problem","text":"","category":"section"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"We will consider a unit commitment problem, finding the cost-minimizing activation of generation units in a power network over multiple time periods. The considered constraints include:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"Demand satisfaction of several loads\nRamping constraints\nGeneration limits.","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"The decisions are:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"u_it in 01: activation of the i-th unit at time t\np_it: power output of the i-th unit at time t.","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"DiffOpt handles convex optimization problems only, we therefore relax the domain of the u_it variables to left01right.","category":"page"},{"location":"examples/chainrules_unit/#Primal-UC-problem","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"Primal UC problem","text":"","category":"section"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"ChainRules defines the differentiation of functions. The actual function that is differentiated in the context of DiffOpt is the solution map taking in input the problem parameters and returning the solution.","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"function unit_commitment(\n load1_demand,\n load2_demand,\n gen_costs,\n noload_costs;\n model = Model(HiGHS.Optimizer),\n silent = false,\n)\n MOI.set(model, MOI.Silent(), silent)\n\n # Problem data\n units = [1, 2] # Generator identifiers\n load_names = [\"Load1\", \"Load2\"] # Load identifiers\n n_periods = 4 # Number of time periods\n Pmin = Dict(1 => fill(0.5, n_periods), 2 => fill(0.5, n_periods)) # Minimum power output (pu)\n Pmax = Dict(1 => fill(3.0, n_periods), 2 => fill(3.0, n_periods)) # Maximum power output (pu)\n RR = Dict(1 => 0.25, 2 => 0.25) # Ramp rates (pu/min)\n P0 = Dict(1 => 0.0, 2 => 0.0) # Initial power output (pu)\n D = Dict(\"Load1\" => load1_demand, \"Load2\" => load2_demand) # Demand (pu)\n Cp = Dict(1 => gen_costs[1], 2 => gen_costs[2]) # Generation cost coefficient ($/pu)\n Cnl = Dict(1 => noload_costs[1], 2 => noload_costs[2]) # No-load cost ($)\n\n # Variables\n # Note: u represents the activation of generation units.\n # Would be binary in the typical UC problem, relaxed here to u ∈ [0,1]\n # for a linear relaxation.\n @variable(model, 0 <= u[g in units, t in 1:n_periods] <= 1) # Commitment\n @variable(model, p[g in units, t in 1:n_periods] >= 0) # Power output (pu)\n\n # Constraints\n\n # Energy balance\n @constraint(\n model,\n energy_balance_cons[t in 1:n_periods],\n sum(p[g, t] for g in units) == sum(D[l][t] for l in load_names),\n )\n\n # Generation limits\n @constraint(\n model,\n [g in units, t in 1:n_periods],\n Pmin[g][t] * u[g, t] <= p[g, t]\n )\n @constraint(\n model,\n [g in units, t in 1:n_periods],\n p[g, t] <= Pmax[g][t] * u[g, t]\n )\n\n # Ramp rates\n @constraint(\n model,\n [g in units, t in 2:n_periods],\n p[g, t] - p[g, t-1] <= 60 * RR[g]\n )\n @constraint(model, [g in units], p[g, 1] - P0[g] <= 60 * RR[g])\n @constraint(\n model,\n [g in units, t in 2:n_periods],\n p[g, t-1] - p[g, t] <= 60 * RR[g]\n )\n @constraint(model, [g in units], P0[g] - p[g, 1] <= 60 * RR[g])\n\n # Objective\n @objective(\n model,\n Min,\n sum(\n (Cp[g] * p[g, t]) + (Cnl[g] * u[g, t]) for g in units,\n t in 1:n_periods\n ),\n )\n\n optimize!(model)\n # asserting finite optimal value\n @assert termination_status(model) == MOI.OPTIMAL\n # converting to dense matrix\n return JuMP.value.(p.data)\nend\n\nm = Model(HiGHS.Optimizer)\n@show unit_commitment(\n [1.0, 1.2, 1.4, 1.6],\n [1.0, 1.2, 1.4, 1.6],\n [1000.0, 1500.0],\n [500.0, 1000.0],\n model = m,\n silent = true,\n)","category":"page"},{"location":"examples/chainrules_unit/#Perturbation-of-a-single-input-parameter","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"Perturbation of a single input parameter","text":"","category":"section"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"Let us vary the demand at the second time frame on both loads:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"demand_values = 0.05:0.05:3.0\npvalues = map(demand_values) do di\n return unit_commitment(\n [1.0, di, 1.4, 1.6],\n [1.0, di, 1.4, 1.6],\n [1000.0, 1500.0],\n [500.0, 1000.0];\n silent = true,\n )\nend\npflat = [getindex.(pvalues, i) for i in eachindex(pvalues[1])];\nnothing #hide","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"The influence of this variation of the demand is piecewise linear on the generation at different time frames:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"Plots.scatter(demand_values, pflat; xaxis = (\"Demand\"), yaxis = (\"Generation\"))\nPlots.title!(\"Different time frames and generators\")\nPlots.xlims!(0.0, 3.5)","category":"page"},{"location":"examples/chainrules_unit/#Forward-Differentiation","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"Forward Differentiation","text":"","category":"section"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"Forward differentiation rule for the solution map of the unit commitment problem. It takes as arguments:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"the perturbations on the input parameters\nthe differentiated function\nthe primal values of the input parameters,","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"and returns a tuple (primal_output, perturbations), the main primal result and the perturbation propagated to this result:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"function ChainRulesCore.frule(\n (_, Δload1_demand, Δload2_demand, Δgen_costs, Δnoload_costs),\n ::typeof(unit_commitment),\n load1_demand,\n load2_demand,\n gen_costs,\n noload_costs;\n optimizer = HiGHS.Optimizer,\n)\n # creating the UC model with a DiffOpt optimizer wrapper around HiGHS\n model = Model(() -> DiffOpt.diff_optimizer(optimizer))\n # building and solving the main model\n pv = unit_commitment(\n load1_demand,\n load2_demand,\n gen_costs,\n noload_costs;\n model = model,\n )\n energy_balance_cons = model[:energy_balance_cons]\n\n # Setting some perturbation of the energy balance constraints\n # Perturbations are set as MOI functions\n Δenergy_balance = [\n convert(MOI.ScalarAffineFunction{Float64}, d1 + d2) for\n (d1, d2) in zip(Δload1_demand, Δload2_demand)\n ]\n MOI.set.(\n model,\n DiffOpt.ForwardConstraintFunction(),\n energy_balance_cons,\n Δenergy_balance,\n )\n\n p = model[:p]\n u = model[:u]\n\n # setting the perturbation of the linear objective\n Δobj =\n sum(Δgen_costs ⋅ p[:, t] + Δnoload_costs ⋅ u[:, t] for t in size(p, 2))\n MOI.set(model, DiffOpt.ForwardObjectiveFunction(), Δobj)\n DiffOpt.forward_differentiate!(JuMP.backend(model))\n # querying the corresponding perturbation of the decision\n Δp = MOI.get.(model, DiffOpt.ForwardVariablePrimal(), p)\n return (pv, Δp.data)\nend","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"We can now compute the perturbation of the output powers Δpv for a perturbation of the first load demand at time 2:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"load1_demand = [1.0, 1.0, 1.4, 1.6]\nload2_demand = [1.0, 1.0, 1.4, 1.6]\ngen_costs = [1000.0, 1500.0]\nnoload_costs = [500.0, 1000.0];\nnothing #hide","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"all input perturbations are 0 except first load at time 2","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"Δload1_demand = 0 * load1_demand\nΔload1_demand[2] = 1.0\nΔload2_demand = 0 * load2_demand\nΔgen_costs = 0 * gen_costs\nΔnoload_costs = 0 * noload_costs\n(pv, Δpv) = ChainRulesCore.frule(\n (nothing, Δload1_demand, Δload2_demand, Δgen_costs, Δnoload_costs),\n unit_commitment,\n load1_demand,\n load2_demand,\n gen_costs,\n noload_costs,\n)\n\nΔpv","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"The result matches what we observe in the previous figure: the generation of the first generator at the second time frame (third element on the plot).","category":"page"},{"location":"examples/chainrules_unit/#Reverse-mode-differentiation-of-the-solution-map","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"Reverse-mode differentiation of the solution map","text":"","category":"section"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"The rrule returns the primal and a pullback. The pullback takes a seed for the optimal solution ̄p and returns derivatives with respect to each input parameter of the function.","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"function ChainRulesCore.rrule(\n ::typeof(unit_commitment),\n load1_demand,\n load2_demand,\n gen_costs,\n noload_costs;\n optimizer = HiGHS.Optimizer,\n silent = false,\n)\n model = Model(() -> DiffOpt.diff_optimizer(optimizer))\n # solve the forward UC problem\n pv = unit_commitment(\n load1_demand,\n load2_demand,\n gen_costs,\n noload_costs;\n model = model,\n silent = silent,\n )\n function pullback_unit_commitment(pb)\n p = model[:p]\n u = model[:u]\n energy_balance_cons = model[:energy_balance_cons]\n\n MOI.set.(model, DiffOpt.ReverseVariablePrimal(), p, pb)\n DiffOpt.reverse_differentiate!(JuMP.backend(model))\n\n obj = MOI.get(model, DiffOpt.ReverseObjectiveFunction())\n\n # computing derivative wrt linear objective costs\n dgen_costs = similar(gen_costs)\n dgen_costs[1] = sum(JuMP.coefficient.(obj, p[1, :]))\n dgen_costs[2] = sum(JuMP.coefficient.(obj, p[2, :]))\n\n dnoload_costs = similar(noload_costs)\n dnoload_costs[1] = sum(JuMP.coefficient.(obj, u[1, :]))\n dnoload_costs[2] = sum(JuMP.coefficient.(obj, u[2, :]))\n\n # computing derivative wrt constraint constant\n dload1_demand =\n JuMP.constant.(\n MOI.get.(\n model,\n DiffOpt.ReverseConstraintFunction(),\n energy_balance_cons,\n )\n )\n dload2_demand = copy(dload1_demand)\n return (dload1_demand, dload2_demand, dgen_costs, dnoload_costs)\n end\n return (pv, pullback_unit_commitment)\nend","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"We can set a seed of one on the power of the first generator at the second time frame and zero for all other parts of the solution:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"(pv, pullback_unit_commitment) = ChainRulesCore.rrule(\n unit_commitment,\n load1_demand,\n load2_demand,\n gen_costs,\n noload_costs;\n optimizer = HiGHS.Optimizer,\n silent = true,\n)\ndpv = 0 * pv\ndpv[1, 2] = 1\ndargs = pullback_unit_commitment(dpv)\n(dload1_demand, dload2_demand, dgen_costs, dnoload_costs) = dargs;\nnothing #hide","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"The sensitivities with respect to the load demands are:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"dload1_demand","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"and:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"dload2_demand","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"The sensitivity of the generation is propagated to the sensitivity of both loads at the second time frame.","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"This example integrating ChainRules was designed with support from Invenia Technical Computing.","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"This page was generated using Literate.jl.","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"EditURL = \"custom-relu.jl\"","category":"page"},{"location":"examples/custom-relu/#Custom-ReLU-layer","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"","category":"section"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"(Image: )","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"We demonstrate how DiffOpt can be used to generate a simple neural network unit - the ReLU layer. A neural network is created using Flux.jl and trained on the MNIST dataset.","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"This tutorial uses the following packages","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"using JuMP\nimport DiffOpt\nimport Ipopt\nimport ChainRulesCore\nimport Flux\nimport MLDatasets\nimport Statistics\nimport Base.Iterators: repeated\nusing LinearAlgebra","category":"page"},{"location":"examples/custom-relu/#The-ReLU-and-its-derivative","page":"Custom ReLU layer","title":"The ReLU and its derivative","text":"","category":"section"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"Define a relu through an optimization problem solved by a quadratic solver. Return the solution of the problem.","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"function matrix_relu(\n y::Matrix;\n model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)),\n)\n layer_size, batch_size = size(y)\n empty!(model)\n set_silent(model)\n @variable(model, x[1:layer_size, 1:batch_size] >= 0)\n @objective(model, Min, x[:]'x[:] - 2y[:]'x[:])\n optimize!(model)\n return value.(x)\nend","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"Define the reverse differentiation rule, for the function we defined above.","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"function ChainRulesCore.rrule(::typeof(matrix_relu), y::Matrix{T}) where {T}\n model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))\n pv = matrix_relu(y; model = model)\n function pullback_matrix_relu(dl_dx)\n # some value from the backpropagation (e.g., loss) is denoted by `l`\n # so `dl_dy` is the derivative of `l` wrt `y`\n x = model[:x] # load decision variable `x` into scope\n dl_dy = zeros(T, size(dl_dx))\n dl_dq = zeros(T, size(dl_dx))\n # set sensitivities\n MOI.set.(model, DiffOpt.ReverseVariablePrimal(), x[:], dl_dx[:])\n # compute grad\n DiffOpt.reverse_differentiate!(model)\n # return gradient wrt objective function parameters\n obj_exp = MOI.get(model, DiffOpt.ReverseObjectiveFunction())\n # coeff of `x` in q'x = -2y'x\n dl_dq[:] .= JuMP.coefficient.(obj_exp, x[:])\n dq_dy = -2 # dq/dy = -2\n dl_dy[:] .= dl_dq[:] * dq_dy\n return (ChainRulesCore.NoTangent(), dl_dy)\n end\n return pv, pullback_matrix_relu\nend","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"For more details about backpropagation, visit Introduction, ChainRulesCore.jl.","category":"page"},{"location":"examples/custom-relu/#Define-the-network","page":"Custom ReLU layer","title":"Define the network","text":"","category":"section"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"layer_size = 10\nm = Flux.Chain(\n Flux.Dense(784, layer_size), # 784 being image linear dimension (28 x 28)\n matrix_relu,\n Flux.Dense(layer_size, 10), # 10 being the number of outcomes (0 to 9)\n Flux.softmax,\n)","category":"page"},{"location":"examples/custom-relu/#Prepare-data","page":"Custom ReLU layer","title":"Prepare data","text":"","category":"section"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"N = 1000 # batch size\n# Preprocessing train data\nimgs = MLDatasets.MNIST.traintensor(1:N)\nlabels = MLDatasets.MNIST.trainlabels(1:N)\ntrain_X = float.(reshape(imgs, size(imgs, 1) * size(imgs, 2), N)) # stack images\ntrain_Y = Flux.onehotbatch(labels, 0:9);\n# Preprocessing test data\ntest_imgs = MLDatasets.MNIST.testtensor(1:N)\ntest_labels = MLDatasets.MNIST.testlabels(1:N)\ntest_X = float.(reshape(test_imgs, size(test_imgs, 1) * size(test_imgs, 2), N))\ntest_Y = Flux.onehotbatch(test_labels, 0:9);\nnothing #hide","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"Define input data The original data is repeated epochs times because Flux.train! only loops through the data set once","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"epochs = 50 # ~1 minute (i7 8th gen with 16gb RAM)\n# epochs = 100 # leads to 77.8% in about 2 minutes\ndataset = repeated((train_X, train_Y), epochs);\nnothing #hide","category":"page"},{"location":"examples/custom-relu/#Network-training","page":"Custom ReLU layer","title":"Network training","text":"","category":"section"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"training loss function, Flux optimizer","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"custom_loss(x, y) = Flux.crossentropy(m(x), y)\nopt = Flux.Adam()\nevalcb = () -> @show(custom_loss(train_X, train_Y))","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"Train to optimize network parameters","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"@time Flux.train!(\n custom_loss,\n Flux.params(m),\n dataset,\n opt,\n cb = Flux.throttle(evalcb, 5),\n);\nnothing #hide","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"Although our custom implementation takes time, it is able to reach similar accuracy as the usual ReLU function implementation.","category":"page"},{"location":"examples/custom-relu/#Accuracy-results","page":"Custom ReLU layer","title":"Accuracy results","text":"","category":"section"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"Average of correct guesses","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"accuracy(x, y) = Statistics.mean(Flux.onecold(m(x)) .== Flux.onecold(y));\nnothing #hide","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"Training accuracy","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"accuracy(train_X, train_Y)","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"Test accuracy","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"accuracy(test_X, test_Y)","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"Note that the accuracy is low due to simplified training. It is possible to increase the number of samples N, the number of epochs epoch and the connectivity inner.","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"This page was generated using Literate.jl.","category":"page"}] +[{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"EditURL = \"sensitivity-analysis-ridge.jl\"","category":"page"},{"location":"examples/sensitivity-analysis-ridge/#Sensitivity-Analysis-of-Ridge-Regression","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"","category":"section"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"(Image: )","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"This example illustrates the sensitivity analysis of data points in a Ridge Regression problem. The general form of the problem is given below:","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"beginsplit\nbeginarray ll\nmboxminimize sum_i=1^N (y_i - w x_i - b)^2 + alpha (w^2 + b^2) \nendarray\nendsplit","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"where","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"w, b are slope and intercept of the regressing line\nx, y are the N data points\nα is the regularization constant","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"which is equivalent to:","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"beginsplit\nbeginarray ll\nmboxminimize e^tope + alpha (w^2) \nmboxst e_i = y_i - w x_i - b quad quad i=1N \nendarray\nendsplit","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"This tutorial uses the following packages","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"using JuMP\nimport DiffOpt\nimport Random\nimport Ipopt\nimport Plots\nusing LinearAlgebra: dot","category":"page"},{"location":"examples/sensitivity-analysis-ridge/#Define-and-solve-the-problem","page":"Sensitivity Analysis of Ridge Regression","title":"Define and solve the problem","text":"","category":"section"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"Construct a set of noisy (guassian) data points around a line.","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"Random.seed!(42)\n\nN = 150\n\nw = 2 * abs(randn())\nb = rand()\nX = randn(N)\nY = w * X .+ b + 0.8 * randn(N);\nnothing #hide","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"The helper method fit_ridge defines and solves the corresponding model. The ridge regression is modeled with quadratic programming (quadratic objective and linear constraints) and solved in generic methods of Ipopt. This is not the standard way of solving the ridge regression problem this is done here for didactic purposes.","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"function fit_ridge(X, Y, alpha = 0.1)\n N = length(Y)\n # Initialize a JuMP Model with Ipopt solver\n model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))\n set_silent(model)\n @variable(model, w) # angular coefficient\n @variable(model, b) # linear coefficient\n # expression defining approximation error\n @expression(model, e[i = 1:N], Y[i] - w * X[i] - b)\n # objective minimizing squared error and ridge penalty\n @objective(model, Min, 1 / N * dot(e, e) + alpha * (w^2))\n optimize!(model)\n return model, w, b # return model & variables\nend","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"Plot the data points and the fitted line for different alpha values","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"p = Plots.scatter(X, Y; label = nothing, legend = :topleft)\nmi, ma = minimum(X), maximum(X)\nPlots.title!(\"Fitted lines and points\")\n\nfor alpha in 0.5:0.5:1.5\n local model, w, b = fit_ridge(X, Y, alpha)\n ŵ = value(w)\n b̂ = value(b)\n Plots.plot!(\n p,\n [mi, ma],\n [mi * ŵ + b̂, ma * ŵ + b̂];\n label = \"alpha=$alpha\",\n width = 2,\n )\nend\np","category":"page"},{"location":"examples/sensitivity-analysis-ridge/#Differentiate","page":"Sensitivity Analysis of Ridge Regression","title":"Differentiate","text":"","category":"section"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"Now that we've solved the problem, we can compute the sensitivity of optimal values of the slope w with respect to perturbations in the data points (x,y).","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"alpha = 0.4\nmodel, w, b = fit_ridge(X, Y, alpha)\nŵ = value(w)\nb̂ = value(b)","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"We first compute sensitivity of the slope with respect to a perturbation of the independent variable x.","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"Recalling that the points (x_i y_i) appear in the objective function as: (yi - b - w*xi)^2, the DiffOpt.ForwardObjectiveFunction attribute must be set accordingly, with the terms multiplying the parameter in the objective. When considering the perturbation of a parameter θ, DiffOpt.ForwardObjectiveFunction() takes in the expression in the objective that multiplies θ. If θ appears with a quadratic and a linear form: θ^2 a x + θ b y, then the expression to pass to ForwardObjectiveFunction is 2θ a x + b y.","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"Sensitivity with respect to x and y","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"∇y = zero(X)\n∇x = zero(X)\nfor i in 1:N\n MOI.set(\n model,\n DiffOpt.ForwardObjectiveFunction(),\n 2w^2 * X[i] + 2b * w - 2 * w * Y[i],\n )\n DiffOpt.forward_differentiate!(model)\n ∇x[i] = MOI.get(model, DiffOpt.ForwardVariablePrimal(), w)\n MOI.set(model, DiffOpt.ForwardObjectiveFunction(), (2Y[i] - 2b - 2w * X[i]))\n DiffOpt.forward_differentiate!(model)\n ∇y[i] = MOI.get(model, DiffOpt.ForwardVariablePrimal(), w)\nend","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"Visualize point sensitivities with respect to regression points.","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"p = Plots.scatter(\n X,\n Y;\n color = [dw < 0 ? :blue : :red for dw in ∇x],\n markersize = [5 * abs(dw) + 1.2 for dw in ∇x],\n label = \"\",\n)\nmi, ma = minimum(X), maximum(X)\nPlots.plot!(\n p,\n [mi, ma],\n [mi * ŵ + b̂, ma * ŵ + b̂];\n color = :blue,\n label = \"\",\n)\nPlots.title!(\"Regression slope sensitivity with respect to x\")","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"p = Plots.scatter(\n X,\n Y;\n color = [dw < 0 ? :blue : :red for dw in ∇y],\n markersize = [5 * abs(dw) + 1.2 for dw in ∇y],\n label = \"\",\n)\nmi, ma = minimum(X), maximum(X)\nPlots.plot!(\n p,\n [mi, ma],\n [mi * ŵ + b̂, ma * ŵ + b̂];\n color = :blue,\n label = \"\",\n)\nPlots.title!(\"Regression slope sensitivity with respect to y\")","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"Note the points with less central x values induce a greater y sensitivity of the slope.","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"","category":"page"},{"location":"examples/sensitivity-analysis-ridge/","page":"Sensitivity Analysis of Ridge Regression","title":"Sensitivity Analysis of Ridge Regression","text":"This page was generated using Literate.jl.","category":"page"},{"location":"intro/#Introduction","page":"Introduction","title":"Introduction","text":"","category":"section"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"An optimization problem is the problem of finding the best solution from all feasible solutions. The standard form of an optimization problem is ","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"beginaligned\nunderset xoperatorname minimize f(x)operatorname subjectto g_i(x)leq 0quad i=1dots mh_j(x)=0quad j=1dots p\nendaligned","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"Note that finding solution to most of the optimization problems is computationally intractable. Here we consider a subset of those problems called convex optimization problems, which admit polynomial time solutions. The standard form of a convex optimization problem is ","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"beginaligned\nunderset xoperatorname minimize f(x)operatorname subjectto g_i(x)leq 0quad i=1dots mA x = b\nendaligned","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"where f and g_i are convex functions.","category":"page"},{"location":"intro/#Parameterized-problems","page":"Introduction","title":"Parameterized problems","text":"","category":"section"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"In practice, convex optimization problems include parameters, apart from the decision variables, which determines the structure of the problem itself i.e. the objective function and constraints. Hence they affect the solution too. A general form of a parameterized convex optimization problem is ","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"beginaligned\nunderset xoperatorname minimize f(x theta)operatorname subjectto g_i(x theta)leq 0quad i=1dots mA(theta) x = b(theta)\nendaligned","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"where theta is the parameter. In different fields, these parameters go by different names:","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"Hyperparameters in machine learning\nRisk aversion or other backtesing parameters in financial modelling\nParameterized systems in control theory","category":"page"},{"location":"intro/#What-do-we-mean-by-differentiating-a-parameterized-optimization-program?-Why-do-we-need-it?","page":"Introduction","title":"What do we mean by differentiating a parameterized optimization program? Why do we need it?","text":"","category":"section"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"Often, parameters are chosen and tuned by hand - an iterative process - and the structure of the problem is crafted manually. But it is possible to do an automatic gradient based tuning of parameters.","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"Consider solution of the parametrized optimization problem, x(theta),","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"beginsplit\nbeginarray lll\nx^*(theta)= underset xoperatorname argmin f(x theta)\n operatorname subjectto g_i(x theta)leq 0quad i=1dots m\n A(theta) x = b(theta)\nendarray\nendsplit","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"which is the input of l(x^*(theta)), a loss function. Our goal is to choose the best parameter theta so that l is optimized. Here, l(x^*(theta)) is the objective function and theta is the decision variable. In order to apply a gradient-based strategy to this problem, we need to differentiate l with respect to theta.","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"fracpartial l(x^*(theta))partial theta = fracpartial l(x^*(theta))partial x^*(theta) fracpartial x^*(theta)partial theta","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"By implicit function theorem, this translates to differentiating the program data, i.e. functions f, g_i(x) and matrices A, b, with respect to theta.","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"This is can be achieved in two steps or passes:","category":"page"},{"location":"intro/","page":"Introduction","title":"Introduction","text":"Forward pass - Given an initial value of theta, solves the optimization problem to find x^*(theta)\nBackward pass - Given x^*, differentiate and find fracpartial x^*(theta)partial theta","category":"page"},{"location":"manual/#Manual","page":"Manual","title":"Manual","text":"","category":"section"},{"location":"manual/","page":"Manual","title":"Manual","text":"note: Note\nAs of now, this package only works for optimization models that can be written either in convex conic form or convex quadratic form.","category":"page"},{"location":"manual/#Supported-objectives-and-constraints-scheme-1","page":"Manual","title":"Supported objectives & constraints - scheme 1","text":"","category":"section"},{"location":"manual/","page":"Manual","title":"Manual","text":"For QPTH/OPTNET style backend, the package supports following Function-in-Set constraints: ","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"MOI Function MOI Set\nVariableIndex GreaterThan\nVariableIndex LessThan\nVariableIndex EqualTo\nScalarAffineFunction GreaterThan\nScalarAffineFunction LessThan\nScalarAffineFunction EqualTo","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"and the following objective types: ","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"MOI Function\nVariableIndex\nScalarAffineFunction\nScalarQuadraticFunction","category":"page"},{"location":"manual/#Supported-objectives-and-constraints-scheme-2","page":"Manual","title":"Supported objectives & constraints - scheme 2","text":"","category":"section"},{"location":"manual/","page":"Manual","title":"Manual","text":"For DiffCP/CVXPY style backend, the package supports following Function-in-Set constraints: ","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"MOI Function MOI Set\nVectorOfVariables Nonnegatives\nVectorOfVariables Nonpositives\nVectorOfVariables Zeros\nVectorOfVariables SecondOrderCone\nVectorOfVariables PositiveSemidefiniteConeTriangle\nVectorAffineFunction Nonnegatives\nVectorAffineFunction Nonpositives\nVectorAffineFunction Zeros\nVectorAffineFunction SecondOrderCone\nVectorAffineFunction PositiveSemidefiniteConeTriangle","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"and the following objective types: ","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"MOI Function\nVariableIndex\nScalarAffineFunction","category":"page"},{"location":"manual/#Creating-a-differentiable-optimizer","page":"Manual","title":"Creating a differentiable optimizer","text":"","category":"section"},{"location":"manual/","page":"Manual","title":"Manual","text":"You can create a differentiable optimizer over an existing MOI solver by using the diff_optimizer utility. ","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"diff_optimizer","category":"page"},{"location":"manual/#DiffOpt.diff_optimizer","page":"Manual","title":"DiffOpt.diff_optimizer","text":"diff_optimizer(optimizer_constructor)::Optimizer\n\nCreates a DiffOpt.Optimizer, which is an MOI layer with an internal optimizer and other utility methods. Results (primal, dual and slack values) are obtained by querying the internal optimizer instantiated using the optimizer_constructor. These values are required for find jacobians with respect to problem data.\n\nOne define a differentiable model by using any solver of choice. Example:\n\njulia> import DiffOpt, HiGHS\n\njulia> model = DiffOpt.diff_optimizer(HiGHS.Optimizer)\njulia> x = model.add_variable(model)\njulia> model.add_constraint(model, ...)\n\n\n\n\n\n","category":"function"},{"location":"manual/#Adding-new-sets-and-constraints","page":"Manual","title":"Adding new sets and constraints","text":"","category":"section"},{"location":"manual/","page":"Manual","title":"Manual","text":"The DiffOpt Optimizer behaves similarly to other MOI Optimizers and implements the MOI.AbstractOptimizer API.","category":"page"},{"location":"manual/#Projections-on-cone-sets","page":"Manual","title":"Projections on cone sets","text":"","category":"section"},{"location":"manual/","page":"Manual","title":"Manual","text":"DiffOpt requires taking projections and finding projection gradients of vectors while computing the jacobians. For this purpose, we use MathOptSetDistances.jl, which is a dedicated package for computing set distances, projections and projection gradients.","category":"page"},{"location":"manual/#Conic-problem-formulation","page":"Manual","title":"Conic problem formulation","text":"","category":"section"},{"location":"manual/","page":"Manual","title":"Manual","text":"note: Note\nAs of now, the package is using SCS geometric form for affine expressions in cones.","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"Consider a convex conic optimization problem in its primal (P) and dual (D) forms:","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"beginsplit\nbeginarray llcc\ntextbfPrimal Problem textbfDual Problem \nmboxminimize c^T x quad quad mboxminimize b^T y \nmboxsubject to A x + s = b quad quad mboxsubject to A^T y + c = 0 \n s in mathcalK y in mathcalK^*\nendarray\nendsplit","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"where","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"x in R^n is the primal variable, y in R^m is the dual variable, and s in R^m is the primal slack","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"variable","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"mathcalK subseteq R^m is a closed convex cone and mathcalK^* subseteq R^m is the corresponding dual cone","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"variable","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"A in R^m times n, b in R^m, c in R^n are problem data","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"In the light of above, DiffOpt differentiates program variables x, s, y w.r.t pertubations/sensivities in problem data i.e. dA, db, dc. This is achieved via implicit differentiation and matrix differential calculus.","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"Note that the primal (P) and dual (D) are self-duals of each other. Similarly, for the constraints we support, mathcalK is same in format as mathcalK^*.","category":"page"},{"location":"manual/#Reference-articles","page":"Manual","title":"Reference articles","text":"","category":"section"},{"location":"manual/","page":"Manual","title":"Manual","text":"Differentiating Through a Cone Program - Akshay Agrawal, Shane Barratt, Stephen Boyd, Enzo Busseti, Walaa M. Moursi, 2019\nA fast and differentiable QP solver for PyTorch. Crafted by Brandon Amos and J. Zico Kolter.\nOptNet: Differentiable Optimization as a Layer in Neural Networks","category":"page"},{"location":"manual/#Backward-Pass-vector","page":"Manual","title":"Backward Pass vector","text":"","category":"section"},{"location":"manual/","page":"Manual","title":"Manual","text":"One possible point of confusion in finding Jacobians is the role of the backward pass vector - above eqn (7), OptNet: Differentiable Optimization as a Layer in Neural Networks. While differentiating convex programs, it is often the case that we don't want to find the actual derivatives, rather we might be interested in computing the product of Jacobians with a backward pass vector, often used in backprop in machine learning/automatic differentiation. This is what happens in scheme 1 of DiffOpt backend.","category":"page"},{"location":"manual/","page":"Manual","title":"Manual","text":"But, for the conic system (scheme 2), we provide perturbations in conic data (dA, db, dc) to compute pertubations (dx, dy, dz) in input variables. Unlike the quadratic case, these perturbations are actual derivatives, not the product with a backward pass vector. This is an important distinction between the two schemes of differential optimization.","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"EditURL = \"sensitivity-analysis-svm.jl\"","category":"page"},{"location":"examples/sensitivity-analysis-svm/#Sensitivity-Analysis-of-SVM","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"","category":"section"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"(Image: )","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"This notebook illustrates sensitivity analysis of data points in a Support Vector Machine (inspired from @matbesancon's SimpleSVMs.)","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"For reference, Section 10.1 of https://online.stat.psu.edu/stat508/book/export/html/792 gives an intuitive explanation of what it means to have a sensitive hyperplane or data point. The general form of the SVM training problem is given below (with ell_2 regularization):","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"beginsplit\nbeginarray ll\nmboxminimize lambdaw^2 + sum_i=1^N xi_i \nmboxst xi_i ge 0 quad quad i=1N \n y_i (w^T X_i + b) ge 1 - xi_i quad i=1N \nendarray\nendsplit","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"where","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"X, y are the N data points\nw is the support vector\nb determines the offset b/||w|| of the hyperplane with normal w\nξ is the soft-margin loss\nλ is the ell_2 regularization.","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"This tutorial uses the following packages","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"using JuMP # The mathematical programming modelling language\nimport DiffOpt # JuMP extension for differentiable optimization\nimport Ipopt # Optimization solver that handles quadratic programs\nimport LinearAlgebra\nimport Plots\nimport Random","category":"page"},{"location":"examples/sensitivity-analysis-svm/#Define-and-solve-the-SVM","page":"Sensitivity Analysis of SVM","title":"Define and solve the SVM","text":"","category":"section"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"Construct two clusters of data points.","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"N = 100\nD = 2\n\nRandom.seed!(62)\nX = vcat(randn(N ÷ 2, D), randn(N ÷ 2, D) .+ [2.0, 2.0]')\ny = append!(ones(N ÷ 2), -ones(N ÷ 2))\nλ = 0.05;\nnothing #hide","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"Let's initialize a special model that can understand sensitivities","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))\nMOI.set(model, MOI.Silent(), true)","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"Add the variables","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"@variable(model, ξ[1:N] >= 0)\n@variable(model, w[1:D])\n@variable(model, b);\nnothing #hide","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"Add the constraints.","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"@constraint(\n model,\n con[i in 1:N],\n y[i] * (LinearAlgebra.dot(X[i, :], w) + b) >= 1 - ξ[i]\n);\nnothing #hide","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"Define the objective and solve","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"@objective(model, Min, λ * LinearAlgebra.dot(w, w) + sum(ξ))\n\noptimize!(model)","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"We can visualize the separating hyperplane.","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"loss = objective_value(model)\n\nwv = value.(w)\n\nbv = value(b)\n\nsvm_x = [-2.0, 4.0] # arbitrary points\nsvm_y = (-bv .- wv[1] * svm_x) / wv[2]\n\np = Plots.scatter(\n X[:, 1],\n X[:, 2];\n color = [yi > 0 ? :red : :blue for yi in y],\n label = \"\",\n)\nPlots.plot!(\n p,\n svm_x,\n svm_y;\n label = \"loss = $(round(loss, digits=2))\",\n width = 3,\n)","category":"page"},{"location":"examples/sensitivity-analysis-svm/#Gradient-of-hyperplane-wrt-the-data-point-coordinates","page":"Sensitivity Analysis of SVM","title":"Gradient of hyperplane wrt the data point coordinates","text":"","category":"section"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"Now that we've solved the SVM, we can compute the sensitivity of optimal values – the separating hyperplane in our case – with respect to perturbations of the problem data – the data points – using DiffOpt.","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"How does a change in coordinates of the data points, X, affects the position of the hyperplane? This is achieved by finding gradients of w and b with respect to X[i].","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"Begin differentiating the model. analogous to varying θ in the expression:","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"y_i (w^T (X_i + theta) + b) ge 1 - xi_i","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"∇ = zeros(N)\nfor i in 1:N\n for j in 1:N\n if i == j\n # we consider identical perturbations on all x_i coordinates\n MOI.set(\n model,\n DiffOpt.ForwardConstraintFunction(),\n con[j],\n y[j] * sum(w),\n )\n else\n MOI.set(model, DiffOpt.ForwardConstraintFunction(), con[j], 0.0)\n end\n end\n DiffOpt.forward_differentiate!(model)\n dw = MOI.get.(model, DiffOpt.ForwardVariablePrimal(), w)\n db = MOI.get(model, DiffOpt.ForwardVariablePrimal(), b)\n ∇[i] = LinearAlgebra.norm(dw) + LinearAlgebra.norm(db)\nend","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"We can visualize the separating hyperplane sensitivity with respect to the data points. Note that all the small numbers were converted into 1/10 of the largest value to show all the points of the set.","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"p3 = Plots.scatter(\n X[:, 1],\n X[:, 2];\n color = [yi > 0 ? :red : :blue for yi in y],\n label = \"\",\n markersize = 2 * (max.(1.8∇, 0.2 * maximum(∇))),\n)\nPlots.yaxis!(p3, (-2, 4.5))\nPlots.plot!(p3, svm_x, svm_y; label = \"\", width = 3)\nPlots.title!(\"Sensitivity of the separator to data point variations\")","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"","category":"page"},{"location":"examples/sensitivity-analysis-svm/","page":"Sensitivity Analysis of SVM","title":"Sensitivity Analysis of SVM","text":"This page was generated using Literate.jl.","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"EditURL = \"matrix-inversion-manual.jl\"","category":"page"},{"location":"examples/matrix-inversion-manual/#Differentiating-a-QP-wrt-a-single-variable","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"","category":"section"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"(Image: )","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Consider the quadratic program","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"beginsplit\nbeginarray ll\nmboxminimize frac12 x^T Q x + q^T x \nmboxsubject to G x leq h x in mathcalR^2 \nendarray\nendsplit","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"where Q, q, G are fixed and h is the single parameter.","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"In this example, we'll try to differentiate the QP wrt h, by finding its jacobian by hand (using Eqn (6) of QPTH article) and compare the results:","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Manual compuation\nUsing JuMP and DiffOpt","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Assuming","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Q = [[4, 1], [1, 2]]\nq = [1, 1]\nG = [1, 1]","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"and begining with a starting value of h=-1","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"few values just for reference","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"variable optimal value note\nx* [-0.25; -0.75] Primal optimal\n𝜆∗ -0.75 Dual optimal","category":"page"},{"location":"examples/matrix-inversion-manual/#Finding-Jacobian-using-matrix-inversion","page":"Differentiating a QP wrt a single variable","title":"Finding Jacobian using matrix inversion","text":"","category":"section"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Lets formulate Eqn (6) of QPTH article for our QP. If we assume h as the only parameter and Q,q,G as fixed problem data - also note that our QP doesn't involves Ax=b constraint - then Eqn (6) reduces to","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"begingather\n beginbmatrix\n Q G^T \n lambda^* G G x^* - h\n endbmatrix\n beginbmatrix\n dx \n d lambda\n endbmatrix\n =\n beginbmatrix\n 0 \n lambda^* dh\n endbmatrix\nendgather","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Now to find the jacobians $ \\frac{\\partial x}{\\partial h}, \\frac{\\partial \\lambda}{\\partial h}$ we substitute dh = I = [1] and plug in values of Q,q,G to get","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"begingather\n beginbmatrix\n 4 1 1 \n 1 2 1 \n -075 -075 0\n endbmatrix\n beginbmatrix\n fracpartial x_1partial h \n fracpartial x_2partial h \n fracpartial lambdapartial h\n endbmatrix\n =\n beginbmatrix\n 0 \n 0 \n -075\n endbmatrix\nendgather","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Upon solving using matrix inversion, the jacobian is","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"fracpartial x_1partial h = 025 fracpartial x_2partial h = 075 fracpartial lambdapartial h = -175","category":"page"},{"location":"examples/matrix-inversion-manual/#Finding-Jacobian-using-JuMP-and-DiffOpt","page":"Differentiating a QP wrt a single variable","title":"Finding Jacobian using JuMP and DiffOpt","text":"","category":"section"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"using JuMP\nimport DiffOpt\nimport Ipopt\n\nn = 2 # variable dimension\nm = 1; # no of inequality constraints\n\nQ = [4.0 1.0; 1.0 2.0]\nq = [1.0; 1.0]\nG = [1.0 1.0;]\nh = [-1.0;] # initial values set","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Initialize empty model","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))\nset_silent(model)","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Add the variables","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"@variable(model, x[1:2])","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Add the constraints.","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"@constraint(model, cons[j in 1:1], sum(G[j, i] * x[i] for i in 1:2) <= h[j]);\n\n@objective(\n model,\n Min,\n 1 / 2 * sum(Q[j, i] * x[i] * x[j] for i in 1:2, j in 1:2) +\n sum(q[i] * x[i] for i in 1:2)\n)","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Solve problem","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"optimize!(model)","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"primal solution","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"value.(x)","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"dual solution","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"dual.(cons)","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"set sensitivitity","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"MOI.set(\n model,\n DiffOpt.ForwardConstraintFunction(),\n cons[1],\n 0.0 * index(x[1]) - 1.0, # to indicate the direction vector to get directional derivatives\n)","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Note that 0.0 * index(x[1]) is used to make its type typeof(0.0 * index(x[1]) - 1.0) <: MOI.AbstractScalarFunction. To indicate different direction to get directional derivative, users should replace 0.0 * index(x[1]) - 1.0 as the form of dG*x - dh, where dG and dh correspond to the elements of direction vectors along G and h axes, respectively.","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Compute derivatives","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"DiffOpt.forward_differentiate!(model)","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"Query derivative","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"dx = MOI.get.(model, DiffOpt.ForwardVariablePrimal(), x)","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"","category":"page"},{"location":"examples/matrix-inversion-manual/","page":"Differentiating a QP wrt a single variable","title":"Differentiating a QP wrt a single variable","text":"This page was generated using Literate.jl.","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"EditURL = \"autotuning-ridge.jl\"","category":"page"},{"location":"examples/autotuning-ridge/#Auto-tuning-Hyperparameters","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"","category":"section"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"(Image: )","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"This example shows how to learn a hyperparameter in Ridge Regression using a gradient descent routine. Let the regularized regression problem be formulated as:","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"beginequation\nmin_w quad frac12nd sum_i=1^n (w^T x_i - y_i)^2 + fracalpha2d w _2^2\nendequation","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"where","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"x, y are the data points\nw are the learned weights\nα is the hyperparameter acting on regularization.","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"The main optimization model will be formulated with JuMP. Using the gradient of the optimal weights with respect to the regularization parameters computed with DiffOpt, we can perform a gradient descent on top of the inner model to minimize the test loss.","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"This tutorial uses the following packages","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"using JuMP # The mathematical programming modelling language\nimport DiffOpt # JuMP extension for differentiable optimization\nimport Ipopt # Optimization solver that handles quadratic programs\nimport LinearAlgebra\nimport Plots\nimport Random","category":"page"},{"location":"examples/autotuning-ridge/#Generating-a-noisy-regression-dataset","page":"Auto-tuning Hyperparameters","title":"Generating a noisy regression dataset","text":"","category":"section"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Random.seed!(42)\n\nN = 100\nD = 20\nnoise = 5\n\nw_real = 10 * randn(D)\nX = 10 * randn(N, D)\ny = X * w_real + noise * randn(N)\nl = N ÷ 2 # test train split\n\nX_train = X[1:l, :]\nX_test = X[l+1:N, :]\ny_train = y[1:l]\ny_test = y[l+1:N];\nnothing #hide","category":"page"},{"location":"examples/autotuning-ridge/#Defining-the-regression-problem","page":"Auto-tuning Hyperparameters","title":"Defining the regression problem","text":"","category":"section"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"We implement the regularized regression problem as a function taking the problem data, building a JuMP model and solving it.","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"function fit_ridge(model, X, y, α)\n JuMP.empty!(model)\n set_silent(model)\n N, D = size(X)\n @variable(model, w[1:D])\n @expression(model, err_term, X * w - y)\n @objective(\n model,\n Min,\n LinearAlgebra.dot(err_term, err_term) / (2 * N * D) +\n α * LinearAlgebra.dot(w, w) / (2 * D),\n )\n optimize!(model)\n @assert termination_status(model) in\n [MOI.OPTIMAL, MOI.LOCALLY_SOLVED, MOI.ALMOST_LOCALLY_SOLVED]\n return w\nend","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"We can solve the problem for several values of α to visualize the effect of regularization on the testing and training loss.","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"αs = 0.00:0.01:0.50\nmse_test = Float64[]\nmse_train = Float64[]\nmodel = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))\n(Ntest, D) = size(X_test)\n(Ntrain, D) = size(X_train)\nfor α in αs\n w = fit_ridge(model, X_train, y_train, α)\n ŵ = value.(w)\n ŷ_test = X_test * ŵ\n ŷ_train = X_train * ŵ\n push!(mse_test, LinearAlgebra.norm(ŷ_test - y_test)^2 / (2 * Ntest * D))\n push!(\n mse_train,\n LinearAlgebra.norm(ŷ_train - y_train)^2 / (2 * Ntrain * D),\n )\nend","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Visualize the Mean Score Error metric","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Plots.plot(\n αs,\n mse_test ./ sum(mse_test);\n label = \"MSE test\",\n xaxis = \"α\",\n yaxis = \"MSE\",\n legend = (0.8, 0.2),\n width = 3,\n)\nPlots.plot!(\n αs,\n mse_train ./ sum(mse_train);\n label = \"MSE train\",\n linestyle = :dash,\n width = 3,\n)\nPlots.title!(\"Normalized MSE on training and testing sets\")","category":"page"},{"location":"examples/autotuning-ridge/#Leveraging-differentiable-optimization:-computing-the-derivative-of-the-solution","page":"Auto-tuning Hyperparameters","title":"Leveraging differentiable optimization: computing the derivative of the solution","text":"","category":"section"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Using DiffOpt, we can compute ∂w_i/∂α, the derivative of the learned solution ̂w w.r.t. the regularization parameter.","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"function compute_dw_dα(model, w)\n D = length(w)\n dw_dα = zeros(D)\n MOI.set(\n model,\n DiffOpt.ForwardObjectiveFunction(),\n LinearAlgebra.dot(w, w) / (2 * D),\n )\n DiffOpt.forward_differentiate!(model)\n for i in 1:D\n dw_dα[i] = MOI.get(model, DiffOpt.ForwardVariablePrimal(), w[i])\n end\n return dw_dα\nend","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Using ∂w_i/∂α computed with compute_dw_dα, we can compute the derivative of the test loss w.r.t. the parameter α by composing derivatives.","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"function d_testloss_dα(model, X_test, y_test, w, ŵ)\n N, D = size(X_test)\n dw_dα = compute_dw_dα(model, w)\n err_term = X_test * ŵ - y_test\n return sum(eachindex(err_term)) do i\n return LinearAlgebra.dot(X_test[i, :], dw_dα) * err_term[i]\n end / (N * D)\nend","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"We can define a meta-optimizer function performing gradient descent on the test loss w.r.t. the regularization parameter.","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"function descent(α0, max_iters = 100; fixed_step = 0.01, grad_tol = 1e-3)\n α_s = Float64[]\n ∂α_s = Float64[]\n test_loss = Float64[]\n α = α0\n N, D = size(X_test)\n model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))\n for iter in 1:max_iters\n w = fit_ridge(model, X_train, y_train, α)\n ŵ = value.(w)\n err_term = X_test * ŵ - y_test\n ∂α = d_testloss_dα(model, X_test, y_test, w, ŵ)\n push!(α_s, α)\n push!(∂α_s, ∂α)\n push!(test_loss, LinearAlgebra.norm(err_term)^2 / (2 * N * D))\n α -= fixed_step * ∂α\n if abs(∂α) ≤ grad_tol\n break\n end\n end\n return α_s, ∂α_s, test_loss\nend\n\nᾱ, ∂ᾱ, msē = descent(0.10, 500)\niters = 1:length(ᾱ);\nnothing #hide","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Visualize gradient descent and convergence","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Plots.plot(\n αs,\n mse_test;\n label = \"MSE test\",\n xaxis = (\"α\"),\n legend = :topleft,\n width = 2,\n)\nPlots.plot!(ᾱ, msē; label = \"learned α\", width = 5, style = :dot)\nPlots.title!(\"Regularizer learning\")","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Visualize the convergence of α to its optimal value","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Plots.plot(\n iters,\n ᾱ;\n label = nothing,\n color = :blue,\n xaxis = (\"Iterations\"),\n legend = :bottom,\n title = \"Convergence of α\",\n)","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Visualize the convergence of the objective function","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Plots.plot(\n iters,\n msē;\n label = nothing,\n color = :red,\n xaxis = (\"Iterations\"),\n legend = :bottom,\n title = \"Convergence of MSE\",\n)","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Visualize the convergence of the derivative to zero","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"Plots.plot(\n iters,\n ∂ᾱ;\n label = nothing,\n color = :green,\n xaxis = (\"Iterations\"),\n legend = :bottom,\n title = \"Convergence of ∂α\",\n)","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"","category":"page"},{"location":"examples/autotuning-ridge/","page":"Auto-tuning Hyperparameters","title":"Auto-tuning Hyperparameters","text":"This page was generated using Literate.jl.","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"EditURL = \"polyhedral_project.jl\"","category":"page"},{"location":"examples/polyhedral_project/#Polyhedral-QP-layer","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"","category":"section"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"(Image: )","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"We use DiffOpt to define a custom network layer which, given an input matrix y, computes its projection onto a polytope defined by a fixed number of inequalities: a_i^T x ≥ b_i. A neural network is created using Flux.jl and trained on the MNIST dataset, integrating this quadratic optimization layer.","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"The QP is solved in the forward pass, and its DiffOpt derivative is used in the backward pass expressed with ChainRulesCore.rrule.","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"This example is similar to the custom ReLU layer, except that the layer is parameterized by the hyperplanes (w,b) and not a simple stateless function. This also means that ChainRulesCore.rrule must return the derivatives of the output with respect to the layer parameters to allow for backpropagation.","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"using JuMP\nimport DiffOpt\nimport Ipopt\nimport ChainRulesCore\nimport Flux\nimport MLDatasets\nimport Statistics\nusing Base.Iterators: repeated\nusing LinearAlgebra\nusing Random\n\nRandom.seed!(42)","category":"page"},{"location":"examples/polyhedral_project/#The-Polytope-representation-and-its-derivative","page":"Polyhedral QP layer","title":"The Polytope representation and its derivative","text":"","category":"section"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"struct Polytope{N}\n w::NTuple{N,Vector{Float64}}\n b::Vector{Float64}\nend\n\nPolytope(w::NTuple{N}) where {N} = Polytope{N}(w, randn(N))","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"We define a \"call\" operation on the polytope, making it a so-called functor. Calling the polytope with a matrix y operates an Euclidean projection of this matrix onto the polytope.","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"function (polytope::Polytope{N})(\n y::AbstractMatrix;\n model = direct_model(DiffOpt.diff_optimizer(Ipopt.Optimizer)),\n) where {N}\n layer_size, batch_size = size(y)\n empty!(model)\n set_silent(model)\n @variable(model, x[1:layer_size, 1:batch_size])\n @constraint(\n model,\n greater_than_cons[idx in 1:N, sample in 1:batch_size],\n dot(polytope.w[idx], x[:, sample]) ≥ polytope.b[idx]\n )\n @objective(model, Min, dot(x - y, x - y))\n optimize!(model)\n return JuMP.value.(x)\nend","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"The @functor macro from Flux implements auxiliary functions for collecting the parameters of our custom layer and operating backpropagation.","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"Flux.@functor Polytope","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"Define the reverse differentiation rule, for the function we defined above. Flux uses ChainRules primitives to implement reverse-mode differentiation of the whole network. To learn the current layer (the polytope the layer contains), the gradient is computed with respect to the Polytope fields in a ChainRulesCore.Tangent type which is used to represent derivatives with respect to structs. For more details about backpropagation, visit Introduction, ChainRulesCore.jl.","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"function ChainRulesCore.rrule(\n polytope::Polytope{N},\n y::AbstractMatrix,\n) where {N}\n model = direct_model(DiffOpt.diff_optimizer(Ipopt.Optimizer))\n xv = polytope(y; model = model)\n function pullback_matrix_projection(dl_dx)\n layer_size, batch_size = size(dl_dx)\n dl_dx = ChainRulesCore.unthunk(dl_dx)\n # `dl_dy` is the derivative of `l` wrt `y`\n x = model[:x]\n # grad wrt input parameters\n dl_dy = zeros(size(dl_dx))\n # grad wrt layer parameters\n dl_dw = zero.(polytope.w)\n dl_db = zero(polytope.b)\n # set sensitivities\n MOI.set.(model, DiffOpt.ReverseVariablePrimal(), x, dl_dx)\n # compute grad\n DiffOpt.reverse_differentiate!(model)\n # compute gradient wrt objective function parameter y\n obj_expr = MOI.get(model, DiffOpt.ReverseObjectiveFunction())\n dl_dy .= -2 * JuMP.coefficient.(obj_expr, x)\n greater_than_cons = model[:greater_than_cons]\n for idx in 1:N, sample in 1:batch_size\n cons_expr = MOI.get(\n model,\n DiffOpt.ReverseConstraintFunction(),\n greater_than_cons[idx, sample],\n )\n dl_db[idx] -= JuMP.constant(cons_expr) / batch_size\n dl_dw[idx] .+=\n JuMP.coefficient.(cons_expr, x[:, sample]) / batch_size\n end\n dself = ChainRulesCore.Tangent{Polytope{N}}(; w = dl_dw, b = dl_db)\n return (dself, dl_dy)\n end\n return xv, pullback_matrix_projection\nend","category":"page"},{"location":"examples/polyhedral_project/#Define-the-Network","page":"Polyhedral QP layer","title":"Define the Network","text":"","category":"section"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"layer_size = 20\nm = Flux.Chain(\n Flux.Dense(784, layer_size), # 784 being image linear dimension (28 x 28)\n Polytope((randn(layer_size), randn(layer_size), randn(layer_size))),\n Flux.Dense(layer_size, 10), # 10 being the number of outcomes (0 to 9)\n Flux.softmax,\n)","category":"page"},{"location":"examples/polyhedral_project/#Prepare-data","page":"Polyhedral QP layer","title":"Prepare data","text":"","category":"section"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"M = 500 # batch size\n# Preprocessing train data\nimgs = MLDatasets.MNIST.traintensor(1:M)\nlabels = MLDatasets.MNIST.trainlabels(1:M);\ntrain_X = float.(reshape(imgs, size(imgs, 1) * size(imgs, 2), M)) # stack images\ntrain_Y = Flux.onehotbatch(labels, 0:9);\n# Preprocessing test data\ntest_imgs = MLDatasets.MNIST.testtensor(1:M)\ntest_labels = MLDatasets.MNIST.testlabels(1:M)\ntest_X = float.(reshape(test_imgs, size(test_imgs, 1) * size(test_imgs, 2), M))\ntest_Y = Flux.onehotbatch(test_labels, 0:9);\nnothing #hide","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"Define input data The original data is repeated epochs times because Flux.train! only loops through the data set once","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"epochs = 50\ndataset = repeated((train_X, train_Y), epochs);\nnothing #hide","category":"page"},{"location":"examples/polyhedral_project/#Network-training","page":"Polyhedral QP layer","title":"Network training","text":"","category":"section"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"training loss function, Flux optimizer","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"custom_loss(x, y) = Flux.crossentropy(m(x), y)\nopt = Flux.ADAM()\nevalcb = () -> @show(custom_loss(train_X, train_Y))","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"Train to optimize network parameters","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"@time Flux.train!(\n custom_loss,\n Flux.params(m),\n dataset,\n opt,\n cb = Flux.throttle(evalcb, 5),\n);\nnothing #hide","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"Although our custom implementation takes time, it is able to reach similar accuracy as the usual ReLU function implementation.","category":"page"},{"location":"examples/polyhedral_project/#Accuracy-results","page":"Polyhedral QP layer","title":"Accuracy results","text":"","category":"section"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"Average of correct guesses","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"accuracy(x, y) = Statistics.mean(Flux.onecold(m(x)) .== Flux.onecold(y));\nnothing #hide","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"Training accuracy","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"accuracy(train_X, train_Y)","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"Test accuracy","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"accuracy(test_X, test_Y)","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"Note that the accuracy is low due to simplified training. It is possible to increase the number of samples N, the number of epochs epoch and the connectivity inner.","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"","category":"page"},{"location":"examples/polyhedral_project/","page":"Polyhedral QP layer","title":"Polyhedral QP layer","text":"This page was generated using Literate.jl.","category":"page"},{"location":"reference/#Reference","page":"Reference","title":"Reference","text":"","category":"section"},{"location":"reference/","page":"Reference","title":"Reference","text":"","category":"page"},{"location":"reference/","page":"Reference","title":"Reference","text":"Modules = [DiffOpt, DiffOpt.QuadraticProgram, DiffOpt.ConicProgram]","category":"page"},{"location":"reference/#DiffOpt.AbstractLazyScalarFunction","page":"Reference","title":"DiffOpt.AbstractLazyScalarFunction","text":"abstract type AbstractLazyScalarFunction <: MOI.AbstractScalarFunction end\n\nSubtype of MOI.AbstractScalarFunction that is not a standard MOI scalar function but can be converted to one using standard_form.\n\nThe function can also be inspected lazily using JuMP.coefficient or quad_sym_half.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.AbstractModel","page":"Reference","title":"DiffOpt.AbstractModel","text":"abstract type AbstractModel <: MOI.ModelLike end\n\nModel supporting forward_differentiate! and reverse_differentiate!.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.DifferentiateTimeSec","page":"Reference","title":"DiffOpt.DifferentiateTimeSec","text":"DifferentiateTimeSec()\n\nA model attribute for the total elapsed time (in seconds) for computing the differentiation information.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ForwardConstraintFunction","page":"Reference","title":"DiffOpt.ForwardConstraintFunction","text":"ForwardConstraintFunction <: MOI.AbstractConstraintAttribute\n\nA MOI.AbstractConstraintAttribute to set input data to forward differentiation, that is, problem input data.\n\nFor instance, if the scalar constraint of index ci contains θ * (x + 2y) <= 5θ, for the purpose of computing the derivative with respect to θ, the following should be set:\n\nMOI.set(model, DiffOpt.ForwardConstraintFunction(), ci, 1.0 * x + 2.0 * y - 5.0)\n\nNote that we use -5 as the ForwardConstraintFunction sets the tangent of the ConstraintFunction so we consider the expression θ * (x + 2y - 5).\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ForwardObjectiveFunction","page":"Reference","title":"DiffOpt.ForwardObjectiveFunction","text":"ForwardObjectiveFunction <: MOI.AbstractModelAttribute\n\nA MOI.AbstractModelAttribute to set input data to forward differentiation, that is, problem input data. The possible values are any MOI.AbstractScalarFunction. A MOI.ScalarQuadraticFunction can only be used in linearly constrained quadratic models.\n\nFor instance, if the objective contains θ * (x + 2y), for the purpose of computing the derivative with respect to θ, the following should be set:\n\nMOI.set(model, DiffOpt.ForwardObjectiveFunction(), 1.0 * x + 2.0 * y)\n\nwhere x and y are the relevant MOI.VariableIndex.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ForwardVariablePrimal","page":"Reference","title":"DiffOpt.ForwardVariablePrimal","text":"ForwardVariablePrimal <: MOI.AbstractVariableAttribute\n\nA MOI.AbstractVariableAttribute to get output data from forward differentiation, that is, problem solution.\n\nFor instance, to get the tangent of the variable of index vi corresponding to the tangents given to ForwardObjectiveFunction and ForwardConstraintFunction, do the following:\n\nMOI.get(model, DiffOpt.ForwardVariablePrimal(), vi)\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.IndexMappedFunction","page":"Reference","title":"DiffOpt.IndexMappedFunction","text":"IndexMappedFunction{F<:MOI.AbstractFunction} <: AbstractLazyScalarFunction\n\nLazily represents the function MOI.Utilities.map_indices(index_map, DiffOpt.standard_form(func)).\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.MOItoJuMP","page":"Reference","title":"DiffOpt.MOItoJuMP","text":"MOItoJuMP{F<:MOI.AbstractScalarFunction} <: JuMP.AbstractJuMPScalar\n\nLazily represents the function JuMP.jump_function(model, DiffOpt.standard_form(func)).\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.MatrixScalarQuadraticFunction","page":"Reference","title":"DiffOpt.MatrixScalarQuadraticFunction","text":"struct MatrixScalarQuadraticFunction{T, VT, MT} <: MOI.AbstractScalarFunction\n affine::VectorScalarAffineFunction{T,VT}\n terms::MT\nend\n\nRepresents the function x' * terms * x / 2 + affine as an MOI.AbstractScalarFunction where x[i] = MOI.VariableIndex(i). Use standard_form to convert it to a MOI.ScalarQuadraticFunction{T}.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.MatrixVectorAffineFunction","page":"Reference","title":"DiffOpt.MatrixVectorAffineFunction","text":"MatrixVectorAffineFunction{T, VT} <: MOI.AbstractVectorFunction\n\nRepresents the function terms * x + constant as an MOI.AbstractVectorFunction where x[i] = MOI.VariableIndex(i). Use standard_form to convert it to a MOI.VectorAffineFunction{T}.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ModelConstructor","page":"Reference","title":"DiffOpt.ModelConstructor","text":"ModelConstructor <: MOI.AbstractOptimizerAttribute\n\nDetermines which subtype of DiffOpt.AbstractModel to use for differentiation. When set to nothing, the first one out of model.model_constructors that support the problem is used.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ObjectiveFunctionAttribute","page":"Reference","title":"DiffOpt.ObjectiveFunctionAttribute","text":"struct ObjectiveFunctionAttribute{A,F} <: MOI.AbstractModelAttribute\n attr::A\nend\n\nObjective function attribute attr for the function type F. The type F is used by a MOI.Bridges.AbstractBridgeOptimizer to keep track of its position in a chain of objective bridges.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ProductOfSets","page":"Reference","title":"DiffOpt.ProductOfSets","text":"ProductOfSets{T} <: MOI.Utilities.OrderedProductOfSets{T}\n\nThe MOI.Utilities.@product_of_sets macro requires to know the list of sets at compile time. In DiffOpt however, the list depends on what the user is going to use as set as DiffOpt supports any set as long as it implements the required function of MathOptSetDistances. For this type, the list of sets can be given a run-time.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ReverseConstraintFunction","page":"Reference","title":"DiffOpt.ReverseConstraintFunction","text":"ReverseConstraintFunction\n\nAn MOI.AbstractConstraintAttribute to get output data to reverse differentiation, that is, problem input data.\n\nFor instance, if the following returns x + 2y + 5, it means that the tangent has coordinate 1 for the coefficient of x, coordinate 2 for the coefficient of y and 5 for the function constant. If the constraint is of the form func == constant or func <= constant, the tangent for the constant on the right-hand side is -5.\n\nMOI.get(model, DiffOpt.ReverseConstraintFunction(), ci)\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ReverseObjectiveFunction","page":"Reference","title":"DiffOpt.ReverseObjectiveFunction","text":"ReverseObjectiveFunction <: MOI.AbstractModelAttribute\n\nA MOI.AbstractModelAttribute to get output data to reverse differentiation, that is, problem input data.\n\nFor instance, to get the tangent of the objective function corresponding to the tangent given to ReverseVariablePrimal, do the following:\n\nfunc = MOI.get(model, DiffOpt.ReverseObjectiveFunction())\n\nThen, to get the sensitivity of the linear term with variable x, do\n\nJuMP.coefficient(func, x)\n\nTo get the sensitivity with respect to the quadratic term with variables x and y, do either\n\nJuMP.coefficient(func, x, y)\n\nor\n\nDiffOpt.quad_sym_half(func, x, y)\n\nwarning: Warning\nThese two lines are not equivalent in case x == y, see quad_sym_half for the details on the difference between these two functions.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.ReverseVariablePrimal","page":"Reference","title":"DiffOpt.ReverseVariablePrimal","text":"ReverseVariablePrimal <: MOI.AbstractVariableAttribute\n\nA MOI.AbstractVariableAttribute to set input data to reverse differentiation, that is, problem solution.\n\nFor instance, to set the tangent of the variable of index vi, do the following:\n\nMOI.set(model, DiffOpt.ReverseVariablePrimal(), x)\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.SparseVectorAffineFunction","page":"Reference","title":"DiffOpt.SparseVectorAffineFunction","text":"struct SparseVectorAffineFunction{T} <: MOI.AbstractVectorFunction\n terms::SparseArrays.SparseMatrixCSC{T,Int}\n constants::Vector{T}\nend\n\nThe vector-valued affine function A x + b, where:\n\nA is the sparse matrix given by terms\nb is the vector constants\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.VectorScalarAffineFunction","page":"Reference","title":"DiffOpt.VectorScalarAffineFunction","text":"VectorScalarAffineFunction{T, VT} <: MOI.AbstractScalarFunction\n\nRepresents the function x ⋅ terms + constant as an MOI.AbstractScalarFunction where x[i] = MOI.VariableIndex(i). Use standard_form to convert it to a MOI.ScalarAffineFunction{T}.\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.Dπ-Union{Tuple{T}, Tuple{Vector{T}, MathOptInterface.ModelLike, DiffOpt.ProductOfSets}} where T","page":"Reference","title":"DiffOpt.Dπ","text":"Dπ(v::Vector{Float64}, model, cones::ProductOfSets)\n\nGiven a model, its cones, find the gradient of the projection of the vectors v of length equal to the number of rows in the conic form onto the cartesian product of the cones corresponding to these rows. For more info, refer to https://github.com/matbesancon/MathOptSetDistances.jl\n\n\n\n\n\n","category":"method"},{"location":"reference/#DiffOpt.add_all_model_constructors-Tuple{Any}","page":"Reference","title":"DiffOpt.add_all_model_constructors","text":"add_all_model_constructors(model)\n\nAdd all constructors of AbstractModel defined in this package to model with add_model_constructor.\n\n\n\n\n\n","category":"method"},{"location":"reference/#DiffOpt.add_model_constructor-Tuple{DiffOpt.Optimizer, Any}","page":"Reference","title":"DiffOpt.add_model_constructor","text":"addmodelconstructor(optimizer::Optimizer, model_constructor)\n\nAdd the constructor of AbstractModel for optimizer to choose from when trying to differentiate.\n\n\n\n\n\n","category":"method"},{"location":"reference/#DiffOpt.dU_from_dQ!-Tuple{Any, Any}","page":"Reference","title":"DiffOpt.dU_from_dQ!","text":"dU_from_dQ!(dQ, U)\n\nReturn the solution dU of the matrix equation dQ = dU' * U + U' * dU where dQ and U are the two argument of the function.\n\nThis function overwrites the first argument dQ to store the solution. The matrix U is not however modified.\n\nThe matrix dQ is assumed to be symmetric and the matrix U is assumed to be upper triangular.\n\nWe can exploit the structure of U here:\n\nIf the factorization was obtained from SVD, U would be orthogonal\nIf the factorization was obtained from Cholesky, U would be upper triangular.\n\nThe MOI bridge uses Cholesky in order to exploit sparsity so we are in the second case.\n\nWe look for an upper triangular dU as well.\n\nWe can find each column of dU by solving a triangular linear system once the previous column have been found. Indeed, let dj be the jth column of dU dU' * U = vcat(dj'U for j in axes(U, 2)) Therefore, dQ[j, 1:j] = dj'U[:, 1:j] + U[:, j]'dU[:, 1:j]SodQ[j, 1:(j-1)] - U[:, j]' * dU[:, 1:(j-1)] = dj'U[:, 1:(j-1)]anddQ[j, j] / 2 = dj'U[:, j]`\n\n\n\n\n\n","category":"method"},{"location":"reference/#DiffOpt.diff_optimizer-Tuple{Any}","page":"Reference","title":"DiffOpt.diff_optimizer","text":"diff_optimizer(optimizer_constructor)::Optimizer\n\nCreates a DiffOpt.Optimizer, which is an MOI layer with an internal optimizer and other utility methods. Results (primal, dual and slack values) are obtained by querying the internal optimizer instantiated using the optimizer_constructor. These values are required for find jacobians with respect to problem data.\n\nOne define a differentiable model by using any solver of choice. Example:\n\njulia> import DiffOpt, HiGHS\n\njulia> model = DiffOpt.diff_optimizer(HiGHS.Optimizer)\njulia> x = model.add_variable(model)\njulia> model.add_constraint(model, ...)\n\n\n\n\n\n","category":"method"},{"location":"reference/#DiffOpt.forward_differentiate!","page":"Reference","title":"DiffOpt.forward_differentiate!","text":"forward_differentiate!(model::Optimizer)\n\nWrapper method for the forward pass. This method will consider as input a currently solved problem and differentials with respect to problem data set with the ForwardObjectiveFunction and ForwardConstraintFunction attributes. The output solution differentials can be queried with the attribute ForwardVariablePrimal.\n\n\n\n\n\n","category":"function"},{"location":"reference/#DiffOpt.map_rows-Tuple{Function, Any, DiffOpt.ProductOfSets, Union{DiffOpt.Flattened, DiffOpt.Nested}}","page":"Reference","title":"DiffOpt.map_rows","text":"map_rows(f::Function, model, cones::ProductOfSets, map_mode::Union{Nested{T}, Flattened{T}})\n\nGiven a model, its cones and map_mode of type Nested (resp. Flattened), return a Vector{T} of length equal to the number of cones (resp. rows) in the conic form where the value for the index (resp. rows) corresponding to each cone is equal to f(ci, r) where ci is the corresponding constraint index in model and r is a UnitRange of the corresponding rows in the conic form.\n\n\n\n\n\n","category":"method"},{"location":"reference/#DiffOpt.quad_sym_half","page":"Reference","title":"DiffOpt.quad_sym_half","text":"quad_sym_half(func, vi1::MOI.VariableIndex, vi2::MOI.VariableIndex)\n\nReturn Q[i,j] = Q[j,i] where the quadratic terms of func is represented by x' Q x / 2 for a symmetric matrix Q where x[i] = vi1 and x[j] = vi2. Note that while this is equal to JuMP.coefficient(func, vi1, vi2) if vi1 != vi2, in the case vi1 == vi2, it is rather equal to 2JuMP.coefficient(func, vi1, vi2).\n\n\n\n\n\n","category":"function"},{"location":"reference/#DiffOpt.reverse_differentiate!","page":"Reference","title":"DiffOpt.reverse_differentiate!","text":"reverse_differentiate!(model::MOI.ModelLike)\n\nWrapper method for the backward pass / reverse differentiation. This method will consider as input a currently solved problem and differentials with respect to the solution set with the ReverseVariablePrimal attribute. The output problem data differentials can be queried with the attributes ReverseObjectiveFunction and ReverseConstraintFunction.\n\n\n\n\n\n","category":"function"},{"location":"reference/#DiffOpt.standard_form","page":"Reference","title":"DiffOpt.standard_form","text":"standard_form(func::AbstractLazyScalarFunction)\n\nConverts func to a standard MOI scalar function.\n\nstandard_form(func::MOItoJuMP)\n\nConverts func to a standard JuMP scalar function.\n\n\n\n\n\n","category":"function"},{"location":"reference/#DiffOpt.ΔQ_from_ΔU!-Tuple{Any, Any}","page":"Reference","title":"DiffOpt.ΔQ_from_ΔU!","text":"ΔQ_from_ΔU!(ΔU, U)\n\nReturn the symmetric solution ΔQ of the matrix equation triu(ΔU) = 2triu(U * ΔQ) where ΔU and U are the two argument of the function.\n\nThis function overwrites the first argument ΔU to store the solution. The matrix U is not however modified.\n\nThe matrix U is assumed to be upper triangular.\n\nWe can exploit the structure of U here:\n\nIf the factorization was obtained from SVD, U would be orthogonal\nIf the factorization was obtained from Cholesky, U would be upper triangular.\n\nThe MOI bridge uses Cholesky in order to exploit sparsity so we are in the second case.\n\nWe can find each column of ΔQ by solving a triangular linear system.\n\n\n\n\n\n","category":"method"},{"location":"reference/#DiffOpt.π-Union{Tuple{T}, Tuple{Vector{T}, MathOptInterface.ModelLike, DiffOpt.ProductOfSets}} where T","page":"Reference","title":"DiffOpt.π","text":"π(v::Vector{Float64}, model::MOI.ModelLike, cones::ProductOfSets)\n\nGiven a model, its cones, find the projection of the vectors v of length equal to the number of rows in the conic form onto the cartesian product of the cones corresponding to these rows. For more info, refer to https://github.com/matbesancon/MathOptSetDistances.jl\n\n\n\n\n\n","category":"method"},{"location":"reference/#DiffOpt.QuadraticProgram.LinearAlgebraSolver","page":"Reference","title":"DiffOpt.QuadraticProgram.LinearAlgebraSolver","text":"LinearAlgebraSolver\n\nOptimizer attribute for the solver to use for the linear algebra operations. Each solver must implement: solve_system(solver, LHS, RHS, iterative::Bool).\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.QuadraticProgram.Model","page":"Reference","title":"DiffOpt.QuadraticProgram.Model","text":"DiffOpt.QuadraticProgram.Model <: DiffOpt.AbstractModel\n\nModel to differentiate quadratic programs.\n\nFor the reverse differentiation, it differentiates the optimal solution z and return product of jacobian matrices (dz / dQ, dz / dq, etc) with the backward pass vector dl / dz\n\nThe method computes the product of\n\njacobian of problem solution z* with respect to problem parameters set with the DiffOpt.ReverseVariablePrimal\na backward pass vector dl / dz, where l can be a loss function\n\nNote that this method does not returns the actual jacobians.\n\nFor more info refer eqn(7) and eqn(8) of https://arxiv.org/pdf/1703.00443.pdf\n\n\n\n\n\n","category":"type"},{"location":"reference/#DiffOpt.QuadraticProgram.create_LHS_matrix","page":"Reference","title":"DiffOpt.QuadraticProgram.create_LHS_matrix","text":"create_LHS_matrix(z, λ, Q, G, h, A=nothing)\n\nInverse matrix specified on RHS of eqn(7) in https://arxiv.org/pdf/1703.00443.pdf\n\nHelper method while calling reverse_differentiate!\n\n\n\n\n\n","category":"function"},{"location":"reference/#DiffOpt.QuadraticProgram.solve_system-NTuple{4, Any}","page":"Reference","title":"DiffOpt.QuadraticProgram.solve_system","text":"Default solve_system call uses IterativeSolvers or the default linear solve\n\n\n\n\n\n","category":"method"},{"location":"reference/#DiffOpt.ConicProgram.Model","page":"Reference","title":"DiffOpt.ConicProgram.Model","text":"Diffopt.ConicProgram.Model <: DiffOpt.AbstractModel\n\nModel to differentiate conic programs.\n\nThe forward differentiation computes the product of the derivative (Jacobian) at the conic program parameters A, b, c to the perturbations dA, db, dc.\n\nThe reverse differentiation computes the product of the transpose of the derivative (Jacobian) at the conic program parameters A, b, c to the perturbations dx, dy, ds.\n\nFor theoretical background, refer Section 3 of Differentiating Through a Cone Program, https://arxiv.org/abs/1904.09043\n\n\n\n\n\n","category":"type"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"EditURL = \"nearest_correlation.jl\"","category":"page"},{"location":"examples/nearest_correlation/#Nearest-correlation","page":"Nearest correlation","title":"Nearest correlation","text":"","category":"section"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"(Image: )","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"This example illustrates the sensitivity analysis of the nearest correlation problem studied in [H02].","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"Higham, Nicholas J. Computing the nearest correlation matrix—a problem from finance. IMA journal of Numerical Analysis 22.3 (2002): 329-343.","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"using DiffOpt, JuMP, SCS, LinearAlgebra\nsolver = SCS.Optimizer\n\nfunction proj(A, dH = Diagonal(ones(size(A, 1))), H = ones(size(A)))\n n = LinearAlgebra.checksquare(A)\n model = Model(() -> DiffOpt.diff_optimizer(solver))\n @variable(model, X[1:n, 1:n] in PSDCone())\n @constraint(model, [i in 1:n], X[i, i] == 1)\n @objective(model, Min, sum((H .* (X - A)) .^ 2))\n MOI.set(\n model,\n DiffOpt.ForwardObjectiveFunction(),\n sum((dH .* (X - A)) .^ 2),\n )\n optimize!(model)\n DiffOpt.forward_differentiate!(model)\n dX = MOI.get.(model, DiffOpt.ForwardVariablePrimal(), X)\n return value.(X), dX\nend","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"Example from [H02, p. 334-335]:","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"A = LinearAlgebra.Tridiagonal(ones(2), ones(3), ones(2))","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"The projection is computed as follows:","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"X, dX = proj(A)\nnothing # hide","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"The projection of A is:","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"X","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"The derivative of the projection with respect to a uniform increase of the weights of the diagonal entries is:","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"dX","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"Example from [H02, Section 4, p. 340]:","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"A = LinearAlgebra.Tridiagonal(-ones(3), 2ones(4), -ones(3))","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"The projection is computed as follows:","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"X, dX = proj(A)\nnothing # hide","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"The projection of A is:","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"X","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"The derivative of the projection with respect to a uniform increase of the weights of the diagonal entries is:","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"dX","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"","category":"page"},{"location":"examples/nearest_correlation/","page":"Nearest correlation","title":"Nearest correlation","text":"This page was generated using Literate.jl.","category":"page"},{"location":"usage/#Usage","page":"Usage","title":"Usage","text":"","category":"section"},{"location":"usage/","page":"Usage","title":"Usage","text":"Create a differentiable model from existing optimizers","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"using JuMP\nimport DiffOpt\nimport SCS\n\nmodel = DiffOpt.diff_optimizer(SCS.Optimizer)","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"Update and solve the model ","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"x = MOI.add_variables(model, 2)\nc = MOI.add_constraint(model, ...)\n\nMOI.optimize!(model)","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"Finally differentiate the model (primal and dual variables specifically) to obtain product of jacobians with respect to problem parameters and a backward pass vector. Currently DiffOpt supports two backends for differentiating a model:","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"To differentiate Convex Quadratic Program","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"beginalign*\n min_x in mathbbR^n frac12 x^T Q x + q^T x \n textst A x = b qquad b in mathbbR^m \n G x leq h qquad h in mathbbR^p\nendalign*","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"we can use the reverse_differentiate! method","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"MOI.set.(model,\n DiffOpt.ReverseVariablePrimal(), x, ones(2))\nDiffOpt.reverse_differentiate!(model)\ngrad_obj = MOI.get(model, DiffOpt.ReverseObjectiveFunction())\ngrad_con = MOI.get.(model, DiffOpt.ReverseConstraintFunction(), c)","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"To differentiate convex conic program","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"beginalign*\n min_x in mathbbR^n c^T x \n textst A x + s = b \n b in mathbbR^m \n s in mathcalK\nendalign*","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"we can use the forward_differentiate! method with perturbations in matrices A, b, c:","category":"page"},{"location":"usage/","page":"Usage","title":"Usage","text":"import LinearAlgebra: ⋅\nMOI.set(model, DiffOpt.ForwardObjectiveFunction(), ones(2) ⋅ x)\nDiffOpt.forward_differentiate!(model)\ngrad_x = MOI.get.(model, DiffOpt.ForwardVariablePrimal(), x)","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"EditURL = \"Thermal_Generation_Dispatch_Example.jl\"","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/#Thermal-Generation-Dispatch-Example","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"","category":"section"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"(Image: )","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"This example illustrates the sensitivity analysis of thermal generation dispatch problem.","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"This problem can be described as the choice of thermal generation g given a demand d, a price for thermal generation c and a penalty price c_{ϕ} for any demand not attended ϕ.","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"beginsplit\nbeginarray ll\nmboxminimize sum_i=1^N c_i g_i + c_phi phi \nmboxst g_i ge 0 quad i=1N \n g_i le G_i quad i=1N \n sum_i=1^N g_i + phi = d\nendarray\nendsplit","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"where","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"G_{i} is the maximum possible generation for a thermal generator i","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/#Define-and-solve-the-Thermal-Dispatch-Problem","page":"Thermal Generation Dispatch Example","title":"Define and solve the Thermal Dispatch Problem","text":"","category":"section"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"First, import the libraries.","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"using Test\nusing JuMP\nimport DiffOpt\nimport LinearAlgebra: dot\nimport HiGHS\nimport MathOptInterface as MOI\nimport Plots","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Define the model that will be construct given a set of parameters.","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"function generate_model(\n d::Float64;\n g_sup::Vector{Float64},\n c_g::Vector{Float64},\n c_ϕ::Float64,\n)\n # Creation of the Model and Parameters\n model = Model(() -> DiffOpt.diff_optimizer(HiGHS.Optimizer))\n set_silent(model)\n I = length(g_sup)\n\n # Variables\n @variable(model, g[i in 1:I] >= 0.0)\n @variable(model, ϕ >= 0.0)\n\n # Constraints\n @constraint(model, limit_constraints_sup[i in 1:I], g[i] <= g_sup[i])\n @constraint(model, demand_constraint, sum(g) + ϕ == d)\n\n # Objectives\n @objective(model, Min, dot(c_g, g) + c_ϕ * ϕ)\n\n # Solve the model\n optimize!(model)\n\n # Return the solved model\n return model\nend","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Define the functions that will get the primal values g and \\phi and sensitivity analysis of the demand dg/dd and d\\phi/dd from a optimized model.","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"function diff_forward(model::Model, ϵ::Float64 = 1.0)\n # Initialization of parameters and references to simplify the notation\n vect_ref = [model[:g]; model[:ϕ]]\n I = length(model[:g])\n\n # Get the primal solution of the model\n vect = MOI.get.(model, MOI.VariablePrimal(), vect_ref)\n\n # Pass the perturbation to the DiffOpt Framework and set the context to Forward\n constraint_equation = convert(MOI.ScalarAffineFunction{Float64}, ϵ)\n MOI.set(\n model,\n DiffOpt.ForwardConstraintFunction(),\n model[:demand_constraint],\n constraint_equation,\n )\n DiffOpt.forward_differentiate!(model)\n\n # Get the derivative of the model\n dvect = MOI.get.(model, DiffOpt.ForwardVariablePrimal(), vect_ref)\n\n # Return the values as a vector\n return [vect; dvect]\nend\n\nfunction diff_reverse(model::Model, ϵ::Float64 = 1.0)\n # Initialization of parameters and references to simplify the notation\n vect_ref = [model[:g]; model[:ϕ]]\n I = length(model[:g])\n\n # Get the primal solution of the model\n vect = MOI.get.(model, MOI.VariablePrimal(), vect_ref)\n\n # Set variables needed for the DiffOpt Backward Framework\n dvect = Array{Float64,1}(undef, I + 1)\n perturbation = zeros(I + 1)\n\n # Loop for each primal variable\n for i in 1:I+1\n # Set the perturbation in the Primal Variables and set the context to Backward\n perturbation[i] = ϵ\n MOI.set.(model, DiffOpt.ReverseVariablePrimal(), vect_ref, perturbation)\n DiffOpt.reverse_differentiate!(model)\n\n # Get the value of the derivative of the model\n dvect[i] = JuMP.constant(\n MOI.get(\n model,\n DiffOpt.ReverseConstraintFunction(),\n model[:demand_constraint],\n ),\n )\n perturbation[i] = 0.0\n end\n\n # Return the values as a vector\n return [vect; dvect]\nend","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Initialize of Parameters","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"g_sup = [10.0, 20.0, 30.0]\nI = length(g_sup)\nd = 0.0:0.1:80\nd_size = length(d)\nc_g = [1.0, 3.0, 5.0]\nc_ϕ = 10.0;\nnothing #hide","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Generate models for each demand d","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"models = generate_model.(d; g_sup = g_sup, c_g = c_g, c_ϕ = c_ϕ);\nnothing #hide","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Get the results of models with the DiffOpt Forward and Backward context","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"result_forward = diff_forward.(models)\noptimize!.(models)\nresult_reverse = diff_reverse.(models);\nnothing #hide","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Organization of results to plot Initialize data_results that will contain every result","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"data_results = Array{Float64,3}(undef, 2, d_size, 2 * (I + 1));\nnothing #hide","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Populate the data_results array","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"for k in 1:d_size\n data_results[1, k, :] = result_forward[k]\n data_results[2, k, :] = result_reverse[k]\nend","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/#Results-with-Plot-graphs","page":"Thermal Generation Dispatch Example","title":"Results with Plot graphs","text":"","category":"section"},{"location":"examples/Thermal_Generation_Dispatch_Example/#Results-for-the-forward-context","page":"Thermal Generation Dispatch Example","title":"Results for the forward context","text":"","category":"section"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Result Primal Values:","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Plots.plot(\n d,\n data_results[1, :, 1:I+1];\n title = \"Generation by Demand\",\n label = [\"Thermal Generation 1\" \"Thermal Generation 2\" \"Thermal Generation 3\" \"Generation Deficit\"],\n xlabel = \"Demand [unit]\",\n ylabel = \"Generation [unit]\",\n)","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Result Sensitivity Analysis:","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Plots.plot(\n d,\n data_results[1, :, I+2:2*(I+1)];\n title = \"Sensitivity of Generation by Demand\",\n label = [\"T. Gen. 1 Sensitivity\" \"T. Gen. 2 Sensitivity\" \"T. Gen. 3 Sensitivity\" \"Gen. Deficit Sensitivity\"],\n xlabel = \"Demand [unit]\",\n ylabel = \"Sensitivity [-]\",\n)","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/#Results-for-the-reverse-context","page":"Thermal Generation Dispatch Example","title":"Results for the reverse context","text":"","category":"section"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Result Primal Values:","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Plots.plot(\n d,\n data_results[2, :, 1:I+1];\n title = \"Generation by Demand\",\n label = [\"Thermal Generation 1\" \"Thermal Generation 2\" \"Thermal Generation 3\" \"Generation Deficit\"],\n xlabel = \"Demand [unit]\",\n ylabel = \"Generation [unit]\",\n)","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Result Sensitivity Analysis:","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"Plots.plot(\n d,\n data_results[2, :, I+2:2*(I+1)];\n title = \"Sensitivity of Generation by Demand\",\n label = [\"T. Gen. 1 Sensitivity\" \"T. Gen. 2 Sensitivity\" \"T. Gen. 3 Sensitivity\" \"Gen. Deficit Sensitivity\"],\n xlabel = \"Demand [unit]\",\n ylabel = \"Sensitivity [-]\",\n)","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"","category":"page"},{"location":"examples/Thermal_Generation_Dispatch_Example/","page":"Thermal Generation Dispatch Example","title":"Thermal Generation Dispatch Example","text":"This page was generated using Literate.jl.","category":"page"},{"location":"#DiffOpt.jl","page":"Home","title":"DiffOpt.jl","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"DiffOpt.jl is a package for differentiating convex optimization program (JuMP.jl or MathOptInterface.jl models) with respect to program parameters. Note that this package does not contain any solver. This package has two major backends, available via the reverse_differentiate! and forward_differentiate! methods, to differentiate models (quadratic or conic) with optimal solutions.","category":"page"},{"location":"","page":"Home","title":"Home","text":"note: Note\nCurrently supports linear programs (LP), convex quadratic programs (QP) and convex conic programs (SDP, SOCP, exponential cone constraints only). ","category":"page"},{"location":"#Installation","page":"Home","title":"Installation","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"DiffOpt can be installed through the Julia package manager:","category":"page"},{"location":"","page":"Home","title":"Home","text":"(v1.3) pkg> add https://github.com/jump-dev/DiffOpt.jl","category":"page"},{"location":"#Why-are-Differentiable-optimization-problems-important?","page":"Home","title":"Why are Differentiable optimization problems important?","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"Differentiable optimization is a promising field of convex optimization and has many potential applications in game theory, control theory and machine learning (specifically deep learning - refer this video for more). Recent work has shown how to differentiate specific subclasses of convex optimization problems. But several applications remain unexplored (refer section 8 of this really good thesis). With the help of automatic differentiation, differentiable optimization can have a significant impact on creating end-to-end differentiable systems to model neural networks, stochastic processes, or a game.","category":"page"},{"location":"#Contributing","page":"Home","title":"Contributing","text":"","category":"section"},{"location":"","page":"Home","title":"Home","text":"Contributions to this package are more than welcome, if you find a bug or have any suggestions for the documentation please post it on the github issue tracker.","category":"page"},{"location":"","page":"Home","title":"Home","text":"When contributing please note that the package follows the JuMP style guide","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"EditURL = \"chainrules_unit.jl\"","category":"page"},{"location":"examples/chainrules_unit/#ChainRules-integration-demo:-Relaxed-Unit-Commitment","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"","category":"section"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"(Image: )","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"In this example, we will demonstrate the integration of DiffOpt with ChainRulesCore.jl, the library allowing the definition of derivatives for functions that can then be used by automatic differentiation systems.","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"using JuMP\nimport DiffOpt\nimport Plots\nimport LinearAlgebra: ⋅\nimport HiGHS\nimport ChainRulesCore","category":"page"},{"location":"examples/chainrules_unit/#Unit-commitment-problem","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"Unit commitment problem","text":"","category":"section"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"We will consider a unit commitment problem, finding the cost-minimizing activation of generation units in a power network over multiple time periods. The considered constraints include:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"Demand satisfaction of several loads\nRamping constraints\nGeneration limits.","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"The decisions are:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"u_it in 01: activation of the i-th unit at time t\np_it: power output of the i-th unit at time t.","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"DiffOpt handles convex optimization problems only, we therefore relax the domain of the u_it variables to left01right.","category":"page"},{"location":"examples/chainrules_unit/#Primal-UC-problem","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"Primal UC problem","text":"","category":"section"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"ChainRules defines the differentiation of functions. The actual function that is differentiated in the context of DiffOpt is the solution map taking in input the problem parameters and returning the solution.","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"function unit_commitment(\n load1_demand,\n load2_demand,\n gen_costs,\n noload_costs;\n model = Model(HiGHS.Optimizer),\n silent = false,\n)\n MOI.set(model, MOI.Silent(), silent)\n\n # Problem data\n units = [1, 2] # Generator identifiers\n load_names = [\"Load1\", \"Load2\"] # Load identifiers\n n_periods = 4 # Number of time periods\n Pmin = Dict(1 => fill(0.5, n_periods), 2 => fill(0.5, n_periods)) # Minimum power output (pu)\n Pmax = Dict(1 => fill(3.0, n_periods), 2 => fill(3.0, n_periods)) # Maximum power output (pu)\n RR = Dict(1 => 0.25, 2 => 0.25) # Ramp rates (pu/min)\n P0 = Dict(1 => 0.0, 2 => 0.0) # Initial power output (pu)\n D = Dict(\"Load1\" => load1_demand, \"Load2\" => load2_demand) # Demand (pu)\n Cp = Dict(1 => gen_costs[1], 2 => gen_costs[2]) # Generation cost coefficient ($/pu)\n Cnl = Dict(1 => noload_costs[1], 2 => noload_costs[2]) # No-load cost ($)\n\n # Variables\n # Note: u represents the activation of generation units.\n # Would be binary in the typical UC problem, relaxed here to u ∈ [0,1]\n # for a linear relaxation.\n @variable(model, 0 <= u[g in units, t in 1:n_periods] <= 1) # Commitment\n @variable(model, p[g in units, t in 1:n_periods] >= 0) # Power output (pu)\n\n # Constraints\n\n # Energy balance\n @constraint(\n model,\n energy_balance_cons[t in 1:n_periods],\n sum(p[g, t] for g in units) == sum(D[l][t] for l in load_names),\n )\n\n # Generation limits\n @constraint(\n model,\n [g in units, t in 1:n_periods],\n Pmin[g][t] * u[g, t] <= p[g, t]\n )\n @constraint(\n model,\n [g in units, t in 1:n_periods],\n p[g, t] <= Pmax[g][t] * u[g, t]\n )\n\n # Ramp rates\n @constraint(\n model,\n [g in units, t in 2:n_periods],\n p[g, t] - p[g, t-1] <= 60 * RR[g]\n )\n @constraint(model, [g in units], p[g, 1] - P0[g] <= 60 * RR[g])\n @constraint(\n model,\n [g in units, t in 2:n_periods],\n p[g, t-1] - p[g, t] <= 60 * RR[g]\n )\n @constraint(model, [g in units], P0[g] - p[g, 1] <= 60 * RR[g])\n\n # Objective\n @objective(\n model,\n Min,\n sum(\n (Cp[g] * p[g, t]) + (Cnl[g] * u[g, t]) for g in units,\n t in 1:n_periods\n ),\n )\n\n optimize!(model)\n # asserting finite optimal value\n @assert termination_status(model) == MOI.OPTIMAL\n # converting to dense matrix\n return JuMP.value.(p.data)\nend\n\nm = Model(HiGHS.Optimizer)\n@show unit_commitment(\n [1.0, 1.2, 1.4, 1.6],\n [1.0, 1.2, 1.4, 1.6],\n [1000.0, 1500.0],\n [500.0, 1000.0],\n model = m,\n silent = true,\n)","category":"page"},{"location":"examples/chainrules_unit/#Perturbation-of-a-single-input-parameter","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"Perturbation of a single input parameter","text":"","category":"section"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"Let us vary the demand at the second time frame on both loads:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"demand_values = 0.05:0.05:3.0\npvalues = map(demand_values) do di\n return unit_commitment(\n [1.0, di, 1.4, 1.6],\n [1.0, di, 1.4, 1.6],\n [1000.0, 1500.0],\n [500.0, 1000.0];\n silent = true,\n )\nend\npflat = [getindex.(pvalues, i) for i in eachindex(pvalues[1])];\nnothing #hide","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"The influence of this variation of the demand is piecewise linear on the generation at different time frames:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"Plots.scatter(demand_values, pflat; xaxis = (\"Demand\"), yaxis = (\"Generation\"))\nPlots.title!(\"Different time frames and generators\")\nPlots.xlims!(0.0, 3.5)","category":"page"},{"location":"examples/chainrules_unit/#Forward-Differentiation","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"Forward Differentiation","text":"","category":"section"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"Forward differentiation rule for the solution map of the unit commitment problem. It takes as arguments:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"the perturbations on the input parameters\nthe differentiated function\nthe primal values of the input parameters,","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"and returns a tuple (primal_output, perturbations), the main primal result and the perturbation propagated to this result:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"function ChainRulesCore.frule(\n (_, Δload1_demand, Δload2_demand, Δgen_costs, Δnoload_costs),\n ::typeof(unit_commitment),\n load1_demand,\n load2_demand,\n gen_costs,\n noload_costs;\n optimizer = HiGHS.Optimizer,\n)\n # creating the UC model with a DiffOpt optimizer wrapper around HiGHS\n model = Model(() -> DiffOpt.diff_optimizer(optimizer))\n # building and solving the main model\n pv = unit_commitment(\n load1_demand,\n load2_demand,\n gen_costs,\n noload_costs;\n model = model,\n )\n energy_balance_cons = model[:energy_balance_cons]\n\n # Setting some perturbation of the energy balance constraints\n # Perturbations are set as MOI functions\n Δenergy_balance = [\n convert(MOI.ScalarAffineFunction{Float64}, d1 + d2) for\n (d1, d2) in zip(Δload1_demand, Δload2_demand)\n ]\n MOI.set.(\n model,\n DiffOpt.ForwardConstraintFunction(),\n energy_balance_cons,\n Δenergy_balance,\n )\n\n p = model[:p]\n u = model[:u]\n\n # setting the perturbation of the linear objective\n Δobj =\n sum(Δgen_costs ⋅ p[:, t] + Δnoload_costs ⋅ u[:, t] for t in size(p, 2))\n MOI.set(model, DiffOpt.ForwardObjectiveFunction(), Δobj)\n DiffOpt.forward_differentiate!(JuMP.backend(model))\n # querying the corresponding perturbation of the decision\n Δp = MOI.get.(model, DiffOpt.ForwardVariablePrimal(), p)\n return (pv, Δp.data)\nend","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"We can now compute the perturbation of the output powers Δpv for a perturbation of the first load demand at time 2:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"load1_demand = [1.0, 1.0, 1.4, 1.6]\nload2_demand = [1.0, 1.0, 1.4, 1.6]\ngen_costs = [1000.0, 1500.0]\nnoload_costs = [500.0, 1000.0];\nnothing #hide","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"all input perturbations are 0 except first load at time 2","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"Δload1_demand = 0 * load1_demand\nΔload1_demand[2] = 1.0\nΔload2_demand = 0 * load2_demand\nΔgen_costs = 0 * gen_costs\nΔnoload_costs = 0 * noload_costs\n(pv, Δpv) = ChainRulesCore.frule(\n (nothing, Δload1_demand, Δload2_demand, Δgen_costs, Δnoload_costs),\n unit_commitment,\n load1_demand,\n load2_demand,\n gen_costs,\n noload_costs,\n)\n\nΔpv","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"The result matches what we observe in the previous figure: the generation of the first generator at the second time frame (third element on the plot).","category":"page"},{"location":"examples/chainrules_unit/#Reverse-mode-differentiation-of-the-solution-map","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"Reverse-mode differentiation of the solution map","text":"","category":"section"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"The rrule returns the primal and a pullback. The pullback takes a seed for the optimal solution ̄p and returns derivatives with respect to each input parameter of the function.","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"function ChainRulesCore.rrule(\n ::typeof(unit_commitment),\n load1_demand,\n load2_demand,\n gen_costs,\n noload_costs;\n optimizer = HiGHS.Optimizer,\n silent = false,\n)\n model = Model(() -> DiffOpt.diff_optimizer(optimizer))\n # solve the forward UC problem\n pv = unit_commitment(\n load1_demand,\n load2_demand,\n gen_costs,\n noload_costs;\n model = model,\n silent = silent,\n )\n function pullback_unit_commitment(pb)\n p = model[:p]\n u = model[:u]\n energy_balance_cons = model[:energy_balance_cons]\n\n MOI.set.(model, DiffOpt.ReverseVariablePrimal(), p, pb)\n DiffOpt.reverse_differentiate!(JuMP.backend(model))\n\n obj = MOI.get(model, DiffOpt.ReverseObjectiveFunction())\n\n # computing derivative wrt linear objective costs\n dgen_costs = similar(gen_costs)\n dgen_costs[1] = sum(JuMP.coefficient.(obj, p[1, :]))\n dgen_costs[2] = sum(JuMP.coefficient.(obj, p[2, :]))\n\n dnoload_costs = similar(noload_costs)\n dnoload_costs[1] = sum(JuMP.coefficient.(obj, u[1, :]))\n dnoload_costs[2] = sum(JuMP.coefficient.(obj, u[2, :]))\n\n # computing derivative wrt constraint constant\n dload1_demand =\n JuMP.constant.(\n MOI.get.(\n model,\n DiffOpt.ReverseConstraintFunction(),\n energy_balance_cons,\n )\n )\n dload2_demand = copy(dload1_demand)\n return (dload1_demand, dload2_demand, dgen_costs, dnoload_costs)\n end\n return (pv, pullback_unit_commitment)\nend","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"We can set a seed of one on the power of the first generator at the second time frame and zero for all other parts of the solution:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"(pv, pullback_unit_commitment) = ChainRulesCore.rrule(\n unit_commitment,\n load1_demand,\n load2_demand,\n gen_costs,\n noload_costs;\n optimizer = HiGHS.Optimizer,\n silent = true,\n)\ndpv = 0 * pv\ndpv[1, 2] = 1\ndargs = pullback_unit_commitment(dpv)\n(dload1_demand, dload2_demand, dgen_costs, dnoload_costs) = dargs;\nnothing #hide","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"The sensitivities with respect to the load demands are:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"dload1_demand","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"and:","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"dload2_demand","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"The sensitivity of the generation is propagated to the sensitivity of both loads at the second time frame.","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"This example integrating ChainRules was designed with support from Invenia Technical Computing.","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"","category":"page"},{"location":"examples/chainrules_unit/","page":"ChainRules integration demo: Relaxed Unit Commitment","title":"ChainRules integration demo: Relaxed Unit Commitment","text":"This page was generated using Literate.jl.","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"EditURL = \"custom-relu.jl\"","category":"page"},{"location":"examples/custom-relu/#Custom-ReLU-layer","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"","category":"section"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"(Image: )","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"We demonstrate how DiffOpt can be used to generate a simple neural network unit - the ReLU layer. A neural network is created using Flux.jl and trained on the MNIST dataset.","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"This tutorial uses the following packages","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"using JuMP\nimport DiffOpt\nimport Ipopt\nimport ChainRulesCore\nimport Flux\nimport MLDatasets\nimport Statistics\nimport Base.Iterators: repeated\nusing LinearAlgebra","category":"page"},{"location":"examples/custom-relu/#The-ReLU-and-its-derivative","page":"Custom ReLU layer","title":"The ReLU and its derivative","text":"","category":"section"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"Define a relu through an optimization problem solved by a quadratic solver. Return the solution of the problem.","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"function matrix_relu(\n y::Matrix;\n model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer)),\n)\n layer_size, batch_size = size(y)\n empty!(model)\n set_silent(model)\n @variable(model, x[1:layer_size, 1:batch_size] >= 0)\n @objective(model, Min, x[:]'x[:] - 2y[:]'x[:])\n optimize!(model)\n return value.(x)\nend","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"Define the reverse differentiation rule, for the function we defined above.","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"function ChainRulesCore.rrule(::typeof(matrix_relu), y::Matrix{T}) where {T}\n model = Model(() -> DiffOpt.diff_optimizer(Ipopt.Optimizer))\n pv = matrix_relu(y; model = model)\n function pullback_matrix_relu(dl_dx)\n # some value from the backpropagation (e.g., loss) is denoted by `l`\n # so `dl_dy` is the derivative of `l` wrt `y`\n x = model[:x] # load decision variable `x` into scope\n dl_dy = zeros(T, size(dl_dx))\n dl_dq = zeros(T, size(dl_dx))\n # set sensitivities\n MOI.set.(model, DiffOpt.ReverseVariablePrimal(), x[:], dl_dx[:])\n # compute grad\n DiffOpt.reverse_differentiate!(model)\n # return gradient wrt objective function parameters\n obj_exp = MOI.get(model, DiffOpt.ReverseObjectiveFunction())\n # coeff of `x` in q'x = -2y'x\n dl_dq[:] .= JuMP.coefficient.(obj_exp, x[:])\n dq_dy = -2 # dq/dy = -2\n dl_dy[:] .= dl_dq[:] * dq_dy\n return (ChainRulesCore.NoTangent(), dl_dy)\n end\n return pv, pullback_matrix_relu\nend","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"For more details about backpropagation, visit Introduction, ChainRulesCore.jl.","category":"page"},{"location":"examples/custom-relu/#Define-the-network","page":"Custom ReLU layer","title":"Define the network","text":"","category":"section"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"layer_size = 10\nm = Flux.Chain(\n Flux.Dense(784, layer_size), # 784 being image linear dimension (28 x 28)\n matrix_relu,\n Flux.Dense(layer_size, 10), # 10 being the number of outcomes (0 to 9)\n Flux.softmax,\n)","category":"page"},{"location":"examples/custom-relu/#Prepare-data","page":"Custom ReLU layer","title":"Prepare data","text":"","category":"section"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"N = 1000 # batch size\n# Preprocessing train data\nimgs = MLDatasets.MNIST.traintensor(1:N)\nlabels = MLDatasets.MNIST.trainlabels(1:N)\ntrain_X = float.(reshape(imgs, size(imgs, 1) * size(imgs, 2), N)) # stack images\ntrain_Y = Flux.onehotbatch(labels, 0:9);\n# Preprocessing test data\ntest_imgs = MLDatasets.MNIST.testtensor(1:N)\ntest_labels = MLDatasets.MNIST.testlabels(1:N)\ntest_X = float.(reshape(test_imgs, size(test_imgs, 1) * size(test_imgs, 2), N))\ntest_Y = Flux.onehotbatch(test_labels, 0:9);\nnothing #hide","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"Define input data The original data is repeated epochs times because Flux.train! only loops through the data set once","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"epochs = 50 # ~1 minute (i7 8th gen with 16gb RAM)\n# epochs = 100 # leads to 77.8% in about 2 minutes\ndataset = repeated((train_X, train_Y), epochs);\nnothing #hide","category":"page"},{"location":"examples/custom-relu/#Network-training","page":"Custom ReLU layer","title":"Network training","text":"","category":"section"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"training loss function, Flux optimizer","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"custom_loss(x, y) = Flux.crossentropy(m(x), y)\nopt = Flux.Adam()\nevalcb = () -> @show(custom_loss(train_X, train_Y))","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"Train to optimize network parameters","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"@time Flux.train!(\n custom_loss,\n Flux.params(m),\n dataset,\n opt,\n cb = Flux.throttle(evalcb, 5),\n);\nnothing #hide","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"Although our custom implementation takes time, it is able to reach similar accuracy as the usual ReLU function implementation.","category":"page"},{"location":"examples/custom-relu/#Accuracy-results","page":"Custom ReLU layer","title":"Accuracy results","text":"","category":"section"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"Average of correct guesses","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"accuracy(x, y) = Statistics.mean(Flux.onecold(m(x)) .== Flux.onecold(y));\nnothing #hide","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"Training accuracy","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"accuracy(train_X, train_Y)","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"Test accuracy","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"accuracy(test_X, test_Y)","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"Note that the accuracy is low due to simplified training. It is possible to increase the number of samples N, the number of epochs epoch and the connectivity inner.","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"","category":"page"},{"location":"examples/custom-relu/","page":"Custom ReLU layer","title":"Custom ReLU layer","text":"This page was generated using Literate.jl.","category":"page"}] } diff --git a/dev/usage/index.html b/dev/usage/index.html index a8791f17..2027b5a5 100644 --- a/dev/usage/index.html +++ b/dev/usage/index.html @@ -22,4 +22,4 @@ \end{align*}\]

we can use the forward_differentiate! method with perturbations in matrices A, b, c:

import LinearAlgebra: ⋅
 MOI.set(model, DiffOpt.ForwardObjectiveFunction(), ones(2) ⋅ x)
 DiffOpt.forward_differentiate!(model)
-grad_x = MOI.get.(model, DiffOpt.ForwardVariablePrimal(), x)
+grad_x = MOI.get.(model, DiffOpt.ForwardVariablePrimal(), x)