From 02a19b777a635054cf29b75ee587577e2b6c2617 Mon Sep 17 00:00:00 2001 From: Pietro Monticone <38562595+pitmonticone@users.noreply.github.com> Date: Tue, 21 Mar 2023 19:04:11 +0100 Subject: [PATCH 1/2] Format code and add checking workflow --- .JuliaFormatter.jl | 7 + .JuliaFormatter.toml | 1 + .github/workflows/FormatCheck.yml | 34 +++++ dev/logo/logo.jl | 38 ++--- docs/make.jl | 29 ++-- docs/setup_docs.jl | 4 +- src/AlgorithmicRecourseDynamics.jl | 7 +- src/base.jl | 223 +++++++++++++++++++---------- src/data/Data.jl | 4 +- src/data/functions.jl | 4 +- src/data/utils.jl | 3 +- src/evaluation/Evaluation.jl | 15 +- src/evaluation/domain_shifts.jl | 25 +++- src/evaluation/kernels.jl | 11 +- src/evaluation/mmd.jl | 47 +++--- src/evaluation/model_shifts.jl | 28 ++-- src/experiments/Experiments.jl | 2 +- src/experiments/functions.jl | 83 ++++++----- src/experiments/utils.jl | 13 +- src/models/Models.jl | 2 +- src/models/deep_ensemble.jl | 11 +- src/models/mlp.jl | 10 +- src/post_processing.jl | 91 +++++++++--- test/runtests.jl | 6 +- 24 files changed, 450 insertions(+), 248 deletions(-) create mode 100644 .JuliaFormatter.jl create mode 100644 .JuliaFormatter.toml create mode 100644 .github/workflows/FormatCheck.yml diff --git a/.JuliaFormatter.jl b/.JuliaFormatter.jl new file mode 100644 index 0000000..88e7e4b --- /dev/null +++ b/.JuliaFormatter.jl @@ -0,0 +1,7 @@ +using Pkg # Load package manager +Pkg.add("JuliaFormatter") # Install JuliaFormatter + +using JuliaFormatter # Load JuliaFormatter +format("."; verbose=true) # Format all files + +Pkg.rm("JuliaFormatter") # Remove JuliaFormatter diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml new file mode 100644 index 0000000..323237b --- /dev/null +++ b/.JuliaFormatter.toml @@ -0,0 +1 @@ +style = "blue" diff --git a/.github/workflows/FormatCheck.yml b/.github/workflows/FormatCheck.yml new file mode 100644 index 0000000..e576a1b --- /dev/null +++ b/.github/workflows/FormatCheck.yml @@ -0,0 +1,34 @@ +name: Format Check + +on: + push: + branches: + - 'main' + - 'release-' + tags: '*' + pull_request: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: julia-actions/setup-julia@latest + with: + version: 1 + - uses: actions/checkout@v1 + - name: Install JuliaFormatter + run: | + using Pkg + Pkg.add("JuliaFormatter") + shell: julia --color=yes {0} + - name: Format code + run: | + using JuliaFormatter + format("."; verbose=true) + shell: julia --color=yes {0} + - name: Suggest formatting changes + uses: reviewdog/action-suggester@v1 + if: github.event_name == 'pull_request' + with: + tool_name: JuliaFormatter + fail_on_error: true \ No newline at end of file diff --git a/dev/logo/logo.jl b/dev/logo/logo.jl index 353aef8..aff0fc0 100644 --- a/dev/logo/logo.jl +++ b/dev/logo/logo.jl @@ -20,8 +20,7 @@ const julia_colors = Dict( function get_data(N=1000, xmax=2) X, ys = make_blobs( - N, 2; - centers=2, as_table=false, center_box=(-xmax => xmax), cluster_std=0.1 + N, 2; centers=2, as_table=false, center_box=(-xmax => xmax), cluster_std=0.1 ) ys .= ys .== 2 X = X' @@ -31,7 +30,7 @@ end function logo_picture(; ndots=3, frame_size=500, - ms=frame_size // 10, + ms=frame_size//10, mcolor=(:red, :green, :purple), margin=0.1, fun=f(x) = x * cos(x), @@ -43,7 +42,7 @@ function logo_picture(; gt_stroke_size=5, interval_color=julia_colors[:blue], interval_alpha=0.2, - seed=2022 + seed=2022, ) # Setup @@ -52,8 +51,8 @@ function logo_picture(; Random.seed!(seed) # Data - x, y = get_data(xmax=xmax, noise=noise, fun=fun) - train, test = partition(eachindex(y), 0.4, 0.4, shuffle=true) + x, y = get_data(; xmax=xmax, noise=noise, fun=fun) + train, test = partition(eachindex(y), 0.4, 0.4; shuffle=true) xtrue = range(-xmax, xmax, ntrue) ytrue = fun.(xtrue) @@ -61,16 +60,18 @@ function logo_picture(; Model = @load LinearRegressor pkg = MLJLinearModels degree_polynomial = 5 polynomial_features(x, degree::Int) = reduce(hcat, map(i -> x .^ i, 1:degree)) + #! format: off pipe = (x -> MLJBase.table(polynomial_features(x, degree_polynomial))) |> Model() + #! format: on conf_model = conformal_model(pipe; coverage=0.95) mach = machine(conf_model, x, y) - fit!(mach, rows=train) + fit!(mach; rows=train) yhat = predict(mach, x[test]) y_lb = [y[1] for y in yhat] y_ub = [y[2] for y in yhat] # Logo - idx = sample(test, ndots, replace=false) + idx = sample(test, ndots; replace=false) xplot, yplot = (x[idx], y[idx]) _scale = (frame_size / (2 * maximum(x))) * (1 - margin) @@ -78,15 +79,15 @@ function logo_picture(; setline(gt_stroke_size) sethue(gt_color) true_points = [Point((_scale .* (x, y))...) for (x, y) in zip(xtrue, ytrue)] - poly(true_points[1:(end-1)], action=:stroke) + poly(true_points[1:(end - 1)]; action=:stroke) # Data data_plot = zip(xplot, yplot) - for i = 1:length(data_plot) + for i in 1:length(data_plot) _x, _y = _scale .* collect(data_plot)[i] color_idx = i % n_mcolor == 0 ? n_mcolor : i % n_mcolor sethue(mcolor[color_idx]...) - circle(Point(_x, _y), ms, action=:fill) + circle(Point(_x, _y), ms; action=:fill) end # Prediction interval: @@ -99,17 +100,16 @@ function logo_picture(; Point((_scale .* (x, y))...) for (x, y) in zip(x[test][_order_ub], y_ub[_order_ub]) ] setcolor(sethue(interval_color)..., interval_alpha) - poly(vcat(lb, ub), action=:fill) - + return poly(vcat(lb, ub); action=:fill) end function draw_small_logo(filename="docs/src/assets/logo.svg"; width=500) frame_size = width Drawing(frame_size, frame_size, filename) origin() - logo_picture(frame_size=frame_size) + logo_picture(; frame_size=frame_size) finish() - preview() + return preview() end function draw_wide_logo_new( @@ -120,7 +120,7 @@ function draw_wide_logo_new( font_fill="transparent", font_color=Luxor.julia_blue, bg_color="transparent", - picture_kwargs... + picture_kwargs..., ) # Setup: @@ -142,7 +142,7 @@ function draw_wide_logo_new( # Picture: @layer begin translate(cells[1]) - logo_picture( + logo_picture(; frame_size=height, margin=0.1, ms=ms, @@ -162,7 +162,7 @@ function draw_wide_logo_new( translate(pos) setline(Int(round(gt_stroke_size / 5))) sethue(font_fill) - textoutlines(strs[n], O, :path, valign=:middle, halign=:center) + textoutlines(strs[n], O, :path; valign=:middle, halign=:center) sethue(font_color) strokepath() end @@ -170,7 +170,7 @@ function draw_wide_logo_new( end finish() - preview() + return preview() end draw_wide_logo_new() diff --git a/docs/make.jl b/docs/make.jl index 3031f23..7e036a7 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -6,25 +6,22 @@ ex_meta = quote using AlgorithmicRecourseDynamics end -DocMeta.setdocmeta!(AlgorithmicRecourseDynamics, :DocTestSetup, ex_meta; recursive = true) +DocMeta.setdocmeta!(AlgorithmicRecourseDynamics, :DocTestSetup, ex_meta; recursive=true) makedocs(; - modules = [AlgorithmicRecourseDynamics], - authors = "Patrick Altmeyer", - repo = "https://github.com/juliatrustworthyai/AlgorithmicRecourseDynamics.jl/blob/{commit}{path}#{line}", - sitename = "AlgorithmicRecourseDynamics.jl", - format = Documenter.HTML(; - prettyurls = get(ENV, "CI", "false") == "true", - canonical = "https://juliatrustworthyai.github.io/AlgorithmicRecourseDynamics.jl", - edit_link = "main", - assets = String[], + modules=[AlgorithmicRecourseDynamics], + authors="Patrick Altmeyer", + repo="https://github.com/juliatrustworthyai/AlgorithmicRecourseDynamics.jl/blob/{commit}{path}#{line}", + sitename="AlgorithmicRecourseDynamics.jl", + format=Documenter.HTML(; + prettyurls=get(ENV, "CI", "false") == "true", + canonical="https://juliatrustworthyai.github.io/AlgorithmicRecourseDynamics.jl", + edit_link="main", + assets=String[], ), - pages = [ - "🏠 Home" => "index.md", - "🧐 Reference" => "_reference.md", - ], + pages=["🏠 Home" => "index.md", "🧐 Reference" => "_reference.md"], ) -deploydocs(; - repo = "github.com/JuliaTrustworthyAI/AlgorithmicRecourseDynamics.jl", devbranch = "main" +deploydocs(; + repo="github.com/JuliaTrustworthyAI/AlgorithmicRecourseDynamics.jl", devbranch="main" ) diff --git a/docs/setup_docs.jl b/docs/setup_docs.jl index 1007c62..e7df95c 100644 --- a/docs/setup_docs.jl +++ b/docs/setup_docs.jl @@ -1,5 +1,4 @@ setup_docs = quote - using Pkg Pkg.activate("docs") @@ -16,5 +15,4 @@ setup_docs = quote Random.seed!(2023) theme(:wong) - -end \ No newline at end of file +end diff --git a/src/AlgorithmicRecourseDynamics.jl b/src/AlgorithmicRecourseDynamics.jl index 489bcd8..c454c43 100755 --- a/src/AlgorithmicRecourseDynamics.jl +++ b/src/AlgorithmicRecourseDynamics.jl @@ -4,7 +4,7 @@ using CounterfactualExplanations using Logging function is_logging(io) - isa(io, Base.TTY) == false || (get(ENV, "CI", nothing) == "true") + return isa(io, Base.TTY) == false || (get(ENV, "CI", nothing) == "true") end # Load modules: @@ -23,9 +23,10 @@ using .Evaluation export evaluate_system include("base.jl") -export run_experiment, set_up_experiment, run_experiments, set_up_experiments, ExperimentResults +export run_experiment, + set_up_experiment, run_experiments, set_up_experiments, ExperimentResults include("post_processing.jl") export kable -end \ No newline at end of file +end diff --git a/src/base.jl b/src/base.jl index a0350d5..eeb981f 100644 --- a/src/base.jl +++ b/src/base.jl @@ -2,7 +2,13 @@ using CSV using DataFrames using .Evaluation: evaluate_system -using .Experiments: Experiment, FixedParameters, RecourseSystem, set_up_system_grid!, update_experiment!, choose_individuals +using .Experiments: + Experiment, + FixedParameters, + RecourseSystem, + set_up_system_grid!, + update_experiment!, + choose_individuals using Flux using LinearAlgebra using ProgressMeter: Progress, next! @@ -12,12 +18,20 @@ using Statistics using StatsBase function collect_output( - experiment::Experiment, recourse_system::RecourseSystem, chosen_individuals::Union{Nothing,AbstractArray}, k::Int, n::Int, m::Int; - n_bootstrap=1, n_samples=1000 + experiment::Experiment, + recourse_system::RecourseSystem, + chosen_individuals::Union{Nothing,AbstractArray}, + k::Int, + n::Int, + m::Int; + n_bootstrap=1, + n_samples=1000, ) # Evaluate: - output = evaluate_system(recourse_system, experiment, n=n_bootstrap, n_samples=n_samples) + output = evaluate_system( + recourse_system, experiment; n=n_bootstrap, n_samples=n_samples + ) # Add additional information: output.k .= k @@ -25,7 +39,11 @@ function collect_output( output.model .= collect(experiment.system_identifiers)[m][1] output.generator .= collect(experiment.system_identifiers)[m][2] output.n_individuals .= isnothing(chosen_individuals) ? 0 : length(chosen_individuals) - output.pct_total .= isnothing(chosen_individuals) ? 0 : length(chosen_individuals) / size(experiment.train_data.y, 2) + output.pct_total .= if isnothing(chosen_individuals) + 0 + else + length(chosen_individuals) / size(experiment.train_data.y, 2) + end # Add recourse measures: if n > 0 @@ -44,7 +62,13 @@ end A wrapper function that runs the experiment for endogenous models shifts. """ function run!( - experiment::Experiment; evaluate_every=10, n_bootstrap=1, n_samples=1000, forward=false, show_progress=!is_logging(stderr), fixed_parameters... + experiment::Experiment; + evaluate_every=10, + n_bootstrap=1, + n_samples=1000, + forward=false, + show_progress=!is_logging(stderr), + fixed_parameters..., ) # Load fixed hyperparameters: @@ -66,37 +90,68 @@ function run!( # Pre-allocate memory: output = [DataFrame() for i in 1:M] - p_fold = Progress(K; desc="Progress on folds:", showspeed=true, enabled=show_progress, output=stderr, color=:yellow) + p_fold = Progress( + K; + desc="Progress on folds:", + showspeed=true, + enabled=show_progress, + output=stderr, + color=:yellow, + ) @info "Running experiment ..." for k in 1:K recourse_systems = experiment.recourse_systems[k] # Initial evaluation: for m in 1:M - output_initial = collect_output(experiment, recourse_systems[m], nothing, k, 0, m, n_bootstrap=nothing) - output[m] = vcat(output[m], output_initial, cols=:union) + output_initial = collect_output( + experiment, recourse_systems[m], nothing, k, 0, m; n_bootstrap=nothing + ) + output[m] = vcat(output[m], output_initial; cols=:union) end # Recursion over N rounds: chosen_individuals = zeros(size(recourse_systems)) - p_round = Progress(N; desc="Progress on round:", showspeed=true, enabled=show_progress, output=stderr, color=:green) + p_round = Progress( + N; + desc="Progress on round:", + showspeed=true, + enabled=show_progress, + output=stderr, + color=:green, + ) for n in 1:N # Choose individuals that shall receive recourse: - chosen_individuals_n = choose_individuals(experiment, recourse_systems; intersect_=intersect_) - chosen_individuals = map((x, y) -> union(x, y), chosen_individuals, chosen_individuals_n) + chosen_individuals_n = choose_individuals( + experiment, recourse_systems; intersect_=intersect_ + ) + chosen_individuals = map( + (x, y) -> union(x, y), chosen_individuals, chosen_individuals_n + ) Threads.@threads for m in 1:M recourse_system = recourse_systems[m] chosen_individuals_m = chosen_individuals_n[m] recourse_system.chosen_individuals = chosen_individuals[m] # Update experiment with_logger(NullLogger()) do - AlgorithmicRecourseDynamics.update_experiment!(experiment, recourse_system, chosen_individuals_m) + AlgorithmicRecourseDynamics.update_experiment!( + experiment, recourse_system, chosen_individuals_m + ) end # Evaluate: if n % evaluate_every == 0 - output_checkpoint = collect_output(experiment, recourse_system, chosen_individuals[m], k, n, m, n_bootstrap=n_bootstrap, n_samples=n_samples) - output[m] = vcat(output[m], output_checkpoint, cols=:union) + output_checkpoint = collect_output( + experiment, + recourse_system, + chosen_individuals[m], + k, + n, + m; + n_bootstrap=n_bootstrap, + n_samples=n_samples, + ) + output[m] = vcat(output[m], output_checkpoint; cols=:union) end end - next!(p_round, showvalues=[(:Fold, "$k/$K"), (:Round, "$n/$N")]) + next!(p_round; showvalues=[(:Fold, "$k/$K"), (:Round, "$n/$N")]) end next!(p_fold) end @@ -105,7 +160,6 @@ function run!( output = reduce(vcat, output) return output - end """ @@ -128,17 +182,17 @@ function set_up_experiment( generators::Dict{Symbol,<:CounterfactualExplanations.Generators.AbstractGenerator}; target::Int=1, num_counterfactuals::Int=5, - kwargs... + kwargs..., ) - - experiment = Experiment(data_train, data_test, target, models, deepcopy(generators), num_counterfactuals) + experiment = Experiment( + data_train, data_test, target, models, deepcopy(generators), num_counterfactuals + ) # Sanity check: @info "Initial model scores:" println(experiment.initial_model_scores) return experiment - end """ @@ -162,13 +216,15 @@ function set_up_experiment( target::Int=1, num_counterfactuals::Int=5, pre_train_models::Union{Nothing,Int}=100, - kwargs... + kwargs..., ) - available_models = [:LogisticRegression, :FluxModel, :FluxEnsemble] @assert all(map(model -> model in available_models, models)) "`models` can only be $(available_models)" - models = Dict([(model, getfield(AlgorithmicRecourseDynamics.Models, model)(data; model_params...)) for model in models]) + models = Dict([ + (model, getfield(AlgorithmicRecourseDynamics.Models, model)(data; model_params...)) + for model in models + ]) # Data: data_train, data_test = Data.train_test_split(data) @@ -177,21 +233,23 @@ function set_up_experiment( if !isnothing(pre_train_models) for (key, model) in models @info "Training $key" - CounterfactualExplanations.Models.train(model, data_train; n_epochs=pre_train_models, kwargs...) + CounterfactualExplanations.Models.train( + model, data_train; n_epochs=pre_train_models, kwargs... + ) end end - experiment = Experiment(data_train, data_test, target, models, deepcopy(generators), num_counterfactuals) + experiment = Experiment( + data_train, data_test, target, models, deepcopy(generators), num_counterfactuals + ) # Sanity check: @info "Initial model scores:" println(experiment.initial_model_scores) return experiment - end - """ function set_up_experiments( catalogue::Dict{Symbol, CounterfactualData}, @@ -207,21 +265,28 @@ Sets up multiple experiments. """ function set_up_experiments( catalogue::Dict{Symbol,CounterfactualData}, - models::Union{Dict{Symbol,<:CounterfactualExplanations.Models.AbstractFittedModel},Vector{Symbol}}, + models::Union{ + Dict{Symbol,<:CounterfactualExplanations.Models.AbstractFittedModel},Vector{Symbol} + }, generators::Dict{Symbol,<:CounterfactualExplanations.Generators.AbstractGenerator}; target::Int=1, num_counterfactuals::Int=5, pre_train_models::Union{Nothing,Int}=100, - kwargs... + kwargs..., ) - set_up_single(data) = set_up_experiment( - data, models, generators; - target=target, num_counterfactuals=num_counterfactuals, - pre_train_models=pre_train_models, - kwargs... - ) + function set_up_single(data) + return set_up_experiment( + data, + models, + generators; + target=target, + num_counterfactuals=num_counterfactuals, + pre_train_models=pre_train_models, + kwargs..., + ) + end - experiments = Dict{Symbol, Experiment}() + experiments = Dict{Symbol,Experiment}() for (key, data) in catalogue @info "Setting up $(key)" experiments[key] = set_up_single(data) @@ -250,9 +315,8 @@ function run_experiment( evaluate_every::Int=2, save_path::Union{Nothing,String}=nothing, save_name::Union{Nothing,String}=nothing, - kwargs... + kwargs..., ) - exp_name = isnothing(save_name) ? "unnamed" : save_name @info "Starting experiment: $exp_name" @@ -275,7 +339,6 @@ function run_experiment( Serialization.serialize(joinpath(save_path, "results.jls"), results) @info "Saved experiment: $exp_name" - end return results @@ -299,7 +362,9 @@ Sets up one experiment for the provided data, models and generators and then run """ function run_experiment( data::CounterfactualData, - models::Union{Dict{Symbol,<:CounterfactualExplanations.Models.AbstractFittedModel},Vector{Symbol}}, + models::Union{ + Dict{Symbol,<:CounterfactualExplanations.Models.AbstractFittedModel},Vector{Symbol} + }, generators::Dict{Symbol,<:CounterfactualExplanations.Generators.AbstractGenerator}; target::Int=1, num_counterfactuals::Int=5, @@ -307,12 +372,15 @@ function run_experiment( pre_train_models::Union{Nothing,Int}=100, save_path::Union{Nothing,String}=nothing, save_name::Union{Nothing,String}=nothing, - kwargs... + kwargs..., ) - experiment = set_up_experiment( - data, models, generators; - target=target, num_counterfactuals=num_counterfactuals, pre_train_models=pre_train_models + data, + models, + generators; + target=target, + num_counterfactuals=num_counterfactuals, + pre_train_models=pre_train_models, ) exp_name = isnothing(save_name) ? "unnamed" : save_name @@ -337,11 +405,9 @@ function run_experiment( Serialization.serialize(joinpath(save_path, "results.jls"), results) @info "Saved experiment: $exp_name" - end return results - end """ @@ -360,26 +426,29 @@ function run_experiments( save_path::Union{Nothing,String}=nothing, save_name_suffix::String="", create_copy::Bool=true, - kwargs... + kwargs..., ) - if create_copy experiments = deepcopy(experiments) end - run_single(experiment, name) = run_experiment( - experiment; - evaluate_every=evaluate_every, - save_path=save_path, - save_name=name, - kwargs... - ) + function run_single(experiment, name) + return run_experiment( + experiment; + evaluate_every=evaluate_every, + save_path=save_path, + save_name=name, + kwargs..., + ) + end save_name_suffix = save_name_suffix != "" ? "_$save_name_suffix" : save_name_suffix - output = Dict(name => run_single(experiment, "$(string(name))$(save_name_suffix)") for (name, experiment) in experiments) + output = Dict( + name => run_single(experiment, "$(string(name))$(save_name_suffix)") for + (name, experiment) in experiments + ) return output - end """ @@ -399,7 +468,9 @@ Sets up and runs experiments for multiple data sets. """ function run_experiments( catalogue::Dict{Symbol,CounterfactualData}, - models::Union{Dict{Symbol,<:CounterfactualExplanations.Models.AbstractFittedModel},Vector{Symbol}}, + models::Union{ + Dict{Symbol,<:CounterfactualExplanations.Models.AbstractFittedModel},Vector{Symbol} + }, generators::Dict{Symbol,<:CounterfactualExplanations.Generators.AbstractGenerator}; target::Int=1, num_counterfactuals::Int=5, @@ -407,24 +478,28 @@ function run_experiments( pre_train_models::Union{Nothing,Int}=100, save_path::Union{Nothing,String}=nothing, save_name_suffix::String="", - kwargs... + kwargs..., ) - - run_single(data, save_name) = run_experiment( - data, models, generators; - target=target, num_counterfactuals=num_counterfactuals, - evaluate_every=evaluate_every, - pre_train_models=pre_train_models, - save_path=save_path, - save_name=save_name, - kwargs... - ) + function run_single(data, save_name) + return run_experiment( + data, + models, + generators; + target=target, + num_counterfactuals=num_counterfactuals, + evaluate_every=evaluate_every, + pre_train_models=pre_train_models, + save_path=save_path, + save_name=save_name, + kwargs..., + ) + end save_name_suffix = save_name_suffix != "" ? "_$save_name_suffix" : save_name_suffix - output = Dict(name => run_single(data, "$(string(name))$(save_name_suffix)") for (name, data) in catalogue) + output = Dict( + name => run_single(data, "$(string(name))$(save_name_suffix)") for + (name, data) in catalogue + ) return output end - - - diff --git a/src/data/Data.jl b/src/data/Data.jl index ff88e08..a913309 100644 --- a/src/data/Data.jl +++ b/src/data/Data.jl @@ -6,5 +6,5 @@ using CounterfactualExplanations include("utils.jl") include("functions.jl") export download_data_sets - -end \ No newline at end of file + +end diff --git a/src/data/functions.jl b/src/data/functions.jl index cefa769..86a47b4 100644 --- a/src/data/functions.jl +++ b/src/data/functions.jl @@ -1,7 +1,7 @@ using StatsBase function scale(X, dim) - dt = fit(ZScoreTransform, X, dim=dim) + dt = fit(ZScoreTransform, X; dim=dim) X_scaled = StatsBase.transform(dt, X) return X_scaled, dt end @@ -9,4 +9,4 @@ end function rescale(X, dt) X_rescaled = StatsBase.reconstruct(dt, X) return X_rescaled -end \ No newline at end of file +end diff --git a/src/data/utils.jl b/src/data/utils.jl index 48bdced..88c319b 100644 --- a/src/data/utils.jl +++ b/src/data/utils.jl @@ -4,7 +4,6 @@ using Flux using StatsBase function Base.hcat(data::CounterfactualData, more_data::CounterfactualData) - data = deepcopy(data) more_data = deepcopy(more_data) @@ -15,4 +14,4 @@ function Base.hcat(data::CounterfactualData, more_data::CounterfactualData) data.y = hcat(data.y, more_data.y) return data -end \ No newline at end of file +end diff --git a/src/evaluation/Evaluation.jl b/src/evaluation/Evaluation.jl index 9186b66..e580ef1 100644 --- a/src/evaluation/Evaluation.jl +++ b/src/evaluation/Evaluation.jl @@ -19,8 +19,13 @@ include("domain_shifts.jl") include("model_shifts.jl") using DataFrames -function evaluate_system(recourse_system::RecourseSystem, experiment::Experiment; to_dataframe=true, n=1000, n_samples=1000) - +function evaluate_system( + recourse_system::RecourseSystem, + experiment::Experiment; + to_dataframe=true, + n=1000, + n_samples=1000, +) metrics = [ mmd_domain(experiment, recourse_system; n=n), perturbation(experiment, recourse_system), @@ -28,11 +33,11 @@ function evaluate_system(recourse_system::RecourseSystem, experiment::Experiment mmd_model(experiment, recourse_system; n=n, grid_search=true, n_samples=n_samples), disagreement(experiment, recourse_system), decisiveness(experiment, recourse_system), - model_performance(experiment, recourse_system) + model_performance(experiment, recourse_system), ] if to_dataframe - metrics = reduce(vcat,map(DataFrame, metrics)) + metrics = reduce(vcat, map(DataFrame, metrics)) end return metrics @@ -40,4 +45,4 @@ end export evaluate_system -end \ No newline at end of file +end diff --git a/src/evaluation/domain_shifts.jl b/src/evaluation/domain_shifts.jl index d3560b0..a7cccac 100644 --- a/src/evaluation/domain_shifts.jl +++ b/src/evaluation/domain_shifts.jl @@ -17,17 +17,31 @@ function DataFrames.DataFrame(metric::DomainMetric) return df end - """ mmd_domain(experiment::Experiment, recourse_system::RecourseSystem, n=1000; target_only::Bool=true, kwargs...) Calculates MMD for the input data. """ -function mmd_domain(experiment::Experiment, recourse_system::RecourseSystem; n=1000, n_samples=1000, target_only::Bool=true, kwargs...) +function mmd_domain( + experiment::Experiment, + recourse_system::RecourseSystem; + n=1000, + n_samples=1000, + target_only::Bool=true, + kwargs..., +) X, y = CounterfactualExplanations.DataPreprocessing.unpack_data(experiment.data) - new_X, new_y = CounterfactualExplanations.DataPreprocessing.unpack_data(recourse_system.data) + new_X, new_y = CounterfactualExplanations.DataPreprocessing.unpack_data( + recourse_system.data + ) if target_only - value, p_value = mmd(X[:, vec(y .== experiment.target)], new_X[:, vec(new_y .== experiment.target)], n_samples; compute_p=n, kwargs...) + value, p_value = mmd( + X[:, vec(y .== experiment.target)], + new_X[:, vec(new_y .== experiment.target)], + n_samples; + compute_p=n, + kwargs..., + ) else value, p_value = mmd(X, new_X, n_samples; compute_p=n, kwargs...) end @@ -36,6 +50,3 @@ function mmd_domain(experiment::Experiment, recourse_system::RecourseSystem; n=1 return metric end - - - diff --git a/src/evaluation/kernels.jl b/src/evaluation/kernels.jl index 9e259e9..b82d18f 100644 --- a/src/evaluation/kernels.jl +++ b/src/evaluation/kernels.jl @@ -1,16 +1,15 @@ using KernelFunctions function kernelsum(k::KernelFunctions.Kernel, x::AbstractMatrix, y::AbstractMatrix) - m = size(x,2) - n = size(y,2) - sum(kernelmatrix(k,x,y)) / (m * n) + m = size(x, 2) + n = size(y, 2) + return sum(kernelmatrix(k, x, y)) / (m * n) end LinearAlgebra function kernelsum(k::KernelFunctions.Kernel, x::AbstractMatrix) - l = size(x,2) - (sum(kernelmatrix(k,x,x)) - tr(kernelmatrix(k,x,x)))/(l^2 - l) + l = size(x, 2) + return (sum(kernelmatrix(k, x, x)) - tr(kernelmatrix(k, x, x))) / (l^2 - l) end kernelsum(k::KernelFunctions.Kernel, x::AbstractVector) = zero(eltype(x)) - diff --git a/src/evaluation/mmd.jl b/src/evaluation/mmd.jl index 9229bde..438c4b7 100644 --- a/src/evaluation/mmd.jl +++ b/src/evaluation/mmd.jl @@ -11,22 +11,24 @@ struct MMD{K<:KernelFunctions.Kernel} <: PreMetric kernel::K end - function (m::MMD)(x::AbstractArray, y::AbstractArray) - xx = kernelsum(m.kernel, x) yy = kernelsum(m.kernel, y) xy = kernelsum(m.kernel, x, y) - xx + yy - 2xy + return xx + yy - 2xy end - """ mmd(KernelFunctions.Kernel(γ), x, y) mmd(KernelFunctions.Kernel(γ), x, y, n) MMD with Gaussian kernel of bandwidth `γ` using at most `n` samples """ -function mmd(x::AbstractArray, y::AbstractArray, k::KernelFunctions.Kernel=default_kernel; compute_p::Union{Nothing,Int}=1000) +function mmd( + x::AbstractArray, + y::AbstractArray, + k::KernelFunctions.Kernel=default_kernel; + compute_p::Union{Nothing,Int}=1000, +) mmd_ = MMD(k)(x, y) if !isnothing(compute_p) mmd_null = mmd_null_dist(x, y, k; l=compute_p) @@ -36,9 +38,15 @@ function mmd(x::AbstractArray, y::AbstractArray, k::KernelFunctions.Kernel=defau end return mmd_, p_val end -function mmd(x::AbstractArray, y::AbstractArray, n::Int, k::KernelFunctions.Kernel=default_kernel; compute_p::Union{Nothing,Int}=1000) +function mmd( + x::AbstractArray, + y::AbstractArray, + n::Int, + k::KernelFunctions.Kernel=default_kernel; + compute_p::Union{Nothing,Int}=1000, +) n = minimum([size(x, 2), n]) - mmd(samplecolumns(x, n), samplecolumns(y, n), k; compute_p=compute_p) + return mmd(samplecolumns(x, n), samplecolumns(y, n), k; compute_p=compute_p) end using Random: shuffle @@ -48,15 +56,16 @@ using Random: shuffle Calculates the MMD for a set of permutations of samples from the two distributions to measure whether the shift should be considered significant. This works under the assumption that if samples `x` and `y` come from the same distribution (under the null hypothesis), then the MMD of permutations of these samples should be similar to MMD(x, y) """ -function mmd_null_dist(x::AbstractArray, y::AbstractArray, k::KernelFunctions.Kernel=default_kernel; l=1000) - +function mmd_null_dist( + x::AbstractArray, y::AbstractArray, k::KernelFunctions.Kernel=default_kernel; l=1000 +) n = size(x, 2) mmd_null = zeros(l) Z = hcat(x, y) Zs = [Z[:, shuffle(1:end)] for i in 1:l] # pre-allocate - bootstrap = function(z) - mmd(z[:, 1:n], z[:, (n+1):end], k; compute_p=nothing)[1] + bootstrap = function (z) + return mmd(z[:, 1:n], z[:, (n + 1):end], k; compute_p=nothing)[1] end mmd_null = map(Zs) do z @@ -65,11 +74,10 @@ function mmd_null_dist(x::AbstractArray, y::AbstractArray, k::KernelFunctions.Ke end return mmd_null - end function mmd_significance(mmd::Number, mmd_null_dist::AbstractArray) - sum(mmd_null_dist .>= mmd) / length(mmd_null_dist) + return sum(mmd_null_dist .>= mmd) / length(mmd_null_dist) end """ @@ -79,22 +87,13 @@ Calculates pairwise squared euclidean distances of the columns of `x` and `y` or `x` and `x`. The dispatches for CuArrays are necessary until https://github.com/JuliaStats/Distances.jl/pull/142 is merged. """ -pairwisel2(x::Matrix, y::Matrix) = pairwise(SqEuclidean(), x, y, dims=2) +pairwisel2(x::Matrix, y::Matrix) = pairwise(SqEuclidean(), x, y; dims=2) pairwisel2(x::AbstractMatrix) = pairwisel2(x, x) - """ samplecolumns(x::AbstractMatrix, n::Int) Sample n columns from a matrix. Returns x if the matrix has less than n columns. """ function samplecolumns(x::AbstractMatrix, n::Int) - (size(x, 2) > n) ? x[:, sample(1:size(x, 2), n, replace=false)] : x + return (size(x, 2) > n) ? x[:, sample(1:size(x, 2), n; replace=false)] : x end - - - - - - - - diff --git a/src/evaluation/model_shifts.jl b/src/evaluation/model_shifts.jl index fbff28d..dc08d50 100644 --- a/src/evaluation/model_shifts.jl +++ b/src/evaluation/model_shifts.jl @@ -10,7 +10,6 @@ struct ModelMetric <: AbstractMetric name::Symbol end - """ DataFrame(metric::ModelMetric) @@ -34,21 +33,31 @@ function perturbation(experiment::Experiment, recourse_system::RecourseSystem) metric = ModelMetric(value, missing, :perturbation) return metric - end - """ mmd_model(experiment::Experiment, recourse_system::RecourseSystem; n=1000, grid_search=false, kwargs...) Calculates the MMD on the probabilities of classification assigned by the model to the set of (all) instances. Allows to quantify the model shift. """ -function mmd_model(experiment::Experiment, recourse_system::RecourseSystem; n=1000, grid_search=false, n_samples=1000, kwargs...) - +function mmd_model( + experiment::Experiment, + recourse_system::RecourseSystem; + n=1000, + grid_search=false, + n_samples=1000, + kwargs..., +) X, _ = CounterfactualExplanations.DataPreprocessing.unpack_data(experiment.data) if grid_search - X = reduce(hcat, [map(x -> rand(range(x..., length=100)), extrema(X, dims=2)) for i in 1:n_samples]) + X = reduce( + hcat, + [ + map(x -> rand(range(x...; length=100)), extrema(X; dims=2)) for + i in 1:n_samples + ], + ) end # Initial: @@ -65,7 +74,6 @@ function mmd_model(experiment::Experiment, recourse_system::RecourseSystem; n=10 metric = ModelMetric(value, p_value, metric_name) return metric - end """ @@ -74,7 +82,6 @@ end Calculates the pseudo-distance of points to the decision boundary measured as the average probability of classification centered around 0.5. High value corresponds to a large margin of classification. """ function decisiveness(experiment::Experiment, recourse_system::RecourseSystem) - X, _ = CounterfactualExplanations.DataPreprocessing.unpack_data(experiment.data) # Initial: @@ -98,7 +105,6 @@ end Calculates the Disagreement pseudo-distance defined in https://doi.org/10.1145/1273496.1273541 as Pr(h(x) != h'(x)), that is the probability that labels assigned by one classifier do not agree with the labels assigned by another classifier. Simply put, it measures the overlap between models. As this is an empirical measure, we can vary the number of records in `data`. """ function disagreement(experiment::Experiment, recourse_system::RecourseSystem) - X, _ = CounterfactualExplanations.DataPreprocessing.unpack_data(experiment.data) # Initial: @@ -108,7 +114,7 @@ function disagreement(experiment::Experiment, recourse_system::RecourseSystem) new_M = recourse_system.model new_proba = reduce(hcat, map(x -> length(x) == 1 ? [x, 1 - x] : x, probs(new_M, X))) - value = sum(argmax(proba, dims=1) .!= argmax(new_proba, dims=1)) / size(X, 2) + value = sum(argmax(proba; dims=1) .!= argmax(new_proba; dims=1)) / size(X, 2) metric = ModelMetric(value, missing, :disagreement) return metric @@ -129,6 +135,4 @@ function model_performance(experiment::Experiment, recourse_system::RecourseSyst value = new_score_ - score_ metric = ModelMetric(value, missing, :model_performance) return metric - end - diff --git a/src/experiments/Experiments.jl b/src/experiments/Experiments.jl index 6e7c9a5..49b6c79 100755 --- a/src/experiments/Experiments.jl +++ b/src/experiments/Experiments.jl @@ -6,4 +6,4 @@ using CounterfactualExplanations include("functions.jl") include("utils.jl") -end \ No newline at end of file +end diff --git a/src/experiments/functions.jl b/src/experiments/functions.jl index 74891a3..325799e 100755 --- a/src/experiments/functions.jl +++ b/src/experiments/functions.jl @@ -1,4 +1,5 @@ -using CounterfactualExplanations: counterfactual, counterfactual_label, generate_counterfactual +using CounterfactualExplanations: + counterfactual, counterfactual_label, generate_counterfactual using CounterfactualExplanations.DataPreprocessing using DataFrames using Flux @@ -32,14 +33,18 @@ mutable struct Experiment initial_model_scores::Vector end - """ Experiment(data::CounterfactualExplanations.CounterfactualData, target::Number, models::NamedTuple, generators::NamedTuple) """ function Experiment( - train_data::CounterfactualExplanations.CounterfactualData, test_data::CounterfactualExplanations.CounterfactualData, target::Number, models::Union{NamedTuple,Dict}, generators::Union{NamedTuple,Dict}, num_counterfactuals::Int=1 + train_data::CounterfactualExplanations.CounterfactualData, + test_data::CounterfactualExplanations.CounterfactualData, + target::Number, + models::Union{NamedTuple,Dict}, + generators::Union{NamedTuple,Dict}, + num_counterfactuals::Int=1, ) # Add system identifiers: @@ -49,7 +54,9 @@ function Experiment( data = hcat(train_data, test_data) # Initial scores: - initial_model_scores = [(name, Models.model_evaluation(model, test_data)) for (name, model) in pairs(models)] + initial_model_scores = [ + (name, Models.model_evaluation(model, test_data)) for (name, model) in pairs(models) + ] experiment = Experiment( data, # initial data is owned by the experiment, shared across recourse systems, @@ -62,14 +69,13 @@ function Experiment( models, generators, num_counterfactuals, - initial_model_scores + initial_model_scores, ) return experiment end function set_up_system_grid!(experiment::Experiment, K::Int=1) - data = experiment.train_data grid = Base.Iterators.product(values(experiment.models), values(experiment.generators)) @@ -81,11 +87,13 @@ function set_up_system_grid!(experiment::Experiment, K::Int=1) score = Models.model_evaluation(model, experiment.test_data) newmodel = deepcopy(model) generator = vars[2] - recourse_system = RecourseSystem(newdata, newmodel, generator, model, score, nothing, DataFrame()) + recourse_system = RecourseSystem( + newdata, newmodel, generator, model, score, nothing, DataFrame() + ) return recourse_system end end - experiment.recourse_systems = recourse_systems + return experiment.recourse_systems = recourse_systems end """ @@ -107,7 +115,9 @@ end choose_individuals(system::RecourseSystem, target::Number) """ -function choose_individuals(experiment::Experiment, recourse_systems::AbstractArray; intersect_::Bool=true) +function choose_individuals( + experiment::Experiment, recourse_systems::AbstractArray; intersect_::Bool=true +) args = experiment.fixed_parameters target, μ = experiment.target, args.μ @@ -126,25 +136,30 @@ function choose_individuals(experiment::Experiment, recourse_systems::AbstractAr if intersect_ candidates_intersect = intersect(candidates...) n_individuals = Int(round(μ * length(candidates_intersect))) - chosen_individuals = StatsBase.sample(candidates_intersect, n_individuals, replace=false) + chosen_individuals = StatsBase.sample( + candidates_intersect, n_individuals; replace=false + ) chosen_individuals = map(candidates) do x sort(chosen_individuals) end else chosen_individuals = map(candidates) do x n_individuals = Int(round(μ * length(x))) - sort(StatsBase.sample(x, n_individuals, replace=false)) + sort(StatsBase.sample(x, n_individuals; replace=false)) end end return chosen_individuals end - """ """ -function update_experiment!(experiment::Experiment, recourse_system::RecourseSystem, chosen_individuals::AbstractVector) +function update_experiment!( + experiment::Experiment, + recourse_system::RecourseSystem, + chosen_individuals::AbstractVector, +) # Recourse System: counterfactual_data = recourse_system.data @@ -163,23 +178,29 @@ function update_experiment!(experiment::Experiment, recourse_system::RecourseSys factuals = select_factual(counterfactual_data, chosen_individuals) results = generate_counterfactual( - factuals, target, counterfactual_data, M, generator; - T=T, num_counterfactuals=experiment.num_counterfactuals, generative_model_params=args.generative_model_params, - latent_space=args.latent_space + factuals, + target, + counterfactual_data, + M, + generator; + T=T, + num_counterfactuals=experiment.num_counterfactuals, + generative_model_params=args.generative_model_params, + latent_space=args.latent_space, ) # Unwrap new data: - indices_ = rand(1:experiment.num_counterfactuals, length(results)) # randomly draw from generated counterfactuals + indices_ = rand(1:(experiment.num_counterfactuals), length(results)) # randomly draw from generated counterfactuals X′ = reduce(hcat, @.(selectdim(counterfactual(results), 3, indices_))) y′ = reduce(hcat, @.(selectdim(counterfactual_label(results), 3, indices_))) - # If for any counterfactuals the returned label is NaN, this is considered as invalid and the current label is not updated: - valid_ces = vec(.!(isnan.(y′))) - chosen_individuals = chosen_individuals[valid_ces] + # If for any counterfactuals the returned label is NaN, this is considered as invalid and the current label is not updated: + valid_ces = vec(.!(isnan.(y′))) + chosen_individuals = chosen_individuals[valid_ces] - # Update data: - X[:, chosen_individuals] = X′[:, valid_ces] - y[:, chosen_individuals] = y′[:, valid_ces] + # Update data: + X[:, chosen_individuals] = X′[:, valid_ces] + y[:, chosen_individuals] = y′[:, valid_ces] # Generative model: gen_mod = deepcopy(counterfactual_data.generative_model) @@ -191,14 +212,12 @@ function update_experiment!(experiment::Experiment, recourse_system::RecourseSys recourse_system.data.X = X recourse_system.data.y = y recourse_system.data.generative_model = gen_mod - recourse_system.model = CounterfactualExplanations.Models.train(M, counterfactual_data) - recourse_system.benchmark = vcat(recourse_system.benchmark, CounterfactualExplanations.Benchmark.benchmark(results)) - + recourse_system.model = CounterfactualExplanations.Models.train( + M, counterfactual_data + ) + recourse_system.benchmark = vcat( + recourse_system.benchmark, + CounterfactualExplanations.Benchmark.benchmark(results), + ) end - end - - - - - diff --git a/src/experiments/utils.jl b/src/experiments/utils.jl index 8a0b589..d166cde 100755 --- a/src/experiments/utils.jl +++ b/src/experiments/utils.jl @@ -1,11 +1,8 @@ # Compute cartesian product over two vectors: -function expandgrid(x,y) +function expandgrid(x, y) N = length(x) * length(y) - grid = Iterators.product(x,y) |> - Iterators.flatten |> - collect |> - z -> reshape(z, (2,N)) |> - transpose |> - Matrix + grid = (z -> Matrix(transpose(reshape(z, (2, N)))))( + collect(Iterators.flatten(Iterators.product(x, y))) + ) return grid -end \ No newline at end of file +end diff --git a/src/models/Models.jl b/src/models/Models.jl index 29d941a..8b9e2e6 100755 --- a/src/models/Models.jl +++ b/src/models/Models.jl @@ -7,4 +7,4 @@ using CounterfactualExplanations include("mlp.jl") # including logistic regression include("deep_ensemble.jl") -end \ No newline at end of file +end diff --git a/src/models/deep_ensemble.jl b/src/models/deep_ensemble.jl index 81b632b..7656160 100755 --- a/src/models/deep_ensemble.jl +++ b/src/models/deep_ensemble.jl @@ -4,9 +4,14 @@ using LinearAlgebra using Parameters using Statistics -function perturbation(model::CounterfactualExplanations.Models.FluxEnsemble, new_model::CounterfactualExplanations.Models.FluxEnsemble) +function perturbation( + model::CounterfactualExplanations.Models.FluxEnsemble, + new_model::CounterfactualExplanations.Models.FluxEnsemble, +) ensemble = model.model new_ensemble = new_model.model - Δ = mean(map(x -> norm(x)/length(x),Flux.params(new_ensemble).-Flux.params(ensemble))) + Δ = mean( + map(x -> norm(x) / length(x), Flux.params(new_ensemble) .- Flux.params(ensemble)) + ) return Δ -end \ No newline at end of file +end diff --git a/src/models/mlp.jl b/src/models/mlp.jl index d0f9a50..c9f4c5f 100644 --- a/src/models/mlp.jl +++ b/src/models/mlp.jl @@ -4,12 +4,12 @@ using LinearAlgebra using Parameters using Statistics -function perturbation(model::CounterfactualExplanations.Models.FluxModel, new_model::CounterfactualExplanations.Models.FluxModel) +function perturbation( + model::CounterfactualExplanations.Models.FluxModel, + new_model::CounterfactualExplanations.Models.FluxModel, +) mlp = model.model new_mlp = new_model.model - Δ = mean(map(x -> norm(x)/length(x),Flux.params(new_mlp).-Flux.params(mlp))) + Δ = mean(map(x -> norm(x) / length(x), Flux.params(new_mlp) .- Flux.params(mlp))) return Δ end - - - diff --git a/src/post_processing.jl b/src/post_processing.jl index a813d19..6f65cea 100644 --- a/src/post_processing.jl +++ b/src/post_processing.jl @@ -8,18 +8,41 @@ using RCall using Statistics function run_bootstrap( - results::Dict{Symbol, ExperimentResults}, n_bootstrap::Int=1000; - filename::String="bootstrapped_results.csv", show_progress=!is_logging(stderr) + results::Dict{Symbol,ExperimentResults}, + n_bootstrap::Int=1000; + filename::String="bootstrapped_results.csv", + show_progress=!is_logging(stderr), ) df = DataFrame() n_total = length(results) - p_total = Progress(n_total; desc="Total Progress:", showspeed=true, enabled=show_progress, output = stderr, color=:yellow) + p_total = Progress( + n_total; + desc="Total Progress:", + showspeed=true, + enabled=show_progress, + output=stderr, + color=:yellow, + ) for (key, val) in results n_folds = length(val.experiment.recourse_systems) - p_fold = Progress(n_folds; desc="Progress on fold:", showspeed=true, enabled=show_progress, output = stderr, color=:green) + p_fold = Progress( + n_folds; + desc="Progress on fold:", + showspeed=true, + enabled=show_progress, + output=stderr, + color=:green, + ) for fold in 1:n_folds N = length(val.experiment.system_identifiers) - p_sys = Progress(N; desc="Progress on system:", showspeed=true, enabled=show_progress, output = stderr, color=:blue) + p_sys = Progress( + N; + desc="Progress on system:", + showspeed=true, + enabled=show_progress, + output=stderr, + color=:blue, + ) Threads.@threads for i in 1:N rec_sys = val.experiment.recourse_systems[fold][i] model_name, gen_name = collect(val.experiment.system_identifiers)[i] @@ -29,9 +52,14 @@ function run_bootstrap( df_.generator .= gen_name df_.fold .= fold df = vcat(df, df_) - next!(p_sys, showvalues = [(:Model, model_name), (:Generator, gen_name), (:System, "$i/$N")]) + next!( + p_sys; + showvalues=[ + (:Model, model_name), (:Generator, gen_name), (:System, "$i/$N") + ], + ) end - next!(p_fold, showvalues = [(:Fold, "$fold/$n_folds")]) + next!(p_fold; showvalues=[(:Fold, "$fold/$n_folds")]) end next!(p_total) end @@ -40,15 +68,27 @@ function run_bootstrap( return df end -function Plots.plot(results::ExperimentResults, variable::Symbol=:mmd, scope::Symbol=:model; size=3, title=nothing, kwargs...) - +function Plots.plot( + results::ExperimentResults, + variable::Symbol=:mmd, + scope::Symbol=:model; + size=3, + title=nothing, + kwargs..., +) df = results.output @assert variable in unique(df.name) "Not a valid variable." gdf = groupby(df, [:generator, :model, :n, :name, :scope]) - df_plot = combine(gdf, :value => (x -> [(mean(x), mean(x) + std(x), mean(x) - std(x))]) => [:mean, :ymax, :ymin]) + df_plot = combine( + gdf, + :value => + (x -> [(mean(x), mean(x) + std(x), mean(x) - std(x))]) => [:mean, :ymax, :ymin], + ) df_plot = mapcols(x -> typeof(x) == Vector{Symbol} ? string.(x) : x, df_plot) - df_plot.name .= [r[:name] == "mmd" ? "$(r[:name])_$(r[:scope])" : r[:name] for r in eachrow(df_plot)] + df_plot.name .= [ + r[:name] == "mmd" ? "$(r[:name])_$(r[:scope])" : r[:name] for r in eachrow(df_plot) + ] select!(df_plot, Not(:scope)) ncol = length(unique(df_plot.model)) @@ -70,20 +110,32 @@ function Plots.plot(results::ExperimentResults, variable::Symbol=:mmd, scope::Sy img = Images.load(rcopy(R"temp_path")) return img - end -function Plots.plot(results::ExperimentResults, n::Int, variable::Symbol=:mmd, scope::Symbol=:model; size=3, title=nothing, kwargs...) - +function Plots.plot( + results::ExperimentResults, + n::Int, + variable::Symbol=:mmd, + scope::Symbol=:model; + size=3, + title=nothing, + kwargs..., +) df = results.output @assert variable in unique(df.name) "Not a valid variable." @assert n in unique(df.n) "No results for round `n`." - df = df[df.n.==n, :] + df = df[df.n .== n, :] gdf = groupby(df, [:generator, :model, :n, :name, :scope]) - df_plot = combine(gdf, :value => (x -> [(mean(x), mean(x) + std(x), mean(x) - std(x))]) => [:mean, :ymax, :ymin]) + df_plot = combine( + gdf, + :value => + (x -> [(mean(x), mean(x) + std(x), mean(x) - std(x))]) => [:mean, :ymax, :ymin], + ) df_plot = mapcols(x -> typeof(x) == Vector{Symbol} ? string.(x) : x, df_plot) - df_plot.name .= [r[:name] == "mmd" ? "$(r[:name])_$(r[:scope])" : r[:name] for r in eachrow(df_plot)] + df_plot.name .= [ + r[:name] == "mmd" ? "$(r[:name])_$(r[:scope])" : r[:name] for r in eachrow(df_plot) + ] select!(df_plot, Not(:scope)) ncol = length(unique(df_plot.model)) @@ -110,7 +162,6 @@ function Plots.plot(results::ExperimentResults, n::Int, variable::Symbol=:mmd, s img = Images.load(rcopy(R"temp_path")) return img - end function kable(result::ExperimentResults, n::Vector{Int}; format="latex") @@ -140,7 +191,7 @@ function kable( results::Dict{Symbol,ExperimentResults}, n::Vector{Int}; format="latex", - exclude_metric::Vector{Symbol}=[:mmd_grid] + exclude_metric::Vector{Symbol}=[:mmd_grid], ) df = DataFrame() for (key, val) in results @@ -166,4 +217,4 @@ function kable( collapse_rows(columns = 1:4, latex_hline = "major", valign = "middle") """ return println(rcopy(R"ktab")) -end \ No newline at end of file +end diff --git a/test/runtests.jl b/test/runtests.jl index e363d1e..1ee4218 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -14,12 +14,13 @@ using Random using Test @testset "AlgorithmicRecourseDynamics.jl" begin - N = 1000 counterfactual_data = CounterfactualExplanations.load_linearly_separable(N) generator = GenericGenerator() - data_train, data_test = CounterfactualExplanations.DataPreprocessing.train_test_split(counterfactual_data) + data_train, data_test = CounterfactualExplanations.DataPreprocessing.train_test_split( + counterfactual_data + ) mod = CounterfactualExplanations.fit_model(data_train, :MLP) models = Dict(:mymodel => mod) @@ -27,5 +28,4 @@ using Test experiment = set_up_experiment(data_train, data_test, models, generators) run!(experiment) - end From 0e562c9a69a0e8befda9c3515ae85ff627169aca Mon Sep 17 00:00:00 2001 From: Pietro Monticone <38562595+pitmonticone@users.noreply.github.com> Date: Tue, 21 Mar 2023 19:31:41 +0100 Subject: [PATCH 2/2] Allow pipe syntax --- .JuliaFormatter.toml | 1 + dev/logo/logo.jl | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml index 323237b..8f9f54f 100644 --- a/.JuliaFormatter.toml +++ b/.JuliaFormatter.toml @@ -1 +1,2 @@ style = "blue" +pipe_to_function_call = false \ No newline at end of file diff --git a/dev/logo/logo.jl b/dev/logo/logo.jl index aff0fc0..0045874 100644 --- a/dev/logo/logo.jl +++ b/dev/logo/logo.jl @@ -60,9 +60,7 @@ function logo_picture(; Model = @load LinearRegressor pkg = MLJLinearModels degree_polynomial = 5 polynomial_features(x, degree::Int) = reduce(hcat, map(i -> x .^ i, 1:degree)) - #! format: off pipe = (x -> MLJBase.table(polynomial_features(x, degree_polynomial))) |> Model() - #! format: on conf_model = conformal_model(pipe; coverage=0.95) mach = machine(conf_model, x, y) fit!(mach; rows=train)