diff --git a/Project.toml b/Project.toml index 0bbe02fa..e07a11b0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "SimpleSDMLayers" uuid = "2c645270-77db-11e9-22c3-0f302a89c64c" authors = ["Timothée Poisot ", "Gabriel Dansereau "] -version = "0.8.3" +version = "0.9.0" [deps] ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3" diff --git a/docs/Project.toml b/docs/Project.toml index d6862623..88dd2568 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -8,6 +8,7 @@ GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" GeometryBasics = "5c1252a2-5f33-56bf-86c9-59e7332b4326" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" +MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" diff --git a/docs/make.jl b/docs/make.jl index c32f7084..9a3c1614 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -40,7 +40,8 @@ makedocs( "Geometry for clipping" => "examples/geometry.md", "Sliding window analysis" => "examples/slidingwindow.md", "Landcover data" => "examples/landcover.md", - "Multivariate mapping" => "examples/multivariate.md" + "Multivariate mapping" => "examples/multivariate.md", + "Multivariate statistics" => "examples/pca.md" ], "SDM case studies" => [ "GBIF integration" => "sdm/gbif.md", diff --git a/docs/src/examples/pca.jl b/docs/src/examples/pca.jl new file mode 100644 index 00000000..6a3c21f3 --- /dev/null +++ b/docs/src/examples/pca.jl @@ -0,0 +1,54 @@ +# # MultivariateStats.jl integration + +# In this example we explore how you can use +# `MultivariateStats.jl` with `SimpleSDMLayers` +# to perform [principal-component-analysis (PCA)](wikiling todo) to +# reduce the dimensionality of a set of `SDMLayers`, or to remove the +# covariance between layers via data [whitening](linktodo). + +# ## Principal-component analysis of many `SDMLayers` + +using SimpleSDMLayers +using MultivariateStats +using Plots + +# The `SimpleSDMLayers` enables integration with `MultivariateStats.jl` +# In this example, we will show how this can work. + +boundaries =(left = -164.022167, right = -55.250858, bottom = 23.547132, top = 72.105833) +layers = convert.( + Float32, + SimpleSDMPredictor(WorldClim, BioClim, 1:19; boundaries...) +) + +# fit pca and project to a new set of layers + +pca = fit(PCA, layers) +newlayers = transform(pca, layers) + +# plot them + +pcaplots = plot.(newlayers) +plot(pcaplots...) + + +# ## Remove correlation between layers (`Whitening`) + +# Have to start with two layers that are reasonably correlated. + +wlayers = convert.( + Float32, + SimpleSDMPredictor(WorldClim, BioClim, 1:2; boundaries...) +) +plot(plot.(wlayers)...) + + +# Now we call methods just as in `MultivariateStats` + +w = fit(Whitening, wlayers) +newlayers = transform(w, wlayers) + + +# and plot the layers without covar + +plot(plot.(newlayers)...) \ No newline at end of file diff --git a/src/SimpleSDMLayers.jl b/src/SimpleSDMLayers.jl index b880faff..7a98bf49 100644 --- a/src/SimpleSDMLayers.jl +++ b/src/SimpleSDMLayers.jl @@ -54,7 +54,7 @@ for s in instances(CMIP6) end for s in instances(RepresentativeConcentrationPathway) @eval export $(Symbol(s)) -end +end for s in instances(SharedSocioeconomicPathway) @eval export $(Symbol(s)) end @@ -99,7 +99,7 @@ isdir(_layers_assets_path) || mkpath(_layers_assets_path) export clip function __init__() - @require GBIF="ee291a33-5a6c-5552-a3c8-0f29a1181037" begin + @require GBIF = "ee291a33-5a6c-5552-a3c8-0f29a1181037" begin @info "Loading GBIF support for SimpleSDMLayers.jl" include("integrations/GBIF.jl") end @@ -108,6 +108,11 @@ function __init__() include("integrations/DataFrames.jl") end + @require MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" begin + @info "Loading MultivariateStats support for SimpleSDMLayers.jl" + include("integrations/MultivariateStats.jl") + end + end end # module diff --git a/src/integrations/MultivariateStats.jl b/src/integrations/MultivariateStats.jl new file mode 100644 index 00000000..4d01c72c --- /dev/null +++ b/src/integrations/MultivariateStats.jl @@ -0,0 +1,44 @@ +# WARNING this file is only loaded if MultivariateStats.jl is also active +# This all happens thanks to the Requires.jl package + +_allowed_transforms = (MultivariateStats.PCA, MultivariateStats.PPCA, MultivariateStats.KernelPCA, MultivariateStats.Whitening) +AllowedMultivariateTransforms = Union{_allowed_transforms...} + +""" + MultivariateStats.fit(a, layers::Vector{T}, kwargs...) where T <: SimpleSDMLayer + +Overloads the `fit` function from `MultivariateStats.jl`. +""" +function MultivariateStats.fit(proj::Type{K}, layers::Vector{T}; kwargs...) where {T<:SimpleSDMLayer, K <: AllowedMultivariateTransforms} + _layers_are_compatible(layers) || return ArgumentError("layers are not compatible") + common_keys = reduce(∩, keys.(layers)) + input = hcat([vcat([layer[key] for layer in layers]...) for key in common_keys]...) + proj = MultivariateStats.fit(proj, input; kwargs...) + return proj +end + +""" + transform(proj, layers::Vector{V}, + kwargs...) where {PT<:Union{MultivariateStats.PCA, MultivariateStats.PPCA},V<:SimpleSDMLayer} + +Overload of the `transform` function from `MultivariateStats.jl`. Here `proj` is an output object from `MultivariateStats.fit` (see above). +""" +function MultivariateStats.transform( + proj, layers::AbstractVecOrMat{U}; kwargs... +) where {U<:SimpleSDMLayer} + _layers_are_compatible(layers) || return ArgumentError("layers are not compatible") + + newlayers = [similar(first(layers)) for i in 1:MultivariateStats.outdim(proj)] + common_keys = reduce(∩, keys.(layers)) + + input = hcat([vcat([layer[key] for layer in layers]...) for key in common_keys]...) + + for (ct, key) in enumerate(common_keys) + pcaproj = MultivariateStats.transform(proj, input[:, ct]) + for i in 1:length(newlayers) + newlayers[i][key] = pcaproj[i] + end + end + + return newlayers +end diff --git a/test/Project.toml b/test/Project.toml index 26508182..aebd904b 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,5 +1,7 @@ [deps] GBIF = "ee291a33-5a6c-5552-a3c8-0f29a1181037" +MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" +NeutralLandscapes = "71847384-8354-4223-ac08-659a5128069f" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" diff --git a/test/extensions/multivariatestats.jl b/test/extensions/multivariatestats.jl new file mode 100644 index 00000000..a7c61d6b --- /dev/null +++ b/test/extensions/multivariatestats.jl @@ -0,0 +1,22 @@ +module SSLTestMVStats +using SimpleSDMLayers +using MultivariateStats +using NeutralLandscapes +using Test + +TEST_DIMS = (150, 150) +TEST_AUTOCORRELATION = 0.9 +TEST_NUM_LAYERS = 10 + +layers = [ + SimpleSDMResponse(rand(MidpointDisplacement(TEST_AUTOCORRELATION), TEST_DIMS...)) for + i in 1:TEST_NUM_LAYERS +] + +@test typeof(transform(fit(PCA, layers), layers)) <: Vector{T} where {T<:SimpleSDMLayer} +#@test typeof(transform(fit(PPCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer + +#@assert typeof(transform(fit(KernelPCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer +#@assert typeof(transform(fit(Whitening, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer + +end diff --git a/test/runtests.jl b/test/runtests.jl index 7a263d92..5b05ce2d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,6 +4,7 @@ using Test global anyerrors = false tests = [ + "MultivariateStats" => "extensions/multivariatestats.jl", "construction" => "core/construction.jl", "lat./lon." => "core/latlon.jl", "lat./lon. conversion" => "core/coordconvert.jl",