From 7ea86a47e203a6f571ca1fd1bf22b1a37c6c3255 Mon Sep 17 00:00:00 2001 From: michael catchen Date: Mon, 25 Oct 2021 16:42:10 -0400 Subject: [PATCH 01/22] adding new file that isn't connected to anything --- src/operations/embed.jl | 55 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 src/operations/embed.jl diff --git a/src/operations/embed.jl b/src/operations/embed.jl new file mode 100644 index 00000000..6daaf888 --- /dev/null +++ b/src/operations/embed.jl @@ -0,0 +1,55 @@ +""" + This file contains interfaces to MVStats for + taking many SDMLayers and embedding their values + in a lower dimensional space to create a smaller + number of SDMLayers + +""" + +function embed(layers::Vector{T}, PCA) where T <: SimpleSDMLayer + +end + +function make_pca_input(layers) + numdims = length(layers) + numsites = prod(size(layers[begin])) + pcainput = zeros(numdims, numsites) + + for i in eachindex(layers[1]) + for layernum in 1:length(layers) + if !isnothing(layers[layernum][i]) + pcainput[layernum,i] = layers[layernum][i] + else + end + end + end + pcainput +end + +function make_pca_layers(layers) + pcainput = make_pca_input(layers) + numdims, numsites = length(layers), prod(size(layers[begin])) + + pca = fit(PCA, pcainput, pratio=0.995) + A = projection(pca) + + transformedvals = zeros(outdim(pca), numsites) + + for loc in 1:numsites + transformedvals[:,loc] = A' * vec(pcainput[:,loc]) + end + + + ### TODO this has to be a similar SDMlayer to retain coordinates + newlayers = [zeros(Float32, size(layers[begin])) for l in 1:outdim(pca)] + + for loc in 1:numsites + for layer in 1:outdim(pca) + newlayers[layer][loc] = transformedvals[layer,loc] + end + end + + + newlayers = SimpleSDMPredictor.(newlayers, boundingbox(layers[begin])...) + newlayers = map(f -> rescale(f, (0,1)), newlayers) +end \ No newline at end of file From cf031f24d182ad8127daa59856b22818c2e50a5a Mon Sep 17 00:00:00 2001 From: michael catchen Date: Tue, 26 Oct 2021 11:48:50 -0400 Subject: [PATCH 02/22] :features: adding multivariatestats --- Project.toml | 1 + src/SimpleSDMLayers.jl | 6 +++++- src/operations/{embed.jl => transform.jl} | 7 +++++-- 3 files changed, 11 insertions(+), 3 deletions(-) rename src/operations/{embed.jl => transform.jl} (85%) diff --git a/Project.toml b/Project.toml index 0bbe02fa..68e59cfc 100644 --- a/Project.toml +++ b/Project.toml @@ -10,6 +10,7 @@ Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" GeometryBasics = "5c1252a2-5f33-56bf-86c9-59e7332b4326" +MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" PolygonOps = "647866c9-e3ac-4575-94e7-e3d426903924" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" Requires = "ae029012-a4dd-5104-9daa-d747884805df" diff --git a/src/SimpleSDMLayers.jl b/src/SimpleSDMLayers.jl index b880faff..9832f0c0 100644 --- a/src/SimpleSDMLayers.jl +++ b/src/SimpleSDMLayers.jl @@ -13,6 +13,9 @@ export Point, Polygon using PolygonOps using StatsBase +using MultivariateStats +using MultivariateStats: PCA, PPCA, KernelPCA, Whitening + # Basic types for the package include(joinpath("lib", "types.jl")) export SimpleSDMLayer, SimpleSDMResponse, SimpleSDMPredictor @@ -85,7 +88,8 @@ include(joinpath("operations", "sliding.jl")) include(joinpath("operations", "mask.jl")) include(joinpath("operations", "rescale.jl")) include(joinpath("operations", "mosaic.jl")) -export coarsen, slidingwindow, mask, rescale!, rescale, mosaic +include(joinpath("operations", "transform.jl")) +export coarsen, slidingwindow, mask, rescale!, rescale, mosaic, fit, transform, transform! include(joinpath("recipes", "recipes.jl")) export bivariate diff --git a/src/operations/embed.jl b/src/operations/transform.jl similarity index 85% rename from src/operations/embed.jl rename to src/operations/transform.jl index 6daaf888..ee0d53b3 100644 --- a/src/operations/embed.jl +++ b/src/operations/transform.jl @@ -6,10 +6,13 @@ """ -function embed(layers::Vector{T}, PCA) where T <: SimpleSDMLayer +function fit(PCA, layers::Vector{T}) where T <: SimpleSDMLayer end +function transform(W, layers::Vector{T}) where T<: SimpleSDMLayer end +function transform!(W, layers::Vector{T}) where T<: SimpleSDMLayer end + function make_pca_input(layers) numdims = length(layers) numsites = prod(size(layers[begin])) @@ -52,4 +55,4 @@ function make_pca_layers(layers) newlayers = SimpleSDMPredictor.(newlayers, boundingbox(layers[begin])...) newlayers = map(f -> rescale(f, (0,1)), newlayers) -end \ No newline at end of file +end From b0bc30172b95abecfd50c5b08dfe8409d2dd5bdd Mon Sep 17 00:00:00 2001 From: michael catchen Date: Tue, 26 Oct 2021 14:17:36 -0400 Subject: [PATCH 03/22] :pencil: adding MIT license for GeoData.jl to header of resample file --- src/operations/resample.jl | 68 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 src/operations/resample.jl diff --git a/src/operations/resample.jl b/src/operations/resample.jl new file mode 100644 index 00000000..1fcc3e3d --- /dev/null +++ b/src/operations/resample.jl @@ -0,0 +1,68 @@ +#= + +The code in this file is adaoted from GeoData.jl +(https://github.com/rafaqz/GeoData.jl), see this issue +(https://github.com/EcoJulia/SimpleSDMLayers.jl/issues/131) + +See the following GeoData.jl license (MIT) + +---------------------------------------------------------------------- +Copyright (c) 2019 Rafael Schouten and Michael Krabbe Borregaard + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +---------------------------------------------------------------------- +=# + +function resample(A::T) end + +""" +Code from GeoData.jl + +`method`: A `Symbol` or `String` specifying the method to use for +resampling. Defaults to `:near` (nearest neighbor resampling). See +[resampling +method](https://gdal.org/programs/gdalwarp.html#cmdoption-gdalwarp-r) +in the gdalwarp docs for a complete list of possible values. + +function warp(A::AbstractGeoArray, flags::Dict) + odims = otherdims(A, (X, Y, Band)) + if length(odims) > 0 + # Handle dimensions other than X, Y, Band + slices = slice(A, odims) + warped = map(A -> _warp(A, flags), slices) + return combine(warped, odims) + else + return _warp(A, flags) + end +end +warp(st::AbstractGeoStack, flags::Dict) = map(A -> warp(A, flags), st) + +function _warp(A::AbstractGeoArray, flags::Dict) + flagvect = reduce([flags...]; init=[]) do acc, (key, val) + append!(acc, String[_asflag(key), _stringvect(val)...]) + end + AG.Dataset(A) do dataset + AG.gdalwarp([dataset], flagvect) do warped + _maybe_permute_from_gdal(read(GeoArray(warped)), dims(A)) + end + end +end + +""" From baf395edee3787d5f1fc65d472db4c66007a17de Mon Sep 17 00:00:00 2001 From: michael catchen Date: Tue, 26 Oct 2021 14:54:29 -0400 Subject: [PATCH 04/22] :construction: using @require and basic functionality --- Project.toml | 1 - src/SimpleSDMLayers.jl | 13 ++-- .../MultivariateStats.jl} | 48 ++++++------- src/operations/resample.jl | 68 ------------------- test/Project.toml | 2 + test/extensions/multivariatestats.jl | 11 +++ test/runtests.jl | 1 + 7 files changed, 43 insertions(+), 101 deletions(-) rename src/{operations/transform.jl => integrations/MultivariateStats.jl} (59%) delete mode 100644 src/operations/resample.jl create mode 100644 test/extensions/multivariatestats.jl diff --git a/Project.toml b/Project.toml index 68e59cfc..0bbe02fa 100644 --- a/Project.toml +++ b/Project.toml @@ -10,7 +10,6 @@ Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" GeometryBasics = "5c1252a2-5f33-56bf-86c9-59e7332b4326" -MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" PolygonOps = "647866c9-e3ac-4575-94e7-e3d426903924" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" Requires = "ae029012-a4dd-5104-9daa-d747884805df" diff --git a/src/SimpleSDMLayers.jl b/src/SimpleSDMLayers.jl index 9832f0c0..690d606d 100644 --- a/src/SimpleSDMLayers.jl +++ b/src/SimpleSDMLayers.jl @@ -13,9 +13,6 @@ export Point, Polygon using PolygonOps using StatsBase -using MultivariateStats -using MultivariateStats: PCA, PPCA, KernelPCA, Whitening - # Basic types for the package include(joinpath("lib", "types.jl")) export SimpleSDMLayer, SimpleSDMResponse, SimpleSDMPredictor @@ -57,7 +54,7 @@ for s in instances(CMIP6) end for s in instances(RepresentativeConcentrationPathway) @eval export $(Symbol(s)) -end +end for s in instances(SharedSocioeconomicPathway) @eval export $(Symbol(s)) end @@ -88,8 +85,7 @@ include(joinpath("operations", "sliding.jl")) include(joinpath("operations", "mask.jl")) include(joinpath("operations", "rescale.jl")) include(joinpath("operations", "mosaic.jl")) -include(joinpath("operations", "transform.jl")) -export coarsen, slidingwindow, mask, rescale!, rescale, mosaic, fit, transform, transform! +export coarsen, slidingwindow, mask, rescale!, rescale, mosaic include(joinpath("recipes", "recipes.jl")) export bivariate @@ -112,6 +108,11 @@ function __init__() include("integrations/DataFrames.jl") end + @require MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" begin + @info "Loading MultivariateStats support for SimpleSDMLayers.jl" + include("integrations/MultivariateStats.jl") + end + end end # module diff --git a/src/operations/transform.jl b/src/integrations/MultivariateStats.jl similarity index 59% rename from src/operations/transform.jl rename to src/integrations/MultivariateStats.jl index ee0d53b3..d783d881 100644 --- a/src/operations/transform.jl +++ b/src/integrations/MultivariateStats.jl @@ -1,23 +1,18 @@ -""" - This file contains interfaces to MVStats for - taking many SDMLayers and embedding their values - in a lower dimensional space to create a smaller - number of SDMLayers - -""" - -function fit(PCA, layers::Vector{T}) where T <: SimpleSDMLayer +# WARNING this file is only loaded if MultivariateStats.jl is also active +# This all happens thanks to the Requires.jl package +function MultivariateStats.fit(PCA, layers::Vector{T}) where T <: SimpleSDMLayer + _make_pca_layers(layers) end function transform(W, layers::Vector{T}) where T<: SimpleSDMLayer end -function transform!(W, layers::Vector{T}) where T<: SimpleSDMLayer end +function transform!(W, layers::Vector{T}) where T<: SimpleSDMLayer end -function make_pca_input(layers) +function _make_pca_input(layers) numdims = length(layers) numsites = prod(size(layers[begin])) pcainput = zeros(numdims, numsites) - + for i in eachindex(layers[1]) for layernum in 1:length(layers) if !isnothing(layers[layernum][i]) @@ -29,30 +24,31 @@ function make_pca_input(layers) pcainput end -function make_pca_layers(layers) - pcainput = make_pca_input(layers) +function _make_pca_layers(layers) + pcainput = _make_pca_input(layers) numdims, numsites = length(layers), prod(size(layers[begin])) - pca = fit(PCA, pcainput, pratio=0.995) - A = projection(pca) - - transformedvals = zeros(outdim(pca), numsites) + pca = MultivariateStats.fit(MultivariateStats.PCA, pcainput, pratio=0.995) + A = MultivariateStats.projection(pca) + + outdim = MultivariateStats.outdim(pca) + + transformedvals = zeros(outdim, numsites) for loc in 1:numsites transformedvals[:,loc] = A' * vec(pcainput[:,loc]) end - - - ### TODO this has to be a similar SDMlayer to retain coordinates - newlayers = [zeros(Float32, size(layers[begin])) for l in 1:outdim(pca)] - + + + newlayers = [zeros(Float32, size(layers[begin])) for l in 1:outdim] + for loc in 1:numsites - for layer in 1:outdim(pca) + for layer in 1:outdim newlayers[layer][loc] = transformedvals[layer,loc] end end - - + + newlayers = SimpleSDMPredictor.(newlayers, boundingbox(layers[begin])...) newlayers = map(f -> rescale(f, (0,1)), newlayers) end diff --git a/src/operations/resample.jl b/src/operations/resample.jl deleted file mode 100644 index 1fcc3e3d..00000000 --- a/src/operations/resample.jl +++ /dev/null @@ -1,68 +0,0 @@ -#= - -The code in this file is adaoted from GeoData.jl -(https://github.com/rafaqz/GeoData.jl), see this issue -(https://github.com/EcoJulia/SimpleSDMLayers.jl/issues/131) - -See the following GeoData.jl license (MIT) - ----------------------------------------------------------------------- -Copyright (c) 2019 Rafael Schouten and Michael Krabbe Borregaard - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ----------------------------------------------------------------------- -=# - -function resample(A::T) end - -""" -Code from GeoData.jl - -`method`: A `Symbol` or `String` specifying the method to use for -resampling. Defaults to `:near` (nearest neighbor resampling). See -[resampling -method](https://gdal.org/programs/gdalwarp.html#cmdoption-gdalwarp-r) -in the gdalwarp docs for a complete list of possible values. - -function warp(A::AbstractGeoArray, flags::Dict) - odims = otherdims(A, (X, Y, Band)) - if length(odims) > 0 - # Handle dimensions other than X, Y, Band - slices = slice(A, odims) - warped = map(A -> _warp(A, flags), slices) - return combine(warped, odims) - else - return _warp(A, flags) - end -end -warp(st::AbstractGeoStack, flags::Dict) = map(A -> warp(A, flags), st) - -function _warp(A::AbstractGeoArray, flags::Dict) - flagvect = reduce([flags...]; init=[]) do acc, (key, val) - append!(acc, String[_asflag(key), _stringvect(val)...]) - end - AG.Dataset(A) do dataset - AG.gdalwarp([dataset], flagvect) do warped - _maybe_permute_from_gdal(read(GeoArray(warped)), dims(A)) - end - end -end - -""" diff --git a/test/Project.toml b/test/Project.toml index 26508182..aebd904b 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,5 +1,7 @@ [deps] GBIF = "ee291a33-5a6c-5552-a3c8-0f29a1181037" +MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" +NeutralLandscapes = "71847384-8354-4223-ac08-659a5128069f" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" diff --git a/test/extensions/multivariatestats.jl b/test/extensions/multivariatestats.jl new file mode 100644 index 00000000..bc970757 --- /dev/null +++ b/test/extensions/multivariatestats.jl @@ -0,0 +1,11 @@ +module SSLTestMVStats + using SimpleSDMLayers + using MultivariateStats + using NeutralLandscapes + using Test + + layers = [SimpleSDMPredictor(rand(MidpointDisplacement(0.5), (50,50))) for i in 1:30] + + @assert typeof(fit(PCA, layers)) <: Vector{T} where T<:SimpleSDMLayer + +end diff --git a/test/runtests.jl b/test/runtests.jl index 7a263d92..5b98c15e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -24,6 +24,7 @@ tests = [ "chelsa" => "data/chelsa.jl", "plotting" => "extensions/plots.jl", "GBIF" => "extensions/gbif.jl", + "MultivariateStats" => "extensions/multivariatestats.jl" ] for test in tests From e7af41f11d9e15cc4efd8cde2903ec1357748559 Mon Sep 17 00:00:00 2001 From: michael catchen Date: Tue, 26 Oct 2021 16:10:11 -0400 Subject: [PATCH 05/22] :construction_worker: mostly there --- src/integrations/MultivariateStats.jl | 58 ++++++++++++--------------- test/extensions/multivariatestats.jl | 9 ++++- test/runtests.jl | 2 +- 3 files changed, 33 insertions(+), 36 deletions(-) diff --git a/src/integrations/MultivariateStats.jl b/src/integrations/MultivariateStats.jl index d783d881..662601f3 100644 --- a/src/integrations/MultivariateStats.jl +++ b/src/integrations/MultivariateStats.jl @@ -1,54 +1,46 @@ # WARNING this file is only loaded if MultivariateStats.jl is also active # This all happens thanks to the Requires.jl package -function MultivariateStats.fit(PCA, layers::Vector{T}) where T <: SimpleSDMLayer - _make_pca_layers(layers) +""" + MultivariateStats.fit(a, layers::Vector{T}, kwargs...) where T <: SimpleSDMLayer +""" +function MultivariateStats.fit(a, layers::Vector{T}, kwargs...) where T <: SimpleSDMLayer + input = _make_input(layers) + pca = MultivariateStats.fit(a, input, kwargs...) + return pca end -function transform(W, layers::Vector{T}) where T<: SimpleSDMLayer end -function transform!(W, layers::Vector{T}) where T<: SimpleSDMLayer end - -function _make_pca_input(layers) - numdims = length(layers) - numsites = prod(size(layers[begin])) - pcainput = zeros(numdims, numsites) - - for i in eachindex(layers[1]) - for layernum in 1:length(layers) - if !isnothing(layers[layernum][i]) - pcainput[layernum,i] = layers[layernum][i] - else - end - end - end - pcainput -end - -function _make_pca_layers(layers) - pcainput = _make_pca_input(layers) - numdims, numsites = length(layers), prod(size(layers[begin])) - - pca = MultivariateStats.fit(MultivariateStats.PCA, pcainput, pratio=0.995) - A = MultivariateStats.projection(pca) - - outdim = MultivariateStats.outdim(pca) +""" + # +""" +function MultivariateStats.transform(proj::P, layers::Vector{T}) where {P,T<:SimpleSDMLayer} + input = _make_input(layers) + numsites = length(layers) + A = MultivariateStats.projection(proj) + outdim = MultivariateStats.outdim(proj) transformedvals = zeros(outdim, numsites) for loc in 1:numsites - transformedvals[:,loc] = A' * vec(pcainput[:,loc]) + transformedvals[:,loc] = A' * vec(input[:,loc]) end - newlayers = [zeros(Float32, size(layers[begin])) for l in 1:outdim] - for loc in 1:numsites for layer in 1:outdim newlayers[layer][loc] = transformedvals[layer,loc] end end - newlayers = SimpleSDMPredictor.(newlayers, boundingbox(layers[begin])...) newlayers = map(f -> rescale(f, (0,1)), newlayers) + + newlayers end + +function _make_input(layers) + common_keys = reduce(∩, keys.(layers)) + x = hcat([layer[common_keys] for layer in layers]...); + x +end + diff --git a/test/extensions/multivariatestats.jl b/test/extensions/multivariatestats.jl index bc970757..2fc05ba7 100644 --- a/test/extensions/multivariatestats.jl +++ b/test/extensions/multivariatestats.jl @@ -4,8 +4,13 @@ module SSLTestMVStats using NeutralLandscapes using Test - layers = [SimpleSDMPredictor(rand(MidpointDisplacement(0.5), (50,50))) for i in 1:30] + layers = [SimpleSDMPredictor(rand(MidpointDisplacement(0.9), (50,50))) for i in 1:30] - @assert typeof(fit(PCA, layers)) <: Vector{T} where T<:SimpleSDMLayer + + @assert typeof(transform(fit(PCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer + @assert typeof(transform(fit(PPCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer + # @assert typeof(transform(fit(KernelPCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer + + # @assert typeof(transform(fit(Whitening, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer end diff --git a/test/runtests.jl b/test/runtests.jl index 5b98c15e..5b05ce2d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,6 +4,7 @@ using Test global anyerrors = false tests = [ + "MultivariateStats" => "extensions/multivariatestats.jl", "construction" => "core/construction.jl", "lat./lon." => "core/latlon.jl", "lat./lon. conversion" => "core/coordconvert.jl", @@ -24,7 +25,6 @@ tests = [ "chelsa" => "data/chelsa.jl", "plotting" => "extensions/plots.jl", "GBIF" => "extensions/gbif.jl", - "MultivariateStats" => "extensions/multivariatestats.jl" ] for test in tests From 5930730edb55b9ec15a78de9ac271dea4623bc5a Mon Sep 17 00:00:00 2001 From: michael catchen Date: Thu, 28 Oct 2021 16:38:42 -0400 Subject: [PATCH 06/22] :pencil: docs and blue style --- src/integrations/MultivariateStats.jl | 77 ++++++++++++++++----------- test/extensions/multivariatestats.jl | 12 +++-- 2 files changed, 54 insertions(+), 35 deletions(-) diff --git a/src/integrations/MultivariateStats.jl b/src/integrations/MultivariateStats.jl index 662601f3..0c897f54 100644 --- a/src/integrations/MultivariateStats.jl +++ b/src/integrations/MultivariateStats.jl @@ -3,44 +3,59 @@ """ MultivariateStats.fit(a, layers::Vector{T}, kwargs...) where T <: SimpleSDMLayer + + Overloads the `fit` function from `MultivariateStats.jl`. """ -function MultivariateStats.fit(a, layers::Vector{T}, kwargs...) where T <: SimpleSDMLayer - input = _make_input(layers) - pca = MultivariateStats.fit(a, input, kwargs...) - return pca +function MultivariateStats.fit( + a::PT, + layers::Vector{T}, + kwargs..., +) where { + T<:SimpleSDMLayer, + PT<:Union{Type{MultivariateStats.PCA},Type{MultivariateStats.PPCA}}, +} + input = _make_input(layers) + pca = MultivariateStats.fit(a, input, kwargs...) + return pca end """ - # + MultivariateStats.transform(proj::P, layers::Vector{T}) where {P,T<:SimpleSDMLayer} + + Overload of the `transform` function from `MultivariateStats.jl`. """ function MultivariateStats.transform(proj::P, layers::Vector{T}) where {P,T<:SimpleSDMLayer} - input = _make_input(layers) - numsites = length(layers) - - A = MultivariateStats.projection(proj) - outdim = MultivariateStats.outdim(proj) - transformedvals = zeros(outdim, numsites) - - for loc in 1:numsites - transformedvals[:,loc] = A' * vec(input[:,loc]) - end - - newlayers = [zeros(Float32, size(layers[begin])) for l in 1:outdim] - for loc in 1:numsites - for layer in 1:outdim - newlayers[layer][loc] = transformedvals[layer,loc] - end - end - - newlayers = SimpleSDMPredictor.(newlayers, boundingbox(layers[begin])...) - newlayers = map(f -> rescale(f, (0,1)), newlayers) - - newlayers + input = _make_input(layers) + numsites = length(layers) + + A = MultivariateStats.projection(proj) + outdim = MultivariateStats.outdim(proj) + transformedvals = zeros(outdim, numsites) + + for loc = 1:numsites + transformedvals[:, loc] = A' * vec(input[:, loc]) + end + + newlayers = [zeros(Float32, size(layers[begin])) for l = 1:outdim] + for loc = 1:numsites + for layer = 1:outdim + newlayers[layer][loc] = transformedvals[layer, loc] + end + end + + newlayers = SimpleSDMPredictor.(newlayers, boundingbox(layers[begin])...) + return map(f -> rescale(f, (0, 1)), newlayers) end + +""" + _make_input(layers) + + Creates a matrix for input to PCA. + The resulting matrix has columns for each shared location in + the vector of layers, where the value of each column is the +""" function _make_input(layers) - common_keys = reduce(∩, keys.(layers)) - x = hcat([layer[common_keys] for layer in layers]...); - x + common_keys = reduce(∩, keys.(layers)) + return hcat([layer[common_keys] for layer in layers]...) end - diff --git a/test/extensions/multivariatestats.jl b/test/extensions/multivariatestats.jl index 2fc05ba7..063453f0 100644 --- a/test/extensions/multivariatestats.jl +++ b/test/extensions/multivariatestats.jl @@ -4,13 +4,17 @@ module SSLTestMVStats using NeutralLandscapes using Test - layers = [SimpleSDMPredictor(rand(MidpointDisplacement(0.9), (50,50))) for i in 1:30] + TEST_DIMS = (150,150) + TEST_AUTOCORRELATION = 0.9 + TEST_NUM_LAYERS = 10 + + layers = [SimpleSDMPredictor(rand(MidpointDisplacement(TEST_AUTOCORRELATION), TEST_DIMS...)) for i in 1:TEST_NUM_LAYERS] @assert typeof(transform(fit(PCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer @assert typeof(transform(fit(PPCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer - # @assert typeof(transform(fit(KernelPCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer - - # @assert typeof(transform(fit(Whitening, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer + +# @assert typeof(transform(fit(KernelPCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer +# @assert typeof(transform(fit(Whitening, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer end From 77b3510bd4e55545afb3462cdabdda29d3c0341c Mon Sep 17 00:00:00 2001 From: michael catchen Date: Thu, 28 Oct 2021 16:44:03 -0400 Subject: [PATCH 07/22] :adhesive_bandage: including _layers_are_compatible call --- src/integrations/MultivariateStats.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/integrations/MultivariateStats.jl b/src/integrations/MultivariateStats.jl index 0c897f54..f319d540 100644 --- a/src/integrations/MultivariateStats.jl +++ b/src/integrations/MultivariateStats.jl @@ -14,6 +14,7 @@ function MultivariateStats.fit( T<:SimpleSDMLayer, PT<:Union{Type{MultivariateStats.PCA},Type{MultivariateStats.PPCA}}, } + _layers_are_compatible(layers) || return ArgumentError("layers not compatible") input = _make_input(layers) pca = MultivariateStats.fit(a, input, kwargs...) return pca From f09e364fd937519e35b89a1e6ae07fcbc715da0d Mon Sep 17 00:00:00 2001 From: michael catchen Date: Thu, 4 Nov 2021 17:16:41 -0400 Subject: [PATCH 08/22] removing type assert --- src/integrations/MultivariateStats.jl | 19 +++++++------------ test/extensions/multivariatestats.jl | 9 ++++++--- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/integrations/MultivariateStats.jl b/src/integrations/MultivariateStats.jl index f319d540..23c7b032 100644 --- a/src/integrations/MultivariateStats.jl +++ b/src/integrations/MultivariateStats.jl @@ -6,24 +6,18 @@ Overloads the `fit` function from `MultivariateStats.jl`. """ -function MultivariateStats.fit( - a::PT, - layers::Vector{T}, - kwargs..., -) where { - T<:SimpleSDMLayer, - PT<:Union{Type{MultivariateStats.PCA},Type{MultivariateStats.PPCA}}, -} - _layers_are_compatible(layers) || return ArgumentError("layers not compatible") +function MultivariateStats.fit(a, layers::Vector{T}, kwargs...) where {T<:SimpleSDMLayer} + _layers_are_compatible(layers) || return ArgumentError("layers are not compatible") input = _make_input(layers) - pca = MultivariateStats.fit(a, input, kwargs...) - return pca + proj = MultivariateStats.fit(a, input, kwargs...) + return proj end """ MultivariateStats.transform(proj::P, layers::Vector{T}) where {P,T<:SimpleSDMLayer} - Overload of the `transform` function from `MultivariateStats.jl`. + Overload of the `transform` function from `MultivariateStats.jl`. Here `proj` is a + and output object from `MultivariateStats.fit` (see above). """ function MultivariateStats.transform(proj::P, layers::Vector{T}) where {P,T<:SimpleSDMLayer} input = _make_input(layers) @@ -55,6 +49,7 @@ end Creates a matrix for input to PCA. The resulting matrix has columns for each shared location in the vector of layers, where the value of each column is the + values of each of the `layers` input. """ function _make_input(layers) common_keys = reduce(∩, keys.(layers)) diff --git a/test/extensions/multivariatestats.jl b/test/extensions/multivariatestats.jl index 063453f0..0bfaaaac 100644 --- a/test/extensions/multivariatestats.jl +++ b/test/extensions/multivariatestats.jl @@ -11,10 +11,13 @@ module SSLTestMVStats layers = [SimpleSDMPredictor(rand(MidpointDisplacement(TEST_AUTOCORRELATION), TEST_DIMS...)) for i in 1:TEST_NUM_LAYERS] - @assert typeof(transform(fit(PCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer - @assert typeof(transform(fit(PPCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer - + @test typeof(transform(fit(PCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer + @test typeof(transform(fit(PPCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer + + + # @assert typeof(transform(fit(KernelPCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer # @assert typeof(transform(fit(Whitening, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer end + From d6d7b6b0030781fa075b53f9d44003b1e89ace7d Mon Sep 17 00:00:00 2001 From: michael catchen Date: Mon, 8 Nov 2021 14:40:29 -0500 Subject: [PATCH 09/22] :pencil: --- docs/make.jl | 3 ++- docs/src/examples/pca.jl | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 docs/src/examples/pca.jl diff --git a/docs/make.jl b/docs/make.jl index c32f7084..9a3c1614 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -40,7 +40,8 @@ makedocs( "Geometry for clipping" => "examples/geometry.md", "Sliding window analysis" => "examples/slidingwindow.md", "Landcover data" => "examples/landcover.md", - "Multivariate mapping" => "examples/multivariate.md" + "Multivariate mapping" => "examples/multivariate.md", + "Multivariate statistics" => "examples/pca.md" ], "SDM case studies" => [ "GBIF integration" => "sdm/gbif.md", diff --git a/docs/src/examples/pca.jl b/docs/src/examples/pca.jl new file mode 100644 index 00000000..b35b845c --- /dev/null +++ b/docs/src/examples/pca.jl @@ -0,0 +1,16 @@ +# # Principal-component analysis of many `SDMLayers` + +# The `SimpleSDMLayers` enables integration with `MultivariateStats.jl` +# In this example, we will show how this can work. + +boundaries = (left=-12.0, right=30.0, bottom=36.0, top=72.0) + +layers = convert( + Float16, + SimpleSDMPredictor(WorldClim, HabitatHeterogeneity, 1:19; resolution=5, boundaries...), +) + +using MultivariateStats + +pca = fit(PCA, layers) +newlayers = transform(pca, layers) \ No newline at end of file From 53badf55c1ab003d38aa1513a91307a98397b36e Mon Sep 17 00:00:00 2001 From: michael catchen Date: Mon, 8 Nov 2021 15:15:30 -0500 Subject: [PATCH 10/22] :arrow_up: add mvstats to docs folder req --- docs/Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/Project.toml b/docs/Project.toml index d6862623..88dd2568 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -8,6 +8,7 @@ GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" GeometryBasics = "5c1252a2-5f33-56bf-86c9-59e7332b4326" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" +MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" From 708ea8dbb66251e79d40cb2e16a46faecbf0c068 Mon Sep 17 00:00:00 2001 From: michael catchen Date: Mon, 8 Nov 2021 15:24:25 -0500 Subject: [PATCH 11/22] :pencil: doc deps update --- docs/src/examples/pca.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/src/examples/pca.jl b/docs/src/examples/pca.jl index b35b845c..8fe7ab0b 100644 --- a/docs/src/examples/pca.jl +++ b/docs/src/examples/pca.jl @@ -1,5 +1,9 @@ # # Principal-component analysis of many `SDMLayers` +using MultivariateStats +using Plots + + # The `SimpleSDMLayers` enables integration with `MultivariateStats.jl` # In this example, we will show how this can work. @@ -10,7 +14,8 @@ layers = convert( SimpleSDMPredictor(WorldClim, HabitatHeterogeneity, 1:19; resolution=5, boundaries...), ) -using MultivariateStats pca = fit(PCA, layers) -newlayers = transform(pca, layers) \ No newline at end of file +newlayers = transform(pca, layers) + +plot.(newlayers) \ No newline at end of file From 97bdfb73b735f52301b1f1ad022fba8775f7402b Mon Sep 17 00:00:00 2001 From: michael catchen Date: Mon, 8 Nov 2021 16:50:07 -0500 Subject: [PATCH 12/22] :bug: bug fix and hopefully doc fix? --- docs/src/examples/pca.jl | 12 ++++++++---- src/integrations/MultivariateStats.jl | 26 +++++++++----------------- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/docs/src/examples/pca.jl b/docs/src/examples/pca.jl index 8fe7ab0b..c4f2c777 100644 --- a/docs/src/examples/pca.jl +++ b/docs/src/examples/pca.jl @@ -1,5 +1,6 @@ # # Principal-component analysis of many `SDMLayers` +using SimpleSDMLayers using MultivariateStats using Plots @@ -9,13 +10,16 @@ using Plots boundaries = (left=-12.0, right=30.0, bottom=36.0, top=72.0) -layers = convert( - Float16, - SimpleSDMPredictor(WorldClim, HabitatHeterogeneity, 1:19; resolution=5, boundaries...), +layers = convert.( + Float32, + SimpleSDMPredictor(WorldClim, BioClim, 1:19; boundaries...), ) +plot(layers[10]) pca = fit(PCA, layers) newlayers = transform(pca, layers) -plot.(newlayers) \ No newline at end of file + + +plot(plot.(newlayers, size=(300,300))...) diff --git a/src/integrations/MultivariateStats.jl b/src/integrations/MultivariateStats.jl index 23c7b032..fbd05878 100644 --- a/src/integrations/MultivariateStats.jl +++ b/src/integrations/MultivariateStats.jl @@ -9,7 +9,7 @@ function MultivariateStats.fit(a, layers::Vector{T}, kwargs...) where {T<:SimpleSDMLayer} _layers_are_compatible(layers) || return ArgumentError("layers are not compatible") input = _make_input(layers) - proj = MultivariateStats.fit(a, input, kwargs...) + proj = MultivariateStats.fit(a, input', kwargs...) return proj end @@ -20,26 +20,18 @@ end and output object from `MultivariateStats.fit` (see above). """ function MultivariateStats.transform(proj::P, layers::Vector{T}) where {P,T<:SimpleSDMLayer} - input = _make_input(layers) - numsites = length(layers) - A = MultivariateStats.projection(proj) - outdim = MultivariateStats.outdim(proj) - transformedvals = zeros(outdim, numsites) - - for loc = 1:numsites - transformedvals[:, loc] = A' * vec(input[:, loc]) - end + outdim = MultivariateStats.outdim(proj) + newlayers = [similar(layers[begin]) for i in 1:outdim] + common_keys = reduce(∩, keys.(layers)) - newlayers = [zeros(Float32, size(layers[begin])) for l = 1:outdim] - for loc = 1:numsites - for layer = 1:outdim - newlayers[layer][loc] = transformedvals[layer, loc] + for key in common_keys + pcaproj = A' * vcat([layer[key] for layer in layers]...) + for i in 1:outdim + newlayers[i][key] = pcaproj[i] end end - - newlayers = SimpleSDMPredictor.(newlayers, boundingbox(layers[begin])...) - return map(f -> rescale(f, (0, 1)), newlayers) + return newlayers end From 1eeb82250817bdf80dfb208a6c250635e277590c Mon Sep 17 00:00:00 2001 From: michael catchen Date: Mon, 8 Nov 2021 17:14:43 -0500 Subject: [PATCH 13/22] :pencil: docs should make a plot? --- docs/src/examples/pca.jl | 9 ++++----- src/integrations/MultivariateStats.jl | 16 +--------------- 2 files changed, 5 insertions(+), 20 deletions(-) diff --git a/docs/src/examples/pca.jl b/docs/src/examples/pca.jl index c4f2c777..95141713 100644 --- a/docs/src/examples/pca.jl +++ b/docs/src/examples/pca.jl @@ -3,23 +3,22 @@ using SimpleSDMLayers using MultivariateStats using Plots - +using Unitful # The `SimpleSDMLayers` enables integration with `MultivariateStats.jl` # In this example, we will show how this can work. -boundaries = (left=-12.0, right=30.0, bottom=36.0, top=72.0) +boundaries =(left = -164.022167, right = -55.250858, bottom = 23.547132, top = 72.105833) layers = convert.( Float32, SimpleSDMPredictor(WorldClim, BioClim, 1:19; boundaries...), ) -plot(layers[10]) pca = fit(PCA, layers) newlayers = transform(pca, layers) - -plot(plot.(newlayers, size=(300,300))...) +pcaplots = plot.(newlayers) +plot(pcaplots...) diff --git a/src/integrations/MultivariateStats.jl b/src/integrations/MultivariateStats.jl index fbd05878..6ed4b9c4 100644 --- a/src/integrations/MultivariateStats.jl +++ b/src/integrations/MultivariateStats.jl @@ -31,19 +31,5 @@ function MultivariateStats.transform(proj::P, layers::Vector{T}) where {P,T<:Sim newlayers[i][key] = pcaproj[i] end end - return newlayers -end - - -""" - _make_input(layers) - - Creates a matrix for input to PCA. - The resulting matrix has columns for each shared location in - the vector of layers, where the value of each column is the - values of each of the `layers` input. -""" -function _make_input(layers) - common_keys = reduce(∩, keys.(layers)) - return hcat([layer[common_keys] for layer in layers]...) + return map(f->rescale(f, (0,1)),newlayers) end From 45309e6dfa535e5cea61104e054781d51722e925 Mon Sep 17 00:00:00 2001 From: michael catchen Date: Mon, 8 Nov 2021 17:24:37 -0500 Subject: [PATCH 14/22] :pencil: remove extra unitful include --- docs/src/examples/pca.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/examples/pca.jl b/docs/src/examples/pca.jl index 95141713..8b0bcae3 100644 --- a/docs/src/examples/pca.jl +++ b/docs/src/examples/pca.jl @@ -3,7 +3,6 @@ using SimpleSDMLayers using MultivariateStats using Plots -using Unitful # The `SimpleSDMLayers` enables integration with `MultivariateStats.jl` # In this example, we will show how this can work. @@ -19,6 +18,7 @@ layers = convert.( pca = fit(PCA, layers) newlayers = transform(pca, layers) +# idk why this doesn't work pcaplots = plot.(newlayers) plot(pcaplots...) From a0e548df1cd21d427cd28b1a1421a92930773b3f Mon Sep 17 00:00:00 2001 From: michael catchen Date: Tue, 9 Nov 2021 09:20:41 -0500 Subject: [PATCH 15/22] :bug: missing common_keys call in fit --- docs/src/examples/pca.jl | 3 ++- src/integrations/MultivariateStats.jl | 14 ++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/docs/src/examples/pca.jl b/docs/src/examples/pca.jl index 8b0bcae3..3dfdc824 100644 --- a/docs/src/examples/pca.jl +++ b/docs/src/examples/pca.jl @@ -14,11 +14,12 @@ layers = convert.( SimpleSDMPredictor(WorldClim, BioClim, 1:19; boundaries...), ) +# fit pca and project to a new set of layers pca = fit(PCA, layers) newlayers = transform(pca, layers) -# idk why this doesn't work +# plot them pcaplots = plot.(newlayers) plot(pcaplots...) diff --git a/src/integrations/MultivariateStats.jl b/src/integrations/MultivariateStats.jl index 6ed4b9c4..7f4cb96b 100644 --- a/src/integrations/MultivariateStats.jl +++ b/src/integrations/MultivariateStats.jl @@ -8,8 +8,11 @@ """ function MultivariateStats.fit(a, layers::Vector{T}, kwargs...) where {T<:SimpleSDMLayer} _layers_are_compatible(layers) || return ArgumentError("layers are not compatible") - input = _make_input(layers) - proj = MultivariateStats.fit(a, input', kwargs...) + common_keys = reduce(∩, keys.(layers)) + input = hcat([vcat([layer[key] for layer in layers]...) for key in common_keys]...) + + @show size(input) + proj = MultivariateStats.fit(a, input, kwargs...) return proj end @@ -25,11 +28,14 @@ function MultivariateStats.transform(proj::P, layers::Vector{T}) where {P,T<:Sim newlayers = [similar(layers[begin]) for i in 1:outdim] common_keys = reduce(∩, keys.(layers)) - for key in common_keys - pcaproj = A' * vcat([layer[key] for layer in layers]...) + input = hcat([vcat([layer[key] for layer in layers]...) for key in common_keys]...) + + for (ct,key) in enumerate(common_keys) + pcaproj = A' * input[:,ct] for i in 1:outdim newlayers[i][key] = pcaproj[i] end end return map(f->rescale(f, (0,1)),newlayers) end + From e1deed13b28e4760c4229852f21f44b22a694946 Mon Sep 17 00:00:00 2001 From: michael catchen Date: Tue, 9 Nov 2021 09:22:33 -0500 Subject: [PATCH 16/22] :pencil: extra @show call --- src/integrations/MultivariateStats.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/integrations/MultivariateStats.jl b/src/integrations/MultivariateStats.jl index 7f4cb96b..4c41dff5 100644 --- a/src/integrations/MultivariateStats.jl +++ b/src/integrations/MultivariateStats.jl @@ -10,8 +10,6 @@ function MultivariateStats.fit(a, layers::Vector{T}, kwargs...) where {T<:Simple _layers_are_compatible(layers) || return ArgumentError("layers are not compatible") common_keys = reduce(∩, keys.(layers)) input = hcat([vcat([layer[key] for layer in layers]...) for key in common_keys]...) - - @show size(input) proj = MultivariateStats.fit(a, input, kwargs...) return proj end From 932ef25c529bdf1de197e2551a0b450027845b9e Mon Sep 17 00:00:00 2001 From: michael catchen Date: Tue, 9 Nov 2021 15:32:58 -0500 Subject: [PATCH 17/22] :zap: whitening is back --- docs/src/examples/pca.jl | 36 ++++++++++++++++++++++--- src/integrations/MultivariateStats.jl | 38 ++++++++++++++++++++++++--- 2 files changed, 68 insertions(+), 6 deletions(-) diff --git a/docs/src/examples/pca.jl b/docs/src/examples/pca.jl index 3dfdc824..0ca5c3bc 100644 --- a/docs/src/examples/pca.jl +++ b/docs/src/examples/pca.jl @@ -1,4 +1,12 @@ -# # Principal-component analysis of many `SDMLayers` +# # MultivariateStats.jl integration + +# In this example we explore how you can use +# `MultivariateStats.jl` with `SimpleSDMLayers` +# to perform [principal-component-analysis (PCA)](wikiling todo) to +# reduce the dimensionality of a set of `SDMLayers`, or to remove the +# covariance between layers via data [whitening](linktodo). + +# ## Principal-component analysis of many `SDMLayers` using SimpleSDMLayers using MultivariateStats @@ -8,10 +16,9 @@ using Plots # In this example, we will show how this can work. boundaries =(left = -164.022167, right = -55.250858, bottom = 23.547132, top = 72.105833) - layers = convert.( Float32, - SimpleSDMPredictor(WorldClim, BioClim, 1:19; boundaries...), + SimpleSDMPredictor(WorldClim, BioClim, 1:19; boundaries...) ) # fit pca and project to a new set of layers @@ -23,3 +30,26 @@ newlayers = transform(pca, layers) pcaplots = plot.(newlayers) plot(pcaplots...) + + + +# ## Remove correlation between layers (`Whitening`) + +# Have to start with two layers that are reasonably correlated. + +wlayers = convert.( + Float32, + SimpleSDMPredictor(WorldClim, BioClim, 1:2; boundaries...) +) +plot(plot.(wlayers)...) + + +# Now we call methods just as in `MultivariateStats` + +w = fit(Whitening, wlayers) +newlayers = transform(w, wlayers) + + +# and plot the layers without covar + +plot(plot.(newlayers)...) \ No newline at end of file diff --git a/src/integrations/MultivariateStats.jl b/src/integrations/MultivariateStats.jl index 4c41dff5..04a89518 100644 --- a/src/integrations/MultivariateStats.jl +++ b/src/integrations/MultivariateStats.jl @@ -15,12 +15,18 @@ function MultivariateStats.fit(a, layers::Vector{T}, kwargs...) where {T<:Simple end """ - MultivariateStats.transform(proj::P, layers::Vector{T}) where {P,T<:SimpleSDMLayer} + MultivariateStats.transform( + proj::PT, + layers::Vector{V}, + kwargs...) where {PT<:Union{MultivariateStats.PCA, MultivariateStats.PPCA},V<:SimpleSDMLayer} Overload of the `transform` function from `MultivariateStats.jl`. Here `proj` is a and output object from `MultivariateStats.fit` (see above). """ -function MultivariateStats.transform(proj::P, layers::Vector{T}) where {P,T<:SimpleSDMLayer} +function MultivariateStats.transform( + proj::PT, + layers::Vector{V}, + kwargs...) where {PT<:Union{MultivariateStats.PCA, MultivariateStats.PPCA},V<:SimpleSDMLayer} A = MultivariateStats.projection(proj) outdim = MultivariateStats.outdim(proj) newlayers = [similar(layers[begin]) for i in 1:outdim] @@ -34,6 +40,32 @@ function MultivariateStats.transform(proj::P, layers::Vector{T}) where {P,T<:Sim newlayers[i][key] = pcaproj[i] end end - return map(f->rescale(f, (0,1)),newlayers) + return newlayers end + +""" + MultivariateStats.transform( + proj::Whitening{T}, + layers::Vector{V}, + kwargs...) where {T<:Real,V<:SimpleSDMLayer} + + Overload of the `transform` function from `MultivariateStats.jl`. Here `proj` is a + and output object from `MultivariateStats.fit` (see above). +""" +function MultivariateStats.transform( + proj::MultivariateStats.Whitening{T}, layers::Vector{V}, kwargs...) where {T<:Real,V<:SimpleSDMLayer} + outdim = MultivariateStats.outdim(proj) + newlayers = [similar(layers[begin]) for i in 1:outdim] + common_keys = reduce(∩, keys.(layers)) + + input = hcat([vcat([layer[key] for layer in layers]...) for key in common_keys]...) + + for (ct,key) in enumerate(common_keys) + pcaproj = MultivariateStats.transform(proj,input[:,ct]) + for i in 1:outdim + newlayers[i][key] = pcaproj[i] + end + end + return newlayers +end From 0ce730eefb349f6c701d119ed1c92fe8b354d501 Mon Sep 17 00:00:00 2001 From: michael catchen Date: Tue, 9 Nov 2021 17:32:44 -0500 Subject: [PATCH 18/22] :bulb: mv fix --- docs/src/examples/pca.jl | 5 ++- src/integrations/MultivariateStats.jl | 51 ++++++--------------------- 2 files changed, 13 insertions(+), 43 deletions(-) diff --git a/docs/src/examples/pca.jl b/docs/src/examples/pca.jl index 0ca5c3bc..b7dc86de 100644 --- a/docs/src/examples/pca.jl +++ b/docs/src/examples/pca.jl @@ -24,7 +24,7 @@ layers = convert.( # fit pca and project to a new set of layers pca = fit(PCA, layers) -newlayers = transform(pca, layers) +newlayers = SimpleSDMLayers.transform(pca, layers) # plot them @@ -32,7 +32,6 @@ pcaplots = plot.(newlayers) plot(pcaplots...) - # ## Remove correlation between layers (`Whitening`) # Have to start with two layers that are reasonably correlated. @@ -47,7 +46,7 @@ plot(plot.(wlayers)...) # Now we call methods just as in `MultivariateStats` w = fit(Whitening, wlayers) -newlayers = transform(w, wlayers) +newlayers = SimpleSDMLayers.transform(w, wlayers) # and plot the layers without covar diff --git a/src/integrations/MultivariateStats.jl b/src/integrations/MultivariateStats.jl index 04a89518..1ed602ec 100644 --- a/src/integrations/MultivariateStats.jl +++ b/src/integrations/MultivariateStats.jl @@ -15,55 +15,26 @@ function MultivariateStats.fit(a, layers::Vector{T}, kwargs...) where {T<:Simple end """ - MultivariateStats.transform( - proj::PT, - layers::Vector{V}, + transform(proj, layers::Vector{V}, kwargs...) where {PT<:Union{MultivariateStats.PCA, MultivariateStats.PPCA},V<:SimpleSDMLayer} Overload of the `transform` function from `MultivariateStats.jl`. Here `proj` is a and output object from `MultivariateStats.fit` (see above). """ -function MultivariateStats.transform( - proj::PT, - layers::Vector{V}, - kwargs...) where {PT<:Union{MultivariateStats.PCA, MultivariateStats.PPCA},V<:SimpleSDMLayer} - A = MultivariateStats.projection(proj) - outdim = MultivariateStats.outdim(proj) - newlayers = [similar(layers[begin]) for i in 1:outdim] - common_keys = reduce(∩, keys.(layers)) - - input = hcat([vcat([layer[key] for layer in layers]...) for key in common_keys]...) - - for (ct,key) in enumerate(common_keys) - pcaproj = A' * input[:,ct] - for i in 1:outdim - newlayers[i][key] = pcaproj[i] - end - end - return newlayers -end - - -""" - MultivariateStats.transform( - proj::Whitening{T}, - layers::Vector{V}, - kwargs...) where {T<:Real,V<:SimpleSDMLayer} - - Overload of the `transform` function from `MultivariateStats.jl`. Here `proj` is a - and output object from `MultivariateStats.fit` (see above). -""" -function MultivariateStats.transform( - proj::MultivariateStats.Whitening{T}, layers::Vector{V}, kwargs...) where {T<:Real,V<:SimpleSDMLayer} - outdim = MultivariateStats.outdim(proj) - newlayers = [similar(layers[begin]) for i in 1:outdim] +function transform( + proj, + layers::AbstractVecOrMat{U}, + kwargs..., +) where {T<:Union{MultivariateStats.Whitening,MultivariateStats.PCA},U<:SimpleSDMLayer} + outdim = MultivariateStats.outdim(proj) + newlayers = [similar(layers[begin]) for i = 1:outdim] common_keys = reduce(∩, keys.(layers)) input = hcat([vcat([layer[key] for layer in layers]...) for key in common_keys]...) - for (ct,key) in enumerate(common_keys) - pcaproj = MultivariateStats.transform(proj,input[:,ct]) - for i in 1:outdim + for (ct, key) in enumerate(common_keys) + pcaproj = MultivariateStats.transform(proj, input[:, ct]) + for i = 1:outdim newlayers[i][key] = pcaproj[i] end end From 138786a971191d5308b5682b45b6499ab6d0fd2f Mon Sep 17 00:00:00 2001 From: michael catchen Date: Tue, 9 Nov 2021 17:49:52 -0500 Subject: [PATCH 19/22] :bug: namespace issues, same as it ever was --- src/SimpleSDMLayers.jl | 1 + src/integrations/MultivariateStats.jl | 6 ++++-- test/extensions/multivariatestats.jl | 6 +++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/SimpleSDMLayers.jl b/src/SimpleSDMLayers.jl index 690d606d..c4a0fc3c 100644 --- a/src/SimpleSDMLayers.jl +++ b/src/SimpleSDMLayers.jl @@ -111,6 +111,7 @@ function __init__() @require MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" begin @info "Loading MultivariateStats support for SimpleSDMLayers.jl" include("integrations/MultivariateStats.jl") + export transform end end diff --git a/src/integrations/MultivariateStats.jl b/src/integrations/MultivariateStats.jl index 1ed602ec..48076a0e 100644 --- a/src/integrations/MultivariateStats.jl +++ b/src/integrations/MultivariateStats.jl @@ -23,9 +23,9 @@ end """ function transform( proj, - layers::AbstractVecOrMat{U}, + layers::AbstractVecOrMat{U}; kwargs..., -) where {T<:Union{MultivariateStats.Whitening,MultivariateStats.PCA},U<:SimpleSDMLayer} +) where {U<:SimpleSDMLayer} outdim = MultivariateStats.outdim(proj) newlayers = [similar(layers[begin]) for i = 1:outdim] common_keys = reduce(∩, keys.(layers)) @@ -40,3 +40,5 @@ function transform( end return newlayers end + + diff --git a/test/extensions/multivariatestats.jl b/test/extensions/multivariatestats.jl index 0bfaaaac..2d8f5d4f 100644 --- a/test/extensions/multivariatestats.jl +++ b/test/extensions/multivariatestats.jl @@ -8,11 +8,11 @@ module SSLTestMVStats TEST_AUTOCORRELATION = 0.9 TEST_NUM_LAYERS = 10 - layers = [SimpleSDMPredictor(rand(MidpointDisplacement(TEST_AUTOCORRELATION), TEST_DIMS...)) for i in 1:TEST_NUM_LAYERS] + layers = [SimpleSDMResponse(rand(MidpointDisplacement(TEST_AUTOCORRELATION), TEST_DIMS...)) for i in 1:TEST_NUM_LAYERS] - @test typeof(transform(fit(PCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer - @test typeof(transform(fit(PPCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer + @test typeof(SimpleSDMLayers.transform(fit(PCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer +# @test typeof(transform(fit(PPCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer From 949f2059cfaf1d17e099bdd892d5619e39fbe9af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Poisot?= Date: Tue, 9 Nov 2021 20:25:24 -0500 Subject: [PATCH 20/22] =?UTF-8?q?=F0=9F=A5=B3=20v0.9.0=20in=20preparation?= =?UTF-8?q?=20for=20multivariate?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 0bbe02fa..e07a11b0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "SimpleSDMLayers" uuid = "2c645270-77db-11e9-22c3-0f302a89c64c" authors = ["Timothée Poisot ", "Gabriel Dansereau "] -version = "0.8.3" +version = "0.9.0" [deps] ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3" From 2afab997ffdcbdbf7ca14676c887e32a232518ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Poisot?= Date: Tue, 9 Nov 2021 20:28:52 -0500 Subject: [PATCH 21/22] =?UTF-8?q?=F0=9F=A7=B9=20=20no=20need=20to=20call?= =?UTF-8?q?=20SSDML.transform?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/SimpleSDMLayers.jl | 3 +-- src/integrations/MultivariateStats.jl | 28 ++++++++++++------------ test/extensions/multivariatestats.jl | 31 +++++++++++++-------------- 3 files changed, 30 insertions(+), 32 deletions(-) diff --git a/src/SimpleSDMLayers.jl b/src/SimpleSDMLayers.jl index c4a0fc3c..7a98bf49 100644 --- a/src/SimpleSDMLayers.jl +++ b/src/SimpleSDMLayers.jl @@ -99,7 +99,7 @@ isdir(_layers_assets_path) || mkpath(_layers_assets_path) export clip function __init__() - @require GBIF="ee291a33-5a6c-5552-a3c8-0f29a1181037" begin + @require GBIF = "ee291a33-5a6c-5552-a3c8-0f29a1181037" begin @info "Loading GBIF support for SimpleSDMLayers.jl" include("integrations/GBIF.jl") end @@ -111,7 +111,6 @@ function __init__() @require MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" begin @info "Loading MultivariateStats support for SimpleSDMLayers.jl" include("integrations/MultivariateStats.jl") - export transform end end diff --git a/src/integrations/MultivariateStats.jl b/src/integrations/MultivariateStats.jl index 48076a0e..4d01c72c 100644 --- a/src/integrations/MultivariateStats.jl +++ b/src/integrations/MultivariateStats.jl @@ -1,16 +1,19 @@ # WARNING this file is only loaded if MultivariateStats.jl is also active # This all happens thanks to the Requires.jl package +_allowed_transforms = (MultivariateStats.PCA, MultivariateStats.PPCA, MultivariateStats.KernelPCA, MultivariateStats.Whitening) +AllowedMultivariateTransforms = Union{_allowed_transforms...} + """ MultivariateStats.fit(a, layers::Vector{T}, kwargs...) where T <: SimpleSDMLayer - Overloads the `fit` function from `MultivariateStats.jl`. +Overloads the `fit` function from `MultivariateStats.jl`. """ -function MultivariateStats.fit(a, layers::Vector{T}, kwargs...) where {T<:SimpleSDMLayer} +function MultivariateStats.fit(proj::Type{K}, layers::Vector{T}; kwargs...) where {T<:SimpleSDMLayer, K <: AllowedMultivariateTransforms} _layers_are_compatible(layers) || return ArgumentError("layers are not compatible") common_keys = reduce(∩, keys.(layers)) input = hcat([vcat([layer[key] for layer in layers]...) for key in common_keys]...) - proj = MultivariateStats.fit(a, input, kwargs...) + proj = MultivariateStats.fit(proj, input; kwargs...) return proj end @@ -18,27 +21,24 @@ end transform(proj, layers::Vector{V}, kwargs...) where {PT<:Union{MultivariateStats.PCA, MultivariateStats.PPCA},V<:SimpleSDMLayer} - Overload of the `transform` function from `MultivariateStats.jl`. Here `proj` is a - and output object from `MultivariateStats.fit` (see above). +Overload of the `transform` function from `MultivariateStats.jl`. Here `proj` is an output object from `MultivariateStats.fit` (see above). """ -function transform( - proj, - layers::AbstractVecOrMat{U}; - kwargs..., +function MultivariateStats.transform( + proj, layers::AbstractVecOrMat{U}; kwargs... ) where {U<:SimpleSDMLayer} - outdim = MultivariateStats.outdim(proj) - newlayers = [similar(layers[begin]) for i = 1:outdim] + _layers_are_compatible(layers) || return ArgumentError("layers are not compatible") + + newlayers = [similar(first(layers)) for i in 1:MultivariateStats.outdim(proj)] common_keys = reduce(∩, keys.(layers)) input = hcat([vcat([layer[key] for layer in layers]...) for key in common_keys]...) for (ct, key) in enumerate(common_keys) pcaproj = MultivariateStats.transform(proj, input[:, ct]) - for i = 1:outdim + for i in 1:length(newlayers) newlayers[i][key] = pcaproj[i] end end + return newlayers end - - diff --git a/test/extensions/multivariatestats.jl b/test/extensions/multivariatestats.jl index 2d8f5d4f..a7c61d6b 100644 --- a/test/extensions/multivariatestats.jl +++ b/test/extensions/multivariatestats.jl @@ -1,23 +1,22 @@ module SSLTestMVStats - using SimpleSDMLayers - using MultivariateStats - using NeutralLandscapes - using Test +using SimpleSDMLayers +using MultivariateStats +using NeutralLandscapes +using Test - TEST_DIMS = (150,150) - TEST_AUTOCORRELATION = 0.9 - TEST_NUM_LAYERS = 10 +TEST_DIMS = (150, 150) +TEST_AUTOCORRELATION = 0.9 +TEST_NUM_LAYERS = 10 - layers = [SimpleSDMResponse(rand(MidpointDisplacement(TEST_AUTOCORRELATION), TEST_DIMS...)) for i in 1:TEST_NUM_LAYERS] +layers = [ + SimpleSDMResponse(rand(MidpointDisplacement(TEST_AUTOCORRELATION), TEST_DIMS...)) for + i in 1:TEST_NUM_LAYERS +] +@test typeof(transform(fit(PCA, layers), layers)) <: Vector{T} where {T<:SimpleSDMLayer} +#@test typeof(transform(fit(PPCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer - @test typeof(SimpleSDMLayers.transform(fit(PCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer -# @test typeof(transform(fit(PPCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer - - - -# @assert typeof(transform(fit(KernelPCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer -# @assert typeof(transform(fit(Whitening, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer +#@assert typeof(transform(fit(KernelPCA, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer +#@assert typeof(transform(fit(Whitening, layers),layers)) <: Vector{T} where T<:SimpleSDMLayer end - From f2b8627bfc3112868a873d597525f92a3a286729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timoth=C3=A9e=20Poisot?= Date: Tue, 9 Nov 2021 21:40:02 -0500 Subject: [PATCH 22/22] =?UTF-8?q?=F0=9F=A7=91=E2=80=8D=F0=9F=8F=AB=20fix?= =?UTF-8?q?=20transform?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/src/examples/pca.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/examples/pca.jl b/docs/src/examples/pca.jl index b7dc86de..6a3c21f3 100644 --- a/docs/src/examples/pca.jl +++ b/docs/src/examples/pca.jl @@ -24,7 +24,7 @@ layers = convert.( # fit pca and project to a new set of layers pca = fit(PCA, layers) -newlayers = SimpleSDMLayers.transform(pca, layers) +newlayers = transform(pca, layers) # plot them @@ -46,7 +46,7 @@ plot(plot.(wlayers)...) # Now we call methods just as in `MultivariateStats` w = fit(Whitening, wlayers) -newlayers = SimpleSDMLayers.transform(w, wlayers) +newlayers = transform(w, wlayers) # and plot the layers without covar