From 13f5ba9622e7f4e393784b8afcebdd2c5da8f050 Mon Sep 17 00:00:00 2001 From: Lionel Zoubritzky Date: Wed, 5 Jul 2023 22:46:15 +0200 Subject: [PATCH 1/8] Add InterpenetratedTopologyResult --- Project.toml | 6 +++--- src/query.jl | 25 ++++++++++++------------- src/types.jl | 43 ++++++++++++++++++++++++++----------------- test/cif/results/data | 1 + 4 files changed, 42 insertions(+), 33 deletions(-) create mode 100644 test/cif/results/data diff --git a/Project.toml b/Project.toml index 192aec8..db8b84b 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "CrystalNets" uuid = "7952bbbe-a946-4118-bea0-081a0932faa9" authors = ["Lionel Zoubritzky lionel.zoubritzky@gmail.com"] -version = "0.3.6" +version = "0.4.0" [deps] ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" @@ -27,8 +27,8 @@ ArgParse = "1.1" Chemfiles = "0.10" Graphs = "1.3" PeriodicGraphEmbeddings = "0.2.2" -PeriodicGraphEquilibriumPlacement = "0.1, 0.2" -PeriodicGraphs = "0.8.1, 0.9" +PeriodicGraphEquilibriumPlacement = "0.2" +PeriodicGraphs = "0.10" Pkg = "1.5" ProgressMeter = "1.7" PrecompileTools = "1" diff --git a/src/query.jl b/src/query.jl index 566914c..ffcf37a 100644 --- a/src/query.jl +++ b/src/query.jl @@ -109,7 +109,7 @@ macro loop_group(ex) group = :(group.$D) newex = quote if !isempty(group) - currallowed = first(group)[2][1].options.dimensions + currallowed = first(group)[1][1].options.dimensions if isempty(currallowed) || $i in currallowed $(deepcopy(ex)) end @@ -130,8 +130,8 @@ Compute the topological genome of each subnet stored in `group`. Options must be passed directly within the subnets. """ function topological_genome(group::UnderlyingNets) - ret = Tuple{Vector{Int},TopologyResult}[] - @loop_group for (id, net) in group + ret = Tuple{TopologyResult,Int,Vector{Int}}[] + @loop_group for (net, nfold, id) in group encountered = Dict{PeriodicGraph,_Clustering}() subret = Vector{Tuple{_Clustering,Union{_Clustering,TopologicalGenome}}}(undef, length(net)) for (j, subnet) in enumerate(net) @@ -139,7 +139,7 @@ function topological_genome(group::UnderlyingNets) refclust = get!(encountered, subnet.pge.g, clust) subret[j] = (clust, refclust == clust ? topological_genome(subnet) : refclust) end - push!(ret, (id, TopologyResult(subret))) + push!(ret, (TopologyResult(subret), nfold, id)) end return ret end @@ -174,13 +174,12 @@ In the case where the structure is not made of interpenetrating nets, return the of the only net. """ function determine_topology(path, options::Options) - genomes::Vector{Tuple{Vector{Int},TopologyResult}} = + genomes::Vector{Tuple{TopologyResult, Int, Vector{Int}}} = topological_genome(UnderlyingNets(parse_chemfile(path, options))) - if length(genomes) == 1 - return genomes[1][2] + if length(genomes) == 0 + push!(genomes, (TopologyResult(""), 1, Int[])) end - length(genomes) == 0 && return TopologyResult("") - return genomes + InterpenetratedTopologyResult(genomes) end determine_topology(path; kwargs...) = determine_topology(path, Options(; kwargs...)) @@ -389,16 +388,16 @@ function determine_topology_dataset(path, save, autoclean, showprogress, options f = joinpath(path, file) # threadid() == 1 && @show f # to find infinite loops: the last one printed is probably running - genomes::Vector{Tuple{Vector{Int},TopologyResult}} = try + genomes::Vector{Tuple{TopologyResult,Int,Vector{Int}}} = try topological_genome(UnderlyingNets(parse_chemfile(f, options))) catch e (options.throw_error || isinterrupt(e)) && rethrow() - [(Int[], TopologyResult(string(e)))] + [(TopologyResult(string(e)), 1, Int[])] end if isempty(genomes) - push!(genomes, (Int[], TopologyResult(""))) + push!(genomes, (TopologyResult(""), 1, Int[])) end - for (j, (_, genome)) in enumerate(genomes) + for (j, (genome, _)) in enumerate(genomes) newname = length(genomes) == 1 ? file * '/' : file * '/' * string(j) open(joinpath(resultdir, string(threadid())), "a") do results io = IOContext(results, :compact => true) diff --git a/src/types.jl b/src/types.jl index 11bc8a7..ede4bd2 100644 --- a/src/types.jl +++ b/src/types.jl @@ -722,11 +722,11 @@ function separate_components(c::Crystal{T}) where T @ifwarn if haskey(dimensions, 0) @warn "Detected structure of dimension 0, possibly solvent residues. It will be ignored for topology computation." end - ret = (Tuple{Vector{Int},Crystal{T}}[], Tuple{Vector{Int},Crystal{T}}[], Tuple{Vector{Int},Crystal{T}}[]) + ret = (Tuple{Crystal{T},Int,Vector{Int}}[], Tuple{Crystal{T},Int,Vector{Int}}[], Tuple{Crystal{T},Int,Vector{Int}}[]) for i in 1:3 reti = ret[i] - for vmap in get(dimensions, i, Vector{Int}[]) - push!(reti, (vmap, c[vmap])) + for (vmap, nfold) in get(dimensions, i, Vector{Int}[]) + push!(reti, (c[vmap], nfold, vmap)) end end return ret @@ -807,13 +807,13 @@ end Grouping of the connected components of a structure according to their dimensionality. """ struct UnderlyingNets - D1::Vector{Tuple{Vector{Int},Vector{CrystalNet1D}}} - D2::Vector{Tuple{Vector{Int},Vector{CrystalNet2D}}} - D3::Vector{Tuple{Vector{Int},Vector{CrystalNet3D}}} + D1::Vector{Tuple{Vector{CrystalNet1D},Int,Vector{Int}}} + D2::Vector{Tuple{Vector{CrystalNet2D},Int,Vector{Int}}} + D3::Vector{Tuple{Vector{CrystalNet3D},Int,Vector{Int}}} end -UnderlyingNets() = UnderlyingNets(Tuple{Vector{Int},Vector{CrystalNet1D}}[], - Tuple{Vector{Int},Vector{CrystalNet2D}}[], - Tuple{Vector{Int},Vector{CrystalNet1D}}[], +UnderlyingNets() = UnderlyingNets(Tuple{Vector{CrystalNet1D},Int,Vector{Int}}[], + Tuple{Vector{CrystalNet2D},Int,Vector{Int}}[], + Tuple{Vector{CrystalNet1D},Int,Vector{Int}}[], ) function _repeatgroups!(ex, i) @@ -856,16 +856,16 @@ function UnderlyingNets(c::Crystal) if all(isempty, components) vmap = collect(1:length(c.types)) nets = [CrystalNet3D(c.pge.cell, Options(c.options; clusterings=[clust])) for clust in c.options.clusterings] - push!(groups.D3, (vmap, nets)) + push!(groups.D3, (nets, 1, vmap)) return groups end @repeatgroups begin - for (i, (vmap, comp)) in enumerate(components[D]) + for (i, (comp, nfold, vmap)) in enumerate(components[D]) component = Crystal(comp.pge, comp.types, comp.clusters, Options(comp.options; name=string(comp.options.name,'_',i))) crystals = collapse_clusters(component) nets = collect_nets(crystals, Val(D)) - push!(groups, (vmap, nets)) + push!(groups, (nets, nfold, vmap)) end end return groups @@ -879,19 +879,19 @@ function CrystalNet(c::Crystal) if D == 3 length(group.D3) > 1 && __throw_interpenetrating(D) (isempty(group.D1) && isempty(group.D2)) || __warn_nonunique(D) - _D3 = last(first(group.D3)) + _D3 = first(first(group.D3)) length(_D3) > 1 && __throw_multiplenets(D) return first(_D3) elseif D == 2 length(group.D2) > 1 && __throw_interpenetrating(D) isempty(group.D2) || __warn_nonunique(D) - _D2 = last(first(group.D2)) + _D2 = first(first(group.D2)) length(_D2) > 1 && __throw_multiplenets(D) return first(_D2) end @toggleassert D == 1 length(group.D1) > 1 && __throw_interpenetrating(D) - _D1 = last(first(group.D1)) + _D1 = first(first(group.D1)) length(_D1) > 1 && __throw_multiplenets(D) return first(_D1) end @@ -945,12 +945,12 @@ function UnderlyingNets(g::SmallPseudoGraph, options::Options) end cell = Cell() @repeatgroups begin - for vmap in get(dimensions, D, Vector{Int}[]) + for (vmap, nfold) in get(dimensions, D, Vector{Int}[]) nets = PeriodicGraph{D}(graph[vmap]) n = nv(nets) types = fill(Symbol(""), n) opts = rev_permute_mapping(options, vmap, n) - push!(groups, (vmap, CrystalNet{D}[CrystalNet{D}(cell, types, nets, opts)])) + push!(groups, (CrystalNet{D}[CrystalNet{D}(cell, types, nets, opts)], nfold, vmap)) end end return groups @@ -1068,6 +1068,8 @@ TopologyResult julia> parse(TopologyResult, repr(topologies)) == topologies true ``` + +See also [`TopologicalGenome`](@ref) and [`InterpenetratedTopologyResult`](@ref). """ struct TopologyResult results::SizedVector{8,TopologicalGenome,Vector{TopologicalGenome}} @@ -1274,3 +1276,10 @@ function Base.parse(::Type{TopologyResult}, s::AbstractString) end return TopologyResult(ret) end + + +struct InterpenetratedTopologyResult <: AbstractVector{Tuple{TopologyResult,Int}} + data::Vector{Tuple{TopologyResult,Int,Vector{Int}}} +end +Base.size(x::InterpenetratedTopologyResult) = (length(x.data),) +Base.getindex(x::InterpenetratedTopologyResult, i) = (y = x.data[i]; (y[1], y[2])) \ No newline at end of file diff --git a/test/cif/results/data b/test/cif/results/data new file mode 100644 index 0000000..7f39eee --- /dev/null +++ b/test/cif/results/data @@ -0,0 +1 @@ +/LionelSSDext4/liozou/.julia/dev/CrystalNets/test/cif/MOFs From e9fb77813ca545a50794907649ed7489865e9f15 Mon Sep 17 00:00:00 2001 From: Lionel Zoubritzky Date: Mon, 10 Jul 2023 10:02:55 +0200 Subject: [PATCH 2/8] Integrate InterpenetratedTopologyResult --- docs/src/faq.md | 29 ++++++--- docs/src/index.md | 22 +++---- docs/src/lib/internals.md | 8 +++ docs/src/lib/public.md | 6 +- docs/src/python.md | 10 ++- docs/src/visualization.md | 2 +- src/CrystalNets.jl | 7 +- src/archive.jl | 4 +- src/executable.jl | 30 +++++---- src/options.jl | 2 +- src/precompile.jl | 1 - src/query.jl | 47 +++++++------- src/types.jl | 131 +++++++++++++++++++++++++++++++++++++- test/runtests.jl | 62 ++++++++++-------- 14 files changed, 256 insertions(+), 105 deletions(-) diff --git a/docs/src/faq.md b/docs/src/faq.md index c74bc79..e3b22ac 100644 --- a/docs/src/faq.md +++ b/docs/src/faq.md @@ -101,13 +101,9 @@ These functions expect the path of a directory containing CIF files within (poss ## How can I directly access the genome of my structure instead of its name? -The result of [`determine_topology`](@ref) is either a [`TopologicalGenome`](@ref) or a -`Vector{Tuple{Vector{Int},TopologyResult}}`, depending on whether the input -contains multiple interpenetrating subnets or not. In the second case, extract the relevant -[`TopologyResult`](@ref). +The result `x` of [`determine_topology`](@ref) is an [`InterpenetratedTopologyResult`](@ref). Its `length` gives the number of interpenetrated substructures. Each of its values, for instance `x[1]`, is a tuple `(topo, n)` meaning that the substructure is an `n`-fold catenated net of topology `topo`. `topo` itself is a [`TopologyResult`](@ref), which stores the result of a topology computation for possibly several clusterings. The [`TopologicalGenome`](@ref) associated to a given clustering can be extracted by indexing the [`TopologyResult`](@ref), for instance `t = topo[Clustering.SingleNodes]` (or simply `t = topo[:SingleNodes]`). -A [`TopologyResult`](@ref) can store the result for different clustering options, so the -topological genome should be chosen by extracting the relevant result. For example: +For example: ```jldoctest im19faq julia> path_to_im19 = joinpath(dirname(dirname(pathof(CrystalNets))), "test", "cif", "IM-19.cif"); @@ -117,9 +113,24 @@ AllNodes: rna SingleNodes: bpq julia> typeof(result) +InterpenetratedTopologyResult + +julia> length(result) +1 + +julia> topo, n = only(result); + +julia> n # catenation multiplicity +1 + +julia> topo +AllNodes: rna +SingleNodes: bpq + +julia> typeof(topo) TopologyResult -julia> genome_allnodes = result[Clustering.AllNodes] +julia> genome_allnodes = topo[Clustering.AllNodes] rna julia> typeof(genome_allnodes) @@ -127,7 +138,7 @@ TopologicalGenome ``` In case where all clusterings lead to the same genome, it can simply be accessed -by calling `first(result)`. +by calling `first(topo)`. Having obtained a [`TopologicalGenome`](@ref), the topological genome itself can accessed by converting it to a `PeriodicGraph`: @@ -137,6 +148,8 @@ julia> genome = PeriodicGraph(genome_allnodes) PeriodicGraph3D(6, PeriodicEdge3D[(1, 2, (0,0,0)), (1, 3, (0,0,0)), (1, 4, (0,0,0)), (1, 4, (0,0,1)), (1, 5, (0,0,0)), (1, 6, (0,0,0)), (2, 4, (0,0,1)), (2, 6, (-1,0,0)), (3, 4, (0,0,1)), (3, 5, (0,-1,0)), (4, 5, (0,0,0)), (4, 6, (0,0,0))]) ``` +In case of error during topology identification, the returned `genome` is a `PeriodicGraph{0}`. + The string representation of the genome is simply `string(genome)`: ``` im19faq diff --git a/docs/src/index.md b/docs/src/index.md index bfe7204..4fdf70c 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -66,25 +66,19 @@ julia> determine_topology("/path/to/unstable/net.cif") unstable 1 1 1 1 1 2 0 2 2 1 ``` -In both known and unknown cases, the result is a [`TopologyResult`](@ref). +In both known and unknown cases, the result is an [`InterpenetratedTopologyResult`](@ref). #### Interpenetrating substructures -If the file contains multiple interpenetrating substructures, the result is a -`Vector{Tuple{Vector{Int}, TopologyResult}}`, where each entry is a tuple -`(vmap, result)` with: +If the file contains multiple interpenetrating substructures, each substructure and its catenation multiplicity can be extracted from the [`InterpenetratedTopologyResult`](@ref). -- `vmap`: the list of vertices of the initial graph that were kept for this substructure. - The initial graph is the one exported in .vtf as `input`. See also - [`parse_chemfile`](@ref) and [`CrystalNets.Crystal`](@ref) for manipulations on the initial graph. -- `result`: the [`TopologyResult`](@ref) for this substructure. For example: ```julia -julia> determine_topology("/path/to/intertwinned/structures.cif") -2-element Vector{Tuple{Vector{Int64}, TopologyResult}}: - ([2, 3, 4, 6], pcu) - ([1, 5, 7, 8], srs) +julia> x = determine_topology("/path/to/intertwinned/structures.cif") +2 interpenetrated subnets: +⋅ Subnet 1 → pcu +⋅ Subnet 2 → srs ``` #### Using options @@ -134,5 +128,7 @@ dia Run `CrystalNets --help` for the list of options available to the executable. !!! tip - In terms of performance, the compiled executable is the best option if you only want to identify a few structures from time to time. For intensive workloads with many structures to identify, it is best to use `CrystalNets.jl` as a Julia module through the + In terms of performance, the compiled executable is the best option if you only want to identify a few structures from time to time. Using [the website](https://progs.coudert.name/topology) is recommended as well for this use-case, unless the nets you study are too big. + + For intensive workloads with many structures to identify, it is best to use `CrystalNets.jl` as a Julia module through the [`determine_topology_dataset`](@ref) and [`guess_topology_dataset`](@ref) functions. The module is also the best option to perform more advanced analyses on the net in Julia, or to use the [`Options`](@ref) unavailable to the executable. diff --git a/docs/src/lib/internals.md b/docs/src/lib/internals.md index f455659..18a65c7 100644 --- a/docs/src/lib/internals.md +++ b/docs/src/lib/internals.md @@ -94,3 +94,11 @@ CrystalNets.collision_nodes CrystalNets.@toggleassert CrystalNets.check_dimensionality ``` + +## Other + +```@docs +CrystalNets.guess_topology +CrystalNets.guess_topology_dataset +CrystalNets.recognize_topology +``` diff --git a/docs/src/lib/public.md b/docs/src/lib/public.md index 91ee606..f870d83 100644 --- a/docs/src/lib/public.md +++ b/docs/src/lib/public.md @@ -7,6 +7,7 @@ CrystalNet UnderlyingNets TopologicalGenome TopologyResult +InterpenetratedTopologyResult ``` ## Main functions @@ -14,11 +15,8 @@ TopologyResult ```@docs determine_topology determine_topology_dataset -guess_topology -guess_topology_dataset parse_chemfile topological_genome -recognize_topology ``` ## Options @@ -28,7 +26,7 @@ CrystalNets.Options StructureType Bonding Clustering -ClusterKinds +CrystalNets.ClusterKinds ``` ## Other utilities diff --git a/docs/src/python.md b/docs/src/python.md index 1375eba..d38a7e9 100644 --- a/docs/src/python.md +++ b/docs/src/python.md @@ -77,12 +77,10 @@ def identify_topology(cif): options = jl.CrystalNets.Options(structure=jl.StructureType.MOF) # Since the structure is specified as a MOF, the default clusterings are AllNodes and SingleNodes result = jl.determine_topology(cif, options) # Main call - if jl.isa(result, jl.Vector): # indicates interpenetrating substructures - # for each x in result: - # * x[0] is the list of nodes belonging to the substructure - # * x[1] is the topology of the substructure - return [check_unique_topology(x[1]) for x in result] - return [check_unique_topology(result)] + # for each x in result: + # * x[0] is the topology of the substructure. + # * x[1] is the catenation multiplicity of this subnet. + return [check_unique_topology(x[0]) for x in result] def check_unique_topology(result): singlenodes = result[jl.Clustering.SingleNodes] # topology for SingleNodes diff --git a/docs/src/visualization.md b/docs/src/visualization.md index d5490cd..e073ae1 100644 --- a/docs/src/visualization.md +++ b/docs/src/visualization.md @@ -183,4 +183,4 @@ Other available export options are disabled by default: - `export_clusters` for the clusters. The only difference with `export_subnets` is that the graph induced by the clusters is not trimmed yet. - `export_net` for the net before separation into connected components. This is equivalent - to catenating the result of `export_subnets` into a single file. + to concatenating the result of `export_subnets` into a single file. diff --git a/src/CrystalNets.jl b/src/CrystalNets.jl index 60323f3..9e041a2 100644 --- a/src/CrystalNets.jl +++ b/src/CrystalNets.jl @@ -25,17 +25,14 @@ export CrystalNet, UnderlyingNets, TopologicalGenome, TopologyResult, + InterpenetratedTopologyResult, determine_topology, determine_topology_dataset, - guess_topology, - guess_topology_dataset, parse_chemfile, topological_genome, - recognize_topology, StructureType, Bonding, - Clustering, - ClusterKinds + Clustering using LinearAlgebra: det, dot, norm, rank, cross import LinearAlgebra diff --git a/src/archive.jl b/src/archive.jl index 997471f..ac8c270 100644 --- a/src/archive.jl +++ b/src/archive.jl @@ -47,10 +47,10 @@ dia It is also possible to directly access the topological genome as a `PeriodicGraph` by parsing the name as a [`TopologicalGenome`](@ref): ```jldoctest - julia> parse(TopologicalGenome, "pcu").genome + julia> PeriodicGraph(parse(TopologicalGenome, "pcu")) PeriodicGraph3D(1, PeriodicEdge3D[(1, 1, (0,0,1)), (1, 1, (0,1,0)), (1, 1, (1,0,0))]) - julia> string(parse(TopologicalGenome, "nbo").genome) == REVERSE_CRYSTALNETS_ARCHIVE["nbo"] + julia> string(PeriodicGraph(parse(TopologicalGenome, "nbo"))) == REVERSE_CRYSTALNETS_ARCHIVE["nbo"] true ``` """ diff --git a/src/executable.jl b/src/executable.jl index 4b33390..c15dfa6 100644 --- a/src/executable.jl +++ b/src/executable.jl @@ -236,6 +236,19 @@ function split_clusterings(s) end end +""" + main(ARGS) + +Function called when using the module as an executable. + +Return code can be: +* 0: no error +* 1: the chemical bond system has no periodicity +* 2: invalid input +* 3: parsing error +* 4: internal CrystalNets.jl error +* 5: unhandled CrystalNets.jl error, please report +""" function main(args) try _parsed_args = parse_commandline(args) @@ -424,7 +437,7 @@ function main(args) end end - unets = try + unets::UnderlyingNets = try if iskey g = try PeriodicGraph(input_file) @@ -452,7 +465,7 @@ function main(args) return invalid_input_error("""The input cannot be analyzed because of the following error:""", e, catch_backtrace()) end - genomes::Vector{Tuple{Vector{Int},TopologyResult}} = try + genomes::InterpenetratedTopologyResult = try topological_genome(unets) catch e return internal_error("""Internal error encountered while computing the topological genome:""", @@ -488,20 +501,13 @@ function main(args) end =# - if length(genomes) == 1 - id = genomes[1][2] - println(id) - all(x -> isnothing(x.name), values(id)) && return 1 - return 0 - end - if length(genomes) == 0 - println(TopologyResult("")) + println(genomes) return 1 end - + println(genomes) - return 1 + return 0 catch e return unhandled_error("CrystalNets encountered an unhandled exception:", e, catch_backtrace()) diff --git a/src/options.jl b/src/options.jl index 3dff219..aafbb19 100644 --- a/src/options.jl +++ b/src/options.jl @@ -116,7 +116,7 @@ function Base.parse(::Type{_Clustering}, s::AbstractString) elseif s == "PEM" return Clustering.PEM end - throw(ArgumentError(lazy"No clustering from string $x")) + throw(ArgumentError(lazy"No clustering from string \"$s\"")) end """ diff --git a/src/precompile.jl b/src/precompile.jl index 8e570ee..e4a0e17 100644 --- a/src/precompile.jl +++ b/src/precompile.jl @@ -38,7 +38,6 @@ using PrecompileTools export_input=tmpdir, export_trimmed=tmpdir, export_subnets=tmpdir, export_attributions=tmpdir, export_clusters=tmpdir) net_rro = parse_chemfile(path_to_rro) topological_genome(CrystalNet(net_rro)) - print(net_rro) # Rational{Int128} # mil100 = determine_topology(path_to_mil100; structure=StructureType.Guess, # clusterings=[Clustering.EachVertex,Clustering.PEM,Clustering.PE,Clustering.Standard,Clustering.Auto], diff --git a/src/query.jl b/src/query.jl index ffcf37a..8816a51 100644 --- a/src/query.jl +++ b/src/query.jl @@ -9,6 +9,8 @@ the string representation of a D-periodic graph such that `PeriodicGraph{D}(topological_genome(net))` is isomorphic to `net.pge.g` (except possibly if the `ignore_types` option is unset). +Return a [`TopologicalGenome`](@ref). + !!! info Options must be passed directly within `net`. """ @@ -48,7 +50,7 @@ function topological_genome(net::CrystalNet{D,T})::TopologicalGenome where {D,T} return topological_genome(shrunk_net, collisions) end -topological_genome(net::CrystalNet{0,T}) where {T} = TopologicalGenome(net.options.error) +topological_genome(net::CrystalNet{0}) = TopologicalGenome(net.options.error) function topological_genome(net::CrystalNet{D,T}, collisions::CollisionList)::TopologicalGenome where {D,T} try @@ -73,6 +75,8 @@ end Compute the topological genome of a periodic graph. If given a topological key (as a string), it is converted to a `PeriodicGraph` first. + +Return a [`TopologicalGenome`](@ref). """ function topological_genome(g::PeriodicGraph, options::Options) nets = UnderlyingNets(g, options) @@ -122,10 +126,12 @@ macro loop_group(ex) end """ - topological_genome(group::UnderlyingNets)::Vector{Tuple{Vector{Int},String}} + topological_genome(group::UnderlyingNets) Compute the topological genome of each subnet stored in `group`. +Return a [`InterpenetratedTopologyResult`](@ref) + !!! info Options must be passed directly within the subnets. """ @@ -141,7 +147,7 @@ function topological_genome(group::UnderlyingNets) end push!(ret, (TopologyResult(subret), nfold, id)) end - return ret + return InterpenetratedTopologyResult(ret) end """ @@ -167,19 +173,13 @@ end determine_topology(path; kwargs...) Compute the topology of the structure described in the file located at `path`. -This is essentially equivalent to calling +This is exactly equivalent to calling `topological_genome(UnderlyingNets(parse_chemfile(path, options)))`. -In the case where the structure is not made of interpenetrating nets, return the topology -of the only net. +Return an [`InterpenetratedTopologyResult`](@ref). """ function determine_topology(path, options::Options) - genomes::Vector{Tuple{TopologyResult, Int, Vector{Int}}} = - topological_genome(UnderlyingNets(parse_chemfile(path, options))) - if length(genomes) == 0 - push!(genomes, (TopologyResult(""), 1, Int[])) - end - InterpenetratedTopologyResult(genomes) + topological_genome(UnderlyingNets(parse_chemfile(path, options))) end determine_topology(path; kwargs...) = determine_topology(path, Options(; kwargs...)) @@ -324,8 +324,9 @@ guess_topology(path; kwargs...) = guess_topology(path, Options(structure=Structu Given a path to a directory containing structure input files, compute the topology of each structure within the directory. Return a dictionary linking each file name to the result. -The result is a [`TopologyResult`](@ref), containing the topological genome, the name if -known and the stability of the net. In case of error, the exception is reported. +The result is a [`InterpenetratedTopologyResult`](@ref), containing the topological genome, +the name if known and the stability of the net. +In case of error, the exception is reported. Warnings will be toggled off (unless `force_warn` is set) and it is stongly recommended not to export any file since those actions may critically reduce performance, @@ -388,17 +389,17 @@ function determine_topology_dataset(path, save, autoclean, showprogress, options f = joinpath(path, file) # threadid() == 1 && @show f # to find infinite loops: the last one printed is probably running - genomes::Vector{Tuple{TopologyResult,Int,Vector{Int}}} = try + genomes::InterpenetratedTopologyResult = try topological_genome(UnderlyingNets(parse_chemfile(f, options))) catch e (options.throw_error || isinterrupt(e)) && rethrow() - [(TopologyResult(string(e)), 1, Int[])] + InterpenetratedTopologyResult(e) end if isempty(genomes) - push!(genomes, (TopologyResult(""), 1, Int[])) + push!(genomes.data, (TopologyResult(""), 1, Int[])) end - for (j, (genome, _)) in enumerate(genomes) - newname = length(genomes) == 1 ? file * '/' : file * '/' * string(j) + for (j, (genome, nfold)) in enumerate(genomes) + newname = string(file, '/', j, '/', nfold) open(joinpath(resultdir, string(threadid())), "a") do results io = IOContext(results, :compact => true) println(io, newname, '/', genome) @@ -407,18 +408,18 @@ function determine_topology_dataset(path, save, autoclean, showprogress, options showprogress && next!(progress) end - ret = Pair{String,TopologyResult}[] + result = Dict{String,InterpenetratedTopologyResult}() for _f in readdir(resultdir; join=true) basename(_f) == "data" && continue for l in eachline(_f) isempty(l) && continue splits = split(l, '/') + data = get!(result, splits[1], InterpenetratedTopologyResult()).data _genome = pop!(splits) - isempty(splits[end]) && pop!(splits) - push!(ret, Pair(join(splits, '/'), parse(TopologyResult, _genome))) + _nfold = pop!(splits) + push!(data, (parse(TopologyResult, _genome), parse(Int, _nfold), Int[])) end end - result::Dict{String,TopologyResult} = Dict(ret) if save i = 0 tmpresultdir = resultdir*".OLD"*string(i) diff --git a/src/types.jl b/src/types.jl index ede4bd2..94b139d 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1033,7 +1033,7 @@ function Base.parse(::Type{TopologicalGenome}, s::AbstractString) return TopologicalGenome(PeriodicGraph(s[10:end]), nothing, true) end if startswith(s, "FAILED") - return TopologicalGenome(s[13:end]) + return TopologicalGenome(s[14:end]) end return TopologicalGenome(parse(PeriodicGraph, REVERSE_CRYSTALNETS_ARCHIVE[s]), s, false) end @@ -1057,7 +1057,7 @@ of a `TopologyResult` can be parsed back to a `TopologyResult`: ```jldoctest julia> mof5 = joinpath(dirname(dirname(pathof(CrystalNets))), "test", "cif", "MOF-5.cif"); -julia> topologies = determine_topology(mof5, structure=StructureType.MOF, clusterings=[Clustering.Auto, Clustering.Standard, Clustering.PE]) +julia> topologies = only(determine_topology(mof5, structure=StructureType.MOF, clusterings=[Clustering.Auto, Clustering.Standard, Clustering.PE]))[1] AllNodes, SingleNodes: pcu Standard: xbh PE: cab @@ -1278,8 +1278,133 @@ function Base.parse(::Type{TopologyResult}, s::AbstractString) end +""" + InterpenetratedTopologyResult <: AbstractVector{Tuple{TopologyResult,Int}} + +The result of a topology computation on a structure containing possibly several +interpenetrated substructures. + +An `InterpenetratedTopologyResult` can be seen as a list of `(topology, n)` pair where +* `topology` is the [`TopologyResult`](@ref) corresponding to the substructures. +* `n` is an integer such that the substructure is composed of an `n`-fold catenated net. + +The entire structure can thus be decomposed in a series of substructures, each of them +possibly decomposed into several catenated nets. + +!!! info "Vocabulary" + In this context, *interpenetration* and *catenation* have slightly different meanings: + - two (or more) subnets are *interpenetrated* if both are present in the unit cell, and + are composed of vertices that have disjoint numbers. They may or may not all have the + same topology since they are disjoint and independent subgraphs. For example: + ```jldoctest + julia> topological_genome(PeriodicGraph("2 1 1 0 1 2 2 0 1 2 2 1 0")) + 2 interpenetrated subnets: + ⋅ Subnet 1 → UNKNOWN 1 1 1 1 + ⋅ Subnet 2 → sql + ``` + - a net is `n`-fold *catenated* if the unit cell of a single connected component of the + net is `n` times larger than the unit cell of the overall net. In that case, the net + is actually made of `n` interpenetrating connected components, which all have the + same topology. For example: + ```jldoctest + julia> topological_genome(PeriodicGraph("3 1 1 2 0 0 1 1 0 1 0 1 1 0 0 1")) + (2-fold) pcu + ``` + Both may occur inside a single structure, for example: + ```jldoctest + julia> topological_genome(PeriodicGraph("2 1 1 0 2 2 2 0 1 2 2 1 0")) + 2 interpenetrated subnets: + ⋅ Subnet 1 → (2-fold) UNKNOWN 1 1 1 1 + ⋅ Subnet 2 → sql + ``` + +# Example +```jldoctest +julia> mof14 = joinpath(dirname(dirname(pathof(CrystalNets))), "test", "cif", "MOFs", "MOF-14.cif"); + +julia> topologies = determine_topology(mof14, structure=StructureType.MOF, clusterings=[Clustering.Auto, Clustering.Standard, Clustering.PE]) +2 interpenetrated subnets: +⋅ Subnet 1 → AllNodes,SingleNodes,Standard: pto | PE: sqc11259 +⋅ Subnet 2 → AllNodes,SingleNodes,Standard: pto | PE: sqc11259 + +julia> typeof(topologies) +InterpenetratedTopologyResult + +julia> parse(InterpenetratedTopologyResult, repr(topologies)) == topologies +true + +julia> topologies[2] +(AllNodes, SingleNodes, Standard: pto +PE: sqc11259, 1) + +julia> topology, n = topologies[2]; # second subnet + +julia> n # catenation multiplicity +1 + +julia> topology +AllNodes, SingleNodes, Standard: pto +PE: sqc11259 + +julia> typeof(topology) +TopologyResult +``` +""" struct InterpenetratedTopologyResult <: AbstractVector{Tuple{TopologyResult,Int}} data::Vector{Tuple{TopologyResult,Int,Vector{Int}}} end +InterpenetratedTopologyResult() = InterpenetratedTopologyResult(Tuple{TopologyResult,Int,Vector{Int}}[]) +InterpenetratedTopologyResult(e::AbstractString) = InterpenetratedTopologyResult([(TopologyResult(string(e)), 1, Int[])]) Base.size(x::InterpenetratedTopologyResult) = (length(x.data),) -Base.getindex(x::InterpenetratedTopologyResult, i) = (y = x.data[i]; (y[1], y[2])) \ No newline at end of file +Base.getindex(x::InterpenetratedTopologyResult, i) = (y = x.data[i]; (y[1], y[2])) + +function Base.show(io::IO, ::MIME"text/plain", x::InterpenetratedTopologyResult) + compact = length(x) > 1 + if compact + print(io, length(x), " interpenetrated subnets:") + elseif length(x) == 0 + print(io, "non-periodic") + end + for (i, (topology, nfold)) in enumerate(x) + if compact + print(io, "\n⋅ Subnet ", i, " → ") + end + hasnfold = nfold > 1 + if hasnfold + printstyled(io, '(', nfold, "-fold) ", italic=true) + end + print(IOContext(io, :compact=>(compact|hasnfold)), topology) + end +end +Base.show(io::IO, x::InterpenetratedTopologyResult) = show(io, MIME("text/plain"), x) + +function parse_nfold_topologyresult(x::AbstractString) + nfold = 1 + num_digits = -8 + if x[1] == '(' + nfold = parse(Int, first(split(@view(x[2:end]), !isnumeric; limit=2))) + num_digits = ndigits(nfold) + @assert @view(x[(2+num_digits):(8+num_digits)]) == "-fold) " + end + parse(TopologyResult, @view(x[(9+num_digits):end])), nfold +end + +function Base.parse(::Type{InterpenetratedTopologyResult}, x::AbstractString) + s = split(x; limit=4) + length(s) == 1 && x == "non-periodic" && return InterpenetratedTopologyResult() + if length(s) > 3 && s[2] == "interpenetrated" && s[3] == "subnets:" + lines = split(s[4], '\n') + data = Vector{Tuple{TopologyResult,Int,Vector{Int}}}(undef, length(lines)) + for l in lines + @assert @view(l[1:11]) == "⋅ Subnet " + splits = split(@view(l[11:end]); limit=3) + i = parse(Int, splits[1]) + @assert splits[2] == "→" + topo, nfold = parse_nfold_topologyresult(splits[3]) + data[i] = (topo, nfold, Int[]) + end + return InterpenetratedTopologyResult(data) + end + topo1, nfold1 = parse_nfold_topologyresult(x) + return InterpenetratedTopologyResult([(topo1, nfold1, Int[])]) +end diff --git a/test/runtests.jl b/test/runtests.jl index 4ae9c2e..33ecf97 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -33,6 +33,12 @@ function __reset_archive!(safeARCHIVE, safeREVERSE) nothing end +function extract1(x) + topo, nfold = only(x) + @test nfold == 1 + topo +end + import CrystalNets.Clustering: SingleNodes, AllNodes, Standard, PE, PEM @testset "MOF examples" begin @@ -42,56 +48,59 @@ import CrystalNets.Clustering: SingleNodes, AllNodes, Standard, PE, PEM mofdataset = determine_topology_dataset(joinpath(cifs, "MOFs"), false; kwargs...) @testset "Dataset analysis" begin - @test length(mofdataset) == 14 + @test length(mofdataset) == 13 - hkust1 = mofdataset["HKUST-1.cif"] + hkust1 = extract1(mofdataset["HKUST-1.cif"]) @test hkust1[SingleNodes] == hkust1[AllNodes] == hkust1[Standard] @test hkust1[SingleNodes].name == "tbo" - jxust1 = mofdataset["JXUST-1.cif"] + jxust1, nfoldjxust1 = only(mofdataset["JXUST-1.cif"]) + @test nfoldjxust1 == 2 @test jxust1[SingleNodes] == jxust1[AllNodes] @test PeriodicGraph(jxust1[SingleNodes]) == PeriodicGraph(REVERSE_CRYSTALNETS_ARCHIVE["pcu"]) - mil53 = mofdataset["MIL-53.cif"] + mil53 = extract1(mofdataset["MIL-53.cif"]) @test string(mil53[SingleNodes]) == mil53[Standard].name == "bpq" @test mil53[AllNodes].name == "rna" @test mil53[PE].name == "sra, ABW" - mil100 = mofdataset["MIL-100.cif"] + mil100 = extract1(mofdataset["MIL-100.cif"]) @test mil100[SingleNodes].name == mil100[AllNodes].name == "moo" - mil101 = mofdataset["MIL-101.cif"] + mil101 = extract1(mofdataset["MIL-101.cif"]) @test mil101[SingleNodes].name == mil101[AllNodes].name == "mtn-e" @test mil101[PE].name == "mtn-e-a" - mof5 = mofdataset["MOF-5.cif"] + mof5 = extract1(mofdataset["MOF-5.cif"]) @test first(mof5) == ([AllNodes, SingleNodes, Standard, PEM] => parse(TopologicalGenome, "tbo")) @test last(collect(mof5)) == ([PE] => last(mof5)) @test mof5[SingleNodes].name == mof5[AllNodes].name == mof5[Standard].name == "tbo" - mof14 = mofdataset["MOF-14.cif/1"] - @test mof14 == mofdataset["MOF-14.cif/2"] + topologies_mof14 = mofdataset["MOF-14.cif"] + @test topologies_mof14[1] == topologies_mof14[2] + mof14, nfoldmof14 = topologies_mof14[1] + @test nfoldmof14 == 1 @test mof14[SingleNodes].name == mof14[AllNodes].name == mof14[Standard].name == "pto" @test mof14[PE] == parse(TopologicalGenome, "sqc11259") - mof801 = mofdataset["MOF-801.cif"] + mof801 = extract1(mofdataset["MOF-801.cif"]) @test mof801[SingleNodes].name == mof801[AllNodes].name == "fcu" @test mof801[Standard].name == "xbi" @test mof801[PE].name == "ubt" - pcn700 = mofdataset["PCN-700.cif"] + pcn700 = extract1(mofdataset["PCN-700.cif"]) @test pcn700[SingleNodes].name == pcn700[AllNodes].name == "bcu" @test pcn700[PE].name == "pcb, ACO" - uio66 = mofdataset["UiO-66.cif"] + uio66 = extract1(mofdataset["UiO-66.cif"]) @test uio66[SingleNodes].name == uio66[AllNodes].name == "fcu" @test uio66[Standard].name == "xbi" @test uio66[PE].name == "ubt" - zif8 = mofdataset["ZIF-8.cif"] - @test mofdataset["ZIF-8.cif"] == parse(TopologyResult, "AllNodes, SingleNodes, Standard, PEM: sod, SOD | PE: sod-e") + zif8 = extract1(mofdataset["ZIF-8.cif"]) + @test mofdataset["ZIF-8.cif"] == parse(InterpenetratedTopologyResult, "AllNodes, SingleNodes, Standard, PEM: sod, SOD | PE: sod-e") - zif67 = mofdataset["ZIF-67.cif"] + zif67 = extract1(mofdataset["ZIF-67.cif"]) @test string(zif67) == "AllNodes, SingleNodes, Standard, PEM: sod, SOD\nPE: sod-e" end @@ -102,36 +111,36 @@ import CrystalNets.Clustering: SingleNodes, AllNodes, Standard, PE, PEM @test mofdataset["UiO-66.cif"] == determine_topology(joinpath(cifs, "UiO-66.cif"); kwargs...) - juc101 = determine_topology(joinpath(cifs, "JUC-101.cif"); kwargs..., bonding=Bonding.Guess) - @test juc101 == determine_topology(joinpath(cifs, "JUC-101.cif"); kwargs..., bonding=Bonding.Input) + juc101 = extract1(determine_topology(joinpath(cifs, "JUC-101.cif"); kwargs..., bonding=Bonding.Guess)) + @test juc101 == extract1(determine_topology(joinpath(cifs, "JUC-101.cif"); kwargs..., bonding=Bonding.Input)) @test juc101[SingleNodes].name == "nia" @test juc101[AllNodes].name == "jjt" @test string(juc101[Standard].genome) == "3 1 2 0 0 0 1 2 0 1 1 1 3 0 0 0 1 3 0 1 0 1 4 0 0 0 1 4 0 0 1 1 5 0 0 0 1 5 0 1 1 1 6 0 0 0 1 6 0 1 0 1 7 0 0 0 1 7 0 0 1 2 3 0 0 -1 2 4 0 0 0 2 8 0 0 0 2 8 0 1 1 3 4 0 0 1 3 8 0 0 1 3 8 0 1 1 4 8 0 1 0 4 8 0 1 1 5 6 0 0 0 5 7 0 -1 0 5 8 1 0 0 5 8 1 1 1 6 7 0 -1 0 6 8 1 0 1 6 8 1 1 1 7 8 1 1 0 7 8 1 1 1" @test string(juc101[PE]) == "UNKNOWN 3 1 2 0 0 0 1 3 0 0 0 1 4 0 0 0 1 5 0 0 0 2 3 0 0 0 2 6 0 0 0 2 7 0 0 0 3 8 0 0 0 3 9 0 0 0 4 7 0 0 0 4 8 0 0 0 4 10 0 0 0 5 11 0 0 0 5 12 0 0 0 6 11 1 0 0 6 13 0 0 0 7 8 0 0 0 7 14 0 0 0 8 15 0 0 0 9 11 0 1 0 9 16 0 0 0 10 17 0 0 0 10 18 0 0 0 12 13 -1 1 0 12 16 -1 0 0 12 18 0 0 -1 13 16 0 -1 0 13 19 0 0 0 14 17 1 0 0 14 19 0 0 1 15 17 0 1 0 15 20 0 0 0 16 20 0 0 -1 18 19 -1 1 1 18 20 -1 0 0 19 20 0 -1 -1" @test juc101[PEM].genome == PeriodicGraph("3 1 2 0 0 0 1 3 0 0 0 1 4 0 0 0 1 5 0 0 0 1 6 0 0 0 1 7 0 0 0 2 4 0 0 0 2 8 0 0 0 3 4 0 0 0 3 9 0 0 0 4 6 0 0 0 4 10 0 0 0 4 11 0 0 0 5 6 0 0 0 5 12 0 0 0 6 7 0 0 0 6 10 0 0 0 6 11 0 0 0 7 13 0 0 0 8 14 0 0 0 8 15 0 0 0 9 16 0 0 0 9 17 0 0 0 10 18 0 0 0 11 19 0 0 0 12 15 -1 0 0 12 20 0 0 0 13 16 -1 0 0 13 21 0 0 0 14 22 0 0 0 14 23 0 0 0 15 18 1 0 -1 16 19 1 0 -1 17 22 -1 1 0 17 23 -1 1 0 18 24 0 0 0 19 25 0 0 0 20 22 -1 0 1 20 26 0 0 0 21 22 -2 1 1 21 26 -1 1 0 22 23 0 0 0 22 26 1 0 -1 23 24 0 0 -1 23 25 1 -1 -1 23 26 1 0 -1 24 26 1 0 0 25 26 0 1 0") - ewetuw = determine_topology(joinpath(cifs, "EWETUW_clean.cif"); kwargs...) + ewetuw = extract1(determine_topology(joinpath(cifs, "EWETUW_clean.cif"); kwargs...)) @test allunique(ewetuw) @test ewetuw[SingleNodes].name == "fit" @test unique!(sort!(degree(ewetuw[PE].genome))) == [3, 5, 6] @assert allunique(unique!(sort!(degree(last(x).genome))) for x in ewetuw) - wemfif = determine_topology(joinpath(cifs, "WEMFIF_clean.cif"); kwargs...) + wemfif = extract1(determine_topology(joinpath(cifs, "WEMFIF_clean.cif"); kwargs...)) @test wemfif[AllNodes] == wemfif[SingleNodes] == wemfif[Standard] == wemfif[PEM] @test wemfif[AllNodes].name == "dia" @test wemfif[PE].name == "crs" CrystalNets.toggle_warning(true) # test cell minimization with collision nodes - nott112 = determine_topology(joinpath(cifs, "NOTT-112.cif"); kwargs..., bonding=Bonding.Input) + nott112 = extract1(determine_topology(joinpath(cifs, "NOTT-112.cif"); kwargs..., bonding=Bonding.Input)) @test startswith(string(nott112), "AllNodes, PEM: ntt\nSingleNodes, Standard: nts\nPE: ") # test input bonding when different symmetric images of the same atoms have different bonds - fowwar = determine_topology(joinpath(cifs, "FOWWAR.cif"); kwargs..., bonding=Bonding.Input, clusterings=[Clustering.Standard]) + fowwar = extract1(determine_topology(joinpath(cifs, "FOWWAR.cif"); kwargs..., bonding=Bonding.Input, clusterings=[Clustering.Standard])) @test string(fowwar) == "Standard: UNKNOWN 3 1 1 0 0 1 1 2 0 0 0 1 3 0 0 0 1 4 0 0 0 2 5 0 0 0 2 6 0 0 0 3 6 0 0 0 3 7 0 0 0 4 5 1 0 0 4 7 0 -1 0 5 5 0 1 1 5 8 0 0 0 6 6 0 0 1 6 8 0 1 0 7 7 0 1 1 7 8 1 1 0" # Test non-periodic input - calfig = determine_topology(joinpath(cifs, "CALFIG.cif"); kwargs..., clusterings=[Clustering.Auto]) + calfig = extract1(determine_topology(joinpath(cifs, "CALFIG.cif"); kwargs..., clusterings=[Clustering.Auto])) @test string(calfig) == "non-periodic" end @@ -259,7 +268,7 @@ end path = joinpath(cifs, "RRO.cif") push!(ARGS, "-a", joinpath(CrystalNets.arc_location, "rcsr.arc"), path) result, written = capture_out(out) - @test result == 1 + @test result == 0 @test startswith(only(written), "UNKNOWN") __reset_archive!(safeARCHIVE, safeREVERSE) @@ -309,13 +318,14 @@ end path = joinpath(cifs, "ALPO-3.1.1.37.001.cif") push!(ARGS, "-s", "guess", "-b", "input", path) result, written = capture_out(out) - @test result == 1 # Unknown topology with the input bonds + @test result == 0 + @test startswith(only(written), "AllNodes, SingleNodes: UNKNOWN 3") # Unknown topology with the input bonds empty!(ARGS) path = joinpath(cifs, "ALPO-3.1.1.37.001.cif") push!(ARGS, "-b", "auto", path) result, written = capture_out(out) - @test_broken result == 0 + @test result == 0 @test_broken written == ["afi, AFI"] # Test automatic removal of solvent residues and sites with multiple atoms @@ -389,4 +399,4 @@ end end Test.get_testset().n_passed += length(unstabletry) - failures @test failures == 0 -end \ No newline at end of file +end From 6a07f2c16c5faa594f534c20d1645d3bc898f622 Mon Sep 17 00:00:00 2001 From: Lionel Zoubritzky Date: Mon, 10 Jul 2023 10:41:01 +0200 Subject: [PATCH 3/8] Change "subnets" into "substructures" --- docs/src/index.md | 2 +- src/types.jl | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index 4fdf70c..f56abf0 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -76,7 +76,7 @@ For example: ```julia julia> x = determine_topology("/path/to/intertwinned/structures.cif") -2 interpenetrated subnets: +2 interpenetrated substructures: ⋅ Subnet 1 → pcu ⋅ Subnet 2 → srs ``` diff --git a/src/types.jl b/src/types.jl index 94b139d..8b672f7 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1293,12 +1293,12 @@ possibly decomposed into several catenated nets. !!! info "Vocabulary" In this context, *interpenetration* and *catenation* have slightly different meanings: - - two (or more) subnets are *interpenetrated* if both are present in the unit cell, and + - two (or more) substructures are *interpenetrated* if both are present in the unit cell, and are composed of vertices that have disjoint numbers. They may or may not all have the same topology since they are disjoint and independent subgraphs. For example: ```jldoctest julia> topological_genome(PeriodicGraph("2 1 1 0 1 2 2 0 1 2 2 1 0")) - 2 interpenetrated subnets: + 2 interpenetrated substructures: ⋅ Subnet 1 → UNKNOWN 1 1 1 1 ⋅ Subnet 2 → sql ``` @@ -1313,7 +1313,7 @@ possibly decomposed into several catenated nets. Both may occur inside a single structure, for example: ```jldoctest julia> topological_genome(PeriodicGraph("2 1 1 0 2 2 2 0 1 2 2 1 0")) - 2 interpenetrated subnets: + 2 interpenetrated substructures: ⋅ Subnet 1 → (2-fold) UNKNOWN 1 1 1 1 ⋅ Subnet 2 → sql ``` @@ -1323,7 +1323,7 @@ possibly decomposed into several catenated nets. julia> mof14 = joinpath(dirname(dirname(pathof(CrystalNets))), "test", "cif", "MOFs", "MOF-14.cif"); julia> topologies = determine_topology(mof14, structure=StructureType.MOF, clusterings=[Clustering.Auto, Clustering.Standard, Clustering.PE]) -2 interpenetrated subnets: +2 interpenetrated substructures: ⋅ Subnet 1 → AllNodes,SingleNodes,Standard: pto | PE: sqc11259 ⋅ Subnet 2 → AllNodes,SingleNodes,Standard: pto | PE: sqc11259 @@ -1361,7 +1361,7 @@ Base.getindex(x::InterpenetratedTopologyResult, i) = (y = x.data[i]; (y[1], y[2] function Base.show(io::IO, ::MIME"text/plain", x::InterpenetratedTopologyResult) compact = length(x) > 1 if compact - print(io, length(x), " interpenetrated subnets:") + print(io, length(x), " interpenetrated substructures:") elseif length(x) == 0 print(io, "non-periodic") end @@ -1392,7 +1392,7 @@ end function Base.parse(::Type{InterpenetratedTopologyResult}, x::AbstractString) s = split(x; limit=4) length(s) == 1 && x == "non-periodic" && return InterpenetratedTopologyResult() - if length(s) > 3 && s[2] == "interpenetrated" && s[3] == "subnets:" + if length(s) > 3 && s[2] == "interpenetrated" && s[3] == "substructures:" lines = split(s[4], '\n') data = Vector{Tuple{TopologyResult,Int,Vector{Int}}}(undef, length(lines)) for l in lines From 87075dc6203eef4595ef25e73228e19378417699 Mon Sep 17 00:00:00 2001 From: Lionel Zoubritzky Date: Mon, 10 Jul 2023 10:58:35 +0200 Subject: [PATCH 4/8] Unexport make_archive --- docs/src/faq.md | 4 ++-- docs/src/index.md | 2 +- docs/src/lib/internals.md | 6 ++++++ docs/src/lib/public.md | 1 - docs/src/python.md | 2 +- src/archive.jl | 22 +++++++++++++--------- 6 files changed, 23 insertions(+), 14 deletions(-) diff --git a/docs/src/faq.md b/docs/src/faq.md index e3b22ac..dbe3928 100644 --- a/docs/src/faq.md +++ b/docs/src/faq.md @@ -96,8 +96,8 @@ Most often, the difference will come from either: ## How can I do a database topology analysis with CrystalNets.jl? -The built-in way to do this consists in using the [`determine_topology_dataset`](@ref) function, or [`guess_topology_dataset`](@ref) in some cases. -These functions expect the path of a directory containing CIF files within (possibly in subdirectories). +The built-in way to do this consists in using the [`determine_topology_dataset`](@ref) function. +This function expects the path of a directory containing CIF files within (possibly in subdirectories). ## How can I directly access the genome of my structure instead of its name? diff --git a/docs/src/index.md b/docs/src/index.md index f56abf0..494a272 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -131,4 +131,4 @@ Run `CrystalNets --help` for the list of options available to the executable. In terms of performance, the compiled executable is the best option if you only want to identify a few structures from time to time. Using [the website](https://progs.coudert.name/topology) is recommended as well for this use-case, unless the nets you study are too big. For intensive workloads with many structures to identify, it is best to use `CrystalNets.jl` as a Julia module through the - [`determine_topology_dataset`](@ref) and [`guess_topology_dataset`](@ref) functions. The module is also the best option to perform more advanced analyses on the net in Julia, or to use the [`Options`](@ref) unavailable to the executable. + [`determine_topology_dataset`](@ref) function. The module is also the best option to perform more advanced analyses on the net in Julia, or to use the [`Options`](@ref) unavailable to the executable. diff --git a/docs/src/lib/internals.md b/docs/src/lib/internals.md index 18a65c7..9dd0d17 100644 --- a/docs/src/lib/internals.md +++ b/docs/src/lib/internals.md @@ -88,6 +88,12 @@ CrystalNets.expand_collisions CrystalNets.collision_nodes ``` +## Archives + +```@docs +CrystalNets.make_archive +``` + ## Utils ```@docs diff --git a/docs/src/lib/public.md b/docs/src/lib/public.md index f870d83..c17d82b 100644 --- a/docs/src/lib/public.md +++ b/docs/src/lib/public.md @@ -49,6 +49,5 @@ empty_default_archive! change_current_archive! refresh_current_archive! add_to_current_archive! -make_archive CrystalNets.export_arc ``` diff --git a/docs/src/python.md b/docs/src/python.md index d38a7e9..942bd3c 100644 --- a/docs/src/python.md +++ b/docs/src/python.md @@ -66,7 +66,7 @@ The same warnings are printed at the beginning, followed by the same exports. Th ## Usage -Let's now consider a programmatic use-case where the goal is to identify the topology of a complex MOF structure according the [`SingleNodes`](@ref Clustering) and [`AllNodes`](@ref Clustering) clusterings. The main structure may contain interpenetrating substructures and for each substructure. +Let's now consider a programmatic use-case where the goal is to identify the topology of a complex MOF structure according the [`SingleNodes`](@ref Clustering) and [`AllNodes`](@ref Clustering) clusterings. The main structure may contain interpenetrating substructures. The function is expected to error if the topologies are different between the two clusterings. Otherwise, it returns a list of pairs whose first element is the dimensionality of the subnet and the second element is the name of the corresponding topology. If there is no known name, the topological genome is used instead. diff --git a/src/archive.jl b/src/archive.jl index ac8c270..37ca0fd 100644 --- a/src/archive.jl +++ b/src/archive.jl @@ -56,14 +56,16 @@ dia """ const REVERSE_CRYSTALNETS_ARCHIVE = Dict{String,String}(id => (startswith(key, "unstable") ? key[10:end] : key) for (key, id) in CRYSTALNETS_ARCHIVE) +export REVERSE_CRYSTALNETS_ARCHIVE + export clean_default_archive!, set_default_archive!, empty_default_archive!, change_current_archive!, refresh_current_archive!, - add_to_current_archive!, - make_archive, - REVERSE_CRYSTALNETS_ARCHIVE + add_to_current_archive! + +# export make_archive function _reset_archive!() global CRYSTALNETS_ARCHIVE @@ -317,7 +319,7 @@ function add_to_current_archive!(id::AbstractString, genome::AbstractString) end """ - make_archive(path, destination=nothing) + make_archive(path, destination=nothing, verbose=false) Make an archive from the files located in the directory given by `path` and export it to `destination`, if specified. Each file of the directory should correspond @@ -334,22 +336,24 @@ function make_archive(path, destination, verbose=false) verbose && print("Handling "*name*"... ") flag = false flagerror = Ref{Any}(Tuple{Vector{Int},String}[]) - genomes::Vector{Tuple{Vector{Int},String}} = try + results::InterpenetratedTopologyResult = try x = topological_genome(UnderlyingNets(parse_chemfile(path*f))) verbose && println(name*" done.") x catch e flag = true flagerror[] = e - Tuple{Vector{Int},String}[] + InterpenetratedTopologyResult() end - for (i, (vmap, genome)) in enumerate(genomes) + for (i, (topology, nfold)) in enumerate(results) + genome = string(topology) if startswith(genome, "unstable") || genome == "non-periodic" flag = true - push!(flagerror[]::Vector{Vector{Int}}, (vmap, genome)) + push!(flagerror[]::Vector{Tuple{Vector{Int},String}}, (vmap, genome)) continue end - arc[genome] = length(genomes) == 1 ? name : (name * '_' * string(i)) + verbose && nfold != 1 && println(nfold, "-fold catenated net found for ", name) + arc[genome] = length(results) == 1 ? name : (name * '_' * string(i)) end if flag e = flagerror[] From 1a5187919d9c23216e7906f3400646a4b2bca6a4 Mon Sep 17 00:00:00 2001 From: Lionel Zoubritzky Date: Mon, 10 Jul 2023 11:00:05 +0200 Subject: [PATCH 5/8] Add 1.9 on CI --- .github/workflows/CI.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index b95c44a..32f57bd 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -18,6 +18,7 @@ jobs: matrix: version: - '1.6' + - '1.9' - 'nightly' os: - ubuntu-latest From 3f7678cfb976c3ba90b38fe9e78a6d8b148d7b2f Mon Sep 17 00:00:00 2001 From: Lionel Zoubritzky Date: Mon, 10 Jul 2023 11:07:29 +0200 Subject: [PATCH 6/8] Back-compatibility with v<1.9 --- src/types.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/types.jl b/src/types.jl index 8b672f7..f75c58a 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1371,7 +1371,11 @@ function Base.show(io::IO, ::MIME"text/plain", x::InterpenetratedTopologyResult) end hasnfold = nfold > 1 if hasnfold - printstyled(io, '(', nfold, "-fold) ", italic=true) + @static if VERSION < v"1.9-" + printstyled(io, '(', nfold, "-fold) ", color=:yellow) + else + printstyled(io, '(', nfold, "-fold) ", italic=true) + end end print(IOContext(io, :compact=>(compact|hasnfold)), topology) end From 7550c8bb8d76c92eff0796793f63bacbda92deb0 Mon Sep 17 00:00:00 2001 From: Lionel Zoubritzky Date: Mon, 10 Jul 2023 11:09:22 +0200 Subject: [PATCH 7/8] Update documentation builder version --- .github/workflows/documentation.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 4a8bdeb..489f040 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -14,7 +14,7 @@ jobs: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@latest with: - version: '1.6' + version: '1.9' - name: Install dependencies run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' - name: Build and deploy From 0f67e5c4d02a9568231b92cc6dc82dad0cd2554e Mon Sep 17 00:00:00 2001 From: Lionel Zoubritzky Date: Mon, 10 Jul 2023 11:29:25 +0200 Subject: [PATCH 8/8] Fix back-compatibility again --- src/types.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/types.jl b/src/types.jl index f75c58a..4aa8f81 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1371,7 +1371,7 @@ function Base.show(io::IO, ::MIME"text/plain", x::InterpenetratedTopologyResult) end hasnfold = nfold > 1 if hasnfold - @static if VERSION < v"1.9-" + @static if VERSION < v"1.10-" printstyled(io, '(', nfold, "-fold) ", color=:yellow) else printstyled(io, '(', nfold, "-fold) ", italic=true)