From cf71a8faa0c51811bba5b507f85ffdf87711e08c Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Wed, 7 May 2025 17:52:51 +0800 Subject: [PATCH 01/23] initial working BCIF decoding --- Project.toml | 2 + src/BioStructures.jl | 1 + src/bcif.jl | 582 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 585 insertions(+) create mode 100644 src/bcif.jl diff --git a/Project.toml b/Project.toml index 9588ac6c..081a90c3 100644 --- a/Project.toml +++ b/Project.toml @@ -10,6 +10,7 @@ CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193" Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" Format = "1fa38f19-a742-5d3f-a2b9-30dd87b9d5f8" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MsgPack = "99f44e22-a591-53d1-9472-aa23ef4bd671" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" @@ -47,6 +48,7 @@ Graphs = "1" LinearAlgebra = "1.9" MMTF = "1" MetaGraphs = "0.7, 0.8" +MsgPack = "1.2.1" PrecompileTools = "1" RecipesBase = "1" STRIDE_jll = "1" diff --git a/src/BioStructures.jl b/src/BioStructures.jl index 16c20c05..0f84ca0d 100644 --- a/src/BioStructures.jl +++ b/src/BioStructures.jl @@ -24,6 +24,7 @@ include("model.jl") include("select.jl") include("pdb.jl") include("mmcif.jl") +include("bcif.jl") include("download.jl") include("spatial.jl") include("bonding.jl") diff --git a/src/bcif.jl b/src/bcif.jl new file mode 100644 index 00000000..1f49c0ba --- /dev/null +++ b/src/bcif.jl @@ -0,0 +1,582 @@ +export read_binary_cif_attributes, decode_column +using LinearAlgebra +using MsgPack + + +""" +# read_binary_cif_attributes(filename::String) + +# A simple function to read a binary CIF file from MolStar and extract +# the list of attributes and their compressed bytes. + +# Returns a Dict with attribute names as keys and their byte arrays as values. +# """ +function read_binary_cif_attributes(filename::String) + dictionary = MsgPack.unpack(read(filename)) + blocks = dictionary["dataBlocks"] + categories = blocks[1]["categories"] + atom_site = categories[findall(getindex.(categories, "name") .== "_atom_site")] + columns = atom_site[1]["columns"] + new_dict = Dict{String, Any}() + # attributes = decode_column.(columns) + for column in columns + new_dict[column["name"]] = decode_column(column) + end + # populatedict!(mmcif_dict, attributes) + return new_dict +end + +# Enum for type codes +@enum TypeCode begin + INT8 = 1 + INT16 = 2 + INT32 = 3 + UINT8 = 4 + UINT16 = 5 + UINT32 = 6 + FLOAT32 = 32 + FLOAT64 = 33 +end + +# Mapping from TypeCode to Julia types +const TYPE_CODE_TO_TYPE = Dict( + INT8 => Int8, + INT16 => Int16, + INT32 => Int32, + UINT8 => UInt8, + UINT16 => UInt16, + UINT32 => UInt32, + FLOAT32 => Float32, + FLOAT64 => Float64 +) + +# Mapping from Julia types to TypeCode +const TYPE_TO_TYPE_CODE = Dict(value => key for (key, value) in TYPE_CODE_TO_TYPE) + +const INT_TO_TYPE = Dict( + 1 => Int8, + 2 => Int16, + 3 => Int32, + 4 => UInt8, + 5 => UInt16, + 6 => UInt32, + 32 => Float32, + 33 => Float64 +) + + +# Helper functions for camel/snake case conversion +function camel_to_snake_case(name) + return lowercase(replace(name, r"(? "_")) +end + +function snake_to_camel_case(name) + parts = split(name, "_") + return lowercase(parts[1]) * join(uppercase.(first.(parts[2:end])) .* parts[2:end][2:end], "") +end + +# Safe casting function +function safe_cast(array, dtype) + if eltype(array) == dtype + return array + end + + if dtype <: Integer && !(eltype(array) <: Integer) + throw(ArgumentError("Cannot cast floating point to integer")) + end + + if dtype <: Integer + type_min, type_max = typemin(dtype), typemax(dtype) + if any(x -> x < type_min || x > type_max, array) + throw(ArgumentError("Integer values do not fit into the given dtype")) + end + end + + return convert(Array{dtype}, array) +end + +# Abstract encoding type +abstract type Encoding end + +# ByteArrayEncoding +mutable struct ByteArrayEncoding <: Encoding + type::Union{TypeCode, Nothing} + + function ByteArrayEncoding(type=nothing) + if type !== nothing + type = type isa TypeCode ? type : TYPE_TO_TYPE_CODE[type] + end + new(type) + end +end + +function encode(enc::ByteArrayEncoding, data) + if enc.type === nothing + enc.type = TYPE_TO_TYPE_CODE[eltype(data)] + end + return reinterpret(UInt8, safe_cast(data, TYPE_CODE_TO_TYPE[enc.type])) +end + +function decode(enc::ByteArrayEncoding, data) + return reinterpret(TYPE_CODE_TO_TYPE[enc.type], data) +end + +# FixedPointEncoding +mutable struct FixedPointEncoding <: Encoding + factor::Float64 + srcType::TypeCode + + function FixedPointEncoding(factor; srcType=FLOAT32) + srcType = srcType isa TypeCode ? srcType : TYPE_TO_TYPE_CODE[srcType] + if !(srcType in (FLOAT32, FLOAT64)) + throw(ArgumentError("Only floating point types are supported")) + end + new(factor, srcType) + end +end + +function encode(enc::FixedPointEncoding, data) + return round.(Int32, data .* enc.factor) +end + +function decode(enc::FixedPointEncoding, data) + return convert(Array{TYPE_CODE_TO_TYPE[enc.srcType]}, data ./ enc.factor) +end + +# IntervalQuantizationEncoding +mutable struct IntervalQuantizationEncoding <: Encoding + min::Float64 + max::Float64 + numSteps::Int + srcType::TypeCode + + function IntervalQuantizationEncoding(min, max, numSteps; srcType=FLOAT32) + srcType = srcType isa TypeCode ? srcType : TYPE_TO_TYPE_CODE[srcType] + new(min, max, numSteps, srcType) + end +end + +function encode(enc::IntervalQuantizationEncoding, data) + # Convert to normalized values between 0 and numSteps-1 + normalized = (data .- enc.min) ./ (enc.max - enc.min) .* (enc.numSteps - 1) + # Clamp to valid range and convert to integers + indices = clamp.(round.(Int32, normalized), 0, enc.numSteps - 1) + return indices +end + +function decode(enc::IntervalQuantizationEncoding, data) + # Convert indices back to values in the original range + normalized = data ./ Float64(enc.numSteps - 1) + output = normalized .* (enc.max - enc.min) .+ enc.min + return convert(Array{TYPE_CODE_TO_TYPE[enc.srcType]}, output) +end + +# RunLengthEncoding +mutable struct RunLengthEncoding <: Encoding + srcSize::Union{Int, Nothing} + srcType::Union{TypeCode, Nothing} + + function RunLengthEncoding(; srcSize=nothing, srcType=nothing) + if srcType !== nothing + srcType = srcType isa TypeCode ? srcType : TYPE_TO_TYPE_CODE[srcType] + end + new(srcSize, srcType) + end +end + +function encode(enc::RunLengthEncoding, data) + if enc.srcType === nothing + enc.srcType = TYPE_TO_TYPE_CODE[eltype(data)] + end + if enc.srcSize === nothing + enc.srcSize = length(data) + elseif enc.srcSize != length(data) + throw(ArgumentError("Given source size does not match actual data size")) + end + + # Pessimistic allocation - worst case is run length of 1 for every element + output = zeros(Int32, length(data) * 2) + j = 1 + val = data[1] + run_length = 0 + + for i in 1:length(data) + curr_val = data[i] + if curr_val == val + run_length += 1 + else + # New element -> Write element with run-length + output[j] = val + output[j+1] = run_length + j += 2 + val = curr_val + run_length = 1 + end + end + + # Write last element + output[j] = val + output[j+1] = run_length + j += 2 + + # Trim to correct size + return output[1:j-1] +end + +function decode(enc::RunLengthEncoding, data) + if length(data) % 2 != 0 + throw(ArgumentError("Invalid run-length encoded data")) + end + + length_output = 0 + if enc.srcSize === nothing + # Determine length of output array by summing run lengths + for i in 2:2:length(data) + length_output += data[i] + end + else + length_output = enc.srcSize + end + + output = zeros(TYPE_CODE_TO_TYPE[enc.srcType], length_output) + j = 1 + + for i in 1:2:length(data) + value = data[i] + repeat_count = data[i+1] + output[j:j+repeat_count-1] .= value + j += repeat_count + end + + return output +end + +# DeltaEncoding +mutable struct DeltaEncoding <: Encoding + srcType::Union{TypeCode, Nothing} + origin::Int + + function DeltaEncoding(; srcType=nothing, origin=0) + if srcType !== nothing + srcType = srcType isa TypeCode ? srcType : TYPE_TO_TYPE_CODE[srcType] + end + new(srcType, origin) + end +end + +function encode(enc::DeltaEncoding, data) + if enc.srcType === nothing + enc.srcType = TYPE_TO_TYPE_CODE[eltype(data)] + end + + data = data .- enc.origin + diffs = vcat([0], diff(data)) + return convert(Array{Int32}, diffs) +end + +function decode(enc::DeltaEncoding, data) + output = cumsum(data) + output = convert(Array{TYPE_CODE_TO_TYPE[enc.srcType]}, output) + output .+= enc.origin + return output +end + +# IntegerPackingEncoding +mutable struct IntegerPackingEncoding <: Encoding + byteCount::Int + srcSize::Union{Int, Nothing} + isUnsigned::Bool + + function IntegerPackingEncoding(byteCount; srcSize=nothing, isUnsigned=false) + new(byteCount, srcSize, isUnsigned) + end +end + +function determine_packed_dtype(enc::IntegerPackingEncoding) + if enc.byteCount == 1 + return enc.isUnsigned ? UInt8 : Int8 + elseif enc.byteCount == 2 + return enc.isUnsigned ? UInt16 : Int16 + else + throw(ArgumentError("Unsupported byte count")) + end +end + +function encode(enc::IntegerPackingEncoding, data) + if enc.srcSize === nothing + enc.srcSize = length(data) + elseif enc.srcSize != length(data) + throw(ArgumentError("Given source size does not match actual data size")) + end + + data = convert(Array{Int32}, data) + packed_type = determine_packed_dtype(enc) + min_val = typemin(packed_type) + max_val = typemax(packed_type) + + # Get length of output array by summing up required length of each element + length_output = 0 + for num in data + if num < 0 + if min_val == 0 + throw(ArgumentError("Cannot pack negative numbers into unsigned type")) + end + # Required packed length is number of times min_val needs to be repeated + 1 + length_output += div(num, min_val) + 1 + elseif num > 0 + length_output += div(num, max_val) + 1 + else + # num = 0 + length_output += 1 + end + end + + # Fill output + output = zeros(packed_type, length_output) + j = 1 + + for i in 1:length(data) + remainder = data[i] + if remainder < 0 + if min_val == 0 + throw(ArgumentError("Cannot pack negative numbers into unsigned type")) + end + while remainder <= min_val + remainder -= min_val + output[j] = min_val + j += 1 + end + elseif remainder > 0 + while remainder >= max_val + remainder -= max_val + output[j] = max_val + j += 1 + end + end + output[j] = remainder + j += 1 + end + + return output +end + +function decode(enc::IntegerPackingEncoding, data) + packed_type = determine_packed_dtype(enc) + min_val = typemin(packed_type) + max_val = typemax(packed_type) + + # For unsigned integers, do not check lower bound (is always 0) + # -> Set lower bound to value that is never reached + if min_val == 0 + min_val = -1 + end + + output = zeros(Int32, enc.srcSize) + j = 1 + unpacked_val = 0 + + for i in 1:length(data) + packed_val = data[i] + if packed_val == max_val || packed_val == min_val + unpacked_val += packed_val + else + unpacked_val += packed_val + output[j] = unpacked_val + unpacked_val = 0 + j += 1 + end + end + + return output +end + +# StringArrayEncoding +mutable struct StringArrayEncoding <: Encoding + stringData::String + dataEncoding::Vector{Encoding} + offsetEncoding::Vector{Encoding} + offsets::Vector{UInt8} + + function StringArrayEncoding(; stringData=nothing, dataEncoding=nothing, offsetEncoding=nothing, offsets=nothing) + if dataEncoding === nothing + dataEncoding = [ByteArrayEncoding(INT32)] + end + if offsetEncoding === nothing + offsetEncoding = [ByteArrayEncoding(INT32)] + end + new(stringData, dataEncoding, offsetEncoding, offsets) + end +end + +function encode(enc::StringArrayEncoding, data) + if !(eltype(data) <: AbstractString) + throw(ArgumentError("Data must be of string type")) + end + + if enc.stringData === nothing + # Get unique stringData + enc.stringData = unique(data) + check_present = false + else + check_present = true + end + + # Sort stringData for binary search + sorted_indices = sortperm(enc.stringData) + sorted_strings = enc.stringData[sorted_indices] + + # Find indices of each string in data + indices = zeros(Int32, length(data)) + for i in 1:length(data) + idx = searchsortedfirst(sorted_strings, data[i]) + if idx <= length(sorted_strings) && sorted_strings[idx] == data[i] + indices[i] = sorted_indices[idx] + else + if check_present + throw(ArgumentError("Data contains stringData not present in 'stringData'")) + end + end + end + + # Apply encodings + encoded_data = indices + for encoding in enc.dataEncoding + encoded_data = encode(encoding, encoded_data) + end + + return encoded_data +end + +function decode(enc::StringArrayEncoding, data) + # Apply decodings in reverse order + indices = decode_stepwise(data, enc.dataEncoding) .+ 1 + offsets = decode_stepwise(enc.offsets, enc.offsetEncoding) + + substrings = Vector{String}() + + # break up the string into the substrings that are individual occurrences + for (i, offset) in enumerate(offsets[1:end-1]) + start_i = offsets[i] + 1 + end_i = offsets[i+1] + push!(substrings, String(enc.stringData[start_i:end_i])) + end + + return substrings[indices] +end + +# Utility functions for encoding/decoding +function encode_stepwise(data, encodings) + for encoding in encodings + data = encode(encoding, data) + end + return data +end + +function decode_stepwise(data, encodings) + for encoding in reverse(encodings) + data = decode(encoding, data) + end + return data +end + +function create_uncompressed_encoding(array) + if eltype(array) <: AbstractString + return [StringArrayEncoding()] + else + return [ByteArrayEncoding()] + end +end + + +function deserialize_encoding(content::Any) + if isa(content, Vector) + return [deserialize_encoding(item) for item in content] + end + + if isa(content, Encoding) + return content + end + kind = content["kind"] + + # if byte convert to integer + for (key, value) in content + content[key] = value isa UInt8 ? Int32(value) : value + end + params = content + + # Handle nested encodings + if haskey(params, "data_encoding") + params["data_encoding"] = deserialize_encoding(params["data_encoding"]) + end + + if haskey(params, "offsetEncoding") + params["offsetEncoding"] = deserialize_encoding(params["offsetEncoding"]) + end + + encoding_constructors = Dict( + "ByteArray" => () -> ByteArrayEncoding(INT_TO_TYPE[get(params, "type", nothing)]), + "FixedPoint" => () -> FixedPointEncoding(params["factor"]; srcType=INT_TO_TYPE[get(params, "srcType", FLOAT32)]), + "StringArray" => () -> StringArrayEncoding( + stringData=get(params, "stringData", nothing), + dataEncoding=get(params, "dataEncoding", nothing), + offsetEncoding=get(params, "offsetEncoding", nothing), + offsets=get(params, "offsets", nothing) + ), + "IntervalQuantization" => () -> IntervalQuantizationEncoding(params["min"], params["max"], params["numSteps"]; + srcType=INT_TO_TYPE[get(params, "srcType", 32)]), + "RunLength" => () -> RunLengthEncoding(srcSize=get(params, "srcSize", nothing), + srcType=INT_TO_TYPE[get(params, "srcType", nothing)]), + "Delta" => () -> DeltaEncoding(srcType=INT_TO_TYPE[get(params, "srcType", nothing)], + origin=get(params, "origin", 0)), + "IntegerPacking" => () -> IntegerPackingEncoding(params["byteCount"], + srcSize=get(params, "srcSize", nothing), + isUnsigned=get(params, "isUnsigned", false)) + ) + + if haskey(encoding_constructors, kind) + return encoding_constructors[kind]() + else + error("Unknown encoding kind: $kind") + end +end + + +function decode_column(column::Dict) + data = column["data"] + encodings = [] + + # Handle the encoding array properly + for enc in data["encoding"] + if haskey(enc, "dataEncoding") + if haskey(enc, "offsetEncoding") + push!(encodings, StringArrayEncoding( + stringData=enc["stringData"], + dataEncoding=deserialize_encoding(enc["dataEncoding"]), + offsetEncoding=deserialize_encoding(enc["offsetEncoding"]), + offsets=enc["offsets"] + )) + else + push!(encodings, deserialize_encoding(enc["dataEncoding"])) + end + else + push!(encodings, deserialize_encoding(enc)) + end + end + + # Flatten the encodings if needed + flat_encodings = [] + for enc in encodings + if enc isa Vector + append!(flat_encodings, enc) + else + push!(flat_encodings, enc) + end + end + + + # return flat_encodings + + decoded = decode_stepwise(data["data"], flat_encodings) + + +end From 8bdb53f0a91e29b7218b0d51ec55862a034ffb6a Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Wed, 7 May 2025 18:55:09 +0800 Subject: [PATCH 02/23] turn into `read()` function --- src/bcif.jl | 28 +++++++++++++++++++--------- src/model.jl | 4 ++++ 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/src/bcif.jl b/src/bcif.jl index 1f49c0ba..65f9e887 100644 --- a/src/bcif.jl +++ b/src/bcif.jl @@ -11,21 +11,31 @@ using MsgPack # Returns a Dict with attribute names as keys and their byte arrays as values. # """ -function read_binary_cif_attributes(filename::String) - dictionary = MsgPack.unpack(read(filename)) - blocks = dictionary["dataBlocks"] - categories = blocks[1]["categories"] +function Base.read(input::IO, + ::Type{BCIFFormat}, + structure_name::AbstractString="", + remove_disorder::Bool=false, + read_std_atoms::Bool=true, + read_het_atoms::Bool=true, + run_dssp::Bool=false, + run_stride::Bool=false) + + file = MsgPack.unpack(read(input)) + + # currently just looking for the first data block + categories = file["dataBlocks"][1]["categories"] atom_site = categories[findall(getindex.(categories, "name") .== "_atom_site")] columns = atom_site[1]["columns"] - new_dict = Dict{String, Any}() - # attributes = decode_column.(columns) + + attributes = Dict{String, Any}() for column in columns - new_dict[column["name"]] = decode_column(column) + attributes[column["name"]] = decode_column(column) end - # populatedict!(mmcif_dict, attributes) - return new_dict + return attributes end + + # Enum for type codes @enum TypeCode begin INT8 = 1 diff --git a/src/model.jl b/src/model.jl index b03c4ff1..4e01ce00 100644 --- a/src/model.jl +++ b/src/model.jl @@ -69,6 +69,7 @@ export threeletter_to_aa, PDBFormat, PDBXMLFormat, + BCIFFormat, MMCIFFormat, MMTFFormat, pdbextension, @@ -1715,6 +1716,9 @@ struct PDBXMLFormat end "Protein Data Bank (PDB) mmCIF file format." struct MMCIFFormat end +"Protein Data Bank (PDB) Binary mmCIF file format." +struct BCIFFormat end + "Protein Data Bank (PDB) MMTF file format." struct MMTFFormat end From a7defb2af605e2d2ff61653a4c588168a65e0233 Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Wed, 7 May 2025 19:09:56 +0800 Subject: [PATCH 03/23] cleanup --- src/bcif.jl | 32 ++++---------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/src/bcif.jl b/src/bcif.jl index 65f9e887..5e64525d 100644 --- a/src/bcif.jl +++ b/src/bcif.jl @@ -1,16 +1,13 @@ export read_binary_cif_attributes, decode_column using LinearAlgebra -using MsgPack +import MsgPack """ -# read_binary_cif_attributes(filename::String) + read(input::IO, ::Type{BCIFFormat}, structure_name::AbstractString="", remove_disorder::Bool=false, read_std_atoms::Bool=true, read_het_atoms::Bool=true, run_dssp::Bool=false, run_stride::Bool=false) -# A simple function to read a binary CIF file from MolStar and extract -# the list of attributes and their compressed bytes. - -# Returns a Dict with attribute names as keys and their byte arrays as values. -# """ +A function to read a binary CIF file from MolStar and extract the list of attributes and their compressed bytes. +""" function Base.read(input::IO, ::Type{BCIFFormat}, structure_name::AbstractString="", @@ -34,8 +31,6 @@ function Base.read(input::IO, return attributes end - - # Enum for type codes @enum TypeCode begin INT8 = 1 @@ -75,16 +70,6 @@ const INT_TO_TYPE = Dict( ) -# Helper functions for camel/snake case conversion -function camel_to_snake_case(name) - return lowercase(replace(name, r"(? "_")) -end - -function snake_to_camel_case(name) - parts = split(name, "_") - return lowercase(parts[1]) * join(uppercase.(first.(parts[2:end])) .* parts[2:end][2:end], "") -end - # Safe casting function function safe_cast(array, dtype) if eltype(array) == dtype @@ -489,15 +474,6 @@ function decode_stepwise(data, encodings) return data end -function create_uncompressed_encoding(array) - if eltype(array) <: AbstractString - return [StringArrayEncoding()] - else - return [ByteArrayEncoding()] - end -end - - function deserialize_encoding(content::Any) if isa(content, Vector) return [deserialize_encoding(item) for item in content] From 7367a760b41cb5d42af62e2d87e0611aa3a406d7 Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Wed, 7 May 2025 19:18:49 +0800 Subject: [PATCH 04/23] cleanup --- src/bcif.jl | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/bcif.jl b/src/bcif.jl index 5e64525d..9fd8b542 100644 --- a/src/bcif.jl +++ b/src/bcif.jl @@ -21,16 +21,25 @@ function Base.read(input::IO, # currently just looking for the first data block categories = file["dataBlocks"][1]["categories"] - atom_site = categories[findall(getindex.(categories, "name") .== "_atom_site")] + atom_site = get_category(categories, "_atom_site") columns = atom_site[1]["columns"] - attributes = Dict{String, Any}() + attributes = Dict{String, Vector{Any}}() for column in columns attributes[column["name"]] = decode_column(column) end return attributes end +function get_category(cats::Vector{Any}, name::String) + idx = findall(getindex.(cats, "name") .== name) + + if isnothing(idx) throw(ArgumentError("Category $name not found")) end + if length(idx) > 1 throw(ArgumentError("Multiple categories with name $name found")) end + + return cats[idx] +end + # Enum for type codes @enum TypeCode begin INT8 = 1 From c0911d7f284934ad40631841514164cd83d9081c Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Wed, 7 May 2025 19:22:08 +0800 Subject: [PATCH 05/23] run JuliaFormatter --- src/bcif.jl | 160 +++++++++++++++++++++++++++------------------------- 1 file changed, 82 insertions(+), 78 deletions(-) diff --git a/src/bcif.jl b/src/bcif.jl index 9fd8b542..3124f680 100644 --- a/src/bcif.jl +++ b/src/bcif.jl @@ -9,22 +9,22 @@ import MsgPack A function to read a binary CIF file from MolStar and extract the list of attributes and their compressed bytes. """ function Base.read(input::IO, - ::Type{BCIFFormat}, - structure_name::AbstractString="", - remove_disorder::Bool=false, - read_std_atoms::Bool=true, - read_het_atoms::Bool=true, - run_dssp::Bool=false, - run_stride::Bool=false) - + ::Type{BCIFFormat}, + structure_name::AbstractString="", + remove_disorder::Bool=false, + read_std_atoms::Bool=true, + read_het_atoms::Bool=true, + run_dssp::Bool=false, + run_stride::Bool=false) + file = MsgPack.unpack(read(input)) - + # currently just looking for the first data block categories = file["dataBlocks"][1]["categories"] atom_site = get_category(categories, "_atom_site") columns = atom_site[1]["columns"] - - attributes = Dict{String, Vector{Any}}() + + attributes = Dict{String,Vector{Any}}() for column in columns attributes[column["name"]] = decode_column(column) end @@ -33,11 +33,15 @@ end function get_category(cats::Vector{Any}, name::String) idx = findall(getindex.(cats, "name") .== name) - - if isnothing(idx) throw(ArgumentError("Category $name not found")) end - if length(idx) > 1 throw(ArgumentError("Multiple categories with name $name found")) end - return cats[idx] + if isnothing(idx) + throw(ArgumentError("Category $name not found")) + end + if length(idx) > 1 + throw(ArgumentError("Multiple categories with name $name found")) + end + + return cats[idx] end # Enum for type codes @@ -75,7 +79,7 @@ const INT_TO_TYPE = Dict( 5 => UInt16, 6 => UInt32, 32 => Float32, - 33 => Float64 + 33 => Float64 ) @@ -84,18 +88,18 @@ function safe_cast(array, dtype) if eltype(array) == dtype return array end - + if dtype <: Integer && !(eltype(array) <: Integer) throw(ArgumentError("Cannot cast floating point to integer")) end - + if dtype <: Integer type_min, type_max = typemin(dtype), typemax(dtype) if any(x -> x < type_min || x > type_max, array) throw(ArgumentError("Integer values do not fit into the given dtype")) end end - + return convert(Array{dtype}, array) end @@ -104,8 +108,8 @@ abstract type Encoding end # ByteArrayEncoding mutable struct ByteArrayEncoding <: Encoding - type::Union{TypeCode, Nothing} - + type::Union{TypeCode,Nothing} + function ByteArrayEncoding(type=nothing) if type !== nothing type = type isa TypeCode ? type : TYPE_TO_TYPE_CODE[type] @@ -129,7 +133,7 @@ end mutable struct FixedPointEncoding <: Encoding factor::Float64 srcType::TypeCode - + function FixedPointEncoding(factor; srcType=FLOAT32) srcType = srcType isa TypeCode ? srcType : TYPE_TO_TYPE_CODE[srcType] if !(srcType in (FLOAT32, FLOAT64)) @@ -153,7 +157,7 @@ mutable struct IntervalQuantizationEncoding <: Encoding max::Float64 numSteps::Int srcType::TypeCode - + function IntervalQuantizationEncoding(min, max, numSteps; srcType=FLOAT32) srcType = srcType isa TypeCode ? srcType : TYPE_TO_TYPE_CODE[srcType] new(min, max, numSteps, srcType) @@ -177,9 +181,9 @@ end # RunLengthEncoding mutable struct RunLengthEncoding <: Encoding - srcSize::Union{Int, Nothing} - srcType::Union{TypeCode, Nothing} - + srcSize::Union{Int,Nothing} + srcType::Union{TypeCode,Nothing} + function RunLengthEncoding(; srcSize=nothing, srcType=nothing) if srcType !== nothing srcType = srcType isa TypeCode ? srcType : TYPE_TO_TYPE_CODE[srcType] @@ -197,13 +201,13 @@ function encode(enc::RunLengthEncoding, data) elseif enc.srcSize != length(data) throw(ArgumentError("Given source size does not match actual data size")) end - + # Pessimistic allocation - worst case is run length of 1 for every element output = zeros(Int32, length(data) * 2) j = 1 val = data[1] run_length = 0 - + for i in 1:length(data) curr_val = data[i] if curr_val == val @@ -217,12 +221,12 @@ function encode(enc::RunLengthEncoding, data) run_length = 1 end end - + # Write last element output[j] = val output[j+1] = run_length j += 2 - + # Trim to correct size return output[1:j-1] end @@ -231,7 +235,7 @@ function decode(enc::RunLengthEncoding, data) if length(data) % 2 != 0 throw(ArgumentError("Invalid run-length encoded data")) end - + length_output = 0 if enc.srcSize === nothing # Determine length of output array by summing run lengths @@ -241,25 +245,25 @@ function decode(enc::RunLengthEncoding, data) else length_output = enc.srcSize end - + output = zeros(TYPE_CODE_TO_TYPE[enc.srcType], length_output) j = 1 - + for i in 1:2:length(data) value = data[i] repeat_count = data[i+1] output[j:j+repeat_count-1] .= value j += repeat_count end - + return output end # DeltaEncoding mutable struct DeltaEncoding <: Encoding - srcType::Union{TypeCode, Nothing} + srcType::Union{TypeCode,Nothing} origin::Int - + function DeltaEncoding(; srcType=nothing, origin=0) if srcType !== nothing srcType = srcType isa TypeCode ? srcType : TYPE_TO_TYPE_CODE[srcType] @@ -272,7 +276,7 @@ function encode(enc::DeltaEncoding, data) if enc.srcType === nothing enc.srcType = TYPE_TO_TYPE_CODE[eltype(data)] end - + data = data .- enc.origin diffs = vcat([0], diff(data)) return convert(Array{Int32}, diffs) @@ -288,9 +292,9 @@ end # IntegerPackingEncoding mutable struct IntegerPackingEncoding <: Encoding byteCount::Int - srcSize::Union{Int, Nothing} + srcSize::Union{Int,Nothing} isUnsigned::Bool - + function IntegerPackingEncoding(byteCount; srcSize=nothing, isUnsigned=false) new(byteCount, srcSize, isUnsigned) end @@ -312,12 +316,12 @@ function encode(enc::IntegerPackingEncoding, data) elseif enc.srcSize != length(data) throw(ArgumentError("Given source size does not match actual data size")) end - + data = convert(Array{Int32}, data) packed_type = determine_packed_dtype(enc) min_val = typemin(packed_type) max_val = typemax(packed_type) - + # Get length of output array by summing up required length of each element length_output = 0 for num in data @@ -334,11 +338,11 @@ function encode(enc::IntegerPackingEncoding, data) length_output += 1 end end - + # Fill output output = zeros(packed_type, length_output) j = 1 - + for i in 1:length(data) remainder = data[i] if remainder < 0 @@ -360,7 +364,7 @@ function encode(enc::IntegerPackingEncoding, data) output[j] = remainder j += 1 end - + return output end @@ -368,17 +372,17 @@ function decode(enc::IntegerPackingEncoding, data) packed_type = determine_packed_dtype(enc) min_val = typemin(packed_type) max_val = typemax(packed_type) - + # For unsigned integers, do not check lower bound (is always 0) # -> Set lower bound to value that is never reached if min_val == 0 min_val = -1 end - + output = zeros(Int32, enc.srcSize) j = 1 unpacked_val = 0 - + for i in 1:length(data) packed_val = data[i] if packed_val == max_val || packed_val == min_val @@ -390,7 +394,7 @@ function decode(enc::IntegerPackingEncoding, data) j += 1 end end - + return output end @@ -400,7 +404,7 @@ mutable struct StringArrayEncoding <: Encoding dataEncoding::Vector{Encoding} offsetEncoding::Vector{Encoding} offsets::Vector{UInt8} - + function StringArrayEncoding(; stringData=nothing, dataEncoding=nothing, offsetEncoding=nothing, offsets=nothing) if dataEncoding === nothing dataEncoding = [ByteArrayEncoding(INT32)] @@ -416,7 +420,7 @@ function encode(enc::StringArrayEncoding, data) if !(eltype(data) <: AbstractString) throw(ArgumentError("Data must be of string type")) end - + if enc.stringData === nothing # Get unique stringData enc.stringData = unique(data) @@ -424,11 +428,11 @@ function encode(enc::StringArrayEncoding, data) else check_present = true end - + # Sort stringData for binary search sorted_indices = sortperm(enc.stringData) sorted_strings = enc.stringData[sorted_indices] - + # Find indices of each string in data indices = zeros(Int32, length(data)) for i in 1:length(data) @@ -441,13 +445,13 @@ function encode(enc::StringArrayEncoding, data) end end end - + # Apply encodings encoded_data = indices for encoding in enc.dataEncoding encoded_data = encode(encoding, encoded_data) end - + return encoded_data end @@ -455,7 +459,7 @@ function decode(enc::StringArrayEncoding, data) # Apply decodings in reverse order indices = decode_stepwise(data, enc.dataEncoding) .+ 1 offsets = decode_stepwise(enc.offsets, enc.offsetEncoding) - + substrings = Vector{String}() # break up the string into the substrings that are individual occurrences @@ -487,47 +491,47 @@ function deserialize_encoding(content::Any) if isa(content, Vector) return [deserialize_encoding(item) for item in content] end - + if isa(content, Encoding) return content end kind = content["kind"] - + # if byte convert to integer for (key, value) in content content[key] = value isa UInt8 ? Int32(value) : value end params = content - + # Handle nested encodings if haskey(params, "data_encoding") params["data_encoding"] = deserialize_encoding(params["data_encoding"]) end - + if haskey(params, "offsetEncoding") params["offsetEncoding"] = deserialize_encoding(params["offsetEncoding"]) end - + encoding_constructors = Dict( "ByteArray" => () -> ByteArrayEncoding(INT_TO_TYPE[get(params, "type", nothing)]), "FixedPoint" => () -> FixedPointEncoding(params["factor"]; srcType=INT_TO_TYPE[get(params, "srcType", FLOAT32)]), "StringArray" => () -> StringArrayEncoding( - stringData=get(params, "stringData", nothing), + stringData=get(params, "stringData", nothing), dataEncoding=get(params, "dataEncoding", nothing), offsetEncoding=get(params, "offsetEncoding", nothing), offsets=get(params, "offsets", nothing) - ), - "IntervalQuantization" => () -> IntervalQuantizationEncoding(params["min"], params["max"], params["numSteps"]; - srcType=INT_TO_TYPE[get(params, "srcType", 32)]), - "RunLength" => () -> RunLengthEncoding(srcSize=get(params, "srcSize", nothing), - srcType=INT_TO_TYPE[get(params, "srcType", nothing)]), - "Delta" => () -> DeltaEncoding(srcType=INT_TO_TYPE[get(params, "srcType", nothing)], - origin=get(params, "origin", 0)), - "IntegerPacking" => () -> IntegerPackingEncoding(params["byteCount"], - srcSize=get(params, "srcSize", nothing), - isUnsigned=get(params, "isUnsigned", false)) + ), + "IntervalQuantization" => () -> IntervalQuantizationEncoding(params["min"], params["max"], params["numSteps"]; + srcType=INT_TO_TYPE[get(params, "srcType", 32)]), + "RunLength" => () -> RunLengthEncoding(srcSize=get(params, "srcSize", nothing), + srcType=INT_TO_TYPE[get(params, "srcType", nothing)]), + "Delta" => () -> DeltaEncoding(srcType=INT_TO_TYPE[get(params, "srcType", nothing)], + origin=get(params, "origin", 0)), + "IntegerPacking" => () -> IntegerPackingEncoding(params["byteCount"], + srcSize=get(params, "srcSize", nothing), + isUnsigned=get(params, "isUnsigned", false)) ) - + if haskey(encoding_constructors, kind) return encoding_constructors[kind]() else @@ -539,10 +543,10 @@ end function decode_column(column::Dict) data = column["data"] encodings = [] - + # Handle the encoding array properly for enc in data["encoding"] - if haskey(enc, "dataEncoding") + if haskey(enc, "dataEncoding") if haskey(enc, "offsetEncoding") push!(encodings, StringArrayEncoding( stringData=enc["stringData"], @@ -557,7 +561,7 @@ function decode_column(column::Dict) push!(encodings, deserialize_encoding(enc)) end end - + # Flatten the encodings if needed flat_encodings = [] for enc in encodings @@ -567,11 +571,11 @@ function decode_column(column::Dict) push!(flat_encodings, enc) end end - + # return flat_encodings - decoded = decode_stepwise(data["data"], flat_encodings) + decoded = decode_stepwise(data["data"], flat_encodings) + - end From 0886b58b2b667e9c2320d06d6a413b11c661473a Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Wed, 7 May 2025 23:07:26 +0800 Subject: [PATCH 06/23] cleanup and multithread column decoding --- src/bcif.jl | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/bcif.jl b/src/bcif.jl index 3124f680..4d552b2d 100644 --- a/src/bcif.jl +++ b/src/bcif.jl @@ -1,4 +1,4 @@ -export read_binary_cif_attributes, decode_column +export decode_column using LinearAlgebra import MsgPack @@ -23,12 +23,10 @@ function Base.read(input::IO, categories = file["dataBlocks"][1]["categories"] atom_site = get_category(categories, "_atom_site") columns = atom_site[1]["columns"] - - attributes = Dict{String,Vector{Any}}() - for column in columns - attributes[column["name"]] = decode_column(column) + tasks = map(columns) do column + Threads.@spawn decode_column(column) end - return attributes + return Dict(columns[i]["name"] => result for (i, result) in enumerate(fetch.(tasks))) end function get_category(cats::Vector{Any}, name::String) From c427d66893a787fe6482d4faf515d63f969d2a36 Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Thu, 8 May 2025 09:34:31 +0800 Subject: [PATCH 07/23] Return as MolecularStructure --- src/bcif.jl | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/src/bcif.jl b/src/bcif.jl index 4d552b2d..1bf2c30e 100644 --- a/src/bcif.jl +++ b/src/bcif.jl @@ -26,9 +26,58 @@ function Base.read(input::IO, tasks = map(columns) do column Threads.@spawn decode_column(column) end - return Dict(columns[i]["name"] => result for (i, result) in enumerate(fetch.(tasks))) + + bcif_dict = BCIFDict(Dict(columns[i]["name"] => result for (i, result) in enumerate(fetch.(tasks)))) + # return bcif_dict + struc = MolecularStructure(structure_name) + struc[1] = Model(1, struc) + for i in 1:length(bcif_dict["id"]) + unsafe_addatomtomodel!(struc[1], AtomRecord(bcif_dict, i)) + end + fixlists!(struc) + return struc +end + +BCIFArrayTypes = Union{Vector{String},Vector{Int32},Vector{Float64}} + +struct BCIFDict <: AbstractDict{String,BCIFArrayTypes} + dict::Dict{String,BCIFArrayTypes} end +function BCIFDict(dict::Dict{String,BCIFArrayTypes}) + new(dict) +end + +Base.keys(mmcif_dict::BCIFDict) = keys(mmcif_dict.dict) +Base.values(mmcif_dict::BCIFDict) = values(mmcif_dict.dict) +Base.haskey(mmcif_dict::BCIFDict, key) = haskey(mmcif_dict.dict, key) +Base.get(mmcif_dict::BCIFDict, key, default) = get(mmcif_dict.dict, key, default) +Base.length(mmcif_dict::BCIFDict) = length(mmcif_dict.dict) +Base.iterate(mmcif_dict::BCIFDict) = iterate(mmcif_dict.dict) +Base.iterate(mmcif_dict::BCIFDict, i) = iterate(mmcif_dict.dict, i) + +AtomRecord = AtomRecord(d::BCIFDict, i::Int) = AtomRecord( + d["group_PDB"][i] == "HETATM", + d["id"][i], + d["auth_atom_id"][i], + 'A',# d["label_alt_id"][i], + d["auth_comp_id"][i], + d["auth_asym_id"][i], + d["auth_seq_id"][i], + 'A', # d["pdbx_PDB_ins_code"][i], + [ + d["Cartn_x"][i], + d["Cartn_y"][i], + d["Cartn_z"][i] + ], + d["occupancy"][i], + d["B_iso_or_equiv"][i], + d["type_symbol"][i], + d["pdbx_formal_charge"][i], +) + + + function get_category(cats::Vector{Any}, name::String) idx = findall(getindex.(cats, "name") .== name) From 070fd365b8c829d4492c0cb41ca72a958e1ea00c Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Thu, 8 May 2025 14:48:40 +0800 Subject: [PATCH 08/23] entire datablock to dict --- src/bcif.jl | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/bcif.jl b/src/bcif.jl index 1bf2c30e..70493e0b 100644 --- a/src/bcif.jl +++ b/src/bcif.jl @@ -1,4 +1,4 @@ -export decode_column +export decode_column, columns_to_dict, datablock_to_dict using LinearAlgebra import MsgPack @@ -38,6 +38,18 @@ function Base.read(input::IO, return struc end +function datablock_to_dict(datablock::Dict) + categories = datablock["categories"] + return reduce(merge, [Dict(category["name"] => columns_to_dict(category["columns"])) for category in categories]) +end + +function columns_to_dict(columns::Vector{Any}) + tasks = map(columns) do column + Threads.@spawn Dict(column["name"] => decode_column(column)) + end + return reduce(merge, fetch.(tasks)) +end + BCIFArrayTypes = Union{Vector{String},Vector{Int32},Vector{Float64}} struct BCIFDict <: AbstractDict{String,BCIFArrayTypes} From c1ee98430b156e5bf5b3969c6d125c2d5ea77de8 Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Fri, 9 May 2025 11:53:16 +0800 Subject: [PATCH 09/23] cleanup --- src/bcif.jl | 53 ++++++++++++++++++++++++++--------------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/src/bcif.jl b/src/bcif.jl index 70493e0b..83f0ca8d 100644 --- a/src/bcif.jl +++ b/src/bcif.jl @@ -8,6 +8,9 @@ import MsgPack A function to read a binary CIF file from MolStar and extract the list of attributes and their compressed bytes. """ + +# currently isn't implementing the dssp / stride. If using BCIF it seems strange to write +# out a .pdb, run dssp / string, then read it back in again. function Base.read(input::IO, ::Type{BCIFFormat}, structure_name::AbstractString="", @@ -18,22 +21,19 @@ function Base.read(input::IO, run_stride::Bool=false) file = MsgPack.unpack(read(input)) - - # currently just looking for the first data block categories = file["dataBlocks"][1]["categories"] atom_site = get_category(categories, "_atom_site") columns = atom_site[1]["columns"] - tasks = map(columns) do column - Threads.@spawn decode_column(column) - end - - bcif_dict = BCIFDict(Dict(columns[i]["name"] => result for (i, result) in enumerate(fetch.(tasks)))) - # return bcif_dict struc = MolecularStructure(structure_name) - struc[1] = Model(1, struc) - for i in 1:length(bcif_dict["id"]) - unsafe_addatomtomodel!(struc[1], AtomRecord(bcif_dict, i)) + + for (i, datablock) in enumerate(file["dataBlocks"]) + bcif_dict = BCIFDict(datablock_to_dict(datablock)["_atom_site"]) + struc[i] = Model(i, struc) + for i in 1:length(bcif_dict["id"]) + unsafe_addatomtomodel!(struc[1], AtomRecord(bcif_dict, i)) + end end + fixlists!(struc) return struc end @@ -68,15 +68,16 @@ Base.length(mmcif_dict::BCIFDict) = length(mmcif_dict.dict) Base.iterate(mmcif_dict::BCIFDict) = iterate(mmcif_dict.dict) Base.iterate(mmcif_dict::BCIFDict, i) = iterate(mmcif_dict.dict, i) + AtomRecord = AtomRecord(d::BCIFDict, i::Int) = AtomRecord( d["group_PDB"][i] == "HETATM", d["id"][i], d["auth_atom_id"][i], - 'A',# d["label_alt_id"][i], + d["label_atom_id"][i] == "" ? ' ' : d["label_atom_id"][i][1], d["auth_comp_id"][i], d["auth_asym_id"][i], d["auth_seq_id"][i], - 'A', # d["pdbx_PDB_ins_code"][i], + d["label_alt_id"][i] == "" ? ' ' : d["label_alt_id"][i][1], [ d["Cartn_x"][i], d["Cartn_y"][i], @@ -89,7 +90,6 @@ AtomRecord = AtomRecord(d::BCIFDict, i::Int) = AtomRecord( ) - function get_category(cats::Vector{Any}, name::String) idx = findall(getindex.(cats, "name") .== name) @@ -103,7 +103,7 @@ function get_category(cats::Vector{Any}, name::String) return cats[idx] end -# Enum for type codes +# Data types defined for the BCIF encoding by are indicated by integer values @enum TypeCode begin INT8 = 1 INT16 = 2 @@ -115,6 +115,17 @@ end FLOAT64 = 33 end +const INT_TO_TYPE = Dict( + 1 => Int8, + 2 => Int16, + 3 => Int32, + 4 => UInt8, + 5 => UInt16, + 6 => UInt32, + 32 => Float32, + 33 => Float64 +) + # Mapping from TypeCode to Julia types const TYPE_CODE_TO_TYPE = Dict( INT8 => Int8, @@ -130,18 +141,6 @@ const TYPE_CODE_TO_TYPE = Dict( # Mapping from Julia types to TypeCode const TYPE_TO_TYPE_CODE = Dict(value => key for (key, value) in TYPE_CODE_TO_TYPE) -const INT_TO_TYPE = Dict( - 1 => Int8, - 2 => Int16, - 3 => Int32, - 4 => UInt8, - 5 => UInt16, - 6 => UInt32, - 32 => Float32, - 33 => Float64 -) - - # Safe casting function function safe_cast(array, dtype) if eltype(array) == dtype From 9fb8b7b57d536464b251356a58fbfb1cb4fb0c6b Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Fri, 9 May 2025 12:26:09 +0800 Subject: [PATCH 10/23] cleanup --- src/bcif.jl | 201 ++++++++++++++++++++++++---------------------------- 1 file changed, 92 insertions(+), 109 deletions(-) diff --git a/src/bcif.jl b/src/bcif.jl index 83f0ca8d..cdb573f8 100644 --- a/src/bcif.jl +++ b/src/bcif.jl @@ -27,7 +27,17 @@ function Base.read(input::IO, struc = MolecularStructure(structure_name) for (i, datablock) in enumerate(file["dataBlocks"]) - bcif_dict = BCIFDict(datablock_to_dict(datablock)["_atom_site"]) + # could decode the whole file at once, or just decode the _atom_site category + # for efficiency which it is currently doing. Can be changed to get access to the + # rest of the file + + decode_all = true + if decode_all + bcif_dict = BCIFDict(datablock_to_dict(datablock)["_atom_site"]) + else + bcif_dict = BCIFDict(columns_to_dict(get_category(categories, "_atom_site"))) + end + struc[i] = Model(i, struc) for i in 1:length(bcif_dict["id"]) unsafe_addatomtomodel!(struc[1], AtomRecord(bcif_dict, i)) @@ -103,7 +113,88 @@ function get_category(cats::Vector{Any}, name::String) return cats[idx] end + +# Utility functions for encoding/decoding +function encode_stepwise(data, encodings) + for encoding in encodings + data = encode(encoding, data) + end + return data +end + +function decode_stepwise(data, encodings) + for encoding in reverse(encodings) + data = decode(encoding, data) + end + return data +end + +function deserialize_numeric_encoding(content::Any) + if isa(content, Vector) + return [deserialize_numeric_encoding(item) for item in content] + end + + if isa(content, Encoding) + return content + end + kind = content["kind"] + + # if byte convert to integer + for (key, value) in content + content[key] = value isa UInt8 ? Int32(value) : value + end + params = content + + encoding_constructors = Dict( + "ByteArray" => () -> ByteArrayEncoding(INT_TO_TYPE[get(params, "type", nothing)]), + "FixedPoint" => () -> FixedPointEncoding(params["factor"]; srcType=INT_TO_TYPE[get(params, "srcType", FLOAT32)]), + "IntervalQuantization" => () -> IntervalQuantizationEncoding(params["min"], params["max"], params["numSteps"]; + srcType=INT_TO_TYPE[get(params, "srcType", 32)]), + "RunLength" => () -> RunLengthEncoding(srcSize=get(params, "srcSize", nothing), + srcType=INT_TO_TYPE[get(params, "srcType", nothing)]), + "Delta" => () -> DeltaEncoding(srcType=INT_TO_TYPE[get(params, "srcType", nothing)], + origin=get(params, "origin", 0)), + "IntegerPacking" => () -> IntegerPackingEncoding(params["byteCount"], + srcSize=get(params, "srcSize", nothing), + isUnsigned=get(params, "isUnsigned", false)) + ) + + if haskey(encoding_constructors, kind) + return encoding_constructors[kind]() + else + error("Unknown encoding kind: $kind") + end +end + + +function decode_column(column::Dict) + column_data = column["data"] + encodings = [] + + # collect the encodings. If it's a string encoding then it should be a single encoding + # that contains it's own dataEncoding and offsetEncoding which also need to be handled + for enc in column_data["encoding"] + if enc["kind"] == "StringArray" + push!(encodings, StringArrayEncoding( + stringData=enc["stringData"], + dataEncoding=deserialize_numeric_encoding(enc["dataEncoding"]), + offsetEncoding=deserialize_numeric_encoding(enc["offsetEncoding"]), + offsets=enc["offsets"] + )) + else + push!(encodings, deserialize_numeric_encoding(enc)) + end + end + + return decode_stepwise(column_data["data"], encodings) +end + + +# Below are the encoding and decoding types for BCIF format + # Data types defined for the BCIF encoding by are indicated by integer values +# there are not well discussed in the official spec, had to ask about it excplicitly +# https://github.com/molstar/BinaryCIF/issues/4 @enum TypeCode begin INT8 = 1 INT16 = 2 @@ -529,111 +620,3 @@ function decode(enc::StringArrayEncoding, data) return substrings[indices] end - -# Utility functions for encoding/decoding -function encode_stepwise(data, encodings) - for encoding in encodings - data = encode(encoding, data) - end - return data -end - -function decode_stepwise(data, encodings) - for encoding in reverse(encodings) - data = decode(encoding, data) - end - return data -end - -function deserialize_encoding(content::Any) - if isa(content, Vector) - return [deserialize_encoding(item) for item in content] - end - - if isa(content, Encoding) - return content - end - kind = content["kind"] - - # if byte convert to integer - for (key, value) in content - content[key] = value isa UInt8 ? Int32(value) : value - end - params = content - - # Handle nested encodings - if haskey(params, "data_encoding") - params["data_encoding"] = deserialize_encoding(params["data_encoding"]) - end - - if haskey(params, "offsetEncoding") - params["offsetEncoding"] = deserialize_encoding(params["offsetEncoding"]) - end - - encoding_constructors = Dict( - "ByteArray" => () -> ByteArrayEncoding(INT_TO_TYPE[get(params, "type", nothing)]), - "FixedPoint" => () -> FixedPointEncoding(params["factor"]; srcType=INT_TO_TYPE[get(params, "srcType", FLOAT32)]), - "StringArray" => () -> StringArrayEncoding( - stringData=get(params, "stringData", nothing), - dataEncoding=get(params, "dataEncoding", nothing), - offsetEncoding=get(params, "offsetEncoding", nothing), - offsets=get(params, "offsets", nothing) - ), - "IntervalQuantization" => () -> IntervalQuantizationEncoding(params["min"], params["max"], params["numSteps"]; - srcType=INT_TO_TYPE[get(params, "srcType", 32)]), - "RunLength" => () -> RunLengthEncoding(srcSize=get(params, "srcSize", nothing), - srcType=INT_TO_TYPE[get(params, "srcType", nothing)]), - "Delta" => () -> DeltaEncoding(srcType=INT_TO_TYPE[get(params, "srcType", nothing)], - origin=get(params, "origin", 0)), - "IntegerPacking" => () -> IntegerPackingEncoding(params["byteCount"], - srcSize=get(params, "srcSize", nothing), - isUnsigned=get(params, "isUnsigned", false)) - ) - - if haskey(encoding_constructors, kind) - return encoding_constructors[kind]() - else - error("Unknown encoding kind: $kind") - end -end - - -function decode_column(column::Dict) - data = column["data"] - encodings = [] - - # Handle the encoding array properly - for enc in data["encoding"] - if haskey(enc, "dataEncoding") - if haskey(enc, "offsetEncoding") - push!(encodings, StringArrayEncoding( - stringData=enc["stringData"], - dataEncoding=deserialize_encoding(enc["dataEncoding"]), - offsetEncoding=deserialize_encoding(enc["offsetEncoding"]), - offsets=enc["offsets"] - )) - else - push!(encodings, deserialize_encoding(enc["dataEncoding"])) - end - else - push!(encodings, deserialize_encoding(enc)) - end - end - - # Flatten the encodings if needed - flat_encodings = [] - for enc in encodings - if enc isa Vector - append!(flat_encodings, enc) - else - push!(flat_encodings, enc) - end - end - - - # return flat_encodings - - decoded = decode_stepwise(data["data"], flat_encodings) - - -end From 88d6940ed9e1be0ac53284333219079e3dfeba2e Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Fri, 9 May 2025 13:12:40 +0800 Subject: [PATCH 11/23] add download and cleanup --- src/download.jl | 69 +++++++++------- src/model.jl | 207 ++++++++++++++++++++++++------------------------ 2 files changed, 145 insertions(+), 131 deletions(-) diff --git a/src/download.jl b/src/download.jl index af162fb7..7f2f5a45 100644 --- a/src/download.jl +++ b/src/download.jl @@ -162,11 +162,11 @@ Requires an internet connection. assembly; by default downloads the PDB file. """ function downloadpdb(pdbid::AbstractString; - dir::AbstractString=pwd(), - format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat}}=PDBFormat, - obsolete::Bool=false, - overwrite::Bool=false, - ba_number::Integer=0) + dir::AbstractString=pwd(), + format::Type{<:Union{PDBFormat,PDBXMLFormat,MMCIFFormat,BCIFFormat}}=PDBFormat, + obsolete::Bool=false, + overwrite::Bool=false, + ba_number::Integer=0) pdbid = uppercase(pdbid) # Check PDB ID is 4 characters long and only consits of alphanumeric characters if !occursin(r"^[a-zA-Z0-9]{4}$", pdbid) @@ -198,10 +198,18 @@ function downloadpdb(pdbid::AbstractString; # Download the compressed PDB file to the temporary location @info "Downloading file from PDB: $pdbid" if ba_number == 0 - Downloads.download( - "http://files.rcsb.org/download/$pdbid.$(pdbextension[format]).gz", - archivefilepath, - ) + if format == BCIFFormat + Downloads.download( + "https://models.rcsb.org/$pdbid.bcif", + pdbpath, + ) + return pdbpath + else + Downloads.download( + "http://files.rcsb.org/download/$pdbid.$(pdbextension[format]).gz", + archivefilepath, + ) + end else if format == PDBFormat Downloads.download( @@ -213,6 +221,11 @@ function downloadpdb(pdbid::AbstractString; "http://files.rcsb.org/download/$pdbid-assembly$ba_number.$(pdbextension[format]).gz", archivefilepath, ) + elseif format == BCIFFormat + Downloads.download( + "https://models.rcsb.org/$pdbid.bcif", + archivefilepath, + ) else throw(ArgumentError("Biological assemblies are available in the " * "PDB and mmCIF formats only")) @@ -230,8 +243,8 @@ function downloadpdb(pdbid::AbstractString; if !isfile(pdbpath) || filesize(pdbpath) == 0 if format == PDBFormat throw(ErrorException("Error downloading file: $pdbid; some PDB entries are " * - "not available as PDB format files, consider downloading " * - "the mmCIF file instead")) + "not available as PDB format files, consider downloading " * + "the mmCIF file instead")) else throw(ErrorException("Error downloading file: $pdbid")) end @@ -245,7 +258,7 @@ function downloadpdb(pdbid::AbstractString; return pdbpath end -function downloadpdb(pdbidlist::AbstractArray{<:AbstractString, 1}; kwargs...) +function downloadpdb(pdbidlist::AbstractArray{<:AbstractString,1}; kwargs...) pdbpaths = String[] failedlist = String[] for pdbid in pdbidlist @@ -291,8 +304,8 @@ Requires an internet connection. in `dir`; by default skips downloading the PDB file if it exists. """ function downloadentirepdb(; dir::AbstractString=pwd(), - format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat}}=PDBFormat, - overwrite::Bool=false) + format::Type{<:Union{PDBFormat,PDBXMLFormat,MMCIFFormat}}=PDBFormat, + overwrite::Bool=false) pdblist = pdbentrylist() @info "About to download $(length(pdblist)) PDB files, make sure you have enough disk space and time" @info "The function can be stopped any time and called again to resume downloading" @@ -310,12 +323,12 @@ automatically updates the PDB files of the given `format` inside the local Requires an internet connection. """ function updatelocalpdb(; dir::AbstractString=pwd(), - format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat}}=PDBFormat) + format::Type{<:Union{PDBFormat,PDBXMLFormat,MMCIFFormat}}=PDBFormat) addedlist, modifiedlist, obsoletelist = pdbrecentchanges() # Download the newly added and modified pdb files downloadpdb(vcat(addedlist, modifiedlist), dir=dir, overwrite=true, format=format) # Set the obsolete directory to be inside dir - obsolete_dir=joinpath(dir, "obsolete") + obsolete_dir = joinpath(dir, "obsolete") for pdbid in obsoletelist oldfile = joinpath(dir, "$pdbid.$(pdbextension[format])") newfile = joinpath(obsolete_dir, "$pdbid.$(pdbextension[format])") @@ -325,10 +338,10 @@ function updatelocalpdb(; dir::AbstractString=pwd(), mkpath(obsolete_dir) end mv(oldfile, newfile) - # If obsolete pdb is already in the obsolete directory, inform the user and skip + # If obsolete pdb is already in the obsolete directory, inform the user and skip elseif isfile(newfile) @info "PDB $pdbid is already moved to the obsolete directory" - # If obsolete pdb not available in both dir and obsolete, inform the user and skip + # If obsolete pdb not available in both dir and obsolete, inform the user and skip else @info "Obsolete PDB $pdbid is missing" end @@ -352,8 +365,8 @@ Requires an internet connection. in `dir`; by default skips downloading the PDB file if it exists. """ function downloadallobsoletepdb(; obsolete_dir::AbstractString=pwd(), - format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat}}=PDBFormat, - overwrite::Bool=false) + format::Type{<:Union{PDBFormat,PDBXMLFormat,MMCIFFormat}}=PDBFormat, + overwrite::Bool=false) obsoletelist = pdbobsoletelist() downloadpdb(obsoletelist, dir=obsolete_dir, format=format, overwrite=overwrite) end @@ -390,15 +403,15 @@ Requires an internet connection. Requires the STRIDE_jll.jl package to be imported if set to `true`. """ function retrievepdb(pdbid::AbstractString; - dir::AbstractString=pwd(), - format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat}}=MMCIFFormat, - obsolete::Bool=false, - overwrite::Bool=false, - ba_number::Integer=0, - structure_name::AbstractString="$(uppercase(pdbid)).pdb", - kwargs...) + dir::AbstractString=pwd(), + format::Type{<:Union{PDBFormat,PDBXMLFormat,MMCIFFormat}}=MMCIFFormat, + obsolete::Bool=false, + overwrite::Bool=false, + ba_number::Integer=0, + structure_name::AbstractString="$(uppercase(pdbid)).pdb", + kwargs...) downloadpdb(pdbid, dir=dir, format=format, obsolete=obsolete, - overwrite=overwrite, ba_number=ba_number) + overwrite=overwrite, ba_number=ba_number) if obsolete # If obsolete is set true, the PDB file is present in the obsolete directory inside dir dir = joinpath(dir, "obsolete") diff --git a/src/model.jl b/src/model.jl index 4e01ce00..1381816d 100644 --- a/src/model.jl +++ b/src/model.jl @@ -117,12 +117,12 @@ end function Atom(a::Atom, r::StructuralElement) return Atom(a.serial, a.name, a.alt_loc_id, copy(a.coords), a.occupancy, - a.temp_factor, a.element, a.charge, r) + a.temp_factor, a.element, a.charge, r) end "A container to hold different locations of the same atom." struct DisorderedAtom <: AbstractAtom - alt_loc_ids::Dict{Char, Atom} + alt_loc_ids::Dict{Char,Atom} default::Char end @@ -143,15 +143,15 @@ mutable struct Residue <: AbstractResidue ins_code::Char het_res::Bool # Does the residue consist of hetatoms? atom_list::Vector{String} - atoms::Dict{String, AbstractAtom} + atoms::Dict{String,AbstractAtom} chain::StructuralElement ss_code::Char end function Residue(r::Residue, ch::StructuralElement) - atom_dict = Dict{String, AbstractAtom}() + atom_dict = Dict{String,AbstractAtom}() rnew = Residue(r.name, r.number, r.ins_code, r.het_res, [name for name in r.atom_list], - atom_dict, ch, r.ss_code) + atom_dict, ch, r.ss_code) for (name, atom) in r.atoms atom_dict[name] = isa(atom, Atom) ? Atom(atom, rnew) : DisorderedAtom(atom, rnew) end @@ -163,7 +163,7 @@ A container to hold different versions of the same residue (point mutations). """ struct DisorderedResidue <: AbstractResidue - names::Dict{String, Residue} + names::Dict{String,Residue} default::String end @@ -175,12 +175,12 @@ end mutable struct Chain <: StructuralElement id::String # mmCIF files can have multi-character chain IDs res_list::Vector{String} - residues::Dict{String, AbstractResidue} + residues::Dict{String,AbstractResidue} model::StructuralElement end function Chain(c::Chain, mo::StructuralElement) - res_dict = Dict{String, AbstractResidue}() + res_dict = Dict{String,AbstractResidue}() cnew = Chain(c.id, [id for id in c.res_list], res_dict, mo) for (id, res) in c.residues res_dict[id] = isa(res, Residue) ? Residue(res, cnew) : DisorderedResidue(res, cnew) @@ -191,12 +191,12 @@ end "A conformation of a macromolecular structure." struct Model <: StructuralElement number::Int - chains::Dict{String, Chain} + chains::Dict{String,Chain} structure::StructuralElement end function Model(m::Model, struc::StructuralElement) - chain_dict = Dict{String, Chain}() + chain_dict = Dict{String,Chain}() mnew = Model(m.number, chain_dict, struc) for (id, ch) in m.chains chain_dict[id] = Chain(ch, mnew) @@ -210,11 +210,11 @@ entry. """ struct MolecularStructure <: StructuralElement name::String - models::Dict{Int, Model} + models::Dict{Int,Model} end function MolecularStructure(s::MolecularStructure) - model_dict = Dict{Int, Model}() + model_dict = Dict{Int,Model}() snew = MolecularStructure(s.name, model_dict) for (number, mo) in s.models model_dict[number] = Model(mo, snew) @@ -257,14 +257,14 @@ Model() = Model(1) Chain(id::AbstractString, mo::Model) = Chain(id, [], Dict(), mo) Chain(id::Char, mo::Model) = Chain(string(id), [], Dict(), mo) -Chain(id::Union{AbstractString, Char}) = Chain(id, Model()) +Chain(id::Union{AbstractString,Char}) = Chain(id, Model()) function Residue(name::AbstractString, - number::Integer, - ins_code::Char, - het_res::Bool, - ch::Chain, - ss_code=ss_code_unassigned) + number::Integer, + ins_code::Char, + het_res::Bool, + ch::Chain, + ss_code=ss_code_unassigned) return Residue(name, number, ins_code, het_res, [], Dict(), ch, ss_code_unassigned) end @@ -404,7 +404,7 @@ function findatombyname(res::Residue, atom_name::AbstractString; strict::Bool=tr # Look for atom name directly if haskey(res.atoms, atom_name) return res.atoms[atom_name] - # Pad out name to 4 characters to read PDB atom names with whitespace + # Pad out name to 4 characters to read PDB atom names with whitespace elseif length(atom_name) == 3 if haskey(res.atoms, " $atom_name") return res.atoms[" $atom_name"] @@ -861,9 +861,9 @@ alphabetically. """ function resnames(dis_res::DisorderedResidue) return sort(collect(keys(dis_res.names)), - lt= (res_name_one, res_name_two) -> + lt=(res_name_one, res_name_two) -> (isless(res_name_one, res_name_two) && res_name_two != defaultresname(dis_res)) || - res_name_one == defaultresname(dis_res) + res_name_one == defaultresname(dis_res) ) end @@ -928,7 +928,7 @@ chain(ch::Chain) = ch Get the chain ID of an `AbstractAtom`, `AbstractResidue` or `Chain` as a `String`. """ -chainid(el::Union{AbstractResidue, AbstractAtom}) = chainid(chain(el)) +chainid(el::Union{AbstractResidue,AbstractAtom}) = chainid(chain(el)) chainid(ch::Chain) = ch.id """ @@ -1014,7 +1014,7 @@ Get the model number of a `Model`, `Chain`, `AbstractResidue` or `AbstractAtom` as an `Int`. """ modelnumber(mo::Model) = mo.number -modelnumber(el::Union{Chain, AbstractResidue, AbstractAtom}) = modelnumber(model(el)) +modelnumber(el::Union{Chain,AbstractResidue,AbstractAtom}) = modelnumber(model(el)) """ chainids(model) @@ -1046,7 +1046,7 @@ function chains(struc::MolecularStructure) if countmodels(struc) > 0 return chains(defaultmodel(struc)) else - return Dict{String, Chain}() + return Dict{String,Chain}() end end @@ -1070,7 +1070,7 @@ structure(struc::MolecularStructure) = struc Get the name of the `MolecularStructure` that a `StructuralElement` belongs to as a `String`. """ -structurename(el::Union{Model, Chain, AbstractResidue, AbstractAtom}) = structurename(structure(el)) +structurename(el::Union{Model,Chain,AbstractResidue,AbstractAtom}) = structurename(structure(el)) structurename(struc::MolecularStructure) = struc.name """ @@ -1131,7 +1131,7 @@ end function Base.isless(ch_one::Chain, ch_two::Chain) # Deal with usual case of single letter comparison quickly if length(chainid(ch_one)) == 1 && length(chainid(ch_two)) == 1 && - chainid(ch_one) != " " && chainid(ch_two) != " " + chainid(ch_one) != " " && chainid(ch_two) != " " return Int(chainid(ch_one)[1]) < Int(chainid(ch_two)[1]) end chid_one = strip(chainid(ch_one)) @@ -1170,11 +1170,11 @@ insertion code of the second greater than the first). """ function sequentialresidues(res_first::AbstractResidue, res_second::AbstractResidue) if chainid(res_second) == chainid(res_first) && - ishetero(res_second) == ishetero(res_first) + ishetero(res_second) == ishetero(res_first) if resnumber(res_second) == resnumber(res_first) + 1 return true elseif resnumber(res_second) == resnumber(res_first) && - inscode(res_second) > inscode(res_first) + inscode(res_second) > inscode(res_first) return true end end @@ -1272,11 +1272,11 @@ collectmodels(struc::MolecularStructure) = collect(struc) collectmodels(mo::Model) = [mo] -collectmodels(el::Union{Chain, AbstractResidue, AbstractAtom}) = [model(el)] +collectmodels(el::Union{Chain,AbstractResidue,AbstractAtom}) = [model(el)] collectmodels(mos::AbstractVector{Model}) = mos -function collectmodels(els::AbstractVector{<:Union{Chain, AbstractResidue, AbstractAtom}}) +function collectmodels(els::AbstractVector{<:Union{Chain,AbstractResidue,AbstractAtom}}) mo_list = Model[] for el in els if !(model(el) in mo_list) @@ -1288,8 +1288,8 @@ end # One selector explicitly defined to prevent this being called without selectors function collectmodels(el::StructuralElementOrList, - model_selector::Function, - model_selectors::Function...) + model_selector::Function, + model_selectors::Function...) return applyselectors(collectmodels(el), model_selector, model_selectors...) end @@ -1327,7 +1327,7 @@ collectchains(mo::Model) = collect(mo) collectchains(ch::Chain) = [ch] -collectchains(el::Union{AbstractResidue, AbstractAtom}) = [chain(el)] +collectchains(el::Union{AbstractResidue,AbstractAtom}) = [chain(el)] function collectchains(mos::AbstractVector{Model}) ch_list = Chain[] @@ -1339,7 +1339,7 @@ end collectchains(chs::AbstractVector{Chain}) = chs -function collectchains(els::AbstractVector{<:Union{AbstractResidue, AbstractAtom}}) +function collectchains(els::AbstractVector{<:Union{AbstractResidue,AbstractAtom}}) ch_list = Chain[] for el in els if !(chain(el) in ch_list) @@ -1350,8 +1350,8 @@ function collectchains(els::AbstractVector{<:Union{AbstractResidue, AbstractAtom end function collectchains(el::StructuralElementOrList, - chain_selector::Function, - chain_selectors::Function...) + chain_selector::Function, + chain_selectors::Function...) return applyselectors(collectchains(el), chain_selector, chain_selectors...) end @@ -1387,8 +1387,8 @@ function collectresidues(struc::MolecularStructure; expand_disordered::Bool=fals end end -function collectresidues(el::Union{Model, Vector{Model}, Vector{Chain}}; - expand_disordered::Bool=false) +function collectresidues(el::Union{Model,Vector{Model},Vector{Chain}}; + expand_disordered::Bool=false) res_list = AbstractResidue[] for sub_el in el append!(res_list, collectresidues(sub_el; expand_disordered=expand_disordered)) @@ -1399,8 +1399,8 @@ end # Note output is always Vector{AbstractResidue} unless input was Vector{Residue} # or Vector{DisorderedResidue}, in which case output is same type as input # type -function collectresidues(el::Union{Chain, Vector{<:AbstractResidue}}; - expand_disordered::Bool=false) +function collectresidues(el::Union{Chain,Vector{<:AbstractResidue}}; + expand_disordered::Bool=false) if expand_disordered res_list = AbstractResidue[] for res in el @@ -1439,11 +1439,11 @@ function collectresidues(at_list::AbstractVector{<:AbstractAtom}; expand_disorde end function collectresidues(el::StructuralElementOrList, - residue_selector::Function, - residue_selectors::Function...; - expand_disordered::Bool=false) + residue_selector::Function, + residue_selectors::Function...; + expand_disordered::Bool=false) return collectresidues(applyselectors(collectresidues(el), residue_selector, - residue_selectors...); expand_disordered=expand_disordered) + residue_selectors...); expand_disordered=expand_disordered) end """ @@ -1457,10 +1457,10 @@ The keyword argument `expand_disordered` (default `false`) determines whether to return all copies of disordered residues separately. """ function countresidues(el::StructuralElementOrList, - residue_selectors::Function...; - expand_disordered::Bool=false) + residue_selectors::Function...; + expand_disordered::Bool=false) return length(collectresidues(el, residue_selectors...; - expand_disordered=expand_disordered)) + expand_disordered=expand_disordered)) end """ @@ -1481,9 +1481,9 @@ function collectatoms(struc::MolecularStructure; expand_disordered::Bool=false) end end -function collectatoms(el::Union{Model, Chain, Vector{Model}, Vector{Chain}, - Vector{<:AbstractResidue}}; - expand_disordered::Bool=false) +function collectatoms(el::Union{Model,Chain,Vector{Model},Vector{Chain}, + Vector{<:AbstractResidue}}; + expand_disordered::Bool=false) at_list = AbstractAtom[] for sub_el in el append!(at_list, collectatoms(sub_el; expand_disordered=expand_disordered)) @@ -1493,8 +1493,8 @@ end # Note output is always Vector{AbstractAtom} unless input was Vector{Atom} or # Vector{DisorderedAtom}, in which case output is same type as input type -function collectatoms(el::Union{Residue, Vector{<:AbstractAtom}}; - expand_disordered::Bool=false) +function collectatoms(el::Union{Residue,Vector{<:AbstractAtom}}; + expand_disordered::Bool=false) if expand_disordered at_list = AbstractAtom[] for at in el @@ -1513,7 +1513,7 @@ end function collectatoms(dis_res::DisorderedResidue; expand_disordered::Bool=false) if expand_disordered return collectatoms(collectresidues(dis_res; expand_disordered=true); - expand_disordered=true) + expand_disordered=true) else return collectatoms(defaultresidue(dis_res)) end @@ -1530,11 +1530,11 @@ function collectatoms(dis_at::DisorderedAtom; expand_disordered::Bool=false) end function collectatoms(el::StructuralElementOrList, - atom_selector::Function, - atom_selectors::Function...; - expand_disordered::Bool=false) + atom_selector::Function, + atom_selectors::Function...; + expand_disordered::Bool=false) return collectatoms(applyselectors(collectatoms(el), atom_selector, atom_selectors...); - expand_disordered=expand_disordered) + expand_disordered=expand_disordered) end """ @@ -1548,18 +1548,18 @@ The keyword argument `expand_disordered` (default `false`) determines whether to return all copies of disordered atoms separately. """ function countatoms(el::StructuralElementOrList, - atom_selectors::Function...; - expand_disordered::Bool=false) + atom_selectors::Function...; + expand_disordered::Bool=false) return length(collectatoms(el, atom_selectors...; - expand_disordered=expand_disordered)) + expand_disordered=expand_disordered)) end # Add an atom represented in an AtomRecord to a Model # Unsafe as sub-element lists are not updated (for speed) # fixlists! should be run after all additions to update the sub-element lists function unsafe_addatomtomodel!(mo::Model, - atom_rec::AtomRecord; - remove_disorder::Bool=false) + atom_rec::AtomRecord; + remove_disorder::Bool=false) # Add chain to model if necessary if !haskey(chains(mo), atom_rec.chain_id) mo[atom_rec.chain_id] = Chain(atom_rec.chain_id, mo) @@ -1569,29 +1569,29 @@ function unsafe_addatomtomodel!(mo::Model, # If residue does not exist in the chain, create a Residue if !haskey(residues(ch), res_id) ch[res_id] = Residue( - atom_rec.res_name, - atom_rec.res_number, - atom_rec.ins_code, - atom_rec.het_atom, - ch) + atom_rec.res_name, + atom_rec.res_number, + atom_rec.ins_code, + atom_rec.het_atom, + ch) res = ch[res_id] elseif isa(ch[res_id], Residue) # Residue exists in the chain and the residue names match # Add to that Residue if fullresname(ch[res_id]) == atom_rec.res_name res = ch[res_id] - # Residue exists in the chain but the residue names do not match - # Create a DisorderedResidue + # Residue exists in the chain but the residue names do not match + # Create a DisorderedResidue else ch[res_id] = DisorderedResidue(Dict( - fullresname(ch[res_id]) => ch[res_id], - atom_rec.res_name => Residue( - atom_rec.res_name, - atom_rec.res_number, - atom_rec.ins_code, - atom_rec.het_atom, - ch) - ), fullresname(ch[res_id])) + fullresname(ch[res_id]) => ch[res_id], + atom_rec.res_name => Residue( + atom_rec.res_name, + atom_rec.res_number, + atom_rec.ins_code, + atom_rec.het_atom, + ch) + ), fullresname(ch[res_id])) res = disorderedres(ch[res_id], atom_rec.res_name) end else @@ -1599,15 +1599,15 @@ function unsafe_addatomtomodel!(mo::Model, # Add to that DisorderedResidue if atom_rec.res_name in resnames(ch[res_id]) res = disorderedres(ch[res_id], atom_rec.res_name) - # DisorderedResidue exists in the chain and the residue names do not match - # Create a new Residue in the DisorderedResidue + # DisorderedResidue exists in the chain and the residue names do not match + # Create a new Residue in the DisorderedResidue else ch[res_id].names[atom_rec.res_name] = Residue( - atom_rec.res_name, - atom_rec.res_number, - atom_rec.ins_code, - atom_rec.het_atom, - ch) + atom_rec.res_name, + atom_rec.res_number, + atom_rec.ins_code, + atom_rec.het_atom, + ch) res = disorderedres(ch[res_id], atom_rec.res_name) end end @@ -1624,30 +1624,30 @@ function unsafe_addatomtomodel!(mo::Model, # If atom does not exist in the residue, create an Atom if !haskey(atoms(res), atom_rec.atom_name) res[atom_rec.atom_name] = at - # Atom exists in the residue, atom names match and alt loc IDs are different + # Atom exists in the residue, atom names match and alt loc IDs are different elseif isa(res[atom_rec.atom_name], Atom) && - atom_rec.alt_loc_id != altlocid(res[atom_rec.atom_name]) + atom_rec.alt_loc_id != altlocid(res[atom_rec.atom_name]) # If we are removing disorder and the new atom is preferred to the old one, replace the old one if remove_disorder && - choosedefaultaltlocid(at, res[atom_rec.atom_name]) == atom_rec.alt_loc_id + choosedefaultaltlocid(at, res[atom_rec.atom_name]) == atom_rec.alt_loc_id res[atom_rec.atom_name] = at - # If we are not removing disorder, create a new disordered atom container and add both atoms + # If we are not removing disorder, create a new disordered atom container and add both atoms elseif !remove_disorder res[atom_rec.atom_name] = DisorderedAtom(Dict( - atom_rec.alt_loc_id => at, - altlocid(res[atom_rec.atom_name]) => res[atom_rec.atom_name] - ), choosedefaultaltlocid(at, res[atom_rec.atom_name])) + atom_rec.alt_loc_id => at, + altlocid(res[atom_rec.atom_name]) => res[atom_rec.atom_name] + ), choosedefaultaltlocid(at, res[atom_rec.atom_name])) end - # A disordered atom container already exists and the alt loc ID is not taken + # A disordered atom container already exists and the alt loc ID is not taken elseif isa(res[atom_rec.atom_name], DisorderedAtom) && - !(atom_rec.alt_loc_id in altlocids(res[atom_rec.atom_name])) + !(atom_rec.alt_loc_id in altlocids(res[atom_rec.atom_name])) # Add the new atom to the disordered atom container res[atom_rec.atom_name][atom_rec.alt_loc_id] = at # If the default alt loc requires changing, change it if choosedefaultaltlocid(defaultatom(res[atom_rec.atom_name]), at) != defaultaltlocid(res[atom_rec.atom_name]) res[atom_rec.atom_name] = DisorderedAtom( - res[atom_rec.atom_name], - atom_rec.alt_loc_id) + res[atom_rec.atom_name], + atom_rec.alt_loc_id) end else error("Two copies of the same atom have the same alternative location ID. Existing atom:\n" * @@ -1694,8 +1694,8 @@ chosen. """ function choosedefaultaltlocid(at_one::Atom, at_two::Atom) if occupancy(at_one) > occupancy(at_two) || - (occupancy(at_one) == occupancy(at_two) && - Int(altlocid(at_one)) < Int(altlocid(at_two))) + (occupancy(at_one) == occupancy(at_two) && + Int(altlocid(at_one)) < Int(altlocid(at_two))) return altlocid(at_one) else return altlocid(at_two) @@ -1723,11 +1723,12 @@ struct BCIFFormat end struct MMTFFormat end "Mapping of Protein Data Bank (PDB) formats to their file extensions." -const pdbextension = Dict{Type, String}( - PDBFormat => "pdb", +const pdbextension = Dict{Type,String}( + PDBFormat => "pdb", PDBXMLFormat => "xml", - MMCIFFormat => "cif", - MMTFFormat => "mmtf", + MMCIFFormat => "cif", + MMTFFormat => "mmtf", + BCIFFormat => "bcif", ) """ @@ -1768,8 +1769,8 @@ Call `MMTFDict` with a filepath or stream to read the dictionary from that source. The keyword argument `gzip` (default `false`) determines if the file is gzipped. """ -struct MMTFDict <: AbstractDict{String, Any} - dict::Dict{String, Any} +struct MMTFDict <: AbstractDict{String,Any} + dict::Dict{String,Any} end """ From 4f191cc296b3faacc4462064a7095b2ef85ca95d Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Tue, 13 May 2025 16:41:59 +0800 Subject: [PATCH 12/23] Revert "add download and cleanup" This reverts commit 88d6940ed9e1be0ac53284333219079e3dfeba2e. --- src/download.jl | 69 +++++++--------- src/model.jl | 207 ++++++++++++++++++++++++------------------------ 2 files changed, 131 insertions(+), 145 deletions(-) diff --git a/src/download.jl b/src/download.jl index 7f2f5a45..af162fb7 100644 --- a/src/download.jl +++ b/src/download.jl @@ -162,11 +162,11 @@ Requires an internet connection. assembly; by default downloads the PDB file. """ function downloadpdb(pdbid::AbstractString; - dir::AbstractString=pwd(), - format::Type{<:Union{PDBFormat,PDBXMLFormat,MMCIFFormat,BCIFFormat}}=PDBFormat, - obsolete::Bool=false, - overwrite::Bool=false, - ba_number::Integer=0) + dir::AbstractString=pwd(), + format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat}}=PDBFormat, + obsolete::Bool=false, + overwrite::Bool=false, + ba_number::Integer=0) pdbid = uppercase(pdbid) # Check PDB ID is 4 characters long and only consits of alphanumeric characters if !occursin(r"^[a-zA-Z0-9]{4}$", pdbid) @@ -198,18 +198,10 @@ function downloadpdb(pdbid::AbstractString; # Download the compressed PDB file to the temporary location @info "Downloading file from PDB: $pdbid" if ba_number == 0 - if format == BCIFFormat - Downloads.download( - "https://models.rcsb.org/$pdbid.bcif", - pdbpath, - ) - return pdbpath - else - Downloads.download( - "http://files.rcsb.org/download/$pdbid.$(pdbextension[format]).gz", - archivefilepath, - ) - end + Downloads.download( + "http://files.rcsb.org/download/$pdbid.$(pdbextension[format]).gz", + archivefilepath, + ) else if format == PDBFormat Downloads.download( @@ -221,11 +213,6 @@ function downloadpdb(pdbid::AbstractString; "http://files.rcsb.org/download/$pdbid-assembly$ba_number.$(pdbextension[format]).gz", archivefilepath, ) - elseif format == BCIFFormat - Downloads.download( - "https://models.rcsb.org/$pdbid.bcif", - archivefilepath, - ) else throw(ArgumentError("Biological assemblies are available in the " * "PDB and mmCIF formats only")) @@ -243,8 +230,8 @@ function downloadpdb(pdbid::AbstractString; if !isfile(pdbpath) || filesize(pdbpath) == 0 if format == PDBFormat throw(ErrorException("Error downloading file: $pdbid; some PDB entries are " * - "not available as PDB format files, consider downloading " * - "the mmCIF file instead")) + "not available as PDB format files, consider downloading " * + "the mmCIF file instead")) else throw(ErrorException("Error downloading file: $pdbid")) end @@ -258,7 +245,7 @@ function downloadpdb(pdbid::AbstractString; return pdbpath end -function downloadpdb(pdbidlist::AbstractArray{<:AbstractString,1}; kwargs...) +function downloadpdb(pdbidlist::AbstractArray{<:AbstractString, 1}; kwargs...) pdbpaths = String[] failedlist = String[] for pdbid in pdbidlist @@ -304,8 +291,8 @@ Requires an internet connection. in `dir`; by default skips downloading the PDB file if it exists. """ function downloadentirepdb(; dir::AbstractString=pwd(), - format::Type{<:Union{PDBFormat,PDBXMLFormat,MMCIFFormat}}=PDBFormat, - overwrite::Bool=false) + format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat}}=PDBFormat, + overwrite::Bool=false) pdblist = pdbentrylist() @info "About to download $(length(pdblist)) PDB files, make sure you have enough disk space and time" @info "The function can be stopped any time and called again to resume downloading" @@ -323,12 +310,12 @@ automatically updates the PDB files of the given `format` inside the local Requires an internet connection. """ function updatelocalpdb(; dir::AbstractString=pwd(), - format::Type{<:Union{PDBFormat,PDBXMLFormat,MMCIFFormat}}=PDBFormat) + format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat}}=PDBFormat) addedlist, modifiedlist, obsoletelist = pdbrecentchanges() # Download the newly added and modified pdb files downloadpdb(vcat(addedlist, modifiedlist), dir=dir, overwrite=true, format=format) # Set the obsolete directory to be inside dir - obsolete_dir = joinpath(dir, "obsolete") + obsolete_dir=joinpath(dir, "obsolete") for pdbid in obsoletelist oldfile = joinpath(dir, "$pdbid.$(pdbextension[format])") newfile = joinpath(obsolete_dir, "$pdbid.$(pdbextension[format])") @@ -338,10 +325,10 @@ function updatelocalpdb(; dir::AbstractString=pwd(), mkpath(obsolete_dir) end mv(oldfile, newfile) - # If obsolete pdb is already in the obsolete directory, inform the user and skip + # If obsolete pdb is already in the obsolete directory, inform the user and skip elseif isfile(newfile) @info "PDB $pdbid is already moved to the obsolete directory" - # If obsolete pdb not available in both dir and obsolete, inform the user and skip + # If obsolete pdb not available in both dir and obsolete, inform the user and skip else @info "Obsolete PDB $pdbid is missing" end @@ -365,8 +352,8 @@ Requires an internet connection. in `dir`; by default skips downloading the PDB file if it exists. """ function downloadallobsoletepdb(; obsolete_dir::AbstractString=pwd(), - format::Type{<:Union{PDBFormat,PDBXMLFormat,MMCIFFormat}}=PDBFormat, - overwrite::Bool=false) + format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat}}=PDBFormat, + overwrite::Bool=false) obsoletelist = pdbobsoletelist() downloadpdb(obsoletelist, dir=obsolete_dir, format=format, overwrite=overwrite) end @@ -403,15 +390,15 @@ Requires an internet connection. Requires the STRIDE_jll.jl package to be imported if set to `true`. """ function retrievepdb(pdbid::AbstractString; - dir::AbstractString=pwd(), - format::Type{<:Union{PDBFormat,PDBXMLFormat,MMCIFFormat}}=MMCIFFormat, - obsolete::Bool=false, - overwrite::Bool=false, - ba_number::Integer=0, - structure_name::AbstractString="$(uppercase(pdbid)).pdb", - kwargs...) + dir::AbstractString=pwd(), + format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat}}=MMCIFFormat, + obsolete::Bool=false, + overwrite::Bool=false, + ba_number::Integer=0, + structure_name::AbstractString="$(uppercase(pdbid)).pdb", + kwargs...) downloadpdb(pdbid, dir=dir, format=format, obsolete=obsolete, - overwrite=overwrite, ba_number=ba_number) + overwrite=overwrite, ba_number=ba_number) if obsolete # If obsolete is set true, the PDB file is present in the obsolete directory inside dir dir = joinpath(dir, "obsolete") diff --git a/src/model.jl b/src/model.jl index 9024fad2..173dbb6b 100644 --- a/src/model.jl +++ b/src/model.jl @@ -117,12 +117,12 @@ end function Atom(a::Atom, r::StructuralElement) return Atom(a.serial, a.name, a.alt_loc_id, copy(a.coords), a.occupancy, - a.temp_factor, a.element, a.charge, r) + a.temp_factor, a.element, a.charge, r) end "A container to hold different locations of the same atom." struct DisorderedAtom <: AbstractAtom - alt_loc_ids::Dict{Char,Atom} + alt_loc_ids::Dict{Char, Atom} default::Char end @@ -143,15 +143,15 @@ mutable struct Residue <: AbstractResidue ins_code::Char het_res::Bool # Does the residue consist of hetatoms? atom_list::Vector{String} - atoms::Dict{String,AbstractAtom} + atoms::Dict{String, AbstractAtom} chain::StructuralElement ss_code::Char end function Residue(r::Residue, ch::StructuralElement) - atom_dict = Dict{String,AbstractAtom}() + atom_dict = Dict{String, AbstractAtom}() rnew = Residue(r.name, r.number, r.ins_code, r.het_res, [name for name in r.atom_list], - atom_dict, ch, r.ss_code) + atom_dict, ch, r.ss_code) for (name, atom) in r.atoms atom_dict[name] = (isa(atom, Atom) ? Atom(atom, rnew) : DisorderedAtom(atom, rnew)) end @@ -163,7 +163,7 @@ A container to hold different versions of the same residue (point mutations). """ struct DisorderedResidue <: AbstractResidue - names::Dict{String,Residue} + names::Dict{String, Residue} default::String end @@ -175,12 +175,12 @@ end mutable struct Chain <: StructuralElement id::String # mmCIF files can have multi-character chain IDs res_list::Vector{String} - residues::Dict{String,AbstractResidue} + residues::Dict{String, AbstractResidue} model::StructuralElement end function Chain(c::Chain, mo::StructuralElement) - res_dict = Dict{String,AbstractResidue}() + res_dict = Dict{String, AbstractResidue}() cnew = Chain(c.id, [id for id in c.res_list], res_dict, mo) for (id, res) in c.residues res_dict[id] = (isa(res, Residue) ? Residue(res, cnew) : DisorderedResidue(res, cnew)) @@ -191,12 +191,12 @@ end "A conformation of a macromolecular structure." struct Model <: StructuralElement number::Int - chains::Dict{String,Chain} + chains::Dict{String, Chain} structure::StructuralElement end function Model(m::Model, struc::StructuralElement) - chain_dict = Dict{String,Chain}() + chain_dict = Dict{String, Chain}() mnew = Model(m.number, chain_dict, struc) for (id, ch) in m.chains chain_dict[id] = Chain(ch, mnew) @@ -210,11 +210,11 @@ entry. """ struct MolecularStructure <: StructuralElement name::String - models::Dict{Int,Model} + models::Dict{Int, Model} end function MolecularStructure(s::MolecularStructure) - model_dict = Dict{Int,Model}() + model_dict = Dict{Int, Model}() snew = MolecularStructure(s.name, model_dict) for (number, mo) in s.models model_dict[number] = Model(mo, snew) @@ -257,14 +257,14 @@ Model() = Model(1) Chain(id::AbstractString, mo::Model) = Chain(id, [], Dict(), mo) Chain(id::Char, mo::Model) = Chain(string(id), [], Dict(), mo) -Chain(id::Union{AbstractString,Char}) = Chain(id, Model()) +Chain(id::Union{AbstractString, Char}) = Chain(id, Model()) function Residue(name::AbstractString, - number::Integer, - ins_code::Char, - het_res::Bool, - ch::Chain, - ss_code=ss_code_unassigned) + number::Integer, + ins_code::Char, + het_res::Bool, + ch::Chain, + ss_code=ss_code_unassigned) return Residue(name, number, ins_code, het_res, [], Dict(), ch, ss_code_unassigned) end @@ -404,7 +404,7 @@ function findatombyname(res::Residue, atom_name::AbstractString; strict::Bool=tr # Look for atom name directly if haskey(res.atoms, atom_name) return res.atoms[atom_name] - # Pad out name to 4 characters to read PDB atom names with whitespace + # Pad out name to 4 characters to read PDB atom names with whitespace elseif length(atom_name) == 3 if haskey(res.atoms, " $atom_name") return res.atoms[" $atom_name"] @@ -861,9 +861,9 @@ alphabetically. """ function resnames(dis_res::DisorderedResidue) return sort(collect(keys(dis_res.names)), - lt=(res_name_one, res_name_two) -> + lt= (res_name_one, res_name_two) -> (isless(res_name_one, res_name_two) && res_name_two != defaultresname(dis_res)) || - res_name_one == defaultresname(dis_res) + res_name_one == defaultresname(dis_res) ) end @@ -928,7 +928,7 @@ chain(ch::Chain) = ch Get the chain ID of an `AbstractAtom`, `AbstractResidue` or `Chain` as a `String`. """ -chainid(el::Union{AbstractResidue,AbstractAtom}) = chainid(chain(el)) +chainid(el::Union{AbstractResidue, AbstractAtom}) = chainid(chain(el)) chainid(ch::Chain) = ch.id """ @@ -1014,7 +1014,7 @@ Get the model number of a `Model`, `Chain`, `AbstractResidue` or `AbstractAtom` as an `Int`. """ modelnumber(mo::Model) = mo.number -modelnumber(el::Union{Chain,AbstractResidue,AbstractAtom}) = modelnumber(model(el)) +modelnumber(el::Union{Chain, AbstractResidue, AbstractAtom}) = modelnumber(model(el)) """ chainids(model) @@ -1046,7 +1046,7 @@ function chains(struc::MolecularStructure) if countmodels(struc) > 0 return chains(defaultmodel(struc)) else - return Dict{String,Chain}() + return Dict{String, Chain}() end end @@ -1070,7 +1070,7 @@ structure(struc::MolecularStructure) = struc Get the name of the `MolecularStructure` that a `StructuralElement` belongs to as a `String`. """ -structurename(el::Union{Model,Chain,AbstractResidue,AbstractAtom}) = structurename(structure(el)) +structurename(el::Union{Model, Chain, AbstractResidue, AbstractAtom}) = structurename(structure(el)) structurename(struc::MolecularStructure) = struc.name """ @@ -1131,7 +1131,7 @@ end function Base.isless(ch_one::Chain, ch_two::Chain) # Deal with usual case of single letter comparison quickly if length(chainid(ch_one)) == 1 && length(chainid(ch_two)) == 1 && - chainid(ch_one) != " " && chainid(ch_two) != " " + chainid(ch_one) != " " && chainid(ch_two) != " " return Int(chainid(ch_one)[1]) < Int(chainid(ch_two)[1]) end chid_one = strip(chainid(ch_one)) @@ -1170,11 +1170,11 @@ insertion code of the second greater than the first). """ function sequentialresidues(res_first::AbstractResidue, res_second::AbstractResidue) if chainid(res_second) == chainid(res_first) && - ishetero(res_second) == ishetero(res_first) + ishetero(res_second) == ishetero(res_first) if resnumber(res_second) == resnumber(res_first) + 1 return true elseif resnumber(res_second) == resnumber(res_first) && - inscode(res_second) > inscode(res_first) + inscode(res_second) > inscode(res_first) return true end end @@ -1272,11 +1272,11 @@ collectmodels(struc::MolecularStructure) = collect(struc) collectmodels(mo::Model) = [mo] -collectmodels(el::Union{Chain,AbstractResidue,AbstractAtom}) = [model(el)] +collectmodels(el::Union{Chain, AbstractResidue, AbstractAtom}) = [model(el)] collectmodels(mos::AbstractVector{Model}) = mos -function collectmodels(els::AbstractVector{<:Union{Chain,AbstractResidue,AbstractAtom}}) +function collectmodels(els::AbstractVector{<:Union{Chain, AbstractResidue, AbstractAtom}}) mo_list = Model[] for el in els if !(model(el) in mo_list) @@ -1288,8 +1288,8 @@ end # One selector explicitly defined to prevent this being called without selectors function collectmodels(el::StructuralElementOrList, - model_selector::Function, - model_selectors::Function...) + model_selector::Function, + model_selectors::Function...) return applyselectors(collectmodels(el), model_selector, model_selectors...) end @@ -1327,7 +1327,7 @@ collectchains(mo::Model) = collect(mo) collectchains(ch::Chain) = [ch] -collectchains(el::Union{AbstractResidue,AbstractAtom}) = [chain(el)] +collectchains(el::Union{AbstractResidue, AbstractAtom}) = [chain(el)] function collectchains(mos::AbstractVector{Model}) ch_list = Chain[] @@ -1339,7 +1339,7 @@ end collectchains(chs::AbstractVector{Chain}) = chs -function collectchains(els::AbstractVector{<:Union{AbstractResidue,AbstractAtom}}) +function collectchains(els::AbstractVector{<:Union{AbstractResidue, AbstractAtom}}) ch_list = Chain[] for el in els if !(chain(el) in ch_list) @@ -1350,8 +1350,8 @@ function collectchains(els::AbstractVector{<:Union{AbstractResidue,AbstractAtom} end function collectchains(el::StructuralElementOrList, - chain_selector::Function, - chain_selectors::Function...) + chain_selector::Function, + chain_selectors::Function...) return applyselectors(collectchains(el), chain_selector, chain_selectors...) end @@ -1387,8 +1387,8 @@ function collectresidues(struc::MolecularStructure; expand_disordered::Bool=fals end end -function collectresidues(el::Union{Model,Vector{Model},Vector{Chain}}; - expand_disordered::Bool=false) +function collectresidues(el::Union{Model, Vector{Model}, Vector{Chain}}; + expand_disordered::Bool=false) res_list = AbstractResidue[] for sub_el in el append!(res_list, collectresidues(sub_el; expand_disordered=expand_disordered)) @@ -1399,8 +1399,8 @@ end # Note output is always Vector{AbstractResidue} unless input was Vector{Residue} # or Vector{DisorderedResidue}, in which case output is same type as input # type -function collectresidues(el::Union{Chain,Vector{<:AbstractResidue}}; - expand_disordered::Bool=false) +function collectresidues(el::Union{Chain, Vector{<:AbstractResidue}}; + expand_disordered::Bool=false) if expand_disordered res_list = AbstractResidue[] for res in el @@ -1439,11 +1439,11 @@ function collectresidues(at_list::AbstractVector{<:AbstractAtom}; expand_disorde end function collectresidues(el::StructuralElementOrList, - residue_selector::Function, - residue_selectors::Function...; - expand_disordered::Bool=false) + residue_selector::Function, + residue_selectors::Function...; + expand_disordered::Bool=false) return collectresidues(applyselectors(collectresidues(el), residue_selector, - residue_selectors...); expand_disordered=expand_disordered) + residue_selectors...); expand_disordered=expand_disordered) end """ @@ -1457,10 +1457,10 @@ The keyword argument `expand_disordered` (default `false`) determines whether to return all copies of disordered residues separately. """ function countresidues(el::StructuralElementOrList, - residue_selectors::Function...; - expand_disordered::Bool=false) + residue_selectors::Function...; + expand_disordered::Bool=false) return length(collectresidues(el, residue_selectors...; - expand_disordered=expand_disordered)) + expand_disordered=expand_disordered)) end """ @@ -1481,9 +1481,9 @@ function collectatoms(struc::MolecularStructure; expand_disordered::Bool=false) end end -function collectatoms(el::Union{Model,Chain,Vector{Model},Vector{Chain}, - Vector{<:AbstractResidue}}; - expand_disordered::Bool=false) +function collectatoms(el::Union{Model, Chain, Vector{Model}, Vector{Chain}, + Vector{<:AbstractResidue}}; + expand_disordered::Bool=false) at_list = AbstractAtom[] for sub_el in el append!(at_list, collectatoms(sub_el; expand_disordered=expand_disordered)) @@ -1493,8 +1493,8 @@ end # Note output is always Vector{AbstractAtom} unless input was Vector{Atom} or # Vector{DisorderedAtom}, in which case output is same type as input type -function collectatoms(el::Union{Residue,Vector{<:AbstractAtom}}; - expand_disordered::Bool=false) +function collectatoms(el::Union{Residue, Vector{<:AbstractAtom}}; + expand_disordered::Bool=false) if expand_disordered at_list = AbstractAtom[] for at in el @@ -1513,7 +1513,7 @@ end function collectatoms(dis_res::DisorderedResidue; expand_disordered::Bool=false) if expand_disordered return collectatoms(collectresidues(dis_res; expand_disordered=true); - expand_disordered=true) + expand_disordered=true) else return collectatoms(defaultresidue(dis_res)) end @@ -1530,11 +1530,11 @@ function collectatoms(dis_at::DisorderedAtom; expand_disordered::Bool=false) end function collectatoms(el::StructuralElementOrList, - atom_selector::Function, - atom_selectors::Function...; - expand_disordered::Bool=false) + atom_selector::Function, + atom_selectors::Function...; + expand_disordered::Bool=false) return collectatoms(applyselectors(collectatoms(el), atom_selector, atom_selectors...); - expand_disordered=expand_disordered) + expand_disordered=expand_disordered) end """ @@ -1548,18 +1548,18 @@ The keyword argument `expand_disordered` (default `false`) determines whether to return all copies of disordered atoms separately. """ function countatoms(el::StructuralElementOrList, - atom_selectors::Function...; - expand_disordered::Bool=false) + atom_selectors::Function...; + expand_disordered::Bool=false) return length(collectatoms(el, atom_selectors...; - expand_disordered=expand_disordered)) + expand_disordered=expand_disordered)) end # Add an atom represented in an AtomRecord to a Model # Unsafe as sub-element lists are not updated (for speed) # fixlists! should be run after all additions to update the sub-element lists function unsafe_addatomtomodel!(mo::Model, - atom_rec::AtomRecord; - remove_disorder::Bool=false) + atom_rec::AtomRecord; + remove_disorder::Bool=false) # Add chain to model if necessary if !haskey(chains(mo), atom_rec.chain_id) mo[atom_rec.chain_id] = Chain(atom_rec.chain_id, mo) @@ -1569,29 +1569,29 @@ function unsafe_addatomtomodel!(mo::Model, # If residue does not exist in the chain, create a Residue if !haskey(residues(ch), res_id) ch[res_id] = Residue( - atom_rec.res_name, - atom_rec.res_number, - atom_rec.ins_code, - atom_rec.het_atom, - ch) + atom_rec.res_name, + atom_rec.res_number, + atom_rec.ins_code, + atom_rec.het_atom, + ch) res = ch[res_id] elseif isa(ch[res_id], Residue) # Residue exists in the chain and the residue names match # Add to that Residue if fullresname(ch[res_id]) == atom_rec.res_name res = ch[res_id] - # Residue exists in the chain but the residue names do not match - # Create a DisorderedResidue + # Residue exists in the chain but the residue names do not match + # Create a DisorderedResidue else ch[res_id] = DisorderedResidue(Dict( - fullresname(ch[res_id]) => ch[res_id], - atom_rec.res_name => Residue( - atom_rec.res_name, - atom_rec.res_number, - atom_rec.ins_code, - atom_rec.het_atom, - ch) - ), fullresname(ch[res_id])) + fullresname(ch[res_id]) => ch[res_id], + atom_rec.res_name => Residue( + atom_rec.res_name, + atom_rec.res_number, + atom_rec.ins_code, + atom_rec.het_atom, + ch) + ), fullresname(ch[res_id])) res = disorderedres(ch[res_id], atom_rec.res_name) end else @@ -1599,15 +1599,15 @@ function unsafe_addatomtomodel!(mo::Model, # Add to that DisorderedResidue if atom_rec.res_name in resnames(ch[res_id]) res = disorderedres(ch[res_id], atom_rec.res_name) - # DisorderedResidue exists in the chain and the residue names do not match - # Create a new Residue in the DisorderedResidue + # DisorderedResidue exists in the chain and the residue names do not match + # Create a new Residue in the DisorderedResidue else ch[res_id].names[atom_rec.res_name] = Residue( - atom_rec.res_name, - atom_rec.res_number, - atom_rec.ins_code, - atom_rec.het_atom, - ch) + atom_rec.res_name, + atom_rec.res_number, + atom_rec.ins_code, + atom_rec.het_atom, + ch) res = disorderedres(ch[res_id], atom_rec.res_name) end end @@ -1624,30 +1624,30 @@ function unsafe_addatomtomodel!(mo::Model, # If atom does not exist in the residue, create an Atom if !haskey(atoms(res), atom_rec.atom_name) res[atom_rec.atom_name] = at - # Atom exists in the residue, atom names match and alt loc IDs are different + # Atom exists in the residue, atom names match and alt loc IDs are different elseif isa(res[atom_rec.atom_name], Atom) && - atom_rec.alt_loc_id != altlocid(res[atom_rec.atom_name]) + atom_rec.alt_loc_id != altlocid(res[atom_rec.atom_name]) # If we are removing disorder and the new atom is preferred to the old one, replace the old one if remove_disorder && - choosedefaultaltlocid(at, res[atom_rec.atom_name]) == atom_rec.alt_loc_id + choosedefaultaltlocid(at, res[atom_rec.atom_name]) == atom_rec.alt_loc_id res[atom_rec.atom_name] = at - # If we are not removing disorder, create a new disordered atom container and add both atoms + # If we are not removing disorder, create a new disordered atom container and add both atoms elseif !remove_disorder res[atom_rec.atom_name] = DisorderedAtom(Dict( - atom_rec.alt_loc_id => at, - altlocid(res[atom_rec.atom_name]) => res[atom_rec.atom_name] - ), choosedefaultaltlocid(at, res[atom_rec.atom_name])) + atom_rec.alt_loc_id => at, + altlocid(res[atom_rec.atom_name]) => res[atom_rec.atom_name] + ), choosedefaultaltlocid(at, res[atom_rec.atom_name])) end - # A disordered atom container already exists and the alt loc ID is not taken + # A disordered atom container already exists and the alt loc ID is not taken elseif isa(res[atom_rec.atom_name], DisorderedAtom) && - !(atom_rec.alt_loc_id in altlocids(res[atom_rec.atom_name])) + !(atom_rec.alt_loc_id in altlocids(res[atom_rec.atom_name])) # Add the new atom to the disordered atom container res[atom_rec.atom_name][atom_rec.alt_loc_id] = at # If the default alt loc requires changing, change it if choosedefaultaltlocid(defaultatom(res[atom_rec.atom_name]), at) != defaultaltlocid(res[atom_rec.atom_name]) res[atom_rec.atom_name] = DisorderedAtom( - res[atom_rec.atom_name], - atom_rec.alt_loc_id) + res[atom_rec.atom_name], + atom_rec.alt_loc_id) end else error("Two copies of the same atom have the same alternative location ID. Existing atom:\n" * @@ -1694,8 +1694,8 @@ chosen. """ function choosedefaultaltlocid(at_one::Atom, at_two::Atom) if occupancy(at_one) > occupancy(at_two) || - (occupancy(at_one) == occupancy(at_two) && - Int(altlocid(at_one)) < Int(altlocid(at_two))) + (occupancy(at_one) == occupancy(at_two) && + Int(altlocid(at_one)) < Int(altlocid(at_two))) return altlocid(at_one) else return altlocid(at_two) @@ -1723,12 +1723,11 @@ struct BCIFFormat end struct MMTFFormat end "Mapping of Protein Data Bank (PDB) formats to their file extensions." -const pdbextension = Dict{Type,String}( - PDBFormat => "pdb", +const pdbextension = Dict{Type, String}( + PDBFormat => "pdb", PDBXMLFormat => "xml", - MMCIFFormat => "cif", - MMTFFormat => "mmtf", - BCIFFormat => "bcif", + MMCIFFormat => "cif", + MMTFFormat => "mmtf", ) """ @@ -1769,8 +1768,8 @@ Call `MMTFDict` with a filepath or stream to read the dictionary from that source. The keyword argument `gzip` (default `false`) determines if the file is gzipped. """ -struct MMTFDict <: AbstractDict{String,Any} - dict::Dict{String,Any} +struct MMTFDict <: AbstractDict{String, Any} + dict::Dict{String, Any} end """ From 137186b3f1b658d5a7b061d8e4cdc5c0921c2e5d Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Tue, 13 May 2025 17:48:43 +0800 Subject: [PATCH 13/23] BCIF support for downloading --- src/download.jl | 25 ++++++++++++++++--------- src/model.jl | 1 + 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/download.jl b/src/download.jl index af162fb7..b948f11d 100644 --- a/src/download.jl +++ b/src/download.jl @@ -163,7 +163,7 @@ Requires an internet connection. """ function downloadpdb(pdbid::AbstractString; dir::AbstractString=pwd(), - format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat}}=PDBFormat, + format::Type{<:Union{PDBFormat,PDBXMLFormat,MMCIFFormat,BCIFFormat}}=PDBFormat, obsolete::Bool=false, overwrite::Bool=false, ba_number::Integer=0) @@ -197,9 +197,14 @@ function downloadpdb(pdbid::AbstractString; try # Download the compressed PDB file to the temporary location @info "Downloading file from PDB: $pdbid" - if ba_number == 0 + if format == BCIFFormat Downloads.download( - "http://files.rcsb.org/download/$pdbid.$(pdbextension[format]).gz", + "https://models.rcsb.org/$pdbid.bcif", + pdbpath, + ) + elseif ba_number == 0 + Downloads.download( + "http://files.rcsb.org/download/$pdbid.$(pdbextension[format])$ba_number.gz", archivefilepath, ) else @@ -219,7 +224,7 @@ function downloadpdb(pdbid::AbstractString; end end # Verify if the compressed file is downloaded properly and extract it - if isfile(archivefilepath) && filesize(archivefilepath) > 0 + if isfile(archivefilepath) && filesize(archivefilepath) > 0 && format != BCIFFormat stream = GzipDecompressorStream(open(archivefilepath)) open(pdbpath, "w") do output write(output, stream) @@ -238,7 +243,9 @@ function downloadpdb(pdbid::AbstractString; end finally # Remove the temporary compressd PDB file downloaded to clear up space - rm(archivefilepath, force=true) + if format != BCIFFormat + rm(archivefilepath, force=true) + end end end @@ -291,7 +298,7 @@ Requires an internet connection. in `dir`; by default skips downloading the PDB file if it exists. """ function downloadentirepdb(; dir::AbstractString=pwd(), - format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat}}=PDBFormat, + format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat, BCIFFormat}}=PDBFormat, overwrite::Bool=false) pdblist = pdbentrylist() @info "About to download $(length(pdblist)) PDB files, make sure you have enough disk space and time" @@ -310,7 +317,7 @@ automatically updates the PDB files of the given `format` inside the local Requires an internet connection. """ function updatelocalpdb(; dir::AbstractString=pwd(), - format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat}}=PDBFormat) + format::Type{<:Union{PDBFormat,PDBXMLFormat,MMCIFFormat,BCIFFormat}}=PDBFormat) addedlist, modifiedlist, obsoletelist = pdbrecentchanges() # Download the newly added and modified pdb files downloadpdb(vcat(addedlist, modifiedlist), dir=dir, overwrite=true, format=format) @@ -352,7 +359,7 @@ Requires an internet connection. in `dir`; by default skips downloading the PDB file if it exists. """ function downloadallobsoletepdb(; obsolete_dir::AbstractString=pwd(), - format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat}}=PDBFormat, + format::Type{<:Union{PDBFormat,PDBXMLFormat,MMCIFFormat,BCIFFormat}}=PDBFormat, overwrite::Bool=false) obsoletelist = pdbobsoletelist() downloadpdb(obsoletelist, dir=obsolete_dir, format=format, overwrite=overwrite) @@ -391,7 +398,7 @@ Requires an internet connection. """ function retrievepdb(pdbid::AbstractString; dir::AbstractString=pwd(), - format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat}}=MMCIFFormat, + format::Type{<:Union{PDBFormat,PDBXMLFormat,MMCIFFormat,BCIFFormat}}=MMCIFFormat, obsolete::Bool=false, overwrite::Bool=false, ba_number::Integer=0, diff --git a/src/model.jl b/src/model.jl index 173dbb6b..e137c5df 100644 --- a/src/model.jl +++ b/src/model.jl @@ -1728,6 +1728,7 @@ const pdbextension = Dict{Type, String}( PDBXMLFormat => "xml", MMCIFFormat => "cif", MMTFFormat => "mmtf", + BCIFFormat => "bcif", ) """ From 1bdf0fcdf82e0e4d0722c14696720742e784acd8 Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Tue, 13 May 2025 18:05:05 +0800 Subject: [PATCH 14/23] format --- src/bcif.jl | 17 +++++++---------- src/download.jl | 8 ++++---- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/bcif.jl b/src/bcif.jl index cdb573f8..9ef3d154 100644 --- a/src/bcif.jl +++ b/src/bcif.jl @@ -8,17 +8,14 @@ import MsgPack A function to read a binary CIF file from MolStar and extract the list of attributes and their compressed bytes. """ - -# currently isn't implementing the dssp / stride. If using BCIF it seems strange to write -# out a .pdb, run dssp / string, then read it back in again. function Base.read(input::IO, - ::Type{BCIFFormat}, - structure_name::AbstractString="", - remove_disorder::Bool=false, - read_std_atoms::Bool=true, - read_het_atoms::Bool=true, - run_dssp::Bool=false, - run_stride::Bool=false) + ::Type{BCIFFormat}; + structure_name::AbstractString="", + remove_disorder::Bool=false, + read_std_atoms::Bool=true, + read_het_atoms::Bool=true, + run_dssp::Bool=false, + run_stride::Bool=false) file = MsgPack.unpack(read(input)) categories = file["dataBlocks"][1]["categories"] diff --git a/src/download.jl b/src/download.jl index b948f11d..33cb7c3c 100644 --- a/src/download.jl +++ b/src/download.jl @@ -163,7 +163,7 @@ Requires an internet connection. """ function downloadpdb(pdbid::AbstractString; dir::AbstractString=pwd(), - format::Type{<:Union{PDBFormat,PDBXMLFormat,MMCIFFormat,BCIFFormat}}=PDBFormat, + format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat, BCIFFormat}}=PDBFormat, obsolete::Bool=false, overwrite::Bool=false, ba_number::Integer=0) @@ -317,7 +317,7 @@ automatically updates the PDB files of the given `format` inside the local Requires an internet connection. """ function updatelocalpdb(; dir::AbstractString=pwd(), - format::Type{<:Union{PDBFormat,PDBXMLFormat,MMCIFFormat,BCIFFormat}}=PDBFormat) + format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat, BCIFFormat}}=PDBFormat) addedlist, modifiedlist, obsoletelist = pdbrecentchanges() # Download the newly added and modified pdb files downloadpdb(vcat(addedlist, modifiedlist), dir=dir, overwrite=true, format=format) @@ -359,7 +359,7 @@ Requires an internet connection. in `dir`; by default skips downloading the PDB file if it exists. """ function downloadallobsoletepdb(; obsolete_dir::AbstractString=pwd(), - format::Type{<:Union{PDBFormat,PDBXMLFormat,MMCIFFormat,BCIFFormat}}=PDBFormat, + format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat, BCIFFormat}}=PDBFormat, overwrite::Bool=false) obsoletelist = pdbobsoletelist() downloadpdb(obsoletelist, dir=obsolete_dir, format=format, overwrite=overwrite) @@ -398,7 +398,7 @@ Requires an internet connection. """ function retrievepdb(pdbid::AbstractString; dir::AbstractString=pwd(), - format::Type{<:Union{PDBFormat,PDBXMLFormat,MMCIFFormat,BCIFFormat}}=MMCIFFormat, + format::Type{<:Union{PDBFormat, PDBXMLFormat, MMCIFFormat, BCIFFormat}}=MMCIFFormat, obsolete::Bool=false, overwrite::Bool=false, ba_number::Integer=0, From b168125bb10ab511bc05348692c3b3175e145771 Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Tue, 13 May 2025 18:10:09 +0800 Subject: [PATCH 15/23] fix downloading --- 1BNA.pdb | 944 ++++++++++++++++++++++++++++++++++++++++++++++++ src/download.jl | 31 +- 2 files changed, 961 insertions(+), 14 deletions(-) create mode 100644 1BNA.pdb diff --git a/1BNA.pdb b/1BNA.pdb new file mode 100644 index 00000000..630a15d2 --- /dev/null +++ b/1BNA.pdb @@ -0,0 +1,944 @@ +HEADER DNA 26-JAN-81 1BNA +TITLE STRUCTURE OF A B-DNA DODECAMER. CONFORMATION AND DYNAMICS +COMPND MOL_ID: 1; +COMPND 2 MOLECULE: DNA (5'-D(*CP*GP*CP*GP*AP*AP*TP*TP*CP*GP*CP*G)-3'); +COMPND 3 CHAIN: A, B; +COMPND 4 ENGINEERED: YES +SOURCE MOL_ID: 1; +SOURCE 2 SYNTHETIC: YES +KEYWDS B-DNA, DOUBLE HELIX, DNA +EXPDTA X-RAY DIFFRACTION +AUTHOR H.R.DREW,R.M.WING,T.TAKANO,C.BROKA,S.TANAKA,K.ITAKURA,R.E.DICKERSON +REVDAT 4 07-FEB-24 1BNA 1 REMARK +REVDAT 3 24-FEB-09 1BNA 1 VERSN +REVDAT 2 01-APR-03 1BNA 1 JRNL +REVDAT 1 21-MAY-81 1BNA 0 +JRNL AUTH H.R.DREW,R.M.WING,T.TAKANO,C.BROKA,S.TANAKA,K.ITAKURA, +JRNL AUTH 2 R.E.DICKERSON +JRNL TITL STRUCTURE OF A B-DNA DODECAMER: CONFORMATION AND DYNAMICS. +JRNL REF PROC.NATL.ACAD.SCI.USA V. 78 2179 1981 +JRNL REFN ISSN 0027-8424 +JRNL PMID 6941276 +JRNL DOI 10.1073/PNAS.78.4.2179 +REMARK 1 +REMARK 1 REFERENCE 1 +REMARK 1 AUTH R.E.DICKERSON,H.R.DREW +REMARK 1 TITL KINEMATIC MODEL FOR B-DNA +REMARK 1 REF PROC.NATL.ACAD.SCI.USA V. 78 7318 1981 +REMARK 1 REFN ISSN 0027-8424 +REMARK 1 REFERENCE 2 +REMARK 1 AUTH R.E.DICKERSON,H.R.DREW +REMARK 1 TITL STRUCTURE OF A B-DNA DODECAMER. II. INFLUENCE OF BASE +REMARK 1 TITL 2 SEQUENCE ON HELIX STRUCTURE +REMARK 1 REF J.MOL.BIOL. V. 149 761 1981 +REMARK 1 REFN ISSN 0022-2836 +REMARK 1 REFERENCE 3 +REMARK 1 AUTH H.R.DREW,R.E.DICKERSON +REMARK 1 TITL STRUCTURE OF A B-DNA DODECAMER. III. GEOMETRY OF HYDRATION +REMARK 1 REF J.MOL.BIOL. V. 151 535 1981 +REMARK 1 REFN ISSN 0022-2836 +REMARK 1 REFERENCE 4 +REMARK 1 AUTH R.WING,H.R.DREW,T.TAKANO,C.BROKA,S.TANAKA,K.ITAKURA, +REMARK 1 AUTH 2 R.E.DICKERSON +REMARK 1 TITL CRYSTAL STRUCTURE ANALYSIS OF A COMPLETE TURN OF B-DNA +REMARK 1 REF NATURE V. 287 755 1980 +REMARK 1 REFN ISSN 0028-0836 +REMARK 2 +REMARK 2 RESOLUTION. 1.90 ANGSTROMS. +REMARK 3 +REMARK 3 REFINEMENT. +REMARK 3 PROGRAM : JACK-LEVITT +REMARK 3 AUTHORS : JACK,LEVITT +REMARK 3 +REMARK 3 DATA USED IN REFINEMENT. +REMARK 3 RESOLUTION RANGE HIGH (ANGSTROMS) : 1.90 +REMARK 3 RESOLUTION RANGE LOW (ANGSTROMS) : 8.00 +REMARK 3 DATA CUTOFF (SIGMA(F)) : NULL +REMARK 3 DATA CUTOFF HIGH (ABS(F)) : NULL +REMARK 3 DATA CUTOFF LOW (ABS(F)) : NULL +REMARK 3 COMPLETENESS (WORKING+TEST) (%) : NULL +REMARK 3 NUMBER OF REFLECTIONS : 2725 +REMARK 3 +REMARK 3 FIT TO DATA USED IN REFINEMENT. +REMARK 3 CROSS-VALIDATION METHOD : NULL +REMARK 3 FREE R VALUE TEST SET SELECTION : NULL +REMARK 3 R VALUE (WORKING SET) : 0.178 +REMARK 3 FREE R VALUE : NULL +REMARK 3 FREE R VALUE TEST SET SIZE (%) : NULL +REMARK 3 FREE R VALUE TEST SET COUNT : NULL +REMARK 3 ESTIMATED ERROR OF FREE R VALUE : NULL +REMARK 3 +REMARK 3 FIT IN THE HIGHEST RESOLUTION BIN. +REMARK 3 TOTAL NUMBER OF BINS USED : NULL +REMARK 3 BIN RESOLUTION RANGE HIGH (A) : NULL +REMARK 3 BIN RESOLUTION RANGE LOW (A) : NULL +REMARK 3 BIN COMPLETENESS (WORKING+TEST) (%) : NULL +REMARK 3 REFLECTIONS IN BIN (WORKING SET) : NULL +REMARK 3 BIN R VALUE (WORKING SET) : NULL +REMARK 3 BIN FREE R VALUE : NULL +REMARK 3 BIN FREE R VALUE TEST SET SIZE (%) : NULL +REMARK 3 BIN FREE R VALUE TEST SET COUNT : NULL +REMARK 3 ESTIMATED ERROR OF BIN FREE R VALUE : NULL +REMARK 3 +REMARK 3 NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT. +REMARK 3 PROTEIN ATOMS : 0 +REMARK 3 NUCLEIC ACID ATOMS : 486 +REMARK 3 HETEROGEN ATOMS : 0 +REMARK 3 SOLVENT ATOMS : 80 +REMARK 3 +REMARK 3 B VALUES. +REMARK 3 FROM WILSON PLOT (A**2) : NULL +REMARK 3 MEAN B VALUE (OVERALL, A**2) : NULL +REMARK 3 OVERALL ANISOTROPIC B VALUE. +REMARK 3 B11 (A**2) : NULL +REMARK 3 B22 (A**2) : NULL +REMARK 3 B33 (A**2) : NULL +REMARK 3 B12 (A**2) : NULL +REMARK 3 B13 (A**2) : NULL +REMARK 3 B23 (A**2) : NULL +REMARK 3 +REMARK 3 ESTIMATED COORDINATE ERROR. +REMARK 3 ESD FROM LUZZATI PLOT (A) : NULL +REMARK 3 ESD FROM SIGMAA (A) : NULL +REMARK 3 LOW RESOLUTION CUTOFF (A) : NULL +REMARK 3 +REMARK 3 CROSS-VALIDATED ESTIMATED COORDINATE ERROR. +REMARK 3 ESD FROM C-V LUZZATI PLOT (A) : NULL +REMARK 3 ESD FROM C-V SIGMAA (A) : NULL +REMARK 3 +REMARK 3 RMS DEVIATIONS FROM IDEAL VALUES. +REMARK 3 BOND LENGTHS (A) : NULL +REMARK 3 BOND ANGLES (DEGREES) : NULL +REMARK 3 DIHEDRAL ANGLES (DEGREES) : NULL +REMARK 3 IMPROPER ANGLES (DEGREES) : NULL +REMARK 3 +REMARK 3 ISOTROPIC THERMAL MODEL : NULL +REMARK 3 +REMARK 3 ISOTROPIC THERMAL FACTOR RESTRAINTS. RMS SIGMA +REMARK 3 MAIN-CHAIN BOND (A**2) : NULL ; NULL +REMARK 3 MAIN-CHAIN ANGLE (A**2) : NULL ; NULL +REMARK 3 SIDE-CHAIN BOND (A**2) : NULL ; NULL +REMARK 3 SIDE-CHAIN ANGLE (A**2) : NULL ; NULL +REMARK 3 +REMARK 3 NCS MODEL : NULL +REMARK 3 +REMARK 3 NCS RESTRAINTS. RMS SIGMA/WEIGHT +REMARK 3 GROUP 1 POSITIONAL (A) : NULL ; NULL +REMARK 3 GROUP 1 B-FACTOR (A**2) : NULL ; NULL +REMARK 3 +REMARK 3 PARAMETER FILE 1 : NULL +REMARK 3 TOPOLOGY FILE 1 : NULL +REMARK 3 +REMARK 3 OTHER REFINEMENT REMARKS: NULL +REMARK 4 +REMARK 4 1BNA COMPLIES WITH FORMAT V. 3.30, 13-JUL-11 +REMARK 100 +REMARK 100 THIS ENTRY HAS BEEN PROCESSED BY BNL. +REMARK 100 THE DEPOSITION ID IS D_1000171933. +REMARK 200 +REMARK 200 EXPERIMENTAL DETAILS +REMARK 200 EXPERIMENT TYPE : X-RAY DIFFRACTION +REMARK 200 DATE OF DATA COLLECTION : NULL +REMARK 200 TEMPERATURE (KELVIN) : NULL +REMARK 200 PH : NULL +REMARK 200 NUMBER OF CRYSTALS USED : NULL +REMARK 200 +REMARK 200 SYNCHROTRON (Y/N) : N +REMARK 200 RADIATION SOURCE : NULL +REMARK 200 BEAMLINE : NULL +REMARK 200 X-RAY GENERATOR MODEL : NULL +REMARK 200 MONOCHROMATIC OR LAUE (M/L) : NULL +REMARK 200 WAVELENGTH OR RANGE (A) : NULL +REMARK 200 MONOCHROMATOR : NULL +REMARK 200 OPTICS : NULL +REMARK 200 +REMARK 200 DETECTOR TYPE : DIFFRACTOMETER +REMARK 200 DETECTOR MANUFACTURER : NULL +REMARK 200 INTENSITY-INTEGRATION SOFTWARE : NULL +REMARK 200 DATA SCALING SOFTWARE : NULL +REMARK 200 +REMARK 200 NUMBER OF UNIQUE REFLECTIONS : 5534 +REMARK 200 RESOLUTION RANGE HIGH (A) : 1.900 +REMARK 200 RESOLUTION RANGE LOW (A) : 8.000 +REMARK 200 REJECTION CRITERIA (SIGMA(I)) : NULL +REMARK 200 +REMARK 200 OVERALL. +REMARK 200 COMPLETENESS FOR RANGE (%) : NULL +REMARK 200 DATA REDUNDANCY : NULL +REMARK 200 R MERGE (I) : NULL +REMARK 200 R SYM (I) : NULL +REMARK 200 FOR THE DATA SET : NULL +REMARK 200 +REMARK 200 IN THE HIGHEST RESOLUTION SHELL. +REMARK 200 HIGHEST RESOLUTION SHELL, RANGE HIGH (A) : NULL +REMARK 200 HIGHEST RESOLUTION SHELL, RANGE LOW (A) : NULL +REMARK 200 COMPLETENESS FOR SHELL (%) : NULL +REMARK 200 DATA REDUNDANCY IN SHELL : NULL +REMARK 200 R MERGE FOR SHELL (I) : NULL +REMARK 200 R SYM FOR SHELL (I) : NULL +REMARK 200 FOR SHELL : NULL +REMARK 200 +REMARK 200 DIFFRACTION PROTOCOL: NULL +REMARK 200 METHOD USED TO DETERMINE THE STRUCTURE: NULL +REMARK 200 SOFTWARE USED: NULL +REMARK 200 STARTING MODEL: NULL +REMARK 200 +REMARK 200 REMARK: NULL +REMARK 280 +REMARK 280 CRYSTAL +REMARK 280 SOLVENT CONTENT, VS (%): 45.79 +REMARK 280 MATTHEWS COEFFICIENT, VM (ANGSTROMS**3/DA): 2.27 +REMARK 280 +REMARK 280 CRYSTALLIZATION CONDITIONS: VAPOR DIFFUSION, TEMPERATURE 290.00K +REMARK 290 +REMARK 290 CRYSTALLOGRAPHIC SYMMETRY +REMARK 290 SYMMETRY OPERATORS FOR SPACE GROUP: P 21 21 21 +REMARK 290 +REMARK 290 SYMOP SYMMETRY +REMARK 290 NNNMMM OPERATOR +REMARK 290 1555 X,Y,Z +REMARK 290 2555 -X+1/2,-Y,Z+1/2 +REMARK 290 3555 -X,Y+1/2,-Z+1/2 +REMARK 290 4555 X+1/2,-Y+1/2,-Z +REMARK 290 +REMARK 290 WHERE NNN -> OPERATOR NUMBER +REMARK 290 MMM -> TRANSLATION VECTOR +REMARK 290 +REMARK 290 CRYSTALLOGRAPHIC SYMMETRY TRANSFORMATIONS +REMARK 290 THE FOLLOWING TRANSFORMATIONS OPERATE ON THE ATOM/HETATM +REMARK 290 RECORDS IN THIS ENTRY TO PRODUCE CRYSTALLOGRAPHICALLY +REMARK 290 RELATED MOLECULES. +REMARK 290 SMTRY1 1 1.000000 0.000000 0.000000 0.00000 +REMARK 290 SMTRY2 1 0.000000 1.000000 0.000000 0.00000 +REMARK 290 SMTRY3 1 0.000000 0.000000 1.000000 0.00000 +REMARK 290 SMTRY1 2 -1.000000 0.000000 0.000000 12.43500 +REMARK 290 SMTRY2 2 0.000000 -1.000000 0.000000 0.00000 +REMARK 290 SMTRY3 2 0.000000 0.000000 1.000000 33.10000 +REMARK 290 SMTRY1 3 -1.000000 0.000000 0.000000 0.00000 +REMARK 290 SMTRY2 3 0.000000 1.000000 0.000000 20.19500 +REMARK 290 SMTRY3 3 0.000000 0.000000 -1.000000 33.10000 +REMARK 290 SMTRY1 4 1.000000 0.000000 0.000000 12.43500 +REMARK 290 SMTRY2 4 0.000000 -1.000000 0.000000 20.19500 +REMARK 290 SMTRY3 4 0.000000 0.000000 -1.000000 0.00000 +REMARK 290 +REMARK 290 REMARK: NULL +REMARK 300 +REMARK 300 BIOMOLECULE: 1 +REMARK 300 SEE REMARK 350 FOR THE AUTHOR PROVIDED AND/OR PROGRAM +REMARK 300 GENERATED ASSEMBLY INFORMATION FOR THE STRUCTURE IN +REMARK 300 THIS ENTRY. THE REMARK MAY ALSO PROVIDE INFORMATION ON +REMARK 300 BURIED SURFACE AREA. +REMARK 350 +REMARK 350 COORDINATES FOR A COMPLETE MULTIMER REPRESENTING THE KNOWN +REMARK 350 BIOLOGICALLY SIGNIFICANT OLIGOMERIZATION STATE OF THE +REMARK 350 MOLECULE CAN BE GENERATED BY APPLYING BIOMT TRANSFORMATIONS +REMARK 350 GIVEN BELOW. BOTH NON-CRYSTALLOGRAPHIC AND +REMARK 350 CRYSTALLOGRAPHIC OPERATIONS ARE GIVEN. +REMARK 350 +REMARK 350 BIOMOLECULE: 1 +REMARK 350 AUTHOR DETERMINED BIOLOGICAL UNIT: DIMERIC +REMARK 350 APPLY THE FOLLOWING TO CHAINS: A, B +REMARK 350 BIOMT1 1 1.000000 0.000000 0.000000 0.00000 +REMARK 350 BIOMT2 1 0.000000 1.000000 0.000000 0.00000 +REMARK 350 BIOMT3 1 0.000000 0.000000 1.000000 0.00000 +REMARK 500 +REMARK 500 GEOMETRY AND STEREOCHEMISTRY +REMARK 500 SUBTOPIC: CLOSE CONTACTS IN SAME ASYMMETRIC UNIT +REMARK 500 +REMARK 500 THE FOLLOWING ATOMS ARE IN CLOSE CONTACT. +REMARK 500 +REMARK 500 ATM1 RES C SSEQI ATM2 RES C SSEQI DISTANCE +REMARK 500 O HOH A 62 O HOH A 77 1.61 +REMARK 500 OP2 DA A 6 O HOH A 65 1.89 +REMARK 500 OP2 DG A 10 O HOH A 70 2.02 +REMARK 500 O HOH A 54 O HOH A 86 2.07 +REMARK 500 O HOH A 77 O HOH B 63 2.09 +REMARK 500 O HOH A 31 O HOH A 99 2.12 +REMARK 500 +REMARK 500 REMARK: NULL +REMARK 500 +REMARK 500 GEOMETRY AND STEREOCHEMISTRY +REMARK 500 SUBTOPIC: COVALENT BOND LENGTHS +REMARK 500 +REMARK 500 THE STEREOCHEMICAL PARAMETERS OF THE FOLLOWING RESIDUES +REMARK 500 HAVE VALUES WHICH DEVIATE FROM EXPECTED VALUES BY MORE +REMARK 500 THAN 6*RMSD (M=MODEL NUMBER; RES=RESIDUE NAME; C=CHAIN +REMARK 500 IDENTIFIER; SSEQ=SEQUENCE NUMBER; I=INSERTION CODE). +REMARK 500 +REMARK 500 STANDARD TABLE: +REMARK 500 FORMAT: (10X,I3,1X,2(A3,1X,A1,I4,A1,1X,A4,3X),1X,F6.3) +REMARK 500 +REMARK 500 EXPECTED VALUES PROTEIN: ENGH AND HUBER, 1999 +REMARK 500 EXPECTED VALUES NUCLEIC ACID: CLOWNEY ET AL 1996 +REMARK 500 +REMARK 500 M RES CSSEQI ATM1 RES CSSEQI ATM2 DEVIATION +REMARK 500 DC A 1 C5 DC A 1 C6 0.051 +REMARK 500 DG A 2 C5 DG A 2 N7 -0.040 +REMARK 500 DG A 2 N7 DG A 2 C8 0.036 +REMARK 500 DC A 3 C5 DC A 3 C6 0.052 +REMARK 500 DG A 4 C5 DG A 4 N7 -0.039 +REMARK 500 DG A 4 N7 DG A 4 C8 0.036 +REMARK 500 DA A 5 C5 DA A 5 N7 -0.039 +REMARK 500 DA A 6 C5 DA A 6 N7 -0.038 +REMARK 500 DT A 7 C5 DT A 7 C6 0.056 +REMARK 500 DT A 8 C5 DT A 8 C6 0.056 +REMARK 500 DC A 9 C5 DC A 9 C6 0.053 +REMARK 500 DG A 10 C5 DG A 10 N7 -0.039 +REMARK 500 DG A 10 N7 DG A 10 C8 0.036 +REMARK 500 DC A 11 C5 DC A 11 C6 0.051 +REMARK 500 DG A 12 C5 DG A 12 N7 -0.039 +REMARK 500 DC B 13 C5 DC B 13 C6 0.052 +REMARK 500 DG B 14 C5 DG B 14 N7 -0.042 +REMARK 500 DC B 15 C5 DC B 15 C6 0.049 +REMARK 500 DG B 16 C5 DG B 16 N7 -0.040 +REMARK 500 DG B 16 N7 DG B 16 C8 0.039 +REMARK 500 DA B 17 C5 DA B 17 N7 -0.039 +REMARK 500 DA B 18 C5 DA B 18 N7 -0.039 +REMARK 500 DT B 19 C5 DT B 19 C6 0.056 +REMARK 500 DT B 20 C5 DT B 20 C6 0.051 +REMARK 500 DC B 21 C5 DC B 21 C6 0.052 +REMARK 500 DG B 22 C5 DG B 22 N7 -0.036 +REMARK 500 DG B 22 N7 DG B 22 C8 0.037 +REMARK 500 DC B 23 C5 DC B 23 C6 0.051 +REMARK 500 DG B 24 C5 DG B 24 N7 -0.039 +REMARK 500 +REMARK 500 REMARK: NULL +REMARK 500 +REMARK 500 GEOMETRY AND STEREOCHEMISTRY +REMARK 500 SUBTOPIC: COVALENT BOND ANGLES +REMARK 500 +REMARK 500 THE STEREOCHEMICAL PARAMETERS OF THE FOLLOWING RESIDUES +REMARK 500 HAVE VALUES WHICH DEVIATE FROM EXPECTED VALUES BY MORE +REMARK 500 THAN 6*RMSD (M=MODEL NUMBER; RES=RESIDUE NAME; C=CHAIN +REMARK 500 IDENTIFIER; SSEQ=SEQUENCE NUMBER; I=INSERTION CODE). +REMARK 500 +REMARK 500 STANDARD TABLE: +REMARK 500 FORMAT: (10X,I3,1X,A3,1X,A1,I4,A1,3(1X,A4,2X),12X,F5.1) +REMARK 500 +REMARK 500 EXPECTED VALUES PROTEIN: ENGH AND HUBER, 1999 +REMARK 500 EXPECTED VALUES NUCLEIC ACID: CLOWNEY ET AL 1996 +REMARK 500 +REMARK 500 M RES CSSEQI ATM1 ATM2 ATM3 +REMARK 500 DC A 1 C4' - C3' - C2' ANGL. DEV. = -4.8 DEGREES +REMARK 500 DC A 1 C3' - C2' - C1' ANGL. DEV. = -6.0 DEGREES +REMARK 500 DC A 1 O4' - C1' - C2' ANGL. DEV. = -5.4 DEGREES +REMARK 500 DG A 2 O5' - C5' - C4' ANGL. DEV. = -6.8 DEGREES +REMARK 500 DG A 2 O4' - C1' - N9 ANGL. DEV. = -5.0 DEGREES +REMARK 500 DC A 3 O4' - C1' - N1 ANGL. DEV. = -5.4 DEGREES +REMARK 500 DG A 4 C3' - C2' - C1' ANGL. DEV. = -5.2 DEGREES +REMARK 500 DA A 5 O4' - C1' - C2' ANGL. DEV. = -4.9 DEGREES +REMARK 500 DA A 5 O4' - C1' - N9 ANGL. DEV. = -4.4 DEGREES +REMARK 500 DA A 6 O4' - C1' - C2' ANGL. DEV. = -6.1 DEGREES +REMARK 500 DT A 7 O5' - C5' - C4' ANGL. DEV. = -6.1 DEGREES +REMARK 500 DT A 7 N1 - C2 - N3 ANGL. DEV. = 3.7 DEGREES +REMARK 500 DT A 7 C2 - N3 - C4 ANGL. DEV. = -4.4 DEGREES +REMARK 500 DT A 7 C5 - C6 - N1 ANGL. DEV. = -3.7 DEGREES +REMARK 500 DT A 8 O4' - C1' - C2' ANGL. DEV. = -5.4 DEGREES +REMARK 500 DT A 8 N1 - C2 - N3 ANGL. DEV. = 4.1 DEGREES +REMARK 500 DT A 8 C2 - N3 - C4 ANGL. DEV. = -4.9 DEGREES +REMARK 500 DT A 8 C5 - C6 - N1 ANGL. DEV. = -4.1 DEGREES +REMARK 500 DG A 10 C3' - C2' - C1' ANGL. DEV. = -7.1 DEGREES +REMARK 500 DG A 12 O4' - C1' - C2' ANGL. DEV. = -5.9 DEGREES +REMARK 500 DC B 13 O4' - C1' - N1 ANGL. DEV. = -4.6 DEGREES +REMARK 500 DG B 14 O5' - C5' - C4' ANGL. DEV. = -5.8 DEGREES +REMARK 500 DG B 14 O4' - C1' - C2' ANGL. DEV. = -5.2 DEGREES +REMARK 500 DG B 14 O4' - C1' - N9 ANGL. DEV. = -5.3 DEGREES +REMARK 500 DG B 16 O5' - C5' - C4' ANGL. DEV. = -5.0 DEGREES +REMARK 500 DA B 18 O4' - C1' - N9 ANGL. DEV. = -4.4 DEGREES +REMARK 500 DT B 19 N1 - C2 - N3 ANGL. DEV. = 3.9 DEGREES +REMARK 500 DT B 19 C2 - N3 - C4 ANGL. DEV. = -4.9 DEGREES +REMARK 500 DT B 19 C5 - C6 - N1 ANGL. DEV. = -4.7 DEGREES +REMARK 500 DT B 20 O4' - C1' - C2' ANGL. DEV. = -6.1 DEGREES +REMARK 500 DT B 20 O4' - C1' - N1 ANGL. DEV. = -5.2 DEGREES +REMARK 500 DT B 20 N1 - C2 - N3 ANGL. DEV. = 3.6 DEGREES +REMARK 500 DT B 20 C2 - N3 - C4 ANGL. DEV. = -4.4 DEGREES +REMARK 500 DT B 20 C5 - C6 - N1 ANGL. DEV. = -4.0 DEGREES +REMARK 500 DT B 20 C6 - C5 - C7 ANGL. DEV. = -3.6 DEGREES +REMARK 500 DC B 21 O4' - C1' - N1 ANGL. DEV. = -7.1 DEGREES +REMARK 500 DG B 22 C3' - C2' - C1' ANGL. DEV. = -6.8 DEGREES +REMARK 500 DG B 22 O4' - C1' - N9 ANGL. DEV. = 1.9 DEGREES +REMARK 500 DG B 24 O5' - C5' - C4' ANGL. DEV. = -5.5 DEGREES +REMARK 500 +REMARK 500 REMARK: NULL +DBREF 1BNA A 1 12 PDB 1BNA 1BNA 1 12 +DBREF 1BNA B 13 24 PDB 1BNA 1BNA 13 24 +SEQRES 1 A 12 DC DG DC DG DA DA DT DT DC DG DC DG +SEQRES 1 B 12 DC DG DC DG DA DA DT DT DC DG DC DG +FORMUL 3 HOH *80(H2 O) +CRYST1 24.870 40.390 66.200 90.00 90.00 90.00 P 21 21 21 8 +ORIGX1 1.000000 0.000000 0.000000 0.00000 +ORIGX2 0.000000 1.000000 0.000000 0.00000 +ORIGX3 0.000000 0.000000 1.000000 0.00000 +SCALE1 0.040209 0.000000 0.000000 0.00000 +SCALE2 0.000000 0.024759 0.000000 0.00000 +SCALE3 0.000000 0.000000 0.015106 0.00000 +ATOM 1 O5' DC A 1 18.935 34.195 25.617 1.00 64.35 O +ATOM 2 C5' DC A 1 19.130 33.921 24.219 1.00 44.69 C +ATOM 3 C4' DC A 1 19.961 32.668 24.100 1.00 31.28 C +ATOM 4 O4' DC A 1 19.360 31.583 24.852 1.00 37.45 O +ATOM 5 C3' DC A 1 20.172 32.122 22.694 1.00 46.72 C +ATOM 6 O3' DC A 1 21.350 31.325 22.681 1.00 48.89 O +ATOM 7 C2' DC A 1 18.948 31.223 22.647 1.00 30.88 C +ATOM 8 C1' DC A 1 19.231 30.482 23.944 1.00 36.58 C +ATOM 9 N1 DC A 1 18.070 29.661 24.380 1.00 40.51 N +ATOM 10 C2 DC A 1 18.224 28.454 25.015 1.00 16.62 C +ATOM 11 O2 DC A 1 19.360 28.014 25.214 1.00 27.75 O +ATOM 12 N3 DC A 1 17.143 27.761 25.377 1.00 20.55 N +ATOM 13 C4 DC A 1 15.917 28.226 25.120 1.00 34.72 C +ATOM 14 N4 DC A 1 14.828 27.477 25.444 1.00 40.31 N +ATOM 15 C5 DC A 1 15.719 29.442 24.471 1.00 30.78 C +ATOM 16 C6 DC A 1 16.843 30.171 24.101 1.00 25.90 C +ATOM 17 P DG A 2 22.409 31.286 21.483 1.00 58.85 P +ATOM 18 OP1 DG A 2 23.536 32.157 21.851 1.00 57.82 O +ATOM 19 OP2 DG A 2 21.822 31.459 20.139 1.00 78.33 O +ATOM 20 O5' DG A 2 22.840 29.751 21.498 1.00 40.36 O +ATOM 21 C5' DG A 2 23.543 29.175 22.594 1.00 47.19 C +ATOM 22 C4' DG A 2 23.494 27.709 22.279 1.00 47.81 C +ATOM 23 O4' DG A 2 22.193 27.252 22.674 1.00 38.76 O +ATOM 24 C3' DG A 2 23.693 27.325 20.807 1.00 28.58 C +ATOM 25 O3' DG A 2 24.723 26.320 20.653 1.00 40.44 O +ATOM 26 C2' DG A 2 22.273 26.885 20.416 1.00 21.14 C +ATOM 27 C1' DG A 2 21.721 26.304 21.716 1.00 33.95 C +ATOM 28 N9 DG A 2 20.237 26.470 21.780 1.00 34.00 N +ATOM 29 C8 DG A 2 19.526 27.584 21.429 1.00 36.47 C +ATOM 30 N7 DG A 2 18.207 27.455 21.636 1.00 32.37 N +ATOM 31 C5 DG A 2 18.083 26.212 22.142 1.00 15.06 C +ATOM 32 C6 DG A 2 16.904 25.525 22.545 1.00 11.88 C +ATOM 33 O6 DG A 2 15.739 25.916 22.518 1.00 21.30 O +ATOM 34 N1 DG A 2 17.197 24.279 23.037 1.00 15.44 N +ATOM 35 C2 DG A 2 18.434 23.717 23.155 1.00 9.63 C +ATOM 36 N2 DG A 2 18.508 22.456 23.668 1.00 16.69 N +ATOM 37 N3 DG A 2 19.537 24.360 22.770 1.00 30.98 N +ATOM 38 C4 DG A 2 19.290 25.594 22.274 1.00 18.56 C +ATOM 39 P DC A 3 25.064 25.621 19.252 1.00 44.67 P +ATOM 40 OP1 DC A 3 26.506 25.316 19.220 1.00 53.89 O +ATOM 41 OP2 DC A 3 24.559 26.412 18.115 1.00 57.79 O +ATOM 42 O5' DC A 3 24.260 24.246 19.327 1.00 35.42 O +ATOM 43 C5' DC A 3 24.584 23.285 20.335 1.00 45.75 C +ATOM 44 C4' DC A 3 23.523 22.233 20.245 1.00 43.02 C +ATOM 45 O4' DC A 3 22.256 22.844 20.453 1.00 36.85 O +ATOM 46 C3' DC A 3 23.424 21.557 18.903 1.00 40.14 C +ATOM 47 O3' DC A 3 24.121 20.309 18.928 1.00 49.62 O +ATOM 48 C2' DC A 3 21.930 21.406 18.661 1.00 53.79 C +ATOM 49 C1' DC A 3 21.278 21.966 19.909 1.00 22.18 C +ATOM 50 N1 DC A 3 20.196 22.889 19.521 1.00 25.44 N +ATOM 51 C2 DC A 3 18.909 22.584 19.816 1.00 19.81 C +ATOM 52 O2 DC A 3 18.685 21.512 20.382 1.00 29.92 O +ATOM 53 N3 DC A 3 17.935 23.447 19.502 1.00 21.59 N +ATOM 54 C4 DC A 3 18.217 24.603 18.897 1.00 14.01 C +ATOM 55 N4 DC A 3 17.221 25.499 18.629 1.00 26.88 N +ATOM 56 C5 DC A 3 19.526 24.945 18.571 1.00 27.59 C +ATOM 57 C6 DC A 3 20.537 24.048 18.899 1.00 27.05 C +ATOM 58 P DG A 4 24.249 19.412 17.617 1.00 44.54 P +ATOM 59 OP1 DG A 4 25.420 18.535 17.765 1.00 61.90 O +ATOM 60 OP2 DG A 4 24.208 20.296 16.440 1.00 37.36 O +ATOM 61 O5' DG A 4 22.931 18.537 17.670 1.00 32.01 O +ATOM 62 C5' DG A 4 22.714 17.625 18.753 1.00 37.89 C +ATOM 63 C4' DG A 4 21.393 16.960 18.505 1.00 53.00 C +ATOM 64 O4' DG A 4 20.353 17.952 18.496 1.00 38.79 O +ATOM 65 C3' DG A 4 21.264 16.229 17.176 1.00 56.72 C +ATOM 66 O3' DG A 4 20.284 15.214 17.238 1.00 64.12 O +ATOM 67 C2' DG A 4 20.793 17.368 16.288 1.00 40.81 C +ATOM 68 C1' DG A 4 19.716 17.901 17.218 1.00 30.52 C +ATOM 69 N9 DG A 4 19.305 19.281 16.869 1.00 28.53 N +ATOM 70 C8 DG A 4 20.017 20.263 16.232 1.00 27.82 C +ATOM 71 N7 DG A 4 19.313 21.394 16.077 1.00 28.01 N +ATOM 72 C5 DG A 4 18.121 21.100 16.635 1.00 23.22 C +ATOM 73 C6 DG A 4 16.952 21.904 16.749 1.00 29.21 C +ATOM 74 O6 DG A 4 16.769 23.057 16.368 1.00 38.58 O +ATOM 75 N1 DG A 4 15.933 21.214 17.352 1.00 27.94 N +ATOM 76 C2 DG A 4 15.972 19.930 17.816 1.00 23.44 C +ATOM 77 N2 DG A 4 14.831 19.416 18.353 1.00 42.64 N +ATOM 78 N3 DG A 4 17.068 19.179 17.717 1.00 21.56 N +ATOM 79 C4 DG A 4 18.084 19.825 17.121 1.00 23.44 C +ATOM 80 P DA A 5 20.356 13.969 16.245 1.00 57.01 P +ATOM 81 OP1 DA A 5 21.116 12.891 16.892 1.00 58.59 O +ATOM 82 OP2 DA A 5 20.837 14.423 14.910 1.00 51.96 O +ATOM 83 O5' DA A 5 18.810 13.581 16.161 1.00 47.12 O +ATOM 84 C5' DA A 5 18.015 13.569 17.362 1.00 47.67 C +ATOM 85 C4' DA A 5 16.672 14.088 16.957 1.00 64.79 C +ATOM 86 O4' DA A 5 16.842 15.447 16.561 1.00 47.60 O +ATOM 87 C3' DA A 5 16.019 13.393 15.764 1.00 51.50 C +ATOM 88 O3' DA A 5 14.762 12.796 16.120 1.00 52.18 O +ATOM 89 C2' DA A 5 15.952 14.498 14.696 1.00 45.00 C +ATOM 90 C1' DA A 5 15.851 15.732 15.569 1.00 26.88 C +ATOM 91 N9 DA A 5 16.391 16.916 14.867 1.00 16.69 N +ATOM 92 C8 DA A 5 17.658 17.103 14.382 1.00 28.14 C +ATOM 93 N7 DA A 5 17.863 18.346 13.913 1.00 34.85 N +ATOM 94 C5 DA A 5 16.673 18.953 14.098 1.00 22.49 C +ATOM 95 C6 DA A 5 16.230 20.279 13.819 1.00 18.12 C +ATOM 96 N6 DA A 5 17.045 21.222 13.268 1.00 29.30 N +ATOM 97 N1 DA A 5 14.966 20.578 14.118 1.00 27.61 N +ATOM 98 C2 DA A 5 14.178 19.652 14.669 1.00 18.53 C +ATOM 99 N3 DA A 5 14.463 18.392 14.984 1.00 29.16 N +ATOM 100 C4 DA A 5 15.750 18.110 14.661 1.00 15.08 C +ATOM 101 P DA A 6 13.866 12.006 15.063 1.00 43.68 P +ATOM 102 OP1 DA A 6 13.028 11.039 15.800 1.00 42.55 O +ATOM 103 OP2 DA A 6 14.715 11.499 13.968 1.00 54.20 O +ATOM 104 O5' DA A 6 12.879 13.111 14.480 1.00 28.20 O +ATOM 105 C5' DA A 6 11.802 13.597 15.290 1.00 42.29 C +ATOM 106 C4' DA A 6 11.111 14.603 14.435 1.00 33.23 C +ATOM 107 O4' DA A 6 12.152 15.460 13.962 1.00 41.48 O +ATOM 108 C3' DA A 6 10.417 14.070 13.187 1.00 18.16 C +ATOM 109 O3' DA A 6 9.007 14.369 13.181 1.00 30.42 O +ATOM 110 C2' DA A 6 11.240 14.692 12.061 1.00 52.97 C +ATOM 111 C1' DA A 6 11.699 15.974 12.719 1.00 38.93 C +ATOM 112 N9 DA A 6 12.918 16.526 12.078 1.00 19.06 N +ATOM 113 C8 DA A 6 14.115 15.899 11.868 1.00 17.83 C +ATOM 114 N7 DA A 6 15.049 16.714 11.356 1.00 29.55 N +ATOM 115 C5 DA A 6 14.416 17.901 11.246 1.00 19.88 C +ATOM 116 C6 DA A 6 14.873 19.187 10.815 1.00 17.26 C +ATOM 117 N6 DA A 6 16.161 19.418 10.427 1.00 19.85 N +ATOM 118 N1 DA A 6 13.999 20.191 10.852 1.00 17.93 N +ATOM 119 C2 DA A 6 12.753 19.962 11.272 1.00 23.00 C +ATOM 120 N3 DA A 6 12.210 18.824 11.698 1.00 21.37 N +ATOM 121 C4 DA A 6 13.116 17.823 11.657 1.00 15.93 C +ATOM 122 P DT A 7 8.081 14.050 11.915 1.00 40.72 P +ATOM 123 OP1 DT A 7 6.668 13.960 12.342 1.00 46.75 O +ATOM 124 OP2 DT A 7 8.600 12.894 11.137 1.00 42.53 O +ATOM 125 O5' DT A 7 8.239 15.387 11.076 1.00 35.21 O +ATOM 126 C5' DT A 7 7.907 16.635 11.686 1.00 34.88 C +ATOM 127 C4' DT A 7 8.162 17.628 10.598 1.00 31.45 C +ATOM 128 O4' DT A 7 9.543 17.580 10.279 1.00 46.82 O +ATOM 129 C3' DT A 7 7.461 17.284 9.296 1.00 23.76 C +ATOM 130 O3' DT A 7 6.251 18.034 9.162 1.00 44.27 O +ATOM 131 C2' DT A 7 8.532 17.527 8.223 1.00 26.30 C +ATOM 132 C1' DT A 7 9.644 18.209 9.019 1.00 28.96 C +ATOM 133 N1 DT A 7 11.021 17.903 8.565 1.00 20.47 N +ATOM 134 C2 DT A 7 11.822 18.923 8.176 1.00 28.01 C +ATOM 135 O2 DT A 7 11.383 20.077 8.143 1.00 40.01 O +ATOM 136 N3 DT A 7 13.119 18.641 7.852 1.00 27.94 N +ATOM 137 C4 DT A 7 13.633 17.372 7.882 1.00 15.14 C +ATOM 138 O4 DT A 7 14.830 17.222 7.619 1.00 32.54 O +ATOM 139 C5 DT A 7 12.781 16.325 8.235 1.00 10.83 C +ATOM 140 C7 DT A 7 13.269 14.902 8.236 1.00 36.33 C +ATOM 141 C6 DT A 7 11.465 16.616 8.594 1.00 12.19 C +ATOM 142 P DT A 8 5.384 17.990 7.824 1.00 49.10 P +ATOM 143 OP1 DT A 8 4.025 18.444 8.180 1.00 41.11 O +ATOM 144 OP2 DT A 8 5.458 16.668 7.160 1.00 39.21 O +ATOM 145 O5' DT A 8 6.086 19.118 6.927 1.00 48.80 O +ATOM 146 C5' DT A 8 6.146 20.478 7.418 1.00 34.73 C +ATOM 147 C4' DT A 8 6.995 21.229 6.438 1.00 28.73 C +ATOM 148 O4' DT A 8 8.188 20.458 6.284 1.00 39.07 O +ATOM 149 C3' DT A 8 6.418 21.332 5.029 1.00 37.88 C +ATOM 150 O3' DT A 8 5.967 22.667 4.696 1.00 52.04 O +ATOM 151 C2' DT A 8 7.513 20.718 4.139 1.00 32.80 C +ATOM 152 C1' DT A 8 8.736 20.855 5.034 1.00 36.58 C +ATOM 153 N1 DT A 8 9.823 19.876 4.759 1.00 24.57 N +ATOM 154 C2 DT A 8 11.086 20.316 4.494 1.00 19.41 C +ATOM 155 O2 DT A 8 11.324 21.516 4.389 1.00 32.74 O +ATOM 156 N3 DT A 8 12.094 19.403 4.412 1.00 25.12 N +ATOM 157 C4 DT A 8 11.876 18.060 4.551 1.00 31.35 C +ATOM 158 O4 DT A 8 12.858 17.317 4.503 1.00 28.53 O +ATOM 159 C5 DT A 8 10.569 17.611 4.765 1.00 22.80 C +ATOM 160 C7 DT A 8 10.261 16.140 4.896 1.00 24.98 C +ATOM 161 C6 DT A 8 9.545 18.548 4.904 1.00 20.28 C +ATOM 162 P DC A 9 5.531 23.071 3.209 1.00 48.97 P +ATOM 163 OP1 DC A 9 4.648 24.244 3.269 1.00 62.33 O +ATOM 164 OP2 DC A 9 5.010 21.905 2.470 1.00 51.53 O +ATOM 165 O5' DC A 9 6.926 23.547 2.611 1.00 43.99 O +ATOM 166 C5' DC A 9 7.636 24.627 3.249 1.00 50.86 C +ATOM 167 C4' DC A 9 8.897 24.853 2.457 1.00 46.66 C +ATOM 168 O4' DC A 9 9.638 23.627 2.448 1.00 42.69 O +ATOM 169 C3' DC A 9 8.717 25.240 0.998 1.00 56.96 C +ATOM 170 O3' DC A 9 9.470 26.414 0.667 1.00 63.54 O +ATOM 171 C2' DC A 9 9.126 23.965 0.253 1.00 50.41 C +ATOM 172 C1' DC A 9 10.241 23.483 1.157 1.00 41.08 C +ATOM 173 N1 DC A 9 10.524 22.022 1.015 1.00 37.23 N +ATOM 174 C2 DC A 9 11.814 21.603 0.840 1.00 40.54 C +ATOM 175 O2 DC A 9 12.691 22.447 0.670 1.00 43.89 O +ATOM 176 N3 DC A 9 12.106 20.297 0.873 1.00 32.57 N +ATOM 177 C4 DC A 9 11.141 19.395 1.046 1.00 24.65 C +ATOM 178 N4 DC A 9 11.461 18.075 1.089 1.00 27.84 N +ATOM 179 C5 DC A 9 9.803 19.775 1.177 1.00 17.61 C +ATOM 180 C6 DC A 9 9.499 21.133 1.167 1.00 30.63 C +ATOM 181 P DG A 10 9.055 27.333 -0.581 1.00 65.48 P +ATOM 182 OP1 DG A 10 9.496 28.717 -0.258 1.00 59.09 O +ATOM 183 OP2 DG A 10 7.632 27.106 -0.947 1.00 45.71 O +ATOM 184 O5' DG A 10 9.954 26.765 -1.771 1.00 70.30 O +ATOM 185 C5' DG A 10 11.382 26.940 -1.720 1.00 71.73 C +ATOM 186 C4' DG A 10 11.972 26.090 -2.802 1.00 58.69 C +ATOM 187 O4' DG A 10 11.802 24.724 -2.404 1.00 41.03 O +ATOM 188 C3' DG A 10 11.327 26.178 -4.188 1.00 45.61 C +ATOM 189 O3' DG A 10 12.311 26.096 -5.214 1.00 52.70 O +ATOM 190 C2' DG A 10 10.414 24.962 -4.186 1.00 36.02 C +ATOM 191 C1' DG A 10 11.429 24.028 -3.587 1.00 50.90 C +ATOM 192 N9 DG A 10 10.890 22.713 -3.200 1.00 45.86 N +ATOM 193 C8 DG A 10 9.616 22.315 -2.910 1.00 44.49 C +ATOM 194 N7 DG A 10 9.541 21.009 -2.613 1.00 39.96 N +ATOM 195 C5 DG A 10 10.818 20.588 -2.718 1.00 38.99 C +ATOM 196 C6 DG A 10 11.376 19.292 -2.511 1.00 35.78 C +ATOM 197 O6 DG A 10 10.813 18.252 -2.179 1.00 34.90 O +ATOM 198 N1 DG A 10 12.729 19.299 -2.720 1.00 23.54 N +ATOM 199 C2 DG A 10 13.498 20.365 -3.082 1.00 8.73 C +ATOM 200 N2 DG A 10 14.834 20.169 -3.237 1.00 23.15 N +ATOM 201 N3 DG A 10 12.982 21.573 -3.267 1.00 24.68 N +ATOM 202 C4 DG A 10 11.656 21.601 -3.061 1.00 31.53 C +ATOM 203 P DC A 11 12.763 27.421 -5.980 1.00 60.62 P +ATOM 204 OP1 DC A 11 12.796 28.572 -5.049 1.00 63.74 O +ATOM 205 OP2 DC A 11 11.886 27.542 -7.164 1.00 52.44 O +ATOM 206 O5' DC A 11 14.272 27.086 -6.366 1.00 57.57 O +ATOM 207 C5' DC A 11 15.275 27.108 -5.318 1.00 54.70 C +ATOM 208 C4' DC A 11 16.222 25.946 -5.510 1.00 72.51 C +ATOM 209 O4' DC A 11 15.443 24.754 -5.397 1.00 47.18 O +ATOM 210 C3' DC A 11 16.942 25.827 -6.848 1.00 29.82 C +ATOM 211 O3' DC A 11 18.340 25.511 -6.701 1.00 43.53 O +ATOM 212 C2' DC A 11 16.118 24.767 -7.578 1.00 51.34 C +ATOM 213 C1' DC A 11 15.856 23.836 -6.414 1.00 30.07 C +ATOM 214 N1 DC A 11 14.672 22.975 -6.637 1.00 23.25 N +ATOM 215 C2 DC A 11 14.802 21.628 -6.529 1.00 20.38 C +ATOM 216 O2 DC A 11 15.924 21.178 -6.314 1.00 38.77 O +ATOM 217 N3 DC A 11 13.723 20.842 -6.627 1.00 15.92 N +ATOM 218 C4 DC A 11 12.515 21.373 -6.836 1.00 15.82 C +ATOM 219 N4 DC A 11 11.410 20.574 -6.872 1.00 28.04 N +ATOM 220 C5 DC A 11 12.348 22.744 -6.978 1.00 26.17 C +ATOM 221 C6 DC A 11 13.470 23.558 -6.869 1.00 35.50 C +ATOM 222 P DG A 12 19.331 25.774 -7.925 1.00 55.98 P +ATOM 223 OP1 DG A 12 20.704 25.976 -7.408 1.00 45.83 O +ATOM 224 OP2 DG A 12 18.763 26.851 -8.758 1.00 44.26 O +ATOM 225 O5' DG A 12 19.302 24.412 -8.763 1.00 62.63 O +ATOM 226 C5' DG A 12 20.109 23.284 -8.359 1.00 69.50 C +ATOM 227 C4' DG A 12 19.748 22.167 -9.299 1.00 39.92 C +ATOM 228 O4' DG A 12 18.350 21.969 -9.139 1.00 32.00 O +ATOM 229 C3' DG A 12 19.921 22.404 -10.815 1.00 50.39 C +ATOM 230 O3' DG A 12 20.985 21.635 -11.401 1.00 64.13 O +ATOM 231 C2' DG A 12 18.535 22.062 -11.381 1.00 36.18 C +ATOM 232 C1' DG A 12 17.965 21.200 -10.269 1.00 24.79 C +ATOM 233 N9 DG A 12 16.493 21.220 -10.265 1.00 28.44 N +ATOM 234 C8 DG A 12 15.663 22.289 -10.478 1.00 31.85 C +ATOM 235 N7 DG A 12 14.368 21.958 -10.390 1.00 38.26 N +ATOM 236 C5 DG A 12 14.388 20.640 -10.102 1.00 28.99 C +ATOM 237 C6 DG A 12 13.301 19.742 -9.856 1.00 42.63 C +ATOM 238 O6 DG A 12 12.091 19.967 -9.857 1.00 49.17 O +ATOM 239 N1 DG A 12 13.750 18.466 -9.625 1.00 40.15 N +ATOM 240 C2 DG A 12 15.042 18.043 -9.605 1.00 33.42 C +ATOM 241 N2 DG A 12 15.259 16.717 -9.406 1.00 40.53 N +ATOM 242 N3 DG A 12 16.061 18.885 -9.792 1.00 37.34 N +ATOM 243 C4 DG A 12 15.660 20.156 -10.027 1.00 31.14 C +TER 244 DG A 12 +ATOM 245 O5' DC B 13 7.458 11.884 -9.070 1.00 66.23 O +ATOM 246 C5' DC B 13 8.252 10.968 -9.854 1.00 71.49 C +ATOM 247 C4' DC B 13 9.714 11.141 -9.512 1.00 56.82 C +ATOM 248 O4' DC B 13 10.144 12.455 -9.908 1.00 57.92 O +ATOM 249 C3' DC B 13 10.103 10.989 -8.055 1.00 34.34 C +ATOM 250 O3' DC B 13 11.293 10.221 -7.904 1.00 42.11 O +ATOM 251 C2' DC B 13 10.254 12.437 -7.607 1.00 29.08 C +ATOM 252 C1' DC B 13 10.896 13.044 -8.837 1.00 38.40 C +ATOM 253 N1 DC B 13 10.575 14.487 -8.944 1.00 34.33 N +ATOM 254 C2 DC B 13 11.559 15.430 -9.006 1.00 22.98 C +ATOM 255 O2 DC B 13 12.725 15.066 -8.932 1.00 50.83 O +ATOM 256 N3 DC B 13 11.246 16.714 -9.193 1.00 37.14 N +ATOM 257 C4 DC B 13 9.980 17.088 -9.334 1.00 42.60 C +ATOM 258 N4 DC B 13 9.698 18.395 -9.589 1.00 54.91 N +ATOM 259 C5 DC B 13 8.939 16.162 -9.274 1.00 56.67 C +ATOM 260 C6 DC B 13 9.265 14.824 -9.080 1.00 49.21 C +ATOM 261 P DG B 14 11.602 9.510 -6.502 1.00 60.42 P +ATOM 262 OP1 DG B 14 11.666 8.032 -6.664 1.00 57.44 O +ATOM 263 OP2 DG B 14 10.644 10.010 -5.494 1.00 46.07 O +ATOM 264 O5' DG B 14 13.051 10.094 -6.177 1.00 50.94 O +ATOM 265 C5' DG B 14 14.100 10.021 -7.156 1.00 34.84 C +ATOM 266 C4' DG B 14 15.113 10.992 -6.657 1.00 48.06 C +ATOM 267 O4' DG B 14 14.556 12.300 -6.755 1.00 37.01 O +ATOM 268 C3' DG B 14 15.445 10.806 -5.189 1.00 50.58 C +ATOM 269 O3' DG B 14 16.836 10.560 -5.013 1.00 51.98 O +ATOM 270 C2' DG B 14 14.937 12.100 -4.529 1.00 40.32 C +ATOM 271 C1' DG B 14 15.058 13.086 -5.671 1.00 46.69 C +ATOM 272 N9 DG B 14 14.036 14.140 -5.536 1.00 29.17 N +ATOM 273 C8 DG B 14 12.710 13.957 -5.259 1.00 23.48 C +ATOM 274 N7 DG B 14 12.016 15.103 -5.269 1.00 37.54 N +ATOM 275 C5 DG B 14 12.937 16.041 -5.558 1.00 26.27 C +ATOM 276 C6 DG B 14 12.761 17.451 -5.710 1.00 40.82 C +ATOM 277 O6 DG B 14 11.723 18.111 -5.630 1.00 44.39 O +ATOM 278 N1 DG B 14 13.952 18.079 -5.973 1.00 19.52 N +ATOM 279 C2 DG B 14 15.171 17.485 -6.107 1.00 18.48 C +ATOM 280 N2 DG B 14 16.244 18.292 -6.325 1.00 36.58 N +ATOM 281 N3 DG B 14 15.329 16.161 -5.986 1.00 46.96 N +ATOM 282 C4 DG B 14 14.179 15.499 -5.721 1.00 35.70 C +ATOM 283 P DC B 15 17.478 10.380 -3.569 1.00 46.26 P +ATOM 284 OP1 DC B 15 18.665 9.516 -3.729 1.00 46.07 O +ATOM 285 OP2 DC B 15 16.427 9.940 -2.633 1.00 40.43 O +ATOM 286 O5' DC B 15 17.957 11.865 -3.208 1.00 40.97 O +ATOM 287 C5' DC B 15 18.963 12.531 -3.996 1.00 28.78 C +ATOM 288 C4' DC B 15 18.936 13.958 -3.536 1.00 32.84 C +ATOM 289 O4' DC B 15 17.592 14.409 -3.622 1.00 37.24 O +ATOM 290 C3' DC B 15 19.253 14.139 -2.066 1.00 43.98 C +ATOM 291 O3' DC B 15 20.659 14.219 -1.858 1.00 40.90 O +ATOM 292 C2' DC B 15 18.520 15.417 -1.728 1.00 36.26 C +ATOM 293 C1' DC B 15 17.545 15.602 -2.872 1.00 20.54 C +ATOM 294 N1 DC B 15 16.145 15.696 -2.428 1.00 23.10 N +ATOM 295 C2 DC B 15 15.507 16.886 -2.558 1.00 32.12 C +ATOM 296 O2 DC B 15 16.162 17.846 -2.957 1.00 30.04 O +ATOM 297 N3 DC B 15 14.209 16.983 -2.264 1.00 32.94 N +ATOM 298 C4 DC B 15 13.536 15.919 -1.825 1.00 16.43 C +ATOM 299 N4 DC B 15 12.205 16.017 -1.553 1.00 34.91 N +ATOM 300 C5 DC B 15 14.164 14.689 -1.652 1.00 22.75 C +ATOM 301 C6 DC B 15 15.509 14.584 -1.979 1.00 26.42 C +ATOM 302 P DG B 16 21.304 14.529 -0.436 1.00 42.39 P +ATOM 303 OP1 DG B 16 22.696 14.087 -0.524 1.00 60.41 O +ATOM 304 OP2 DG B 16 20.488 13.954 0.650 1.00 51.09 O +ATOM 305 O5' DG B 16 21.306 16.117 -0.363 1.00 45.08 O +ATOM 306 C5' DG B 16 22.177 16.876 -1.212 1.00 33.20 C +ATOM 307 C4' DG B 16 21.739 18.292 -1.021 1.00 24.95 C +ATOM 308 O4' DG B 16 20.305 18.225 -1.048 1.00 32.83 O +ATOM 309 C3' DG B 16 22.101 18.959 0.293 1.00 41.12 C +ATOM 310 O3' DG B 16 22.592 20.293 0.097 1.00 53.45 O +ATOM 311 C2' DG B 16 20.820 18.829 1.121 1.00 28.93 C +ATOM 312 C1' DG B 16 19.765 18.985 0.046 1.00 37.44 C +ATOM 313 N9 DG B 16 18.513 18.299 0.468 1.00 17.75 N +ATOM 314 C8 DG B 16 18.363 17.062 1.039 1.00 17.96 C +ATOM 315 N7 DG B 16 17.080 16.744 1.281 1.00 24.14 N +ATOM 316 C5 DG B 16 16.400 17.832 0.868 1.00 9.96 C +ATOM 317 C6 DG B 16 14.996 18.090 0.882 1.00 18.10 C +ATOM 318 O6 DG B 16 14.082 17.378 1.280 1.00 31.13 O +ATOM 319 N1 DG B 16 14.712 19.349 0.418 1.00 17.72 N +ATOM 320 C2 DG B 16 15.606 20.268 -0.027 1.00 16.23 C +ATOM 321 N2 DG B 16 15.134 21.493 -0.382 1.00 33.42 N +ATOM 322 N3 DG B 16 16.912 20.017 -0.072 1.00 26.37 N +ATOM 323 C4 DG B 16 17.236 18.794 0.384 1.00 31.72 C +ATOM 324 P DA B 17 22.904 21.238 1.339 1.00 46.87 P +ATOM 325 OP1 DA B 17 23.994 22.183 1.025 1.00 47.75 O +ATOM 326 OP2 DA B 17 23.104 20.390 2.538 1.00 46.81 O +ATOM 327 O5' DA B 17 21.577 22.107 1.390 1.00 39.51 O +ATOM 328 C5' DA B 17 21.216 22.833 0.200 1.00 30.37 C +ATOM 329 C4' DA B 17 20.101 23.788 0.484 1.00 35.43 C +ATOM 330 O4' DA B 17 18.913 23.054 0.816 1.00 43.05 O +ATOM 331 C3' DA B 17 20.347 24.743 1.633 1.00 44.50 C +ATOM 332 O3' DA B 17 19.732 26.010 1.411 1.00 78.59 O +ATOM 333 C2' DA B 17 19.752 23.945 2.791 1.00 44.42 C +ATOM 334 C1' DA B 17 18.497 23.393 2.145 1.00 42.55 C +ATOM 335 N9 DA B 17 18.079 22.095 2.758 1.00 34.56 N +ATOM 336 C8 DA B 17 18.847 21.020 3.133 1.00 20.07 C +ATOM 337 N7 DA B 17 18.114 19.984 3.584 1.00 27.60 N +ATOM 338 C5 DA B 17 16.842 20.424 3.488 1.00 18.80 C +ATOM 339 C6 DA B 17 15.577 19.817 3.786 1.00 32.58 C +ATOM 340 N6 DA B 17 15.448 18.537 4.242 1.00 29.54 N +ATOM 341 N1 DA B 17 14.482 20.557 3.593 1.00 35.01 N +ATOM 342 C2 DA B 17 14.597 21.801 3.118 1.00 36.47 C +ATOM 343 N3 DA B 17 15.700 22.472 2.783 1.00 38.96 N +ATOM 344 C4 DA B 17 16.791 21.706 3.002 1.00 28.24 C +ATOM 345 P DA B 18 19.803 27.141 2.526 1.00 46.11 P +ATOM 346 OP1 DA B 18 19.796 28.478 1.888 1.00 49.20 O +ATOM 347 OP2 DA B 18 20.953 26.858 3.426 1.00 43.48 O +ATOM 348 O5' DA B 18 18.396 26.939 3.241 1.00 40.83 O +ATOM 349 C5' DA B 18 17.203 27.028 2.452 1.00 40.72 C +ATOM 350 C4' DA B 18 16.035 26.958 3.388 1.00 66.52 C +ATOM 351 O4' DA B 18 15.856 25.612 3.850 1.00 44.25 O +ATOM 352 C3' DA B 18 16.101 27.861 4.615 1.00 63.34 C +ATOM 353 O3' DA B 18 14.890 28.608 4.757 1.00 55.65 O +ATOM 354 C2' DA B 18 16.368 26.844 5.724 1.00 34.49 C +ATOM 355 C1' DA B 18 15.561 25.655 5.243 1.00 29.45 C +ATOM 356 N9 DA B 18 16.104 24.373 5.755 1.00 20.03 N +ATOM 357 C8 DA B 18 17.411 23.967 5.830 1.00 16.51 C +ATOM 358 N7 DA B 18 17.539 22.706 6.276 1.00 20.58 N +ATOM 359 C5 DA B 18 16.266 22.309 6.480 1.00 21.66 C +ATOM 360 C6 DA B 18 15.715 21.073 6.933 1.00 17.93 C +ATOM 361 N6 DA B 18 16.483 19.994 7.243 1.00 20.37 N +ATOM 362 N1 DA B 18 14.389 20.994 7.036 1.00 20.81 N +ATOM 363 C2 DA B 18 13.636 22.041 6.708 1.00 26.77 C +ATOM 364 N3 DA B 18 14.019 23.234 6.265 1.00 26.83 N +ATOM 365 C4 DA B 18 15.367 23.291 6.174 1.00 27.48 C +ATOM 366 P DT B 19 14.604 29.545 6.020 1.00 48.40 P +ATOM 367 OP1 DT B 19 13.792 30.696 5.582 1.00 50.18 O +ATOM 368 OP2 DT B 19 15.852 29.836 6.749 1.00 44.42 O +ATOM 369 O5' DT B 19 13.633 28.628 6.885 1.00 53.86 O +ATOM 370 C5' DT B 19 12.398 28.171 6.303 1.00 55.04 C +ATOM 371 C4' DT B 19 11.809 27.217 7.302 1.00 44.86 C +ATOM 372 O4' DT B 19 12.767 26.184 7.534 1.00 48.52 O +ATOM 373 C3' DT B 19 11.515 27.822 8.669 1.00 41.77 C +ATOM 374 O3' DT B 19 10.103 27.952 8.891 1.00 57.02 O +ATOM 375 C2' DT B 19 12.267 26.906 9.630 1.00 39.28 C +ATOM 376 C1' DT B 19 12.426 25.645 8.799 1.00 27.68 C +ATOM 377 N1 DT B 19 13.609 24.850 9.205 1.00 21.67 N +ATOM 378 C2 DT B 19 13.442 23.575 9.656 1.00 31.71 C +ATOM 379 O2 DT B 19 12.311 23.101 9.802 1.00 36.00 O +ATOM 380 N3 DT B 19 14.551 22.825 9.913 1.00 24.66 N +ATOM 381 C4 DT B 19 15.815 23.321 9.777 1.00 40.64 C +ATOM 382 O4 DT B 19 16.755 22.570 10.029 1.00 31.47 O +ATOM 383 C5 DT B 19 15.972 24.647 9.362 1.00 31.79 C +ATOM 384 C7 DT B 19 17.345 25.239 9.234 1.00 30.05 C +ATOM 385 C6 DT B 19 14.844 25.405 9.048 1.00 14.35 C +ATOM 386 P DT B 20 9.513 28.533 10.260 1.00 48.24 P +ATOM 387 OP1 DT B 20 8.145 29.007 9.998 1.00 41.28 O +ATOM 388 OP2 DT B 20 10.455 29.513 10.841 1.00 53.39 O +ATOM 389 O5' DT B 20 9.395 27.223 11.153 1.00 36.57 O +ATOM 390 C5' DT B 20 8.576 26.148 10.664 1.00 50.41 C +ATOM 391 C4' DT B 20 8.655 25.060 11.678 1.00 32.08 C +ATOM 392 O4' DT B 20 10.003 24.615 11.764 1.00 48.38 O +ATOM 393 C3' DT B 20 8.272 25.471 13.087 1.00 29.99 C +ATOM 394 O3' DT B 20 7.199 24.657 13.553 1.00 45.14 O +ATOM 395 C2' DT B 20 9.586 25.307 13.860 1.00 32.42 C +ATOM 396 C1' DT B 20 10.190 24.148 13.089 1.00 39.56 C +ATOM 397 N1 DT B 20 11.660 24.070 13.205 1.00 20.36 N +ATOM 398 C2 DT B 20 12.257 22.880 13.486 1.00 27.55 C +ATOM 399 O2 DT B 20 11.583 21.866 13.691 1.00 38.33 O +ATOM 400 N3 DT B 20 13.620 22.829 13.497 1.00 29.60 N +ATOM 401 C4 DT B 20 14.402 23.914 13.225 1.00 30.11 C +ATOM 402 O4 DT B 20 15.625 23.764 13.252 1.00 32.92 O +ATOM 403 C5 DT B 20 13.774 25.126 12.933 1.00 24.11 C +ATOM 404 C7 DT B 20 14.563 26.358 12.612 1.00 23.96 C +ATOM 405 C6 DT B 20 12.385 25.187 12.926 1.00 19.78 C +ATOM 406 P DC B 21 6.594 24.823 15.016 1.00 54.73 P +ATOM 407 OP1 DC B 21 5.169 24.424 14.987 1.00 53.98 O +ATOM 408 OP2 DC B 21 6.870 26.189 15.511 1.00 65.53 O +ATOM 409 O5' DC B 21 7.409 23.731 15.839 1.00 50.67 O +ATOM 410 C5' DC B 21 7.331 22.352 15.433 1.00 60.86 C +ATOM 411 C4' DC B 21 8.100 21.598 16.461 1.00 40.86 C +ATOM 412 O4' DC B 21 9.478 21.902 16.263 1.00 36.88 O +ATOM 413 C3' DC B 21 7.766 22.045 17.879 1.00 53.80 C +ATOM 414 O3' DC B 21 7.036 21.041 18.611 1.00 79.04 O +ATOM 415 C2' DC B 21 9.123 22.414 18.469 1.00 48.43 C +ATOM 416 C1' DC B 21 10.107 21.743 17.523 1.00 36.51 C +ATOM 417 N1 DC B 21 11.328 22.556 17.331 1.00 24.72 N +ATOM 418 C2 DC B 21 12.534 21.939 17.329 1.00 30.96 C +ATOM 419 O2 DC B 21 12.560 20.731 17.579 1.00 34.53 O +ATOM 420 N3 DC B 21 13.639 22.639 17.035 1.00 31.69 N +ATOM 421 C4 DC B 21 13.560 23.938 16.739 1.00 21.53 C +ATOM 422 N4 DC B 21 14.685 24.628 16.404 1.00 23.72 N +ATOM 423 C5 DC B 21 12.338 24.609 16.736 1.00 30.74 C +ATOM 424 C6 DC B 21 11.193 23.878 17.035 1.00 27.58 C +ATOM 425 P DG B 22 6.509 21.324 20.099 1.00 56.50 P +ATOM 426 OP1 DG B 22 5.387 20.397 20.396 1.00 50.81 O +ATOM 427 OP2 DG B 22 6.235 22.774 20.306 1.00 53.84 O +ATOM 428 O5' DG B 22 7.767 20.924 20.993 1.00 66.30 O +ATOM 429 C5' DG B 22 8.216 19.559 21.073 1.00 73.42 C +ATOM 430 C4' DG B 22 9.422 19.557 21.977 1.00 42.96 C +ATOM 431 O4' DG B 22 10.493 20.260 21.319 1.00 52.87 O +ATOM 432 C3' DG B 22 9.267 20.267 23.325 1.00 38.51 C +ATOM 433 O3' DG B 22 10.088 19.657 24.293 1.00 60.28 O +ATOM 434 C2' DG B 22 9.751 21.670 22.990 1.00 22.00 C +ATOM 435 C1' DG B 22 10.988 21.226 22.256 1.00 24.85 C +ATOM 436 N9 DG B 22 11.599 22.357 21.543 1.00 25.91 N +ATOM 437 C8 DG B 22 11.037 23.545 21.159 1.00 23.91 C +ATOM 438 N7 DG B 22 11.921 24.362 20.566 1.00 39.18 N +ATOM 439 C5 DG B 22 13.072 23.653 20.580 1.00 25.66 C +ATOM 440 C6 DG B 22 14.370 24.003 20.102 1.00 28.34 C +ATOM 441 O6 DG B 22 14.747 25.057 19.585 1.00 31.85 O +ATOM 442 N1 DG B 22 15.268 22.983 20.308 1.00 25.22 N +ATOM 443 C2 DG B 22 15.023 21.776 20.891 1.00 11.07 C +ATOM 444 N2 DG B 22 16.066 20.914 21.038 1.00 25.92 N +ATOM 445 N3 DG B 22 13.815 21.452 21.350 1.00 19.05 N +ATOM 446 C4 DG B 22 12.902 22.429 21.151 1.00 23.69 C +ATOM 447 P DC B 23 9.477 18.627 25.340 1.00 55.93 P +ATOM 448 OP1 DC B 23 8.767 17.534 24.627 1.00 45.14 O +ATOM 449 OP2 DC B 23 8.670 19.409 26.312 1.00 41.61 O +ATOM 450 O5' DC B 23 10.807 18.067 26.034 1.00 59.70 O +ATOM 451 C5' DC B 23 11.688 17.170 25.310 1.00 63.13 C +ATOM 452 C4' DC B 23 13.115 17.573 25.593 1.00 27.86 C +ATOM 453 O4' DC B 23 13.284 18.804 24.893 1.00 50.51 O +ATOM 454 C3' DC B 23 13.441 17.879 27.059 1.00 46.45 C +ATOM 455 O3' DC B 23 14.341 16.938 27.677 1.00 57.21 O +ATOM 456 C2' DC B 23 13.928 19.322 27.025 1.00 68.01 C +ATOM 457 C1' DC B 23 14.312 19.508 25.568 1.00 32.05 C +ATOM 458 N1 DC B 23 14.144 20.932 25.170 1.00 23.28 N +ATOM 459 C2 DC B 23 15.199 21.595 24.630 1.00 20.62 C +ATOM 460 O2 DC B 23 16.257 20.984 24.504 1.00 29.62 O +ATOM 461 N3 DC B 23 15.067 22.877 24.257 1.00 39.00 N +ATOM 462 C4 DC B 23 13.898 23.510 24.404 1.00 30.44 C +ATOM 463 N4 DC B 23 13.771 24.813 24.018 1.00 34.66 N +ATOM 464 C5 DC B 23 12.795 22.866 24.967 1.00 27.74 C +ATOM 465 C6 DC B 23 12.935 21.540 25.359 1.00 24.58 C +ATOM 466 P DG B 24 14.658 17.064 29.247 1.00 53.70 P +ATOM 467 OP1 DG B 24 14.863 15.717 29.825 1.00 61.79 O +ATOM 468 OP2 DG B 24 13.633 17.912 29.920 1.00 36.06 O +ATOM 469 O5' DG B 24 16.033 17.880 29.284 1.00 34.06 O +ATOM 470 C5' DG B 24 17.243 17.320 28.742 1.00 46.57 C +ATOM 471 C4' DG B 24 18.208 18.464 28.758 1.00 50.89 C +ATOM 472 O4' DG B 24 17.716 19.428 27.829 1.00 32.02 O +ATOM 473 C3' DG B 24 18.230 19.236 30.058 1.00 30.38 C +ATOM 474 O3' DG B 24 18.978 18.583 31.084 1.00 61.06 O +ATOM 475 C2' DG B 24 18.885 20.519 29.578 1.00 53.33 C +ATOM 476 C1' DG B 24 18.276 20.693 28.188 1.00 35.03 C +ATOM 477 N9 DG B 24 17.164 21.659 28.139 1.00 30.25 N +ATOM 478 C8 DG B 24 15.874 21.536 28.580 1.00 30.86 C +ATOM 479 N7 DG B 24 15.129 22.614 28.308 1.00 44.08 N +ATOM 480 C5 DG B 24 15.990 23.436 27.673 1.00 16.87 C +ATOM 481 C6 DG B 24 15.765 24.729 27.117 1.00 19.36 C +ATOM 482 O6 DG B 24 14.719 25.373 27.067 1.00 33.30 O +ATOM 483 N1 DG B 24 16.926 25.257 26.604 1.00 15.78 N +ATOM 484 C2 DG B 24 18.157 24.666 26.579 1.00 11.92 C +ATOM 485 N2 DG B 24 19.208 25.386 26.096 1.00 29.76 N +ATOM 486 N3 DG B 24 18.350 23.438 27.053 1.00 21.95 N +ATOM 487 C4 DG B 24 17.231 22.893 27.570 1.00 13.89 C +TER 488 DG B 24 +HETATM 489 O HOH A 25 19.736 30.706 18.656 1.00 51.86 O +HETATM 490 O HOH A 31 10.879 26.039 -8.906 1.00 47.07 O +HETATM 491 O HOH A 32 18.320 24.816 14.948 1.00 47.72 O +HETATM 492 O HOH A 36 9.821 13.442 8.572 1.00 45.76 O +HETATM 493 O HOH A 38 8.915 15.602 -3.388 1.00 50.97 O +HETATM 494 O HOH A 39 17.505 26.340 -10.581 1.00 51.90 O +HETATM 495 O HOH A 40 28.496 23.515 18.349 1.00 45.37 O +HETATM 496 O HOH A 41 11.346 24.175 4.920 1.00 45.03 O +HETATM 497 O HOH A 50 9.098 16.119 1.277 1.00 51.80 O +HETATM 498 O HOH A 54 16.488 29.195 19.861 1.00 54.92 O +HETATM 499 O HOH A 55 22.078 25.894 15.396 1.00 62.20 O +HETATM 500 O HOH A 58 7.133 14.448 4.647 1.00 57.15 O +HETATM 501 O HOH A 62 14.095 28.151 21.614 1.00 53.85 O +HETATM 502 O HOH A 64 27.164 31.710 20.331 1.00 56.84 O +HETATM 503 O HOH A 65 15.295 11.873 12.209 1.00 57.34 O +HETATM 504 O HOH A 66 18.180 16.604 9.966 1.00 61.52 O +HETATM 505 O HOH A 67 6.216 17.035 1.672 1.00 62.91 O +HETATM 506 O HOH A 70 7.055 25.519 -2.053 1.00 55.96 O +HETATM 507 O HOH A 74 12.454 11.354 9.415 1.00 68.40 O +HETATM 508 O HOH A 76 11.492 29.103 20.090 1.00 67.46 O +HETATM 509 O HOH A 77 14.220 29.189 20.392 1.00 48.22 O +HETATM 510 O HOH A 78 6.138 19.149 13.844 1.00 62.26 O +HETATM 511 O HOH A 79 17.315 9.638 13.392 1.00 65.70 O +HETATM 512 O HOH A 80 18.951 25.757 12.989 1.00 66.47 O +HETATM 513 O HOH A 81 20.460 18.861 12.664 1.00 63.00 O +HETATM 514 O HOH A 82 3.529 19.338 12.599 1.00 65.32 O +HETATM 515 O HOH A 84 16.223 12.351 9.406 1.00 63.59 O +HETATM 516 O HOH A 85 12.989 29.901 -9.282 1.00 64.97 O +HETATM 517 O HOH A 86 17.510 30.569 18.702 1.00 61.79 O +HETATM 518 O HOH A 87 25.377 12.891 19.011 1.00 73.80 O +HETATM 519 O HOH A 88 13.610 15.742 18.593 1.00 69.48 O +HETATM 520 O HOH A 89 18.012 32.598 15.262 1.00 67.52 O +HETATM 521 O HOH A 92 8.723 13.216 6.359 1.00 70.66 O +HETATM 522 O HOH A 97 18.779 13.814 11.704 1.00 71.14 O +HETATM 523 O HOH A 99 12.227 25.192 -10.299 1.00 70.46 O +HETATM 524 O HOH A 100 12.292 30.291 27.102 1.00 73.04 O +HETATM 525 O HOH A 102 20.170 23.000 12.999 1.00 73.63 O +HETATM 526 O HOH B 26 14.354 27.683 16.369 1.00 40.92 O +HETATM 527 O HOH B 27 9.864 22.509 9.123 1.00 39.67 O +HETATM 528 O HOH B 28 19.526 19.144 7.481 1.00 51.15 O +HETATM 529 O HOH B 29 25.754 12.744 -1.835 1.00 51.80 O +HETATM 530 O HOH B 30 7.478 20.604 -9.000 1.00 44.82 O +HETATM 531 O HOH B 33 9.012 24.586 7.009 1.00 43.42 O +HETATM 532 O HOH B 34 10.152 19.917 13.381 1.00 48.04 O +HETATM 533 O HOH B 35 7.764 21.397 11.075 1.00 41.41 O +HETATM 534 O HOH B 37 13.239 14.428 2.049 1.00 55.54 O +HETATM 535 O HOH B 42 12.601 23.000 29.167 1.00 51.36 O +HETATM 536 O HOH B 43 10.440 25.542 24.443 1.00 56.79 O +HETATM 537 O HOH B 44 16.979 28.689 16.284 1.00 50.41 O +HETATM 538 O HOH B 45 4.794 22.966 13.368 1.00 45.95 O +HETATM 539 O HOH B 46 4.208 25.591 10.828 1.00 51.06 O +HETATM 540 O HOH B 47 6.362 24.374 9.188 1.00 51.85 O +HETATM 541 O HOH B 48 7.688 28.411 7.883 1.00 49.33 O +HETATM 542 O HOH B 49 18.379 17.074 4.809 1.00 50.72 O +HETATM 543 O HOH B 51 26.464 23.826 1.396 1.00 53.21 O +HETATM 544 O HOH B 52 11.014 11.318 -2.909 1.00 51.36 O +HETATM 545 O HOH B 53 9.476 27.782 26.498 1.00 60.04 O +HETATM 546 O HOH B 56 5.522 27.411 9.017 1.00 62.36 O +HETATM 547 O HOH B 57 18.456 28.409 8.821 1.00 59.63 O +HETATM 548 O HOH B 59 22.610 15.544 3.846 1.00 57.52 O +HETATM 549 O HOH B 60 24.407 13.162 2.229 1.00 52.30 O +HETATM 550 O HOH B 61 7.988 11.556 -2.976 1.00 59.14 O +HETATM 551 O HOH B 63 14.213 27.722 18.905 1.00 57.29 O +HETATM 552 O HOH B 68 19.101 11.433 1.080 1.00 59.79 O +HETATM 553 O HOH B 69 12.607 10.967 0.261 1.00 60.87 O +HETATM 554 O HOH B 71 15.062 26.024 -0.766 1.00 56.35 O +HETATM 555 O HOH B 72 16.380 6.413 -4.784 1.00 59.07 O +HETATM 556 O HOH B 73 14.059 5.751 -6.198 1.00 56.68 O +HETATM 557 O HOH B 75 9.613 17.039 29.793 1.00 63.48 O +HETATM 558 O HOH B 83 25.276 15.890 -1.301 1.00 64.53 O +HETATM 559 O HOH B 90 2.622 23.030 10.332 1.00 68.01 O +HETATM 560 O HOH B 91 19.701 22.518 9.511 1.00 70.25 O +HETATM 561 O HOH B 93 19.727 29.488 6.155 1.00 69.43 O +HETATM 562 O HOH B 94 17.241 11.563 4.511 1.00 72.18 O +HETATM 563 O HOH B 95 26.545 19.404 -1.091 1.00 70.14 O +HETATM 564 O HOH B 96 9.697 18.315 14.885 1.00 69.10 O +HETATM 565 O HOH B 98 14.292 25.159 2.287 1.00 68.44 O +HETATM 566 O HOH B 101 9.396 27.092 16.993 1.00 72.98 O +HETATM 567 O HOH B 103 19.987 21.691 6.802 1.00 72.66 O +HETATM 568 O HOH B 104 18.692 31.584 4.596 1.00 72.98 O +MASTER 340 0 0 0 0 0 0 6 566 2 0 2 +END diff --git a/src/download.jl b/src/download.jl index 33cb7c3c..353dd37b 100644 --- a/src/download.jl +++ b/src/download.jl @@ -197,30 +197,33 @@ function downloadpdb(pdbid::AbstractString; try # Download the compressed PDB file to the temporary location @info "Downloading file from PDB: $pdbid" + if format == BCIFFormat Downloads.download( "https://models.rcsb.org/$pdbid.bcif", pdbpath, ) - elseif ba_number == 0 - Downloads.download( - "http://files.rcsb.org/download/$pdbid.$(pdbextension[format])$ba_number.gz", - archivefilepath, - ) else - if format == PDBFormat + if ba_number == 0 Downloads.download( - "http://files.rcsb.org/download/$pdbid.$(pdbextension[format])$ba_number.gz", - archivefilepath, - ) - elseif format == MMCIFFormat - Downloads.download( - "http://files.rcsb.org/download/$pdbid-assembly$ba_number.$(pdbextension[format]).gz", + "http://files.rcsb.org/download/$pdbid.$(pdbextension[format]).gz", archivefilepath, ) else - throw(ArgumentError("Biological assemblies are available in the " * - "PDB and mmCIF formats only")) + if format == PDBFormat + Downloads.download( + "http://files.rcsb.org/download/$pdbid.$(pdbextension[format])$ba_number.gz", + archivefilepath, + ) + elseif format == MMCIFFormat + Downloads.download( + "http://files.rcsb.org/download/$pdbid-assembly$ba_number.$(pdbextension[format]).gz", + archivefilepath, + ) + else + throw(ArgumentError("Biological assemblies are available in the " * + "PDB and mmCIF formats only")) + end end end # Verify if the compressed file is downloaded properly and extract it From f91a94a9dde608e7b9f463075439ff0f61c575c7 Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Tue, 13 May 2025 19:22:37 +0800 Subject: [PATCH 16/23] parsing working again --- src/bcif.jl | 126 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 74 insertions(+), 52 deletions(-) diff --git a/src/bcif.jl b/src/bcif.jl index 9ef3d154..f448581f 100644 --- a/src/bcif.jl +++ b/src/bcif.jl @@ -1,8 +1,24 @@ -export decode_column, columns_to_dict, datablock_to_dict +export decode_column, columns_to_dict, datablock_to_dict, BCIFDict, BCIFFormat using LinearAlgebra import MsgPack + +struct BCIFDict <: AbstractDict{String, Any} + dict::Dict{String,Any} + + function BCIFDict(dict::Dict{String, Dict{String}}) + new(convert(Dict{String,Any}, dict)) + end + + function BCIFDict(dict::Dict{String,T}) where T + new(convert(Dict{String,Any}, dict)) + end +end + +Base.keys(mmcif_dict::BCIFDict) = keys(mmcif_dict.dict) +Base.values(mmcif_dict::BCIFDict) = values(mmcif_dict.dict) + """ read(input::IO, ::Type{BCIFFormat}, structure_name::AbstractString="", remove_disorder::Bool=false, read_std_atoms::Bool=true, read_het_atoms::Bool=true, run_dssp::Bool=false, run_stride::Bool=false) @@ -18,30 +34,46 @@ function Base.read(input::IO, run_stride::Bool=false) file = MsgPack.unpack(read(input)) - categories = file["dataBlocks"][1]["categories"] - atom_site = get_category(categories, "_atom_site") - columns = atom_site[1]["columns"] - struc = MolecularStructure(structure_name) + bcif_dict = BCIFDict(datablock_to_dict(file["dataBlocks"][1])) + + return MolecularStructure( + bcif_dict; + structure_name=structure_name, + remove_disorder=remove_disorder, + read_std_atoms=read_std_atoms, + read_het_atoms=read_het_atoms, + run_dssp=run_dssp, + run_stride=run_stride + ) +end - for (i, datablock) in enumerate(file["dataBlocks"]) - # could decode the whole file at once, or just decode the _atom_site category - # for efficiency which it is currently doing. Can be changed to get access to the - # rest of the file +function MolecularStructure(bcif_dict::BCIFDict; + structure_name::AbstractString="", + remove_disorder::Bool=false, + read_std_atoms::Bool=true, + read_het_atoms::Bool=true, + run_dssp::Bool=false, + run_stride::Bool=false) - decode_all = true - if decode_all - bcif_dict = BCIFDict(datablock_to_dict(datablock)["_atom_site"]) - else - bcif_dict = BCIFDict(columns_to_dict(get_category(categories, "_atom_site"))) - end + struc = MolecularStructure(structure_name) + struc[1] = Model(1, struc) # Initialize first model + # println(bcif_dict) + for i in 1:length(bcif_dict["_atom_site"]["id"]) + unsafe_addatomtomodel!(struc[1], AtomRecord(bcif_dict, i)) + end + + fixlists!(struc) - struc[i] = Model(i, struc) - for i in 1:length(bcif_dict["id"]) - unsafe_addatomtomodel!(struc[1], AtomRecord(bcif_dict, i)) - end + if run_dssp && run_stride + throw(ArgumentError("run_dssp and run_stride cannot both be true")) + end + if run_dssp + rundssp!(struc) + end + if run_stride + runstride!(struc) end - fixlists!(struc) return struc end @@ -56,19 +88,6 @@ function columns_to_dict(columns::Vector{Any}) end return reduce(merge, fetch.(tasks)) end - -BCIFArrayTypes = Union{Vector{String},Vector{Int32},Vector{Float64}} - -struct BCIFDict <: AbstractDict{String,BCIFArrayTypes} - dict::Dict{String,BCIFArrayTypes} -end - -function BCIFDict(dict::Dict{String,BCIFArrayTypes}) - new(dict) -end - -Base.keys(mmcif_dict::BCIFDict) = keys(mmcif_dict.dict) -Base.values(mmcif_dict::BCIFDict) = values(mmcif_dict.dict) Base.haskey(mmcif_dict::BCIFDict, key) = haskey(mmcif_dict.dict, key) Base.get(mmcif_dict::BCIFDict, key, default) = get(mmcif_dict.dict, key, default) Base.length(mmcif_dict::BCIFDict) = length(mmcif_dict.dict) @@ -76,25 +95,28 @@ Base.iterate(mmcif_dict::BCIFDict) = iterate(mmcif_dict.dict) Base.iterate(mmcif_dict::BCIFDict, i) = iterate(mmcif_dict.dict, i) -AtomRecord = AtomRecord(d::BCIFDict, i::Int) = AtomRecord( - d["group_PDB"][i] == "HETATM", - d["id"][i], - d["auth_atom_id"][i], - d["label_atom_id"][i] == "" ? ' ' : d["label_atom_id"][i][1], - d["auth_comp_id"][i], - d["auth_asym_id"][i], - d["auth_seq_id"][i], - d["label_alt_id"][i] == "" ? ' ' : d["label_alt_id"][i][1], - [ - d["Cartn_x"][i], - d["Cartn_y"][i], - d["Cartn_z"][i] - ], - d["occupancy"][i], - d["B_iso_or_equiv"][i], - d["type_symbol"][i], - d["pdbx_formal_charge"][i], -) +function AtomRecord(d::BCIFDict, i::Int) + d = d["_atom_site"] + return AtomRecord( + d["group_PDB"][i] == "HETATM", + d["id"][i], + d["auth_atom_id"][i], + d["label_atom_id"][i] == "" ? ' ' : d["label_atom_id"][i][1], + d["auth_comp_id"][i], + d["auth_asym_id"][i], + d["auth_seq_id"][i], + d["label_alt_id"][i] == "" ? ' ' : d["label_alt_id"][i][1], + [ + d["Cartn_x"][i], + d["Cartn_y"][i], + d["Cartn_z"][i] + ], + d["occupancy"][i], + d["B_iso_or_equiv"][i], + d["type_symbol"][i], + d["pdbx_formal_charge"][i], + ) +end function get_category(cats::Vector{Any}, name::String) From cd08209cbc19c404e940e187d3c86cd99cc32df1 Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Wed, 14 May 2025 13:22:52 +0800 Subject: [PATCH 17/23] fix parsing of multi-model files --- src/bcif.jl | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/bcif.jl b/src/bcif.jl index f448581f..2ac78509 100644 --- a/src/bcif.jl +++ b/src/bcif.jl @@ -24,7 +24,7 @@ Base.values(mmcif_dict::BCIFDict) = values(mmcif_dict.dict) A function to read a binary CIF file from MolStar and extract the list of attributes and their compressed bytes. """ -function Base.read(input::IO, +function Base.read(input::String, # TODO: does this make sense to be a string? ::Type{BCIFFormat}; structure_name::AbstractString="", remove_disorder::Bool=false, @@ -56,12 +56,14 @@ function MolecularStructure(bcif_dict::BCIFDict; run_stride::Bool=false) struc = MolecularStructure(structure_name) - struc[1] = Model(1, struc) # Initialize first model - # println(bcif_dict) + for i in 1:length(bcif_dict["_atom_site"]["id"]) - unsafe_addatomtomodel!(struc[1], AtomRecord(bcif_dict, i)) + model_n = bcif_dict["_atom_site"]["pdbx_PDB_model_num"][i] + if !haskey(models(struc), model_n) + struc[model_n] = Model(model_n, struc) + end + unsafe_addatomtomodel!(struc[model_n], AtomRecord(bcif_dict, i)) end - fixlists!(struc) if run_dssp && run_stride @@ -101,11 +103,11 @@ function AtomRecord(d::BCIFDict, i::Int) d["group_PDB"][i] == "HETATM", d["id"][i], d["auth_atom_id"][i], - d["label_atom_id"][i] == "" ? ' ' : d["label_atom_id"][i][1], + d["label_alt_id"][i] == "" ? ' ' : d["label_alt_id"][i][1], d["auth_comp_id"][i], - d["auth_asym_id"][i], + string(d["auth_asym_id"][i]), d["auth_seq_id"][i], - d["label_alt_id"][i] == "" ? ' ' : d["label_alt_id"][i][1], + d["pdbx_PDB_ins_code"][i] == "" ? ' ' : d["pdbx_PDB_ins_code"][i][1], [ d["Cartn_x"][i], d["Cartn_y"][i], @@ -114,7 +116,7 @@ function AtomRecord(d::BCIFDict, i::Int) d["occupancy"][i], d["B_iso_or_equiv"][i], d["type_symbol"][i], - d["pdbx_formal_charge"][i], + string(d["pdbx_formal_charge"][i]), ) end From df0a03d2dcced131a542ec75ceda817638ffacdb Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Wed, 14 May 2025 13:49:16 +0800 Subject: [PATCH 18/23] initial test comparison --- test/runtests.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index 44cc1c00..990fbdce 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -153,6 +153,8 @@ Aqua.test_all(BioStructures; ambiguities=(recursive=false)) downloadpdb("1alw", dir=temp_dir, format=MMCIFFormat) pdbpath = joinpath(temp_dir, "1ALW.$(pdbextension[MMCIFFormat])") @test isfile(pdbpath) && filesize(pdbpath) > 0 + downloadpdb("1alw", dir=temp_dir, format=BCIFFormat) + pdbpath = joinpath(temp_dir, "1ALW.bcif") @test isfile(pdbpath) && filesize(pdbpath) > 0 # Obsolete PDB downloadpdb("116l", dir=temp_dir, format=PDBFormat, obsolete=true) @@ -2533,6 +2535,12 @@ end ) writemultimmcif(temp_filename, Dict("not_1AKE" => test_multicif["1AKE"])) end +@testset "BCIF" begin + bcif = retrievepdb("1AKE", dir = temp_dir, format=BCIFFormat) + mmcif = retrievepdb("1AKE", dir = temp_dir, format=MMCIFFormat) + @test coordarray(bcif) == coordarray(mmcif) +end + @testset "MMTF" begin # Test MMTF dictionary dic = MMTFDict(Dict()) From 5421031d6f064c5b2c6b23b69d2b8d8c02b1b713 Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Wed, 14 May 2025 15:47:05 +0800 Subject: [PATCH 19/23] remove extra type lookups and conversions --- src/bcif.jl | 473 +++++++++++++++++++++++----------------------------- 1 file changed, 208 insertions(+), 265 deletions(-) diff --git a/src/bcif.jl b/src/bcif.jl index 2ac78509..0f83fe7b 100644 --- a/src/bcif.jl +++ b/src/bcif.jl @@ -1,4 +1,4 @@ -export decode_column, columns_to_dict, datablock_to_dict, BCIFDict, BCIFFormat +export decode_column, columns_to_dict, datablock_to_dict, BCIFDict, BCIFFormat, FixedPointEncoding, TypeCode using LinearAlgebra import MsgPack @@ -10,10 +10,6 @@ struct BCIFDict <: AbstractDict{String, Any} function BCIFDict(dict::Dict{String, Dict{String}}) new(convert(Dict{String,Any}, dict)) end - - function BCIFDict(dict::Dict{String,T}) where T - new(convert(Dict{String,Any}, dict)) - end end Base.keys(mmcif_dict::BCIFDict) = keys(mmcif_dict.dict) @@ -120,21 +116,6 @@ function AtomRecord(d::BCIFDict, i::Int) ) end - -function get_category(cats::Vector{Any}, name::String) - idx = findall(getindex.(cats, "name") .== name) - - if isnothing(idx) - throw(ArgumentError("Category $name not found")) - end - if length(idx) > 1 - throw(ArgumentError("Multiple categories with name $name found")) - end - - return cats[idx] -end - - # Utility functions for encoding/decoding function encode_stepwise(data, encodings) for encoding in encodings @@ -166,24 +147,19 @@ function deserialize_numeric_encoding(content::Any) end params = content - encoding_constructors = Dict( - "ByteArray" => () -> ByteArrayEncoding(INT_TO_TYPE[get(params, "type", nothing)]), - "FixedPoint" => () -> FixedPointEncoding(params["factor"]; srcType=INT_TO_TYPE[get(params, "srcType", FLOAT32)]), - "IntervalQuantization" => () -> IntervalQuantizationEncoding(params["min"], params["max"], params["numSteps"]; - srcType=INT_TO_TYPE[get(params, "srcType", 32)]), - "RunLength" => () -> RunLengthEncoding(srcSize=get(params, "srcSize", nothing), - srcType=INT_TO_TYPE[get(params, "srcType", nothing)]), - "Delta" => () -> DeltaEncoding(srcType=INT_TO_TYPE[get(params, "srcType", nothing)], - origin=get(params, "origin", 0)), - "IntegerPacking" => () -> IntegerPackingEncoding(params["byteCount"], - srcSize=get(params, "srcSize", nothing), - isUnsigned=get(params, "isUnsigned", false)) - ) - - if haskey(encoding_constructors, kind) - return encoding_constructors[kind]() - else - error("Unknown encoding kind: $kind") + if kind == "ByteArray" + return ByteArrayEncoding(INT_TO_TYPE[params["type"]]) + elseif kind == "FixedPoint" + return FixedPointEncoding(params["factor"]; srcType=INT_TO_TYPE[params["srcType"]]) + elseif kind == "IntervalQuantization" + return IntervalQuantizationEncoding(params["min"], params["max"], params["numSteps"]; srcType=INT_TO_TYPE[params["srcType"]]) + elseif kind == "RunLength" + return RunLengthEncoding(srcSize=params["srcSize"], srcType=INT_TO_TYPE[params["srcType"]]) + elseif kind == "Delta" + # Pass the actual integer type code, not the Type object + return DeltaEncoding(srcType=INT_TO_TYPE[params["srcType"]], origin=Int32(params["origin"])) + elseif kind == "IntegerPacking" + return IntegerPackingEncoding(params["byteCount"], srcSize=params["srcSize"], isUnsigned=params["isUnsigned"]) end end @@ -216,17 +192,6 @@ end # Data types defined for the BCIF encoding by are indicated by integer values # there are not well discussed in the official spec, had to ask about it excplicitly # https://github.com/molstar/BinaryCIF/issues/4 -@enum TypeCode begin - INT8 = 1 - INT16 = 2 - INT32 = 3 - UINT8 = 4 - UINT16 = 5 - UINT32 = 6 - FLOAT32 = 32 - FLOAT64 = 33 -end - const INT_TO_TYPE = Dict( 1 => Int8, 2 => Int16, @@ -238,77 +203,70 @@ const INT_TO_TYPE = Dict( 33 => Float64 ) -# Mapping from TypeCode to Julia types -const TYPE_CODE_TO_TYPE = Dict( - INT8 => Int8, - INT16 => Int16, - INT32 => Int32, - UINT8 => UInt8, - UINT16 => UInt16, - UINT32 => UInt32, - FLOAT32 => Float32, - FLOAT64 => Float64 -) +const TYPES_TO_INT = Dict(t => i for (i, t) in INT_TO_TYPE) + +const EncodingDataTypes = Union{values(INT_TO_TYPE)...} # Mapping from Julia types to TypeCode -const TYPE_TO_TYPE_CODE = Dict(value => key for (key, value) in TYPE_CODE_TO_TYPE) # Safe casting function -function safe_cast(array, dtype) - if eltype(array) == dtype - return array - end +# function safe_cast(array, dtype) +# if eltype(array) == dtype +# return array +# end - if dtype <: Integer && !(eltype(array) <: Integer) - throw(ArgumentError("Cannot cast floating point to integer")) - end +# if dtype <: Integer && !(eltype(array) <: Integer) +# throw(ArgumentError("Cannot cast floating point to integer")) +# end - if dtype <: Integer - type_min, type_max = typemin(dtype), typemax(dtype) - if any(x -> x < type_min || x > type_max, array) - throw(ArgumentError("Integer values do not fit into the given dtype")) - end - end +# if dtype <: Integer +# type_min, type_max = typemin(dtype), typemax(dtype) +# if any(x -> x < type_min || x > type_max, array) +# throw(ArgumentError("Integer values do not fit into the given dtype")) +# end +# end - return convert(Array{dtype}, array) -end +# return convert(Array{dtype}, array) +# end # Abstract encoding type abstract type Encoding end # ByteArrayEncoding mutable struct ByteArrayEncoding <: Encoding - type::Union{TypeCode,Nothing} + type - function ByteArrayEncoding(type=nothing) - if type !== nothing - type = type isa TypeCode ? type : TYPE_TO_TYPE_CODE[type] - end + function ByteArrayEncoding(type) new(type) end end -function encode(enc::ByteArrayEncoding, data) - if enc.type === nothing - enc.type = TYPE_TO_TYPE_CODE[eltype(data)] +# Add a constructor that handles the type lookup from INT_TO_TYPE +function ByteArrayEncoding(type_code::Integer) + if haskey(INT_TO_TYPE, type_code) + return ByteArrayEncoding(INT_TO_TYPE[type_code]) + else + throw(ArgumentError("Invalid type code: $type_code")) end - return reinterpret(UInt8, safe_cast(data, TYPE_CODE_TO_TYPE[enc.type])) end +# function encode(enc::ByteArrayEncoding, data) +# if enc.type === nothing +# enc.type = TYPE_TO_TYPE_CODE[eltype(data)] +# end +# return reinterpret(UInt8, safe_cast(data, TYPE_CODE_TO_TYPE[enc.type])) +# end + function decode(enc::ByteArrayEncoding, data) - return reinterpret(TYPE_CODE_TO_TYPE[enc.type], data) + return reinterpret(enc.type, data) end # FixedPointEncoding mutable struct FixedPointEncoding <: Encoding factor::Float64 - srcType::TypeCode + srcType - function FixedPointEncoding(factor; srcType=FLOAT32) - srcType = srcType isa TypeCode ? srcType : TYPE_TO_TYPE_CODE[srcType] - if !(srcType in (FLOAT32, FLOAT64)) - throw(ArgumentError("Only floating point types are supported")) - end + function FixedPointEncoding(factor; srcType) new(factor, srcType) end end @@ -318,7 +276,7 @@ function encode(enc::FixedPointEncoding, data) end function decode(enc::FixedPointEncoding, data) - return convert(Array{TYPE_CODE_TO_TYPE[enc.srcType]}, data ./ enc.factor) + return convert(Array{enc.srcType}, data ./ enc.factor) end # IntervalQuantizationEncoding @@ -326,80 +284,76 @@ mutable struct IntervalQuantizationEncoding <: Encoding min::Float64 max::Float64 numSteps::Int - srcType::TypeCode + srcType - function IntervalQuantizationEncoding(min, max, numSteps; srcType=FLOAT32) - srcType = srcType isa TypeCode ? srcType : TYPE_TO_TYPE_CODE[srcType] + function IntervalQuantizationEncoding(min, max, numSteps; srcType) new(min, max, numSteps, srcType) end end -function encode(enc::IntervalQuantizationEncoding, data) - # Convert to normalized values between 0 and numSteps-1 - normalized = (data .- enc.min) ./ (enc.max - enc.min) .* (enc.numSteps - 1) - # Clamp to valid range and convert to integers - indices = clamp.(round.(Int32, normalized), 0, enc.numSteps - 1) - return indices -end +# function encode(enc::IntervalQuantizationEncoding, data) +# # Convert to normalized values between 0 and numSteps-1 +# normalized = (data .- enc.min) ./ (enc.max - enc.min) .* (enc.numSteps - 1) +# # Clamp to valid range and convert to integers +# indices = clamp.(round.(Int32, normalized), 0, enc.numSteps - 1) +# return indices +# end function decode(enc::IntervalQuantizationEncoding, data) # Convert indices back to values in the original range normalized = data ./ Float64(enc.numSteps - 1) output = normalized .* (enc.max - enc.min) .+ enc.min - return convert(Array{TYPE_CODE_TO_TYPE[enc.srcType]}, output) + return convert(Array{enc.srcType}, output) end # RunLengthEncoding mutable struct RunLengthEncoding <: Encoding - srcSize::Union{Int,Nothing} - srcType::Union{TypeCode,Nothing} + srcSize::Int + srcType - function RunLengthEncoding(; srcSize=nothing, srcType=nothing) - if srcType !== nothing - srcType = srcType isa TypeCode ? srcType : TYPE_TO_TYPE_CODE[srcType] - end + function RunLengthEncoding(; srcSize, srcType) new(srcSize, srcType) end end -function encode(enc::RunLengthEncoding, data) - if enc.srcType === nothing - enc.srcType = TYPE_TO_TYPE_CODE[eltype(data)] - end - if enc.srcSize === nothing - enc.srcSize = length(data) - elseif enc.srcSize != length(data) - throw(ArgumentError("Given source size does not match actual data size")) - end - - # Pessimistic allocation - worst case is run length of 1 for every element - output = zeros(Int32, length(data) * 2) - j = 1 - val = data[1] - run_length = 0 - - for i in 1:length(data) - curr_val = data[i] - if curr_val == val - run_length += 1 - else - # New element -> Write element with run-length - output[j] = val - output[j+1] = run_length - j += 2 - val = curr_val - run_length = 1 - end - end - - # Write last element - output[j] = val - output[j+1] = run_length - j += 2 - - # Trim to correct size - return output[1:j-1] -end +# function encode(enc::RunLengthEncoding, data) +# if enc.srcType === nothing +# enc.srcType = TYPE_TO_TYPE_CODE[eltype(data)] +# end +# if enc.srcSize === nothing +# enc.srcSize = length(data) +# elseif enc.srcSize != length(data) +# throw(ArgumentError("Given source size does not match actual data size")) +# end + +# # Pessimistic allocation - worst case is run length of 1 for every element +# output = zeros(Int32, length(data) * 2) +# j = 1 +# val = data[1] +# run_length = 0 + +# for i in 1:length(data) +# curr_val = data[i] +# if curr_val == val +# run_length += 1 +# else +# # New element -> Write element with run-length +# output[j] = val +# output[j+1] = run_length +# j += 2 +# val = curr_val +# run_length = 1 +# end +# end + +# # Write last element +# output[j] = val +# output[j+1] = run_length +# j += 2 + +# # Trim to correct size +# return output[1:j-1] +# end function decode(enc::RunLengthEncoding, data) if length(data) % 2 != 0 @@ -416,7 +370,7 @@ function decode(enc::RunLengthEncoding, data) length_output = enc.srcSize end - output = zeros(TYPE_CODE_TO_TYPE[enc.srcType], length_output) + output = zeros(enc.srcType, length_output) j = 1 for i in 1:2:length(data) @@ -431,41 +385,30 @@ end # DeltaEncoding mutable struct DeltaEncoding <: Encoding - srcType::Union{TypeCode,Nothing} - origin::Int + srcType + origin::Int32 - function DeltaEncoding(; srcType=nothing, origin=0) - if srcType !== nothing - srcType = srcType isa TypeCode ? srcType : TYPE_TO_TYPE_CODE[srcType] - end + # Constructor for Type parameter + function DeltaEncoding(; srcType::Type, origin::Int32=0) new(srcType, origin) end end -function encode(enc::DeltaEncoding, data) - if enc.srcType === nothing - enc.srcType = TYPE_TO_TYPE_CODE[eltype(data)] - end - - data = data .- enc.origin - diffs = vcat([0], diff(data)) - return convert(Array{Int32}, diffs) -end - function decode(enc::DeltaEncoding, data) output = cumsum(data) - output = convert(Array{TYPE_CODE_TO_TYPE[enc.srcType]}, output) + output = convert(Array{enc.srcType}, output) output .+= enc.origin return output end +# end # IntegerPackingEncoding mutable struct IntegerPackingEncoding <: Encoding byteCount::Int - srcSize::Union{Int,Nothing} + srcSize::Int isUnsigned::Bool - function IntegerPackingEncoding(byteCount; srcSize=nothing, isUnsigned=false) + function IntegerPackingEncoding(byteCount; srcSize, isUnsigned=false) new(byteCount, srcSize, isUnsigned) end end @@ -480,63 +423,63 @@ function determine_packed_dtype(enc::IntegerPackingEncoding) end end -function encode(enc::IntegerPackingEncoding, data) - if enc.srcSize === nothing - enc.srcSize = length(data) - elseif enc.srcSize != length(data) - throw(ArgumentError("Given source size does not match actual data size")) - end - - data = convert(Array{Int32}, data) - packed_type = determine_packed_dtype(enc) - min_val = typemin(packed_type) - max_val = typemax(packed_type) - - # Get length of output array by summing up required length of each element - length_output = 0 - for num in data - if num < 0 - if min_val == 0 - throw(ArgumentError("Cannot pack negative numbers into unsigned type")) - end - # Required packed length is number of times min_val needs to be repeated + 1 - length_output += div(num, min_val) + 1 - elseif num > 0 - length_output += div(num, max_val) + 1 - else - # num = 0 - length_output += 1 - end - end - - # Fill output - output = zeros(packed_type, length_output) - j = 1 - - for i in 1:length(data) - remainder = data[i] - if remainder < 0 - if min_val == 0 - throw(ArgumentError("Cannot pack negative numbers into unsigned type")) - end - while remainder <= min_val - remainder -= min_val - output[j] = min_val - j += 1 - end - elseif remainder > 0 - while remainder >= max_val - remainder -= max_val - output[j] = max_val - j += 1 - end - end - output[j] = remainder - j += 1 - end - - return output -end +# function encode(enc::IntegerPackingEncoding, data) +# if enc.srcSize === nothing +# enc.srcSize = length(data) +# elseif enc.srcSize != length(data) +# throw(ArgumentError("Given source size does not match actual data size")) +# end + +# data = convert(Array{Int32}, data) +# packed_type = determine_packed_dtype(enc) +# min_val = typemin(packed_type) +# max_val = typemax(packed_type) + +# # Get length of output array by summing up required length of each element +# length_output = 0 +# for num in data +# if num < 0 +# if min_val == 0 +# throw(ArgumentError("Cannot pack negative numbers into unsigned type")) +# end +# # Required packed length is number of times min_val needs to be repeated + 1 +# length_output += div(num, min_val) + 1 +# elseif num > 0 +# length_output += div(num, max_val) + 1 +# else +# # num = 0 +# length_output += 1 +# end +# end + +# # Fill output +# output = zeros(packed_type, length_output) +# j = 1 + +# for i in 1:length(data) +# remainder = data[i] +# if remainder < 0 +# if min_val == 0 +# throw(ArgumentError("Cannot pack negative numbers into unsigned type")) +# end +# while remainder <= min_val +# remainder -= min_val +# output[j] = min_val +# j += 1 +# end +# elseif remainder > 0 +# while remainder >= max_val +# remainder -= max_val +# output[j] = max_val +# j += 1 +# end +# end +# output[j] = remainder +# j += 1 +# end + +# return output +# end function decode(enc::IntegerPackingEncoding, data) packed_type = determine_packed_dtype(enc) @@ -577,53 +520,53 @@ mutable struct StringArrayEncoding <: Encoding function StringArrayEncoding(; stringData=nothing, dataEncoding=nothing, offsetEncoding=nothing, offsets=nothing) if dataEncoding === nothing - dataEncoding = [ByteArrayEncoding(INT32)] + dataEncoding = [ByteArrayEncoding(Int32)] end if offsetEncoding === nothing - offsetEncoding = [ByteArrayEncoding(INT32)] + offsetEncoding = [ByteArrayEncoding(Int32)] end new(stringData, dataEncoding, offsetEncoding, offsets) end end -function encode(enc::StringArrayEncoding, data) - if !(eltype(data) <: AbstractString) - throw(ArgumentError("Data must be of string type")) - end - - if enc.stringData === nothing - # Get unique stringData - enc.stringData = unique(data) - check_present = false - else - check_present = true - end - - # Sort stringData for binary search - sorted_indices = sortperm(enc.stringData) - sorted_strings = enc.stringData[sorted_indices] - - # Find indices of each string in data - indices = zeros(Int32, length(data)) - for i in 1:length(data) - idx = searchsortedfirst(sorted_strings, data[i]) - if idx <= length(sorted_strings) && sorted_strings[idx] == data[i] - indices[i] = sorted_indices[idx] - else - if check_present - throw(ArgumentError("Data contains stringData not present in 'stringData'")) - end - end - end - - # Apply encodings - encoded_data = indices - for encoding in enc.dataEncoding - encoded_data = encode(encoding, encoded_data) - end - - return encoded_data -end +# function encode(enc::StringArrayEncoding, data) +# if !(eltype(data) <: AbstractString) +# throw(ArgumentError("Data must be of string type")) +# end + +# if enc.stringData === nothing +# # Get unique stringData +# enc.stringData = unique(data) +# check_present = false +# else +# check_present = true +# end + +# # Sort stringData for binary search +# sorted_indices = sortperm(enc.stringData) +# sorted_strings = enc.stringData[sorted_indices] + +# # Find indices of each string in data +# indices = zeros(Int32, length(data)) +# for i in 1:length(data) +# idx = searchsortedfirst(sorted_strings, data[i]) +# if idx <= length(sorted_strings) && sorted_strings[idx] == data[i] +# indices[i] = sorted_indices[idx] +# else +# if check_present +# throw(ArgumentError("Data contains stringData not present in 'stringData'")) +# end +# end +# end + +# # Apply encodings +# encoded_data = indices +# for encoding in enc.dataEncoding +# encoded_data = encode(encoding, encoded_data) +# end + +# return encoded_data +# end function decode(enc::StringArrayEncoding, data) # Apply decodings in reverse order From 8270f0835cf902e1fea4e1f41720ae49a60744e6 Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Thu, 15 May 2025 14:52:23 +0800 Subject: [PATCH 20/23] incorporate most feedback Incorporates most feedback from PR --- src/bcif.jl | 96 +++++++++++++++++++++++------------------------- test/runtests.jl | 6 ++- 2 files changed, 49 insertions(+), 53 deletions(-) diff --git a/src/bcif.jl b/src/bcif.jl index 0f83fe7b..70adc9a7 100644 --- a/src/bcif.jl +++ b/src/bcif.jl @@ -1,26 +1,14 @@ -export decode_column, columns_to_dict, datablock_to_dict, BCIFDict, BCIFFormat, FixedPointEncoding, TypeCode +export BCIFDict, BCIFFormat using LinearAlgebra import MsgPack - -struct BCIFDict <: AbstractDict{String, Any} - dict::Dict{String,Any} - - function BCIFDict(dict::Dict{String, Dict{String}}) - new(convert(Dict{String,Any}, dict)) - end -end - -Base.keys(mmcif_dict::BCIFDict) = keys(mmcif_dict.dict) -Base.values(mmcif_dict::BCIFDict) = values(mmcif_dict.dict) - """ read(input::IO, ::Type{BCIFFormat}, structure_name::AbstractString="", remove_disorder::Bool=false, read_std_atoms::Bool=true, read_het_atoms::Bool=true, run_dssp::Bool=false, run_stride::Bool=false) A function to read a binary CIF file from MolStar and extract the list of attributes and their compressed bytes. """ -function Base.read(input::String, # TODO: does this make sense to be a string? +function Base.read(io::IO, ::Type{BCIFFormat}; structure_name::AbstractString="", remove_disorder::Bool=false, @@ -28,9 +16,8 @@ function Base.read(input::String, # TODO: does this make sense to be a string? read_het_atoms::Bool=true, run_dssp::Bool=false, run_stride::Bool=false) - - file = MsgPack.unpack(read(input)) - bcif_dict = BCIFDict(datablock_to_dict(file["dataBlocks"][1])) + + bcif_dict = MsgPack.unpack(io, BCIFDict) return MolecularStructure( bcif_dict; @@ -42,6 +29,27 @@ function Base.read(input::String, # TODO: does this make sense to be a string? run_stride=run_stride ) end +function Base.read(filename::AbstractString, ::Type{BCIFFormat}; kwargs...) + open(filename) do io + read(io, BCIFFormat; kwargs...) + end +end + +struct BCIFDict <: AbstractDict{String, Any} + dict::Dict{String,Any} + + function BCIFDict(dict::Dict{String, Dict{String}}) + new(convert(Dict{String,Any}, dict)) + end +end + +Base.keys(mmcif_dict::BCIFDict) = keys(mmcif_dict.dict) +Base.values(mmcif_dict::BCIFDict) = values(mmcif_dict.dict) +Base.haskey(mmcif_dict::BCIFDict, key) = haskey(mmcif_dict.dict, key) +Base.get(mmcif_dict::BCIFDict, key, default) = get(mmcif_dict.dict, key, default) +Base.length(mmcif_dict::BCIFDict) = length(mmcif_dict.dict) +Base.iterate(mmcif_dict::BCIFDict) = iterate(mmcif_dict.dict) +Base.iterate(mmcif_dict::BCIFDict, i) = iterate(mmcif_dict.dict, i) function MolecularStructure(bcif_dict::BCIFDict; structure_name::AbstractString="", @@ -75,24 +83,6 @@ function MolecularStructure(bcif_dict::BCIFDict; return struc end -function datablock_to_dict(datablock::Dict) - categories = datablock["categories"] - return reduce(merge, [Dict(category["name"] => columns_to_dict(category["columns"])) for category in categories]) -end - -function columns_to_dict(columns::Vector{Any}) - tasks = map(columns) do column - Threads.@spawn Dict(column["name"] => decode_column(column)) - end - return reduce(merge, fetch.(tasks)) -end -Base.haskey(mmcif_dict::BCIFDict, key) = haskey(mmcif_dict.dict, key) -Base.get(mmcif_dict::BCIFDict, key, default) = get(mmcif_dict.dict, key, default) -Base.length(mmcif_dict::BCIFDict) = length(mmcif_dict.dict) -Base.iterate(mmcif_dict::BCIFDict) = iterate(mmcif_dict.dict) -Base.iterate(mmcif_dict::BCIFDict, i) = iterate(mmcif_dict.dict, i) - - function AtomRecord(d::BCIFDict, i::Int) d = d["_atom_site"] return AtomRecord( @@ -117,6 +107,19 @@ function AtomRecord(d::BCIFDict, i::Int) end # Utility functions for encoding/decoding + +function MsgPack.from_msgpack(::Type{BCIFDict}, data::Dict{String, Any}) + categories = data["dataBlocks"][1]["categories"] + return BCIFDict(reduce(merge, [Dict(category["name"] => columns_to_dict(category["columns"])) for category in categories])) +end + +function columns_to_dict(columns::Vector{Any}) + tasks = map(columns) do column + Threads.@spawn Dict(column["name"] => decode_column(column)) + end + return reduce(merge, fetch.(tasks)) +end + function encode_stepwise(data, encodings) for encoding in encodings data = encode(encoding, data) @@ -203,7 +206,7 @@ const INT_TO_TYPE = Dict( 33 => Float64 ) -const TYPES_TO_INT = Dict(t => i for (i, t) in INT_TO_TYPE) +const TYPES_TO_INT = IdDict{Any, Int}(t => i for (i, t) in INT_TO_TYPE) const EncodingDataTypes = Union{values(INT_TO_TYPE)...} @@ -234,11 +237,7 @@ abstract type Encoding end # ByteArrayEncoding mutable struct ByteArrayEncoding <: Encoding - type - - function ByteArrayEncoding(type) - new(type) - end + type::DataType end # Add a constructor that handles the type lookup from INT_TO_TYPE @@ -264,9 +263,9 @@ end # FixedPointEncoding mutable struct FixedPointEncoding <: Encoding factor::Float64 - srcType + srcType::DataType - function FixedPointEncoding(factor; srcType) + function FixedPointEncoding(factor; srcType::DataType) new(factor, srcType) end end @@ -284,11 +283,7 @@ mutable struct IntervalQuantizationEncoding <: Encoding min::Float64 max::Float64 numSteps::Int - srcType - - function IntervalQuantizationEncoding(min, max, numSteps; srcType) - new(min, max, numSteps, srcType) - end + srcType::DataType end # function encode(enc::IntervalQuantizationEncoding, data) @@ -309,7 +304,7 @@ end # RunLengthEncoding mutable struct RunLengthEncoding <: Encoding srcSize::Int - srcType + srcType::DataType function RunLengthEncoding(; srcSize, srcType) new(srcSize, srcType) @@ -385,7 +380,7 @@ end # DeltaEncoding mutable struct DeltaEncoding <: Encoding - srcType + srcType::DataType origin::Int32 # Constructor for Type parameter @@ -400,7 +395,6 @@ function decode(enc::DeltaEncoding, data) output .+= enc.origin return output end -# end # IntegerPackingEncoding mutable struct IntegerPackingEncoding <: Encoding diff --git a/test/runtests.jl b/test/runtests.jl index 990fbdce..70237330 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2536,8 +2536,10 @@ end end @testset "BCIF" begin - bcif = retrievepdb("1AKE", dir = temp_dir, format=BCIFFormat) - mmcif = retrievepdb("1AKE", dir = temp_dir, format=MMCIFFormat) + bcif_file = downloadpdb("1AKE",dir = temp_dir, format=BCIFFormat) + mmcif_file = downloadpdb("1AKE",dir = temp_dir, format=MMCIFFormat) + bcif = read(bcif_file, BCIFFormat) + mmcif = read(mmcif_file, MMCIFFormat) @test coordarray(bcif) == coordarray(mmcif) end From 244a6a09b2a9b36681825fdeb20db0719bc9d0eb Mon Sep 17 00:00:00 2001 From: Brady Johnston <36021261+BradyAJohnston@users.noreply.github.com> Date: Thu, 15 May 2025 14:56:03 +0800 Subject: [PATCH 21/23] Update src/bcif.jl Co-authored-by: Tim Holy --- src/bcif.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bcif.jl b/src/bcif.jl index 70adc9a7..ebee24f1 100644 --- a/src/bcif.jl +++ b/src/bcif.jl @@ -193,7 +193,7 @@ end # Below are the encoding and decoding types for BCIF format # Data types defined for the BCIF encoding by are indicated by integer values -# there are not well discussed in the official spec, had to ask about it excplicitly +# there are not well discussed in the official spec, had to ask about it explicitly # https://github.com/molstar/BinaryCIF/issues/4 const INT_TO_TYPE = Dict( 1 => Int8, From 622f4b7a7c48c7924ab29745a3e759a45f9e8820 Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Thu, 15 May 2025 14:59:00 +0800 Subject: [PATCH 22/23] remove 1BNA This reverts commit b168125bb10ab511bc05348692c3b3175e145771. --- 1BNA.pdb | 944 ------------------------------------------------------- 1 file changed, 944 deletions(-) delete mode 100644 1BNA.pdb diff --git a/1BNA.pdb b/1BNA.pdb deleted file mode 100644 index 630a15d2..00000000 --- a/1BNA.pdb +++ /dev/null @@ -1,944 +0,0 @@ -HEADER DNA 26-JAN-81 1BNA -TITLE STRUCTURE OF A B-DNA DODECAMER. CONFORMATION AND DYNAMICS -COMPND MOL_ID: 1; -COMPND 2 MOLECULE: DNA (5'-D(*CP*GP*CP*GP*AP*AP*TP*TP*CP*GP*CP*G)-3'); -COMPND 3 CHAIN: A, B; -COMPND 4 ENGINEERED: YES -SOURCE MOL_ID: 1; -SOURCE 2 SYNTHETIC: YES -KEYWDS B-DNA, DOUBLE HELIX, DNA -EXPDTA X-RAY DIFFRACTION -AUTHOR H.R.DREW,R.M.WING,T.TAKANO,C.BROKA,S.TANAKA,K.ITAKURA,R.E.DICKERSON -REVDAT 4 07-FEB-24 1BNA 1 REMARK -REVDAT 3 24-FEB-09 1BNA 1 VERSN -REVDAT 2 01-APR-03 1BNA 1 JRNL -REVDAT 1 21-MAY-81 1BNA 0 -JRNL AUTH H.R.DREW,R.M.WING,T.TAKANO,C.BROKA,S.TANAKA,K.ITAKURA, -JRNL AUTH 2 R.E.DICKERSON -JRNL TITL STRUCTURE OF A B-DNA DODECAMER: CONFORMATION AND DYNAMICS. -JRNL REF PROC.NATL.ACAD.SCI.USA V. 78 2179 1981 -JRNL REFN ISSN 0027-8424 -JRNL PMID 6941276 -JRNL DOI 10.1073/PNAS.78.4.2179 -REMARK 1 -REMARK 1 REFERENCE 1 -REMARK 1 AUTH R.E.DICKERSON,H.R.DREW -REMARK 1 TITL KINEMATIC MODEL FOR B-DNA -REMARK 1 REF PROC.NATL.ACAD.SCI.USA V. 78 7318 1981 -REMARK 1 REFN ISSN 0027-8424 -REMARK 1 REFERENCE 2 -REMARK 1 AUTH R.E.DICKERSON,H.R.DREW -REMARK 1 TITL STRUCTURE OF A B-DNA DODECAMER. II. INFLUENCE OF BASE -REMARK 1 TITL 2 SEQUENCE ON HELIX STRUCTURE -REMARK 1 REF J.MOL.BIOL. V. 149 761 1981 -REMARK 1 REFN ISSN 0022-2836 -REMARK 1 REFERENCE 3 -REMARK 1 AUTH H.R.DREW,R.E.DICKERSON -REMARK 1 TITL STRUCTURE OF A B-DNA DODECAMER. III. GEOMETRY OF HYDRATION -REMARK 1 REF J.MOL.BIOL. V. 151 535 1981 -REMARK 1 REFN ISSN 0022-2836 -REMARK 1 REFERENCE 4 -REMARK 1 AUTH R.WING,H.R.DREW,T.TAKANO,C.BROKA,S.TANAKA,K.ITAKURA, -REMARK 1 AUTH 2 R.E.DICKERSON -REMARK 1 TITL CRYSTAL STRUCTURE ANALYSIS OF A COMPLETE TURN OF B-DNA -REMARK 1 REF NATURE V. 287 755 1980 -REMARK 1 REFN ISSN 0028-0836 -REMARK 2 -REMARK 2 RESOLUTION. 1.90 ANGSTROMS. -REMARK 3 -REMARK 3 REFINEMENT. -REMARK 3 PROGRAM : JACK-LEVITT -REMARK 3 AUTHORS : JACK,LEVITT -REMARK 3 -REMARK 3 DATA USED IN REFINEMENT. -REMARK 3 RESOLUTION RANGE HIGH (ANGSTROMS) : 1.90 -REMARK 3 RESOLUTION RANGE LOW (ANGSTROMS) : 8.00 -REMARK 3 DATA CUTOFF (SIGMA(F)) : NULL -REMARK 3 DATA CUTOFF HIGH (ABS(F)) : NULL -REMARK 3 DATA CUTOFF LOW (ABS(F)) : NULL -REMARK 3 COMPLETENESS (WORKING+TEST) (%) : NULL -REMARK 3 NUMBER OF REFLECTIONS : 2725 -REMARK 3 -REMARK 3 FIT TO DATA USED IN REFINEMENT. -REMARK 3 CROSS-VALIDATION METHOD : NULL -REMARK 3 FREE R VALUE TEST SET SELECTION : NULL -REMARK 3 R VALUE (WORKING SET) : 0.178 -REMARK 3 FREE R VALUE : NULL -REMARK 3 FREE R VALUE TEST SET SIZE (%) : NULL -REMARK 3 FREE R VALUE TEST SET COUNT : NULL -REMARK 3 ESTIMATED ERROR OF FREE R VALUE : NULL -REMARK 3 -REMARK 3 FIT IN THE HIGHEST RESOLUTION BIN. -REMARK 3 TOTAL NUMBER OF BINS USED : NULL -REMARK 3 BIN RESOLUTION RANGE HIGH (A) : NULL -REMARK 3 BIN RESOLUTION RANGE LOW (A) : NULL -REMARK 3 BIN COMPLETENESS (WORKING+TEST) (%) : NULL -REMARK 3 REFLECTIONS IN BIN (WORKING SET) : NULL -REMARK 3 BIN R VALUE (WORKING SET) : NULL -REMARK 3 BIN FREE R VALUE : NULL -REMARK 3 BIN FREE R VALUE TEST SET SIZE (%) : NULL -REMARK 3 BIN FREE R VALUE TEST SET COUNT : NULL -REMARK 3 ESTIMATED ERROR OF BIN FREE R VALUE : NULL -REMARK 3 -REMARK 3 NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT. -REMARK 3 PROTEIN ATOMS : 0 -REMARK 3 NUCLEIC ACID ATOMS : 486 -REMARK 3 HETEROGEN ATOMS : 0 -REMARK 3 SOLVENT ATOMS : 80 -REMARK 3 -REMARK 3 B VALUES. -REMARK 3 FROM WILSON PLOT (A**2) : NULL -REMARK 3 MEAN B VALUE (OVERALL, A**2) : NULL -REMARK 3 OVERALL ANISOTROPIC B VALUE. -REMARK 3 B11 (A**2) : NULL -REMARK 3 B22 (A**2) : NULL -REMARK 3 B33 (A**2) : NULL -REMARK 3 B12 (A**2) : NULL -REMARK 3 B13 (A**2) : NULL -REMARK 3 B23 (A**2) : NULL -REMARK 3 -REMARK 3 ESTIMATED COORDINATE ERROR. -REMARK 3 ESD FROM LUZZATI PLOT (A) : NULL -REMARK 3 ESD FROM SIGMAA (A) : NULL -REMARK 3 LOW RESOLUTION CUTOFF (A) : NULL -REMARK 3 -REMARK 3 CROSS-VALIDATED ESTIMATED COORDINATE ERROR. -REMARK 3 ESD FROM C-V LUZZATI PLOT (A) : NULL -REMARK 3 ESD FROM C-V SIGMAA (A) : NULL -REMARK 3 -REMARK 3 RMS DEVIATIONS FROM IDEAL VALUES. -REMARK 3 BOND LENGTHS (A) : NULL -REMARK 3 BOND ANGLES (DEGREES) : NULL -REMARK 3 DIHEDRAL ANGLES (DEGREES) : NULL -REMARK 3 IMPROPER ANGLES (DEGREES) : NULL -REMARK 3 -REMARK 3 ISOTROPIC THERMAL MODEL : NULL -REMARK 3 -REMARK 3 ISOTROPIC THERMAL FACTOR RESTRAINTS. RMS SIGMA -REMARK 3 MAIN-CHAIN BOND (A**2) : NULL ; NULL -REMARK 3 MAIN-CHAIN ANGLE (A**2) : NULL ; NULL -REMARK 3 SIDE-CHAIN BOND (A**2) : NULL ; NULL -REMARK 3 SIDE-CHAIN ANGLE (A**2) : NULL ; NULL -REMARK 3 -REMARK 3 NCS MODEL : NULL -REMARK 3 -REMARK 3 NCS RESTRAINTS. RMS SIGMA/WEIGHT -REMARK 3 GROUP 1 POSITIONAL (A) : NULL ; NULL -REMARK 3 GROUP 1 B-FACTOR (A**2) : NULL ; NULL -REMARK 3 -REMARK 3 PARAMETER FILE 1 : NULL -REMARK 3 TOPOLOGY FILE 1 : NULL -REMARK 3 -REMARK 3 OTHER REFINEMENT REMARKS: NULL -REMARK 4 -REMARK 4 1BNA COMPLIES WITH FORMAT V. 3.30, 13-JUL-11 -REMARK 100 -REMARK 100 THIS ENTRY HAS BEEN PROCESSED BY BNL. -REMARK 100 THE DEPOSITION ID IS D_1000171933. -REMARK 200 -REMARK 200 EXPERIMENTAL DETAILS -REMARK 200 EXPERIMENT TYPE : X-RAY DIFFRACTION -REMARK 200 DATE OF DATA COLLECTION : NULL -REMARK 200 TEMPERATURE (KELVIN) : NULL -REMARK 200 PH : NULL -REMARK 200 NUMBER OF CRYSTALS USED : NULL -REMARK 200 -REMARK 200 SYNCHROTRON (Y/N) : N -REMARK 200 RADIATION SOURCE : NULL -REMARK 200 BEAMLINE : NULL -REMARK 200 X-RAY GENERATOR MODEL : NULL -REMARK 200 MONOCHROMATIC OR LAUE (M/L) : NULL -REMARK 200 WAVELENGTH OR RANGE (A) : NULL -REMARK 200 MONOCHROMATOR : NULL -REMARK 200 OPTICS : NULL -REMARK 200 -REMARK 200 DETECTOR TYPE : DIFFRACTOMETER -REMARK 200 DETECTOR MANUFACTURER : NULL -REMARK 200 INTENSITY-INTEGRATION SOFTWARE : NULL -REMARK 200 DATA SCALING SOFTWARE : NULL -REMARK 200 -REMARK 200 NUMBER OF UNIQUE REFLECTIONS : 5534 -REMARK 200 RESOLUTION RANGE HIGH (A) : 1.900 -REMARK 200 RESOLUTION RANGE LOW (A) : 8.000 -REMARK 200 REJECTION CRITERIA (SIGMA(I)) : NULL -REMARK 200 -REMARK 200 OVERALL. -REMARK 200 COMPLETENESS FOR RANGE (%) : NULL -REMARK 200 DATA REDUNDANCY : NULL -REMARK 200 R MERGE (I) : NULL -REMARK 200 R SYM (I) : NULL -REMARK 200 FOR THE DATA SET : NULL -REMARK 200 -REMARK 200 IN THE HIGHEST RESOLUTION SHELL. -REMARK 200 HIGHEST RESOLUTION SHELL, RANGE HIGH (A) : NULL -REMARK 200 HIGHEST RESOLUTION SHELL, RANGE LOW (A) : NULL -REMARK 200 COMPLETENESS FOR SHELL (%) : NULL -REMARK 200 DATA REDUNDANCY IN SHELL : NULL -REMARK 200 R MERGE FOR SHELL (I) : NULL -REMARK 200 R SYM FOR SHELL (I) : NULL -REMARK 200 FOR SHELL : NULL -REMARK 200 -REMARK 200 DIFFRACTION PROTOCOL: NULL -REMARK 200 METHOD USED TO DETERMINE THE STRUCTURE: NULL -REMARK 200 SOFTWARE USED: NULL -REMARK 200 STARTING MODEL: NULL -REMARK 200 -REMARK 200 REMARK: NULL -REMARK 280 -REMARK 280 CRYSTAL -REMARK 280 SOLVENT CONTENT, VS (%): 45.79 -REMARK 280 MATTHEWS COEFFICIENT, VM (ANGSTROMS**3/DA): 2.27 -REMARK 280 -REMARK 280 CRYSTALLIZATION CONDITIONS: VAPOR DIFFUSION, TEMPERATURE 290.00K -REMARK 290 -REMARK 290 CRYSTALLOGRAPHIC SYMMETRY -REMARK 290 SYMMETRY OPERATORS FOR SPACE GROUP: P 21 21 21 -REMARK 290 -REMARK 290 SYMOP SYMMETRY -REMARK 290 NNNMMM OPERATOR -REMARK 290 1555 X,Y,Z -REMARK 290 2555 -X+1/2,-Y,Z+1/2 -REMARK 290 3555 -X,Y+1/2,-Z+1/2 -REMARK 290 4555 X+1/2,-Y+1/2,-Z -REMARK 290 -REMARK 290 WHERE NNN -> OPERATOR NUMBER -REMARK 290 MMM -> TRANSLATION VECTOR -REMARK 290 -REMARK 290 CRYSTALLOGRAPHIC SYMMETRY TRANSFORMATIONS -REMARK 290 THE FOLLOWING TRANSFORMATIONS OPERATE ON THE ATOM/HETATM -REMARK 290 RECORDS IN THIS ENTRY TO PRODUCE CRYSTALLOGRAPHICALLY -REMARK 290 RELATED MOLECULES. -REMARK 290 SMTRY1 1 1.000000 0.000000 0.000000 0.00000 -REMARK 290 SMTRY2 1 0.000000 1.000000 0.000000 0.00000 -REMARK 290 SMTRY3 1 0.000000 0.000000 1.000000 0.00000 -REMARK 290 SMTRY1 2 -1.000000 0.000000 0.000000 12.43500 -REMARK 290 SMTRY2 2 0.000000 -1.000000 0.000000 0.00000 -REMARK 290 SMTRY3 2 0.000000 0.000000 1.000000 33.10000 -REMARK 290 SMTRY1 3 -1.000000 0.000000 0.000000 0.00000 -REMARK 290 SMTRY2 3 0.000000 1.000000 0.000000 20.19500 -REMARK 290 SMTRY3 3 0.000000 0.000000 -1.000000 33.10000 -REMARK 290 SMTRY1 4 1.000000 0.000000 0.000000 12.43500 -REMARK 290 SMTRY2 4 0.000000 -1.000000 0.000000 20.19500 -REMARK 290 SMTRY3 4 0.000000 0.000000 -1.000000 0.00000 -REMARK 290 -REMARK 290 REMARK: NULL -REMARK 300 -REMARK 300 BIOMOLECULE: 1 -REMARK 300 SEE REMARK 350 FOR THE AUTHOR PROVIDED AND/OR PROGRAM -REMARK 300 GENERATED ASSEMBLY INFORMATION FOR THE STRUCTURE IN -REMARK 300 THIS ENTRY. THE REMARK MAY ALSO PROVIDE INFORMATION ON -REMARK 300 BURIED SURFACE AREA. -REMARK 350 -REMARK 350 COORDINATES FOR A COMPLETE MULTIMER REPRESENTING THE KNOWN -REMARK 350 BIOLOGICALLY SIGNIFICANT OLIGOMERIZATION STATE OF THE -REMARK 350 MOLECULE CAN BE GENERATED BY APPLYING BIOMT TRANSFORMATIONS -REMARK 350 GIVEN BELOW. BOTH NON-CRYSTALLOGRAPHIC AND -REMARK 350 CRYSTALLOGRAPHIC OPERATIONS ARE GIVEN. -REMARK 350 -REMARK 350 BIOMOLECULE: 1 -REMARK 350 AUTHOR DETERMINED BIOLOGICAL UNIT: DIMERIC -REMARK 350 APPLY THE FOLLOWING TO CHAINS: A, B -REMARK 350 BIOMT1 1 1.000000 0.000000 0.000000 0.00000 -REMARK 350 BIOMT2 1 0.000000 1.000000 0.000000 0.00000 -REMARK 350 BIOMT3 1 0.000000 0.000000 1.000000 0.00000 -REMARK 500 -REMARK 500 GEOMETRY AND STEREOCHEMISTRY -REMARK 500 SUBTOPIC: CLOSE CONTACTS IN SAME ASYMMETRIC UNIT -REMARK 500 -REMARK 500 THE FOLLOWING ATOMS ARE IN CLOSE CONTACT. -REMARK 500 -REMARK 500 ATM1 RES C SSEQI ATM2 RES C SSEQI DISTANCE -REMARK 500 O HOH A 62 O HOH A 77 1.61 -REMARK 500 OP2 DA A 6 O HOH A 65 1.89 -REMARK 500 OP2 DG A 10 O HOH A 70 2.02 -REMARK 500 O HOH A 54 O HOH A 86 2.07 -REMARK 500 O HOH A 77 O HOH B 63 2.09 -REMARK 500 O HOH A 31 O HOH A 99 2.12 -REMARK 500 -REMARK 500 REMARK: NULL -REMARK 500 -REMARK 500 GEOMETRY AND STEREOCHEMISTRY -REMARK 500 SUBTOPIC: COVALENT BOND LENGTHS -REMARK 500 -REMARK 500 THE STEREOCHEMICAL PARAMETERS OF THE FOLLOWING RESIDUES -REMARK 500 HAVE VALUES WHICH DEVIATE FROM EXPECTED VALUES BY MORE -REMARK 500 THAN 6*RMSD (M=MODEL NUMBER; RES=RESIDUE NAME; C=CHAIN -REMARK 500 IDENTIFIER; SSEQ=SEQUENCE NUMBER; I=INSERTION CODE). -REMARK 500 -REMARK 500 STANDARD TABLE: -REMARK 500 FORMAT: (10X,I3,1X,2(A3,1X,A1,I4,A1,1X,A4,3X),1X,F6.3) -REMARK 500 -REMARK 500 EXPECTED VALUES PROTEIN: ENGH AND HUBER, 1999 -REMARK 500 EXPECTED VALUES NUCLEIC ACID: CLOWNEY ET AL 1996 -REMARK 500 -REMARK 500 M RES CSSEQI ATM1 RES CSSEQI ATM2 DEVIATION -REMARK 500 DC A 1 C5 DC A 1 C6 0.051 -REMARK 500 DG A 2 C5 DG A 2 N7 -0.040 -REMARK 500 DG A 2 N7 DG A 2 C8 0.036 -REMARK 500 DC A 3 C5 DC A 3 C6 0.052 -REMARK 500 DG A 4 C5 DG A 4 N7 -0.039 -REMARK 500 DG A 4 N7 DG A 4 C8 0.036 -REMARK 500 DA A 5 C5 DA A 5 N7 -0.039 -REMARK 500 DA A 6 C5 DA A 6 N7 -0.038 -REMARK 500 DT A 7 C5 DT A 7 C6 0.056 -REMARK 500 DT A 8 C5 DT A 8 C6 0.056 -REMARK 500 DC A 9 C5 DC A 9 C6 0.053 -REMARK 500 DG A 10 C5 DG A 10 N7 -0.039 -REMARK 500 DG A 10 N7 DG A 10 C8 0.036 -REMARK 500 DC A 11 C5 DC A 11 C6 0.051 -REMARK 500 DG A 12 C5 DG A 12 N7 -0.039 -REMARK 500 DC B 13 C5 DC B 13 C6 0.052 -REMARK 500 DG B 14 C5 DG B 14 N7 -0.042 -REMARK 500 DC B 15 C5 DC B 15 C6 0.049 -REMARK 500 DG B 16 C5 DG B 16 N7 -0.040 -REMARK 500 DG B 16 N7 DG B 16 C8 0.039 -REMARK 500 DA B 17 C5 DA B 17 N7 -0.039 -REMARK 500 DA B 18 C5 DA B 18 N7 -0.039 -REMARK 500 DT B 19 C5 DT B 19 C6 0.056 -REMARK 500 DT B 20 C5 DT B 20 C6 0.051 -REMARK 500 DC B 21 C5 DC B 21 C6 0.052 -REMARK 500 DG B 22 C5 DG B 22 N7 -0.036 -REMARK 500 DG B 22 N7 DG B 22 C8 0.037 -REMARK 500 DC B 23 C5 DC B 23 C6 0.051 -REMARK 500 DG B 24 C5 DG B 24 N7 -0.039 -REMARK 500 -REMARK 500 REMARK: NULL -REMARK 500 -REMARK 500 GEOMETRY AND STEREOCHEMISTRY -REMARK 500 SUBTOPIC: COVALENT BOND ANGLES -REMARK 500 -REMARK 500 THE STEREOCHEMICAL PARAMETERS OF THE FOLLOWING RESIDUES -REMARK 500 HAVE VALUES WHICH DEVIATE FROM EXPECTED VALUES BY MORE -REMARK 500 THAN 6*RMSD (M=MODEL NUMBER; RES=RESIDUE NAME; C=CHAIN -REMARK 500 IDENTIFIER; SSEQ=SEQUENCE NUMBER; I=INSERTION CODE). -REMARK 500 -REMARK 500 STANDARD TABLE: -REMARK 500 FORMAT: (10X,I3,1X,A3,1X,A1,I4,A1,3(1X,A4,2X),12X,F5.1) -REMARK 500 -REMARK 500 EXPECTED VALUES PROTEIN: ENGH AND HUBER, 1999 -REMARK 500 EXPECTED VALUES NUCLEIC ACID: CLOWNEY ET AL 1996 -REMARK 500 -REMARK 500 M RES CSSEQI ATM1 ATM2 ATM3 -REMARK 500 DC A 1 C4' - C3' - C2' ANGL. DEV. = -4.8 DEGREES -REMARK 500 DC A 1 C3' - C2' - C1' ANGL. DEV. = -6.0 DEGREES -REMARK 500 DC A 1 O4' - C1' - C2' ANGL. DEV. = -5.4 DEGREES -REMARK 500 DG A 2 O5' - C5' - C4' ANGL. DEV. = -6.8 DEGREES -REMARK 500 DG A 2 O4' - C1' - N9 ANGL. DEV. = -5.0 DEGREES -REMARK 500 DC A 3 O4' - C1' - N1 ANGL. DEV. = -5.4 DEGREES -REMARK 500 DG A 4 C3' - C2' - C1' ANGL. DEV. = -5.2 DEGREES -REMARK 500 DA A 5 O4' - C1' - C2' ANGL. DEV. = -4.9 DEGREES -REMARK 500 DA A 5 O4' - C1' - N9 ANGL. DEV. = -4.4 DEGREES -REMARK 500 DA A 6 O4' - C1' - C2' ANGL. DEV. = -6.1 DEGREES -REMARK 500 DT A 7 O5' - C5' - C4' ANGL. DEV. = -6.1 DEGREES -REMARK 500 DT A 7 N1 - C2 - N3 ANGL. DEV. = 3.7 DEGREES -REMARK 500 DT A 7 C2 - N3 - C4 ANGL. DEV. = -4.4 DEGREES -REMARK 500 DT A 7 C5 - C6 - N1 ANGL. DEV. = -3.7 DEGREES -REMARK 500 DT A 8 O4' - C1' - C2' ANGL. DEV. = -5.4 DEGREES -REMARK 500 DT A 8 N1 - C2 - N3 ANGL. DEV. = 4.1 DEGREES -REMARK 500 DT A 8 C2 - N3 - C4 ANGL. DEV. = -4.9 DEGREES -REMARK 500 DT A 8 C5 - C6 - N1 ANGL. DEV. = -4.1 DEGREES -REMARK 500 DG A 10 C3' - C2' - C1' ANGL. DEV. = -7.1 DEGREES -REMARK 500 DG A 12 O4' - C1' - C2' ANGL. DEV. = -5.9 DEGREES -REMARK 500 DC B 13 O4' - C1' - N1 ANGL. DEV. = -4.6 DEGREES -REMARK 500 DG B 14 O5' - C5' - C4' ANGL. DEV. = -5.8 DEGREES -REMARK 500 DG B 14 O4' - C1' - C2' ANGL. DEV. = -5.2 DEGREES -REMARK 500 DG B 14 O4' - C1' - N9 ANGL. DEV. = -5.3 DEGREES -REMARK 500 DG B 16 O5' - C5' - C4' ANGL. DEV. = -5.0 DEGREES -REMARK 500 DA B 18 O4' - C1' - N9 ANGL. DEV. = -4.4 DEGREES -REMARK 500 DT B 19 N1 - C2 - N3 ANGL. DEV. = 3.9 DEGREES -REMARK 500 DT B 19 C2 - N3 - C4 ANGL. DEV. = -4.9 DEGREES -REMARK 500 DT B 19 C5 - C6 - N1 ANGL. DEV. = -4.7 DEGREES -REMARK 500 DT B 20 O4' - C1' - C2' ANGL. DEV. = -6.1 DEGREES -REMARK 500 DT B 20 O4' - C1' - N1 ANGL. DEV. = -5.2 DEGREES -REMARK 500 DT B 20 N1 - C2 - N3 ANGL. DEV. = 3.6 DEGREES -REMARK 500 DT B 20 C2 - N3 - C4 ANGL. DEV. = -4.4 DEGREES -REMARK 500 DT B 20 C5 - C6 - N1 ANGL. DEV. = -4.0 DEGREES -REMARK 500 DT B 20 C6 - C5 - C7 ANGL. DEV. = -3.6 DEGREES -REMARK 500 DC B 21 O4' - C1' - N1 ANGL. DEV. = -7.1 DEGREES -REMARK 500 DG B 22 C3' - C2' - C1' ANGL. DEV. = -6.8 DEGREES -REMARK 500 DG B 22 O4' - C1' - N9 ANGL. DEV. = 1.9 DEGREES -REMARK 500 DG B 24 O5' - C5' - C4' ANGL. DEV. = -5.5 DEGREES -REMARK 500 -REMARK 500 REMARK: NULL -DBREF 1BNA A 1 12 PDB 1BNA 1BNA 1 12 -DBREF 1BNA B 13 24 PDB 1BNA 1BNA 13 24 -SEQRES 1 A 12 DC DG DC DG DA DA DT DT DC DG DC DG -SEQRES 1 B 12 DC DG DC DG DA DA DT DT DC DG DC DG -FORMUL 3 HOH *80(H2 O) -CRYST1 24.870 40.390 66.200 90.00 90.00 90.00 P 21 21 21 8 -ORIGX1 1.000000 0.000000 0.000000 0.00000 -ORIGX2 0.000000 1.000000 0.000000 0.00000 -ORIGX3 0.000000 0.000000 1.000000 0.00000 -SCALE1 0.040209 0.000000 0.000000 0.00000 -SCALE2 0.000000 0.024759 0.000000 0.00000 -SCALE3 0.000000 0.000000 0.015106 0.00000 -ATOM 1 O5' DC A 1 18.935 34.195 25.617 1.00 64.35 O -ATOM 2 C5' DC A 1 19.130 33.921 24.219 1.00 44.69 C -ATOM 3 C4' DC A 1 19.961 32.668 24.100 1.00 31.28 C -ATOM 4 O4' DC A 1 19.360 31.583 24.852 1.00 37.45 O -ATOM 5 C3' DC A 1 20.172 32.122 22.694 1.00 46.72 C -ATOM 6 O3' DC A 1 21.350 31.325 22.681 1.00 48.89 O -ATOM 7 C2' DC A 1 18.948 31.223 22.647 1.00 30.88 C -ATOM 8 C1' DC A 1 19.231 30.482 23.944 1.00 36.58 C -ATOM 9 N1 DC A 1 18.070 29.661 24.380 1.00 40.51 N -ATOM 10 C2 DC A 1 18.224 28.454 25.015 1.00 16.62 C -ATOM 11 O2 DC A 1 19.360 28.014 25.214 1.00 27.75 O -ATOM 12 N3 DC A 1 17.143 27.761 25.377 1.00 20.55 N -ATOM 13 C4 DC A 1 15.917 28.226 25.120 1.00 34.72 C -ATOM 14 N4 DC A 1 14.828 27.477 25.444 1.00 40.31 N -ATOM 15 C5 DC A 1 15.719 29.442 24.471 1.00 30.78 C -ATOM 16 C6 DC A 1 16.843 30.171 24.101 1.00 25.90 C -ATOM 17 P DG A 2 22.409 31.286 21.483 1.00 58.85 P -ATOM 18 OP1 DG A 2 23.536 32.157 21.851 1.00 57.82 O -ATOM 19 OP2 DG A 2 21.822 31.459 20.139 1.00 78.33 O -ATOM 20 O5' DG A 2 22.840 29.751 21.498 1.00 40.36 O -ATOM 21 C5' DG A 2 23.543 29.175 22.594 1.00 47.19 C -ATOM 22 C4' DG A 2 23.494 27.709 22.279 1.00 47.81 C -ATOM 23 O4' DG A 2 22.193 27.252 22.674 1.00 38.76 O -ATOM 24 C3' DG A 2 23.693 27.325 20.807 1.00 28.58 C -ATOM 25 O3' DG A 2 24.723 26.320 20.653 1.00 40.44 O -ATOM 26 C2' DG A 2 22.273 26.885 20.416 1.00 21.14 C -ATOM 27 C1' DG A 2 21.721 26.304 21.716 1.00 33.95 C -ATOM 28 N9 DG A 2 20.237 26.470 21.780 1.00 34.00 N -ATOM 29 C8 DG A 2 19.526 27.584 21.429 1.00 36.47 C -ATOM 30 N7 DG A 2 18.207 27.455 21.636 1.00 32.37 N -ATOM 31 C5 DG A 2 18.083 26.212 22.142 1.00 15.06 C -ATOM 32 C6 DG A 2 16.904 25.525 22.545 1.00 11.88 C -ATOM 33 O6 DG A 2 15.739 25.916 22.518 1.00 21.30 O -ATOM 34 N1 DG A 2 17.197 24.279 23.037 1.00 15.44 N -ATOM 35 C2 DG A 2 18.434 23.717 23.155 1.00 9.63 C -ATOM 36 N2 DG A 2 18.508 22.456 23.668 1.00 16.69 N -ATOM 37 N3 DG A 2 19.537 24.360 22.770 1.00 30.98 N -ATOM 38 C4 DG A 2 19.290 25.594 22.274 1.00 18.56 C -ATOM 39 P DC A 3 25.064 25.621 19.252 1.00 44.67 P -ATOM 40 OP1 DC A 3 26.506 25.316 19.220 1.00 53.89 O -ATOM 41 OP2 DC A 3 24.559 26.412 18.115 1.00 57.79 O -ATOM 42 O5' DC A 3 24.260 24.246 19.327 1.00 35.42 O -ATOM 43 C5' DC A 3 24.584 23.285 20.335 1.00 45.75 C -ATOM 44 C4' DC A 3 23.523 22.233 20.245 1.00 43.02 C -ATOM 45 O4' DC A 3 22.256 22.844 20.453 1.00 36.85 O -ATOM 46 C3' DC A 3 23.424 21.557 18.903 1.00 40.14 C -ATOM 47 O3' DC A 3 24.121 20.309 18.928 1.00 49.62 O -ATOM 48 C2' DC A 3 21.930 21.406 18.661 1.00 53.79 C -ATOM 49 C1' DC A 3 21.278 21.966 19.909 1.00 22.18 C -ATOM 50 N1 DC A 3 20.196 22.889 19.521 1.00 25.44 N -ATOM 51 C2 DC A 3 18.909 22.584 19.816 1.00 19.81 C -ATOM 52 O2 DC A 3 18.685 21.512 20.382 1.00 29.92 O -ATOM 53 N3 DC A 3 17.935 23.447 19.502 1.00 21.59 N -ATOM 54 C4 DC A 3 18.217 24.603 18.897 1.00 14.01 C -ATOM 55 N4 DC A 3 17.221 25.499 18.629 1.00 26.88 N -ATOM 56 C5 DC A 3 19.526 24.945 18.571 1.00 27.59 C -ATOM 57 C6 DC A 3 20.537 24.048 18.899 1.00 27.05 C -ATOM 58 P DG A 4 24.249 19.412 17.617 1.00 44.54 P -ATOM 59 OP1 DG A 4 25.420 18.535 17.765 1.00 61.90 O -ATOM 60 OP2 DG A 4 24.208 20.296 16.440 1.00 37.36 O -ATOM 61 O5' DG A 4 22.931 18.537 17.670 1.00 32.01 O -ATOM 62 C5' DG A 4 22.714 17.625 18.753 1.00 37.89 C -ATOM 63 C4' DG A 4 21.393 16.960 18.505 1.00 53.00 C -ATOM 64 O4' DG A 4 20.353 17.952 18.496 1.00 38.79 O -ATOM 65 C3' DG A 4 21.264 16.229 17.176 1.00 56.72 C -ATOM 66 O3' DG A 4 20.284 15.214 17.238 1.00 64.12 O -ATOM 67 C2' DG A 4 20.793 17.368 16.288 1.00 40.81 C -ATOM 68 C1' DG A 4 19.716 17.901 17.218 1.00 30.52 C -ATOM 69 N9 DG A 4 19.305 19.281 16.869 1.00 28.53 N -ATOM 70 C8 DG A 4 20.017 20.263 16.232 1.00 27.82 C -ATOM 71 N7 DG A 4 19.313 21.394 16.077 1.00 28.01 N -ATOM 72 C5 DG A 4 18.121 21.100 16.635 1.00 23.22 C -ATOM 73 C6 DG A 4 16.952 21.904 16.749 1.00 29.21 C -ATOM 74 O6 DG A 4 16.769 23.057 16.368 1.00 38.58 O -ATOM 75 N1 DG A 4 15.933 21.214 17.352 1.00 27.94 N -ATOM 76 C2 DG A 4 15.972 19.930 17.816 1.00 23.44 C -ATOM 77 N2 DG A 4 14.831 19.416 18.353 1.00 42.64 N -ATOM 78 N3 DG A 4 17.068 19.179 17.717 1.00 21.56 N -ATOM 79 C4 DG A 4 18.084 19.825 17.121 1.00 23.44 C -ATOM 80 P DA A 5 20.356 13.969 16.245 1.00 57.01 P -ATOM 81 OP1 DA A 5 21.116 12.891 16.892 1.00 58.59 O -ATOM 82 OP2 DA A 5 20.837 14.423 14.910 1.00 51.96 O -ATOM 83 O5' DA A 5 18.810 13.581 16.161 1.00 47.12 O -ATOM 84 C5' DA A 5 18.015 13.569 17.362 1.00 47.67 C -ATOM 85 C4' DA A 5 16.672 14.088 16.957 1.00 64.79 C -ATOM 86 O4' DA A 5 16.842 15.447 16.561 1.00 47.60 O -ATOM 87 C3' DA A 5 16.019 13.393 15.764 1.00 51.50 C -ATOM 88 O3' DA A 5 14.762 12.796 16.120 1.00 52.18 O -ATOM 89 C2' DA A 5 15.952 14.498 14.696 1.00 45.00 C -ATOM 90 C1' DA A 5 15.851 15.732 15.569 1.00 26.88 C -ATOM 91 N9 DA A 5 16.391 16.916 14.867 1.00 16.69 N -ATOM 92 C8 DA A 5 17.658 17.103 14.382 1.00 28.14 C -ATOM 93 N7 DA A 5 17.863 18.346 13.913 1.00 34.85 N -ATOM 94 C5 DA A 5 16.673 18.953 14.098 1.00 22.49 C -ATOM 95 C6 DA A 5 16.230 20.279 13.819 1.00 18.12 C -ATOM 96 N6 DA A 5 17.045 21.222 13.268 1.00 29.30 N -ATOM 97 N1 DA A 5 14.966 20.578 14.118 1.00 27.61 N -ATOM 98 C2 DA A 5 14.178 19.652 14.669 1.00 18.53 C -ATOM 99 N3 DA A 5 14.463 18.392 14.984 1.00 29.16 N -ATOM 100 C4 DA A 5 15.750 18.110 14.661 1.00 15.08 C -ATOM 101 P DA A 6 13.866 12.006 15.063 1.00 43.68 P -ATOM 102 OP1 DA A 6 13.028 11.039 15.800 1.00 42.55 O -ATOM 103 OP2 DA A 6 14.715 11.499 13.968 1.00 54.20 O -ATOM 104 O5' DA A 6 12.879 13.111 14.480 1.00 28.20 O -ATOM 105 C5' DA A 6 11.802 13.597 15.290 1.00 42.29 C -ATOM 106 C4' DA A 6 11.111 14.603 14.435 1.00 33.23 C -ATOM 107 O4' DA A 6 12.152 15.460 13.962 1.00 41.48 O -ATOM 108 C3' DA A 6 10.417 14.070 13.187 1.00 18.16 C -ATOM 109 O3' DA A 6 9.007 14.369 13.181 1.00 30.42 O -ATOM 110 C2' DA A 6 11.240 14.692 12.061 1.00 52.97 C -ATOM 111 C1' DA A 6 11.699 15.974 12.719 1.00 38.93 C -ATOM 112 N9 DA A 6 12.918 16.526 12.078 1.00 19.06 N -ATOM 113 C8 DA A 6 14.115 15.899 11.868 1.00 17.83 C -ATOM 114 N7 DA A 6 15.049 16.714 11.356 1.00 29.55 N -ATOM 115 C5 DA A 6 14.416 17.901 11.246 1.00 19.88 C -ATOM 116 C6 DA A 6 14.873 19.187 10.815 1.00 17.26 C -ATOM 117 N6 DA A 6 16.161 19.418 10.427 1.00 19.85 N -ATOM 118 N1 DA A 6 13.999 20.191 10.852 1.00 17.93 N -ATOM 119 C2 DA A 6 12.753 19.962 11.272 1.00 23.00 C -ATOM 120 N3 DA A 6 12.210 18.824 11.698 1.00 21.37 N -ATOM 121 C4 DA A 6 13.116 17.823 11.657 1.00 15.93 C -ATOM 122 P DT A 7 8.081 14.050 11.915 1.00 40.72 P -ATOM 123 OP1 DT A 7 6.668 13.960 12.342 1.00 46.75 O -ATOM 124 OP2 DT A 7 8.600 12.894 11.137 1.00 42.53 O -ATOM 125 O5' DT A 7 8.239 15.387 11.076 1.00 35.21 O -ATOM 126 C5' DT A 7 7.907 16.635 11.686 1.00 34.88 C -ATOM 127 C4' DT A 7 8.162 17.628 10.598 1.00 31.45 C -ATOM 128 O4' DT A 7 9.543 17.580 10.279 1.00 46.82 O -ATOM 129 C3' DT A 7 7.461 17.284 9.296 1.00 23.76 C -ATOM 130 O3' DT A 7 6.251 18.034 9.162 1.00 44.27 O -ATOM 131 C2' DT A 7 8.532 17.527 8.223 1.00 26.30 C -ATOM 132 C1' DT A 7 9.644 18.209 9.019 1.00 28.96 C -ATOM 133 N1 DT A 7 11.021 17.903 8.565 1.00 20.47 N -ATOM 134 C2 DT A 7 11.822 18.923 8.176 1.00 28.01 C -ATOM 135 O2 DT A 7 11.383 20.077 8.143 1.00 40.01 O -ATOM 136 N3 DT A 7 13.119 18.641 7.852 1.00 27.94 N -ATOM 137 C4 DT A 7 13.633 17.372 7.882 1.00 15.14 C -ATOM 138 O4 DT A 7 14.830 17.222 7.619 1.00 32.54 O -ATOM 139 C5 DT A 7 12.781 16.325 8.235 1.00 10.83 C -ATOM 140 C7 DT A 7 13.269 14.902 8.236 1.00 36.33 C -ATOM 141 C6 DT A 7 11.465 16.616 8.594 1.00 12.19 C -ATOM 142 P DT A 8 5.384 17.990 7.824 1.00 49.10 P -ATOM 143 OP1 DT A 8 4.025 18.444 8.180 1.00 41.11 O -ATOM 144 OP2 DT A 8 5.458 16.668 7.160 1.00 39.21 O -ATOM 145 O5' DT A 8 6.086 19.118 6.927 1.00 48.80 O -ATOM 146 C5' DT A 8 6.146 20.478 7.418 1.00 34.73 C -ATOM 147 C4' DT A 8 6.995 21.229 6.438 1.00 28.73 C -ATOM 148 O4' DT A 8 8.188 20.458 6.284 1.00 39.07 O -ATOM 149 C3' DT A 8 6.418 21.332 5.029 1.00 37.88 C -ATOM 150 O3' DT A 8 5.967 22.667 4.696 1.00 52.04 O -ATOM 151 C2' DT A 8 7.513 20.718 4.139 1.00 32.80 C -ATOM 152 C1' DT A 8 8.736 20.855 5.034 1.00 36.58 C -ATOM 153 N1 DT A 8 9.823 19.876 4.759 1.00 24.57 N -ATOM 154 C2 DT A 8 11.086 20.316 4.494 1.00 19.41 C -ATOM 155 O2 DT A 8 11.324 21.516 4.389 1.00 32.74 O -ATOM 156 N3 DT A 8 12.094 19.403 4.412 1.00 25.12 N -ATOM 157 C4 DT A 8 11.876 18.060 4.551 1.00 31.35 C -ATOM 158 O4 DT A 8 12.858 17.317 4.503 1.00 28.53 O -ATOM 159 C5 DT A 8 10.569 17.611 4.765 1.00 22.80 C -ATOM 160 C7 DT A 8 10.261 16.140 4.896 1.00 24.98 C -ATOM 161 C6 DT A 8 9.545 18.548 4.904 1.00 20.28 C -ATOM 162 P DC A 9 5.531 23.071 3.209 1.00 48.97 P -ATOM 163 OP1 DC A 9 4.648 24.244 3.269 1.00 62.33 O -ATOM 164 OP2 DC A 9 5.010 21.905 2.470 1.00 51.53 O -ATOM 165 O5' DC A 9 6.926 23.547 2.611 1.00 43.99 O -ATOM 166 C5' DC A 9 7.636 24.627 3.249 1.00 50.86 C -ATOM 167 C4' DC A 9 8.897 24.853 2.457 1.00 46.66 C -ATOM 168 O4' DC A 9 9.638 23.627 2.448 1.00 42.69 O -ATOM 169 C3' DC A 9 8.717 25.240 0.998 1.00 56.96 C -ATOM 170 O3' DC A 9 9.470 26.414 0.667 1.00 63.54 O -ATOM 171 C2' DC A 9 9.126 23.965 0.253 1.00 50.41 C -ATOM 172 C1' DC A 9 10.241 23.483 1.157 1.00 41.08 C -ATOM 173 N1 DC A 9 10.524 22.022 1.015 1.00 37.23 N -ATOM 174 C2 DC A 9 11.814 21.603 0.840 1.00 40.54 C -ATOM 175 O2 DC A 9 12.691 22.447 0.670 1.00 43.89 O -ATOM 176 N3 DC A 9 12.106 20.297 0.873 1.00 32.57 N -ATOM 177 C4 DC A 9 11.141 19.395 1.046 1.00 24.65 C -ATOM 178 N4 DC A 9 11.461 18.075 1.089 1.00 27.84 N -ATOM 179 C5 DC A 9 9.803 19.775 1.177 1.00 17.61 C -ATOM 180 C6 DC A 9 9.499 21.133 1.167 1.00 30.63 C -ATOM 181 P DG A 10 9.055 27.333 -0.581 1.00 65.48 P -ATOM 182 OP1 DG A 10 9.496 28.717 -0.258 1.00 59.09 O -ATOM 183 OP2 DG A 10 7.632 27.106 -0.947 1.00 45.71 O -ATOM 184 O5' DG A 10 9.954 26.765 -1.771 1.00 70.30 O -ATOM 185 C5' DG A 10 11.382 26.940 -1.720 1.00 71.73 C -ATOM 186 C4' DG A 10 11.972 26.090 -2.802 1.00 58.69 C -ATOM 187 O4' DG A 10 11.802 24.724 -2.404 1.00 41.03 O -ATOM 188 C3' DG A 10 11.327 26.178 -4.188 1.00 45.61 C -ATOM 189 O3' DG A 10 12.311 26.096 -5.214 1.00 52.70 O -ATOM 190 C2' DG A 10 10.414 24.962 -4.186 1.00 36.02 C -ATOM 191 C1' DG A 10 11.429 24.028 -3.587 1.00 50.90 C -ATOM 192 N9 DG A 10 10.890 22.713 -3.200 1.00 45.86 N -ATOM 193 C8 DG A 10 9.616 22.315 -2.910 1.00 44.49 C -ATOM 194 N7 DG A 10 9.541 21.009 -2.613 1.00 39.96 N -ATOM 195 C5 DG A 10 10.818 20.588 -2.718 1.00 38.99 C -ATOM 196 C6 DG A 10 11.376 19.292 -2.511 1.00 35.78 C -ATOM 197 O6 DG A 10 10.813 18.252 -2.179 1.00 34.90 O -ATOM 198 N1 DG A 10 12.729 19.299 -2.720 1.00 23.54 N -ATOM 199 C2 DG A 10 13.498 20.365 -3.082 1.00 8.73 C -ATOM 200 N2 DG A 10 14.834 20.169 -3.237 1.00 23.15 N -ATOM 201 N3 DG A 10 12.982 21.573 -3.267 1.00 24.68 N -ATOM 202 C4 DG A 10 11.656 21.601 -3.061 1.00 31.53 C -ATOM 203 P DC A 11 12.763 27.421 -5.980 1.00 60.62 P -ATOM 204 OP1 DC A 11 12.796 28.572 -5.049 1.00 63.74 O -ATOM 205 OP2 DC A 11 11.886 27.542 -7.164 1.00 52.44 O -ATOM 206 O5' DC A 11 14.272 27.086 -6.366 1.00 57.57 O -ATOM 207 C5' DC A 11 15.275 27.108 -5.318 1.00 54.70 C -ATOM 208 C4' DC A 11 16.222 25.946 -5.510 1.00 72.51 C -ATOM 209 O4' DC A 11 15.443 24.754 -5.397 1.00 47.18 O -ATOM 210 C3' DC A 11 16.942 25.827 -6.848 1.00 29.82 C -ATOM 211 O3' DC A 11 18.340 25.511 -6.701 1.00 43.53 O -ATOM 212 C2' DC A 11 16.118 24.767 -7.578 1.00 51.34 C -ATOM 213 C1' DC A 11 15.856 23.836 -6.414 1.00 30.07 C -ATOM 214 N1 DC A 11 14.672 22.975 -6.637 1.00 23.25 N -ATOM 215 C2 DC A 11 14.802 21.628 -6.529 1.00 20.38 C -ATOM 216 O2 DC A 11 15.924 21.178 -6.314 1.00 38.77 O -ATOM 217 N3 DC A 11 13.723 20.842 -6.627 1.00 15.92 N -ATOM 218 C4 DC A 11 12.515 21.373 -6.836 1.00 15.82 C -ATOM 219 N4 DC A 11 11.410 20.574 -6.872 1.00 28.04 N -ATOM 220 C5 DC A 11 12.348 22.744 -6.978 1.00 26.17 C -ATOM 221 C6 DC A 11 13.470 23.558 -6.869 1.00 35.50 C -ATOM 222 P DG A 12 19.331 25.774 -7.925 1.00 55.98 P -ATOM 223 OP1 DG A 12 20.704 25.976 -7.408 1.00 45.83 O -ATOM 224 OP2 DG A 12 18.763 26.851 -8.758 1.00 44.26 O -ATOM 225 O5' DG A 12 19.302 24.412 -8.763 1.00 62.63 O -ATOM 226 C5' DG A 12 20.109 23.284 -8.359 1.00 69.50 C -ATOM 227 C4' DG A 12 19.748 22.167 -9.299 1.00 39.92 C -ATOM 228 O4' DG A 12 18.350 21.969 -9.139 1.00 32.00 O -ATOM 229 C3' DG A 12 19.921 22.404 -10.815 1.00 50.39 C -ATOM 230 O3' DG A 12 20.985 21.635 -11.401 1.00 64.13 O -ATOM 231 C2' DG A 12 18.535 22.062 -11.381 1.00 36.18 C -ATOM 232 C1' DG A 12 17.965 21.200 -10.269 1.00 24.79 C -ATOM 233 N9 DG A 12 16.493 21.220 -10.265 1.00 28.44 N -ATOM 234 C8 DG A 12 15.663 22.289 -10.478 1.00 31.85 C -ATOM 235 N7 DG A 12 14.368 21.958 -10.390 1.00 38.26 N -ATOM 236 C5 DG A 12 14.388 20.640 -10.102 1.00 28.99 C -ATOM 237 C6 DG A 12 13.301 19.742 -9.856 1.00 42.63 C -ATOM 238 O6 DG A 12 12.091 19.967 -9.857 1.00 49.17 O -ATOM 239 N1 DG A 12 13.750 18.466 -9.625 1.00 40.15 N -ATOM 240 C2 DG A 12 15.042 18.043 -9.605 1.00 33.42 C -ATOM 241 N2 DG A 12 15.259 16.717 -9.406 1.00 40.53 N -ATOM 242 N3 DG A 12 16.061 18.885 -9.792 1.00 37.34 N -ATOM 243 C4 DG A 12 15.660 20.156 -10.027 1.00 31.14 C -TER 244 DG A 12 -ATOM 245 O5' DC B 13 7.458 11.884 -9.070 1.00 66.23 O -ATOM 246 C5' DC B 13 8.252 10.968 -9.854 1.00 71.49 C -ATOM 247 C4' DC B 13 9.714 11.141 -9.512 1.00 56.82 C -ATOM 248 O4' DC B 13 10.144 12.455 -9.908 1.00 57.92 O -ATOM 249 C3' DC B 13 10.103 10.989 -8.055 1.00 34.34 C -ATOM 250 O3' DC B 13 11.293 10.221 -7.904 1.00 42.11 O -ATOM 251 C2' DC B 13 10.254 12.437 -7.607 1.00 29.08 C -ATOM 252 C1' DC B 13 10.896 13.044 -8.837 1.00 38.40 C -ATOM 253 N1 DC B 13 10.575 14.487 -8.944 1.00 34.33 N -ATOM 254 C2 DC B 13 11.559 15.430 -9.006 1.00 22.98 C -ATOM 255 O2 DC B 13 12.725 15.066 -8.932 1.00 50.83 O -ATOM 256 N3 DC B 13 11.246 16.714 -9.193 1.00 37.14 N -ATOM 257 C4 DC B 13 9.980 17.088 -9.334 1.00 42.60 C -ATOM 258 N4 DC B 13 9.698 18.395 -9.589 1.00 54.91 N -ATOM 259 C5 DC B 13 8.939 16.162 -9.274 1.00 56.67 C -ATOM 260 C6 DC B 13 9.265 14.824 -9.080 1.00 49.21 C -ATOM 261 P DG B 14 11.602 9.510 -6.502 1.00 60.42 P -ATOM 262 OP1 DG B 14 11.666 8.032 -6.664 1.00 57.44 O -ATOM 263 OP2 DG B 14 10.644 10.010 -5.494 1.00 46.07 O -ATOM 264 O5' DG B 14 13.051 10.094 -6.177 1.00 50.94 O -ATOM 265 C5' DG B 14 14.100 10.021 -7.156 1.00 34.84 C -ATOM 266 C4' DG B 14 15.113 10.992 -6.657 1.00 48.06 C -ATOM 267 O4' DG B 14 14.556 12.300 -6.755 1.00 37.01 O -ATOM 268 C3' DG B 14 15.445 10.806 -5.189 1.00 50.58 C -ATOM 269 O3' DG B 14 16.836 10.560 -5.013 1.00 51.98 O -ATOM 270 C2' DG B 14 14.937 12.100 -4.529 1.00 40.32 C -ATOM 271 C1' DG B 14 15.058 13.086 -5.671 1.00 46.69 C -ATOM 272 N9 DG B 14 14.036 14.140 -5.536 1.00 29.17 N -ATOM 273 C8 DG B 14 12.710 13.957 -5.259 1.00 23.48 C -ATOM 274 N7 DG B 14 12.016 15.103 -5.269 1.00 37.54 N -ATOM 275 C5 DG B 14 12.937 16.041 -5.558 1.00 26.27 C -ATOM 276 C6 DG B 14 12.761 17.451 -5.710 1.00 40.82 C -ATOM 277 O6 DG B 14 11.723 18.111 -5.630 1.00 44.39 O -ATOM 278 N1 DG B 14 13.952 18.079 -5.973 1.00 19.52 N -ATOM 279 C2 DG B 14 15.171 17.485 -6.107 1.00 18.48 C -ATOM 280 N2 DG B 14 16.244 18.292 -6.325 1.00 36.58 N -ATOM 281 N3 DG B 14 15.329 16.161 -5.986 1.00 46.96 N -ATOM 282 C4 DG B 14 14.179 15.499 -5.721 1.00 35.70 C -ATOM 283 P DC B 15 17.478 10.380 -3.569 1.00 46.26 P -ATOM 284 OP1 DC B 15 18.665 9.516 -3.729 1.00 46.07 O -ATOM 285 OP2 DC B 15 16.427 9.940 -2.633 1.00 40.43 O -ATOM 286 O5' DC B 15 17.957 11.865 -3.208 1.00 40.97 O -ATOM 287 C5' DC B 15 18.963 12.531 -3.996 1.00 28.78 C -ATOM 288 C4' DC B 15 18.936 13.958 -3.536 1.00 32.84 C -ATOM 289 O4' DC B 15 17.592 14.409 -3.622 1.00 37.24 O -ATOM 290 C3' DC B 15 19.253 14.139 -2.066 1.00 43.98 C -ATOM 291 O3' DC B 15 20.659 14.219 -1.858 1.00 40.90 O -ATOM 292 C2' DC B 15 18.520 15.417 -1.728 1.00 36.26 C -ATOM 293 C1' DC B 15 17.545 15.602 -2.872 1.00 20.54 C -ATOM 294 N1 DC B 15 16.145 15.696 -2.428 1.00 23.10 N -ATOM 295 C2 DC B 15 15.507 16.886 -2.558 1.00 32.12 C -ATOM 296 O2 DC B 15 16.162 17.846 -2.957 1.00 30.04 O -ATOM 297 N3 DC B 15 14.209 16.983 -2.264 1.00 32.94 N -ATOM 298 C4 DC B 15 13.536 15.919 -1.825 1.00 16.43 C -ATOM 299 N4 DC B 15 12.205 16.017 -1.553 1.00 34.91 N -ATOM 300 C5 DC B 15 14.164 14.689 -1.652 1.00 22.75 C -ATOM 301 C6 DC B 15 15.509 14.584 -1.979 1.00 26.42 C -ATOM 302 P DG B 16 21.304 14.529 -0.436 1.00 42.39 P -ATOM 303 OP1 DG B 16 22.696 14.087 -0.524 1.00 60.41 O -ATOM 304 OP2 DG B 16 20.488 13.954 0.650 1.00 51.09 O -ATOM 305 O5' DG B 16 21.306 16.117 -0.363 1.00 45.08 O -ATOM 306 C5' DG B 16 22.177 16.876 -1.212 1.00 33.20 C -ATOM 307 C4' DG B 16 21.739 18.292 -1.021 1.00 24.95 C -ATOM 308 O4' DG B 16 20.305 18.225 -1.048 1.00 32.83 O -ATOM 309 C3' DG B 16 22.101 18.959 0.293 1.00 41.12 C -ATOM 310 O3' DG B 16 22.592 20.293 0.097 1.00 53.45 O -ATOM 311 C2' DG B 16 20.820 18.829 1.121 1.00 28.93 C -ATOM 312 C1' DG B 16 19.765 18.985 0.046 1.00 37.44 C -ATOM 313 N9 DG B 16 18.513 18.299 0.468 1.00 17.75 N -ATOM 314 C8 DG B 16 18.363 17.062 1.039 1.00 17.96 C -ATOM 315 N7 DG B 16 17.080 16.744 1.281 1.00 24.14 N -ATOM 316 C5 DG B 16 16.400 17.832 0.868 1.00 9.96 C -ATOM 317 C6 DG B 16 14.996 18.090 0.882 1.00 18.10 C -ATOM 318 O6 DG B 16 14.082 17.378 1.280 1.00 31.13 O -ATOM 319 N1 DG B 16 14.712 19.349 0.418 1.00 17.72 N -ATOM 320 C2 DG B 16 15.606 20.268 -0.027 1.00 16.23 C -ATOM 321 N2 DG B 16 15.134 21.493 -0.382 1.00 33.42 N -ATOM 322 N3 DG B 16 16.912 20.017 -0.072 1.00 26.37 N -ATOM 323 C4 DG B 16 17.236 18.794 0.384 1.00 31.72 C -ATOM 324 P DA B 17 22.904 21.238 1.339 1.00 46.87 P -ATOM 325 OP1 DA B 17 23.994 22.183 1.025 1.00 47.75 O -ATOM 326 OP2 DA B 17 23.104 20.390 2.538 1.00 46.81 O -ATOM 327 O5' DA B 17 21.577 22.107 1.390 1.00 39.51 O -ATOM 328 C5' DA B 17 21.216 22.833 0.200 1.00 30.37 C -ATOM 329 C4' DA B 17 20.101 23.788 0.484 1.00 35.43 C -ATOM 330 O4' DA B 17 18.913 23.054 0.816 1.00 43.05 O -ATOM 331 C3' DA B 17 20.347 24.743 1.633 1.00 44.50 C -ATOM 332 O3' DA B 17 19.732 26.010 1.411 1.00 78.59 O -ATOM 333 C2' DA B 17 19.752 23.945 2.791 1.00 44.42 C -ATOM 334 C1' DA B 17 18.497 23.393 2.145 1.00 42.55 C -ATOM 335 N9 DA B 17 18.079 22.095 2.758 1.00 34.56 N -ATOM 336 C8 DA B 17 18.847 21.020 3.133 1.00 20.07 C -ATOM 337 N7 DA B 17 18.114 19.984 3.584 1.00 27.60 N -ATOM 338 C5 DA B 17 16.842 20.424 3.488 1.00 18.80 C -ATOM 339 C6 DA B 17 15.577 19.817 3.786 1.00 32.58 C -ATOM 340 N6 DA B 17 15.448 18.537 4.242 1.00 29.54 N -ATOM 341 N1 DA B 17 14.482 20.557 3.593 1.00 35.01 N -ATOM 342 C2 DA B 17 14.597 21.801 3.118 1.00 36.47 C -ATOM 343 N3 DA B 17 15.700 22.472 2.783 1.00 38.96 N -ATOM 344 C4 DA B 17 16.791 21.706 3.002 1.00 28.24 C -ATOM 345 P DA B 18 19.803 27.141 2.526 1.00 46.11 P -ATOM 346 OP1 DA B 18 19.796 28.478 1.888 1.00 49.20 O -ATOM 347 OP2 DA B 18 20.953 26.858 3.426 1.00 43.48 O -ATOM 348 O5' DA B 18 18.396 26.939 3.241 1.00 40.83 O -ATOM 349 C5' DA B 18 17.203 27.028 2.452 1.00 40.72 C -ATOM 350 C4' DA B 18 16.035 26.958 3.388 1.00 66.52 C -ATOM 351 O4' DA B 18 15.856 25.612 3.850 1.00 44.25 O -ATOM 352 C3' DA B 18 16.101 27.861 4.615 1.00 63.34 C -ATOM 353 O3' DA B 18 14.890 28.608 4.757 1.00 55.65 O -ATOM 354 C2' DA B 18 16.368 26.844 5.724 1.00 34.49 C -ATOM 355 C1' DA B 18 15.561 25.655 5.243 1.00 29.45 C -ATOM 356 N9 DA B 18 16.104 24.373 5.755 1.00 20.03 N -ATOM 357 C8 DA B 18 17.411 23.967 5.830 1.00 16.51 C -ATOM 358 N7 DA B 18 17.539 22.706 6.276 1.00 20.58 N -ATOM 359 C5 DA B 18 16.266 22.309 6.480 1.00 21.66 C -ATOM 360 C6 DA B 18 15.715 21.073 6.933 1.00 17.93 C -ATOM 361 N6 DA B 18 16.483 19.994 7.243 1.00 20.37 N -ATOM 362 N1 DA B 18 14.389 20.994 7.036 1.00 20.81 N -ATOM 363 C2 DA B 18 13.636 22.041 6.708 1.00 26.77 C -ATOM 364 N3 DA B 18 14.019 23.234 6.265 1.00 26.83 N -ATOM 365 C4 DA B 18 15.367 23.291 6.174 1.00 27.48 C -ATOM 366 P DT B 19 14.604 29.545 6.020 1.00 48.40 P -ATOM 367 OP1 DT B 19 13.792 30.696 5.582 1.00 50.18 O -ATOM 368 OP2 DT B 19 15.852 29.836 6.749 1.00 44.42 O -ATOM 369 O5' DT B 19 13.633 28.628 6.885 1.00 53.86 O -ATOM 370 C5' DT B 19 12.398 28.171 6.303 1.00 55.04 C -ATOM 371 C4' DT B 19 11.809 27.217 7.302 1.00 44.86 C -ATOM 372 O4' DT B 19 12.767 26.184 7.534 1.00 48.52 O -ATOM 373 C3' DT B 19 11.515 27.822 8.669 1.00 41.77 C -ATOM 374 O3' DT B 19 10.103 27.952 8.891 1.00 57.02 O -ATOM 375 C2' DT B 19 12.267 26.906 9.630 1.00 39.28 C -ATOM 376 C1' DT B 19 12.426 25.645 8.799 1.00 27.68 C -ATOM 377 N1 DT B 19 13.609 24.850 9.205 1.00 21.67 N -ATOM 378 C2 DT B 19 13.442 23.575 9.656 1.00 31.71 C -ATOM 379 O2 DT B 19 12.311 23.101 9.802 1.00 36.00 O -ATOM 380 N3 DT B 19 14.551 22.825 9.913 1.00 24.66 N -ATOM 381 C4 DT B 19 15.815 23.321 9.777 1.00 40.64 C -ATOM 382 O4 DT B 19 16.755 22.570 10.029 1.00 31.47 O -ATOM 383 C5 DT B 19 15.972 24.647 9.362 1.00 31.79 C -ATOM 384 C7 DT B 19 17.345 25.239 9.234 1.00 30.05 C -ATOM 385 C6 DT B 19 14.844 25.405 9.048 1.00 14.35 C -ATOM 386 P DT B 20 9.513 28.533 10.260 1.00 48.24 P -ATOM 387 OP1 DT B 20 8.145 29.007 9.998 1.00 41.28 O -ATOM 388 OP2 DT B 20 10.455 29.513 10.841 1.00 53.39 O -ATOM 389 O5' DT B 20 9.395 27.223 11.153 1.00 36.57 O -ATOM 390 C5' DT B 20 8.576 26.148 10.664 1.00 50.41 C -ATOM 391 C4' DT B 20 8.655 25.060 11.678 1.00 32.08 C -ATOM 392 O4' DT B 20 10.003 24.615 11.764 1.00 48.38 O -ATOM 393 C3' DT B 20 8.272 25.471 13.087 1.00 29.99 C -ATOM 394 O3' DT B 20 7.199 24.657 13.553 1.00 45.14 O -ATOM 395 C2' DT B 20 9.586 25.307 13.860 1.00 32.42 C -ATOM 396 C1' DT B 20 10.190 24.148 13.089 1.00 39.56 C -ATOM 397 N1 DT B 20 11.660 24.070 13.205 1.00 20.36 N -ATOM 398 C2 DT B 20 12.257 22.880 13.486 1.00 27.55 C -ATOM 399 O2 DT B 20 11.583 21.866 13.691 1.00 38.33 O -ATOM 400 N3 DT B 20 13.620 22.829 13.497 1.00 29.60 N -ATOM 401 C4 DT B 20 14.402 23.914 13.225 1.00 30.11 C -ATOM 402 O4 DT B 20 15.625 23.764 13.252 1.00 32.92 O -ATOM 403 C5 DT B 20 13.774 25.126 12.933 1.00 24.11 C -ATOM 404 C7 DT B 20 14.563 26.358 12.612 1.00 23.96 C -ATOM 405 C6 DT B 20 12.385 25.187 12.926 1.00 19.78 C -ATOM 406 P DC B 21 6.594 24.823 15.016 1.00 54.73 P -ATOM 407 OP1 DC B 21 5.169 24.424 14.987 1.00 53.98 O -ATOM 408 OP2 DC B 21 6.870 26.189 15.511 1.00 65.53 O -ATOM 409 O5' DC B 21 7.409 23.731 15.839 1.00 50.67 O -ATOM 410 C5' DC B 21 7.331 22.352 15.433 1.00 60.86 C -ATOM 411 C4' DC B 21 8.100 21.598 16.461 1.00 40.86 C -ATOM 412 O4' DC B 21 9.478 21.902 16.263 1.00 36.88 O -ATOM 413 C3' DC B 21 7.766 22.045 17.879 1.00 53.80 C -ATOM 414 O3' DC B 21 7.036 21.041 18.611 1.00 79.04 O -ATOM 415 C2' DC B 21 9.123 22.414 18.469 1.00 48.43 C -ATOM 416 C1' DC B 21 10.107 21.743 17.523 1.00 36.51 C -ATOM 417 N1 DC B 21 11.328 22.556 17.331 1.00 24.72 N -ATOM 418 C2 DC B 21 12.534 21.939 17.329 1.00 30.96 C -ATOM 419 O2 DC B 21 12.560 20.731 17.579 1.00 34.53 O -ATOM 420 N3 DC B 21 13.639 22.639 17.035 1.00 31.69 N -ATOM 421 C4 DC B 21 13.560 23.938 16.739 1.00 21.53 C -ATOM 422 N4 DC B 21 14.685 24.628 16.404 1.00 23.72 N -ATOM 423 C5 DC B 21 12.338 24.609 16.736 1.00 30.74 C -ATOM 424 C6 DC B 21 11.193 23.878 17.035 1.00 27.58 C -ATOM 425 P DG B 22 6.509 21.324 20.099 1.00 56.50 P -ATOM 426 OP1 DG B 22 5.387 20.397 20.396 1.00 50.81 O -ATOM 427 OP2 DG B 22 6.235 22.774 20.306 1.00 53.84 O -ATOM 428 O5' DG B 22 7.767 20.924 20.993 1.00 66.30 O -ATOM 429 C5' DG B 22 8.216 19.559 21.073 1.00 73.42 C -ATOM 430 C4' DG B 22 9.422 19.557 21.977 1.00 42.96 C -ATOM 431 O4' DG B 22 10.493 20.260 21.319 1.00 52.87 O -ATOM 432 C3' DG B 22 9.267 20.267 23.325 1.00 38.51 C -ATOM 433 O3' DG B 22 10.088 19.657 24.293 1.00 60.28 O -ATOM 434 C2' DG B 22 9.751 21.670 22.990 1.00 22.00 C -ATOM 435 C1' DG B 22 10.988 21.226 22.256 1.00 24.85 C -ATOM 436 N9 DG B 22 11.599 22.357 21.543 1.00 25.91 N -ATOM 437 C8 DG B 22 11.037 23.545 21.159 1.00 23.91 C -ATOM 438 N7 DG B 22 11.921 24.362 20.566 1.00 39.18 N -ATOM 439 C5 DG B 22 13.072 23.653 20.580 1.00 25.66 C -ATOM 440 C6 DG B 22 14.370 24.003 20.102 1.00 28.34 C -ATOM 441 O6 DG B 22 14.747 25.057 19.585 1.00 31.85 O -ATOM 442 N1 DG B 22 15.268 22.983 20.308 1.00 25.22 N -ATOM 443 C2 DG B 22 15.023 21.776 20.891 1.00 11.07 C -ATOM 444 N2 DG B 22 16.066 20.914 21.038 1.00 25.92 N -ATOM 445 N3 DG B 22 13.815 21.452 21.350 1.00 19.05 N -ATOM 446 C4 DG B 22 12.902 22.429 21.151 1.00 23.69 C -ATOM 447 P DC B 23 9.477 18.627 25.340 1.00 55.93 P -ATOM 448 OP1 DC B 23 8.767 17.534 24.627 1.00 45.14 O -ATOM 449 OP2 DC B 23 8.670 19.409 26.312 1.00 41.61 O -ATOM 450 O5' DC B 23 10.807 18.067 26.034 1.00 59.70 O -ATOM 451 C5' DC B 23 11.688 17.170 25.310 1.00 63.13 C -ATOM 452 C4' DC B 23 13.115 17.573 25.593 1.00 27.86 C -ATOM 453 O4' DC B 23 13.284 18.804 24.893 1.00 50.51 O -ATOM 454 C3' DC B 23 13.441 17.879 27.059 1.00 46.45 C -ATOM 455 O3' DC B 23 14.341 16.938 27.677 1.00 57.21 O -ATOM 456 C2' DC B 23 13.928 19.322 27.025 1.00 68.01 C -ATOM 457 C1' DC B 23 14.312 19.508 25.568 1.00 32.05 C -ATOM 458 N1 DC B 23 14.144 20.932 25.170 1.00 23.28 N -ATOM 459 C2 DC B 23 15.199 21.595 24.630 1.00 20.62 C -ATOM 460 O2 DC B 23 16.257 20.984 24.504 1.00 29.62 O -ATOM 461 N3 DC B 23 15.067 22.877 24.257 1.00 39.00 N -ATOM 462 C4 DC B 23 13.898 23.510 24.404 1.00 30.44 C -ATOM 463 N4 DC B 23 13.771 24.813 24.018 1.00 34.66 N -ATOM 464 C5 DC B 23 12.795 22.866 24.967 1.00 27.74 C -ATOM 465 C6 DC B 23 12.935 21.540 25.359 1.00 24.58 C -ATOM 466 P DG B 24 14.658 17.064 29.247 1.00 53.70 P -ATOM 467 OP1 DG B 24 14.863 15.717 29.825 1.00 61.79 O -ATOM 468 OP2 DG B 24 13.633 17.912 29.920 1.00 36.06 O -ATOM 469 O5' DG B 24 16.033 17.880 29.284 1.00 34.06 O -ATOM 470 C5' DG B 24 17.243 17.320 28.742 1.00 46.57 C -ATOM 471 C4' DG B 24 18.208 18.464 28.758 1.00 50.89 C -ATOM 472 O4' DG B 24 17.716 19.428 27.829 1.00 32.02 O -ATOM 473 C3' DG B 24 18.230 19.236 30.058 1.00 30.38 C -ATOM 474 O3' DG B 24 18.978 18.583 31.084 1.00 61.06 O -ATOM 475 C2' DG B 24 18.885 20.519 29.578 1.00 53.33 C -ATOM 476 C1' DG B 24 18.276 20.693 28.188 1.00 35.03 C -ATOM 477 N9 DG B 24 17.164 21.659 28.139 1.00 30.25 N -ATOM 478 C8 DG B 24 15.874 21.536 28.580 1.00 30.86 C -ATOM 479 N7 DG B 24 15.129 22.614 28.308 1.00 44.08 N -ATOM 480 C5 DG B 24 15.990 23.436 27.673 1.00 16.87 C -ATOM 481 C6 DG B 24 15.765 24.729 27.117 1.00 19.36 C -ATOM 482 O6 DG B 24 14.719 25.373 27.067 1.00 33.30 O -ATOM 483 N1 DG B 24 16.926 25.257 26.604 1.00 15.78 N -ATOM 484 C2 DG B 24 18.157 24.666 26.579 1.00 11.92 C -ATOM 485 N2 DG B 24 19.208 25.386 26.096 1.00 29.76 N -ATOM 486 N3 DG B 24 18.350 23.438 27.053 1.00 21.95 N -ATOM 487 C4 DG B 24 17.231 22.893 27.570 1.00 13.89 C -TER 488 DG B 24 -HETATM 489 O HOH A 25 19.736 30.706 18.656 1.00 51.86 O -HETATM 490 O HOH A 31 10.879 26.039 -8.906 1.00 47.07 O -HETATM 491 O HOH A 32 18.320 24.816 14.948 1.00 47.72 O -HETATM 492 O HOH A 36 9.821 13.442 8.572 1.00 45.76 O -HETATM 493 O HOH A 38 8.915 15.602 -3.388 1.00 50.97 O -HETATM 494 O HOH A 39 17.505 26.340 -10.581 1.00 51.90 O -HETATM 495 O HOH A 40 28.496 23.515 18.349 1.00 45.37 O -HETATM 496 O HOH A 41 11.346 24.175 4.920 1.00 45.03 O -HETATM 497 O HOH A 50 9.098 16.119 1.277 1.00 51.80 O -HETATM 498 O HOH A 54 16.488 29.195 19.861 1.00 54.92 O -HETATM 499 O HOH A 55 22.078 25.894 15.396 1.00 62.20 O -HETATM 500 O HOH A 58 7.133 14.448 4.647 1.00 57.15 O -HETATM 501 O HOH A 62 14.095 28.151 21.614 1.00 53.85 O -HETATM 502 O HOH A 64 27.164 31.710 20.331 1.00 56.84 O -HETATM 503 O HOH A 65 15.295 11.873 12.209 1.00 57.34 O -HETATM 504 O HOH A 66 18.180 16.604 9.966 1.00 61.52 O -HETATM 505 O HOH A 67 6.216 17.035 1.672 1.00 62.91 O -HETATM 506 O HOH A 70 7.055 25.519 -2.053 1.00 55.96 O -HETATM 507 O HOH A 74 12.454 11.354 9.415 1.00 68.40 O -HETATM 508 O HOH A 76 11.492 29.103 20.090 1.00 67.46 O -HETATM 509 O HOH A 77 14.220 29.189 20.392 1.00 48.22 O -HETATM 510 O HOH A 78 6.138 19.149 13.844 1.00 62.26 O -HETATM 511 O HOH A 79 17.315 9.638 13.392 1.00 65.70 O -HETATM 512 O HOH A 80 18.951 25.757 12.989 1.00 66.47 O -HETATM 513 O HOH A 81 20.460 18.861 12.664 1.00 63.00 O -HETATM 514 O HOH A 82 3.529 19.338 12.599 1.00 65.32 O -HETATM 515 O HOH A 84 16.223 12.351 9.406 1.00 63.59 O -HETATM 516 O HOH A 85 12.989 29.901 -9.282 1.00 64.97 O -HETATM 517 O HOH A 86 17.510 30.569 18.702 1.00 61.79 O -HETATM 518 O HOH A 87 25.377 12.891 19.011 1.00 73.80 O -HETATM 519 O HOH A 88 13.610 15.742 18.593 1.00 69.48 O -HETATM 520 O HOH A 89 18.012 32.598 15.262 1.00 67.52 O -HETATM 521 O HOH A 92 8.723 13.216 6.359 1.00 70.66 O -HETATM 522 O HOH A 97 18.779 13.814 11.704 1.00 71.14 O -HETATM 523 O HOH A 99 12.227 25.192 -10.299 1.00 70.46 O -HETATM 524 O HOH A 100 12.292 30.291 27.102 1.00 73.04 O -HETATM 525 O HOH A 102 20.170 23.000 12.999 1.00 73.63 O -HETATM 526 O HOH B 26 14.354 27.683 16.369 1.00 40.92 O -HETATM 527 O HOH B 27 9.864 22.509 9.123 1.00 39.67 O -HETATM 528 O HOH B 28 19.526 19.144 7.481 1.00 51.15 O -HETATM 529 O HOH B 29 25.754 12.744 -1.835 1.00 51.80 O -HETATM 530 O HOH B 30 7.478 20.604 -9.000 1.00 44.82 O -HETATM 531 O HOH B 33 9.012 24.586 7.009 1.00 43.42 O -HETATM 532 O HOH B 34 10.152 19.917 13.381 1.00 48.04 O -HETATM 533 O HOH B 35 7.764 21.397 11.075 1.00 41.41 O -HETATM 534 O HOH B 37 13.239 14.428 2.049 1.00 55.54 O -HETATM 535 O HOH B 42 12.601 23.000 29.167 1.00 51.36 O -HETATM 536 O HOH B 43 10.440 25.542 24.443 1.00 56.79 O -HETATM 537 O HOH B 44 16.979 28.689 16.284 1.00 50.41 O -HETATM 538 O HOH B 45 4.794 22.966 13.368 1.00 45.95 O -HETATM 539 O HOH B 46 4.208 25.591 10.828 1.00 51.06 O -HETATM 540 O HOH B 47 6.362 24.374 9.188 1.00 51.85 O -HETATM 541 O HOH B 48 7.688 28.411 7.883 1.00 49.33 O -HETATM 542 O HOH B 49 18.379 17.074 4.809 1.00 50.72 O -HETATM 543 O HOH B 51 26.464 23.826 1.396 1.00 53.21 O -HETATM 544 O HOH B 52 11.014 11.318 -2.909 1.00 51.36 O -HETATM 545 O HOH B 53 9.476 27.782 26.498 1.00 60.04 O -HETATM 546 O HOH B 56 5.522 27.411 9.017 1.00 62.36 O -HETATM 547 O HOH B 57 18.456 28.409 8.821 1.00 59.63 O -HETATM 548 O HOH B 59 22.610 15.544 3.846 1.00 57.52 O -HETATM 549 O HOH B 60 24.407 13.162 2.229 1.00 52.30 O -HETATM 550 O HOH B 61 7.988 11.556 -2.976 1.00 59.14 O -HETATM 551 O HOH B 63 14.213 27.722 18.905 1.00 57.29 O -HETATM 552 O HOH B 68 19.101 11.433 1.080 1.00 59.79 O -HETATM 553 O HOH B 69 12.607 10.967 0.261 1.00 60.87 O -HETATM 554 O HOH B 71 15.062 26.024 -0.766 1.00 56.35 O -HETATM 555 O HOH B 72 16.380 6.413 -4.784 1.00 59.07 O -HETATM 556 O HOH B 73 14.059 5.751 -6.198 1.00 56.68 O -HETATM 557 O HOH B 75 9.613 17.039 29.793 1.00 63.48 O -HETATM 558 O HOH B 83 25.276 15.890 -1.301 1.00 64.53 O -HETATM 559 O HOH B 90 2.622 23.030 10.332 1.00 68.01 O -HETATM 560 O HOH B 91 19.701 22.518 9.511 1.00 70.25 O -HETATM 561 O HOH B 93 19.727 29.488 6.155 1.00 69.43 O -HETATM 562 O HOH B 94 17.241 11.563 4.511 1.00 72.18 O -HETATM 563 O HOH B 95 26.545 19.404 -1.091 1.00 70.14 O -HETATM 564 O HOH B 96 9.697 18.315 14.885 1.00 69.10 O -HETATM 565 O HOH B 98 14.292 25.159 2.287 1.00 68.44 O -HETATM 566 O HOH B 101 9.396 27.092 16.993 1.00 72.98 O -HETATM 567 O HOH B 103 19.987 21.691 6.802 1.00 72.66 O -HETATM 568 O HOH B 104 18.692 31.584 4.596 1.00 72.98 O -MASTER 340 0 0 0 0 0 0 6 566 2 0 2 -END From 080ad27dad7c039770ff0848acecd01d16b61d40 Mon Sep 17 00:00:00 2001 From: Brady Johnston Date: Thu, 15 May 2025 20:19:24 +0800 Subject: [PATCH 23/23] make structs immutable --- src/bcif.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/bcif.jl b/src/bcif.jl index ebee24f1..40500f3d 100644 --- a/src/bcif.jl +++ b/src/bcif.jl @@ -236,7 +236,7 @@ const EncodingDataTypes = Union{values(INT_TO_TYPE)...} abstract type Encoding end # ByteArrayEncoding -mutable struct ByteArrayEncoding <: Encoding +struct ByteArrayEncoding <: Encoding type::DataType end @@ -261,7 +261,7 @@ function decode(enc::ByteArrayEncoding, data) end # FixedPointEncoding -mutable struct FixedPointEncoding <: Encoding +struct FixedPointEncoding <: Encoding factor::Float64 srcType::DataType @@ -279,7 +279,7 @@ function decode(enc::FixedPointEncoding, data) end # IntervalQuantizationEncoding -mutable struct IntervalQuantizationEncoding <: Encoding +struct IntervalQuantizationEncoding <: Encoding min::Float64 max::Float64 numSteps::Int @@ -302,7 +302,7 @@ function decode(enc::IntervalQuantizationEncoding, data) end # RunLengthEncoding -mutable struct RunLengthEncoding <: Encoding +struct RunLengthEncoding <: Encoding srcSize::Int srcType::DataType @@ -379,7 +379,7 @@ function decode(enc::RunLengthEncoding, data) end # DeltaEncoding -mutable struct DeltaEncoding <: Encoding +struct DeltaEncoding <: Encoding srcType::DataType origin::Int32 @@ -397,7 +397,7 @@ function decode(enc::DeltaEncoding, data) end # IntegerPackingEncoding -mutable struct IntegerPackingEncoding <: Encoding +struct IntegerPackingEncoding <: Encoding byteCount::Int srcSize::Int isUnsigned::Bool @@ -506,7 +506,7 @@ function decode(enc::IntegerPackingEncoding, data) end # StringArrayEncoding -mutable struct StringArrayEncoding <: Encoding +struct StringArrayEncoding <: Encoding stringData::String dataEncoding::Vector{Encoding} offsetEncoding::Vector{Encoding}