seung-lab · xiuliren · Jul 7, 2020 · Jul 8, 2020 · Jul 8, 2020 · Jul 8, 2020
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "BigArrays"
 uuid = "c2a8506f-1b35-5b08-8aa1-bb4a7b47a05e"
 authors = ["Jingpeng Wu <[email protected]>"]
-version = "1.3.0"
+version = "1.4.0"
 
 [deps]
 AWSCore = "4f1ea46c-232b-54a6-9b17-cc2d0f3e6598"

diff --git a/src/BigArrays.jl b/src/BigArrays.jl
@@ -22,10 +22,11 @@ include("backends/include.jl")
 
 const GZIP_MAGIC_NUMBER = UInt8[0x1f, 0x8b, 0x08]  
 const CHUNK_CHANNEL_SIZE = 2
+# options: sequential, taskthreads, multithreads, multiprocesses
 const DEFAULT_MODE = :sequential 
 const DEFAULT_FILL_MISSING = true 
 
 include("type.jl")
-# the getindex and setindex modes with multithreads, multiprocesses, sequential, sharedarray
+# the getindex and setindex modes with multithreads, multiprocesses, sequential, sharedarray, taskthreads
 include("modes/include.jl")
 end # module
diff --git a/src/ChunkIterators.jl b/src/ChunkIterators.jl
@@ -80,6 +80,8 @@ function Base.iterate(iter::ChunkIterator{N},
     chunkGlobalRange = chunkid2global_range( chunkID, iter.chunkSize; offset=iter.offset )
 
     return (chunkID, chunkGlobalRange, cutoutGlobalRange, rangeInChunk, rangeInBuffer), nextState
-end  
+end 
+
+
 
 end # end of module
diff --git a/src/backends/BinDicts.jl b/src/backends/BinDicts.jl
@@ -35,9 +35,15 @@ end
 end 
 
 @inline function Base.getindex( self::BinDict, key::AbstractString)
-    open( joinpath( get_path(self), key )) do f
-        return read(f)
-        #Libz.inflate(data)
+    filePath = joinpath(get_path(self), key)
+
+    if isfile(filePath)
+        open( joinpath( get_path(self), key )) do f
+            return read(f)
+            #Libz.inflate(data)
+        end
+    else
+        return nothing
     end
 end 
 

diff --git a/src/backends/GSDicts.jl b/src/backends/GSDicts.jl
@@ -86,19 +86,26 @@ end
 
 function Base.getindex( d::GSDict, key::AbstractString)
     try
-
         return storage(:Object, :get, d.bucketName, joinpath(d.keyPrefix, key))
     catch err 
         if isa(err, HTTP.ExceptionRequest.StatusError) && err.status==404
-            throw(KeyError("NoSuchKey in Google Cloud Storage: $(key)"))
+            # @show d.bucketName, d.keyPrefix
+            @warn "NoSuchKey in Google Cloud Storage: $(key)"
+            return nothing
+        elseif isa(err, UndefVarError)
+            return nothing
         else
             println("get an unknown error: ", err)
             println("error type is: ", typeof(err))
-            rethrow
+            rethrow()
         end 
     end 
 end
 
+@inline function Base.getindex(d::GSDict, key::Symbol)
+    d[string(key)]
+end
+
 function Base.keys( d::GSDict )
     ds = storage(:Object, :list, d.bucketName; prefix=d.keyPrefix, fields="items(name)")
     ret = Vector{String}()
@@ -112,7 +119,11 @@ end
 function Base.haskey( d::GSDict, key::String )
     @warn("this haskey function will download the object rather than just check whether it exist or not")
     response = storage(:Object, :get, d.bucketName, joinpath(d.keyPrefix, key))
-    !GoogleCloud.api.iserror(response)
+    if response == nothing
+        return false
+    else
+        return true
+    end
 end
 
 ################### utility functions #################

diff --git a/src/backends/S3Dicts.jl b/src/backends/S3Dicts.jl
@@ -86,7 +86,8 @@ function Base.getindex(h::S3Dict, key::AbstractString)
     catch err
         @show err 
         if isa(err, AWSCore.AWSException) && err.code == "NoSuchKey"
-            throw(KeyError("NoSuchKey in AWS S3: $key"))
+            # throw(KeyError("NoSuchKey in AWS S3: $key"))
+            return nothing
         elseif isa(err, HTTP.ClosedError)
             display(err.e)
             rethrow()

diff --git a/src/modes/include.jl b/src/modes/include.jl
@@ -1,8 +1,6 @@
-const TASK_NUM = 8
-const CHUNK_CHANNEL_SIZE = 2
 
 include("multithreads.jl")
 #include("multiprocesses.jl")
 include("sequential.jl")
 #include("sharedarray.jl")
-include("taskthreads.jl")
+include("taskthreads.jl")
diff --git a/src/modes/multiprocesses.jl b/src/modes/multiprocesses.jl
@@ -1,3 +1,8 @@
+using Distributed
+
+WORKER_POOL = default_worker_pool()
+@show WORKER_POOL
+
 function setindex_multiprocesses_worker(block::Array{T,N}, ba::BigArray{D,T}, 
                                         chunkGlobalRange::CartesianIndices{N}) where {D,T,N}
     C = get_encoding(ba)
@@ -12,19 +17,25 @@ function setindex_multiprocesses!( ba::BigArray{D,T}, buf::Array{T,N},
                        idxes::Union{UnitRange, Int, Colon} ... ) where {D,T,N}
     idxes = colon2unit_range(buf, idxes)
     # check alignment
-    @assert all(map((x,y,z)->mod(first(x) - 1 - y, z), idxes, ba.offset.I, ba.chunkSize).==0) "the start of index should align with BigArray chunk size" 
+    info = ba.info
+    offset = get_offset(ba)
+    chunkSize = get_chunk_size(ba)
+
+    @assert all(map((x,y,z)->mod(first(x) - 1 - y, z), idxes, offset.I, chunkSize).==0) "the start of index should align with BigArray chunk size" 
     t1 = time() 
-    baIter = ChunkIterator(idxes, ba.chunkSize; offset=ba.offset)
-    @sync begin  
-        for (blockID, chunkGlobalRange, globalRange, rangeInChunk, rangeInBuffer) in baIter
-            chunkGlobalRange, globalRange, rangeInChunk, rangeInBuffer = 
-                adjust_volume_boundary(ba, chunkGlobalRange, globalRange, 
-                                       rangeInChunk, rangeInBuffer)
-            block = buf[rangeInBuffer]
-            @async remotecall_fetch(setindex_multiprocesses_worker, WORKER_POOL, 
-                                       block, ba, chunkGlobalRange)
-        end 
+    baIter = ChunkIterator(idxes, chunkSize; offset=offset)
+    futures = []
+    for (blockID, chunkGlobalRange, globalRange, rangeInChunk, rangeInBuffer) in baIter
+        chunkGlobalRange, globalRange, rangeInChunk, rangeInBuffer = 
+            adjust_volume_boundary(ba, chunkGlobalRange, globalRange, 
+                                    rangeInChunk, rangeInBuffer)
+        block = buf[rangeInBuffer]
+        ft = remotecall_wait(setindex_multiprocesses_worker, WORKER_POOL, 
+                                    block, ba, chunkGlobalRange)
+        push!(futures, ft)
     end 
+    fetch(futures)
+
     elapsed = time() - t1 # sec
     println("saving speed: $(sizeof(buf)/1024/1024/elapsed) MB/s")
 end 
@@ -62,7 +73,7 @@ function getindex_multiprocesses_worker(ba::BigArray{D,T}, jobs::RemoteChannel,
     end 
 end 
 
-function getindex_multiprocesses( ba::BigArray{D, T, N}, idxes::Union{UnitRange, Int}...) where {D,T,N}
+function getindex_multiprocesses( ba::BigArray, idxes::Union{UnitRange, Int}...)
     t1 = time()
     sz = map(length, idxes)
     ret = OffsetArray(zeros(T, sz), idxes...)
@@ -71,7 +82,8 @@ function getindex_multiprocesses( ba::BigArray{D, T, N}, idxes::Union{UnitRange,
     jobs    = RemoteChannel(()->Channel{Tuple}( channelSize ));
     results = RemoteChannel(()->Channel{OffsetArray}( channelSize ));
 
-    baIter = ChunkIterator(idxes, ba.chunkSize; offset=get_offset(ba))
+    chunkSize = get_chunk_size(ba)
+    baIter = ChunkIterator(idxes, chunkSize; offset=get_offset(ba))
 
     @sync begin
         @async begin

diff --git a/src/modes/multithreads.jl b/src/modes/multithreads.jl
@@ -1,3 +1,5 @@
+const TASK_NUM = 8
+const CHUNK_CHANNEL_SIZE = 2
 
 function setindex_multithreads_worker( channel::Channel{Tuple}, buf::Array{T,N}, ba::BigArray{D,T} ) where {D,T,N}
     C = get_encoding(ba)