Skip to content

Taskthreads #56

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "BigArrays"
uuid = "c2a8506f-1b35-5b08-8aa1-bb4a7b47a05e"
authors = ["Jingpeng Wu <[email protected]>"]
version = "1.3.0"
version = "1.4.0"

[deps]
AWSCore = "4f1ea46c-232b-54a6-9b17-cc2d0f3e6598"
Expand Down
3 changes: 2 additions & 1 deletion src/BigArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ include("backends/include.jl")

const GZIP_MAGIC_NUMBER = UInt8[0x1f, 0x8b, 0x08]
const CHUNK_CHANNEL_SIZE = 2
# options: sequential, taskthreads, multithreads, multiprocesses
const DEFAULT_MODE = :sequential
const DEFAULT_FILL_MISSING = true

include("type.jl")
# the getindex and setindex modes with multithreads, multiprocesses, sequential, sharedarray
# the getindex and setindex modes with multithreads, multiprocesses, sequential, sharedarray, taskthreads
include("modes/include.jl")
end # module
4 changes: 3 additions & 1 deletion src/ChunkIterators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ function Base.iterate(iter::ChunkIterator{N},
chunkGlobalRange = chunkid2global_range( chunkID, iter.chunkSize; offset=iter.offset )

return (chunkID, chunkGlobalRange, cutoutGlobalRange, rangeInChunk, rangeInBuffer), nextState
end
end



end # end of module
12 changes: 9 additions & 3 deletions src/backends/BinDicts.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,15 @@ end
end

@inline function Base.getindex( self::BinDict, key::AbstractString)
open( joinpath( get_path(self), key )) do f
return read(f)
#Libz.inflate(data)
filePath = joinpath(get_path(self), key)

if isfile(filePath)
open( joinpath( get_path(self), key )) do f
return read(f)
#Libz.inflate(data)
end
else
return nothing
end
end

Expand Down
19 changes: 15 additions & 4 deletions src/backends/GSDicts.jl
Original file line number Diff line number Diff line change
Expand Up @@ -86,19 +86,26 @@ end

function Base.getindex( d::GSDict, key::AbstractString)
try

return storage(:Object, :get, d.bucketName, joinpath(d.keyPrefix, key))
catch err
if isa(err, HTTP.ExceptionRequest.StatusError) && err.status==404
throw(KeyError("NoSuchKey in Google Cloud Storage: $(key)"))
# @show d.bucketName, d.keyPrefix
@warn "NoSuchKey in Google Cloud Storage: $(key)"
return nothing
elseif isa(err, UndefVarError)
return nothing
else
println("get an unknown error: ", err)
println("error type is: ", typeof(err))
rethrow
rethrow()
end
end
end

@inline function Base.getindex(d::GSDict, key::Symbol)
d[string(key)]
end

function Base.keys( d::GSDict )
ds = storage(:Object, :list, d.bucketName; prefix=d.keyPrefix, fields="items(name)")
ret = Vector{String}()
Expand All @@ -112,7 +119,11 @@ end
function Base.haskey( d::GSDict, key::String )
@warn("this haskey function will download the object rather than just check whether it exist or not")
response = storage(:Object, :get, d.bucketName, joinpath(d.keyPrefix, key))
!GoogleCloud.api.iserror(response)
if response == nothing
return false
else
return true
end
end

################### utility functions #################
Expand Down
3 changes: 2 additions & 1 deletion src/backends/S3Dicts.jl
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ function Base.getindex(h::S3Dict, key::AbstractString)
catch err
@show err
if isa(err, AWSCore.AWSException) && err.code == "NoSuchKey"
throw(KeyError("NoSuchKey in AWS S3: $key"))
# throw(KeyError("NoSuchKey in AWS S3: $key"))
return nothing
elseif isa(err, HTTP.ClosedError)
display(err.e)
rethrow()
Expand Down
4 changes: 1 addition & 3 deletions src/modes/include.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
const TASK_NUM = 8
const CHUNK_CHANNEL_SIZE = 2

include("multithreads.jl")
#include("multiprocesses.jl")
include("sequential.jl")
#include("sharedarray.jl")
include("taskthreads.jl")
include("taskthreads.jl")
38 changes: 25 additions & 13 deletions src/modes/multiprocesses.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
using Distributed

WORKER_POOL = default_worker_pool()
@show WORKER_POOL

function setindex_multiprocesses_worker(block::Array{T,N}, ba::BigArray{D,T},
chunkGlobalRange::CartesianIndices{N}) where {D,T,N}
C = get_encoding(ba)
Expand All @@ -12,19 +17,25 @@ function setindex_multiprocesses!( ba::BigArray{D,T}, buf::Array{T,N},
idxes::Union{UnitRange, Int, Colon} ... ) where {D,T,N}
idxes = colon2unit_range(buf, idxes)
# check alignment
@assert all(map((x,y,z)->mod(first(x) - 1 - y, z), idxes, ba.offset.I, ba.chunkSize).==0) "the start of index should align with BigArray chunk size"
info = ba.info
offset = get_offset(ba)
chunkSize = get_chunk_size(ba)

@assert all(map((x,y,z)->mod(first(x) - 1 - y, z), idxes, offset.I, chunkSize).==0) "the start of index should align with BigArray chunk size"
t1 = time()
baIter = ChunkIterator(idxes, ba.chunkSize; offset=ba.offset)
@sync begin
for (blockID, chunkGlobalRange, globalRange, rangeInChunk, rangeInBuffer) in baIter
chunkGlobalRange, globalRange, rangeInChunk, rangeInBuffer =
adjust_volume_boundary(ba, chunkGlobalRange, globalRange,
rangeInChunk, rangeInBuffer)
block = buf[rangeInBuffer]
@async remotecall_fetch(setindex_multiprocesses_worker, WORKER_POOL,
block, ba, chunkGlobalRange)
end
baIter = ChunkIterator(idxes, chunkSize; offset=offset)
futures = []
for (blockID, chunkGlobalRange, globalRange, rangeInChunk, rangeInBuffer) in baIter
chunkGlobalRange, globalRange, rangeInChunk, rangeInBuffer =
adjust_volume_boundary(ba, chunkGlobalRange, globalRange,
rangeInChunk, rangeInBuffer)
block = buf[rangeInBuffer]
ft = remotecall_wait(setindex_multiprocesses_worker, WORKER_POOL,
block, ba, chunkGlobalRange)
push!(futures, ft)
end
fetch(futures)

elapsed = time() - t1 # sec
println("saving speed: $(sizeof(buf)/1024/1024/elapsed) MB/s")
end
Expand Down Expand Up @@ -62,7 +73,7 @@ function getindex_multiprocesses_worker(ba::BigArray{D,T}, jobs::RemoteChannel,
end
end

function getindex_multiprocesses( ba::BigArray{D, T, N}, idxes::Union{UnitRange, Int}...) where {D,T,N}
function getindex_multiprocesses( ba::BigArray, idxes::Union{UnitRange, Int}...)
t1 = time()
sz = map(length, idxes)
ret = OffsetArray(zeros(T, sz), idxes...)
Expand All @@ -71,7 +82,8 @@ function getindex_multiprocesses( ba::BigArray{D, T, N}, idxes::Union{UnitRange,
jobs = RemoteChannel(()->Channel{Tuple}( channelSize ));
results = RemoteChannel(()->Channel{OffsetArray}( channelSize ));

baIter = ChunkIterator(idxes, ba.chunkSize; offset=get_offset(ba))
chunkSize = get_chunk_size(ba)
baIter = ChunkIterator(idxes, chunkSize; offset=get_offset(ba))

@sync begin
@async begin
Expand Down
2 changes: 2 additions & 0 deletions src/modes/multithreads.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
const TASK_NUM = 8
const CHUNK_CHANNEL_SIZE = 2

function setindex_multithreads_worker( channel::Channel{Tuple}, buf::Array{T,N}, ba::BigArray{D,T} ) where {D,T,N}
C = get_encoding(ba)
Expand Down
Loading