Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Take fewer locks #705

Merged
merged 3 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/blas/highlevel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,16 @@
# BLAS 2
#

const ROCUpperOrUnitUpperTriangular = LinearAlgebra.UpperOrUnitUpperTriangular{
<:Any,<:Union{<:ROCArray, Adjoint{<:Any, <:ROCArray}, Transpose{<:Any, <:ROCArray}}}
const ROCLowerOrUnitLowerTriangular = LinearAlgebra.LowerOrUnitLowerTriangular{
<:Any,<:Union{<:ROCArray, Adjoint{<:Any, <:ROCArray}, Transpose{<:Any, <:ROCArray}}}

LinearAlgebra.istriu(::ROCUpperOrUnitUpperTriangular) = true
LinearAlgebra.istril(::ROCUpperOrUnitUpperTriangular) = false
LinearAlgebra.istriu(::ROCLowerOrUnitLowerTriangular) = false
LinearAlgebra.istril(::ROCLowerOrUnitLowerTriangular) = true

Check warning on line 101 in src/blas/highlevel.jl

View check run for this annotation

Codecov / codecov/patch

src/blas/highlevel.jl#L98-L101

Added lines #L98 - L101 were not covered by tests

# multiplication
LinearAlgebra.generic_trimatmul!(
c::StridedROCVector{T}, uploc, isunitc, tfun::Function,
Expand Down
8 changes: 4 additions & 4 deletions src/dnn/convolution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ get_conv_cache_type(::Type{miopenConvBwdDataAlgorithm_t}) = CONV_BWD_DATA_BENCHM
get_conv_cache_type(::Type{miopenConvBwdWeightsAlgorithm_t}) = CONV_BWD_WEIGHT_BENCHMARK_CACHE

function get_benchmark_cache(conv_type::C, conv_args) where C <: CONV_ALGOS
perf_results = lock(get_conv_cache_type(conv_type)) do cache
get(cache, conv_args, nothing)
end
isnothing(perf_results) && return nothing
cache = get_conv_cache_type(conv_type).payload
perf_results = get(cache, conv_args, nothing)
perf_results ≡ nothing && return nothing

workspace = ROCArray{UInt8}(undef, perf_results.memory)
perf_results, workspace
end
Expand Down
15 changes: 10 additions & 5 deletions src/hip/HIP.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,18 @@ end
const CONTEXTS = AMDGPU.LockedObject(Dict{HIPDevice,HIPContext}())

function HIPContext(device::HIPDevice)
lock(CONTEXTS) do contexts
contexts = CONTEXTS.payload

ctx = get(contexts, device, nothing)
ctx ≡ nothing || return ctx

Base.@lock CONTEXTS.lock begin
get!(contexts, device) do
context_ref = Ref{hipContext_t}()
hipCtxCreate(context_ref, Cuint(0), device.device)
context = HIPContext(context_ref[], true)
ctx_ref = Ref{hipContext_t}()
hipCtxCreate(ctx_ref, Cuint(0), device.device)
ctx = HIPContext(ctx_ref[], true)
device!(device)
return context
return ctx
end
end
end
Expand Down
23 changes: 14 additions & 9 deletions src/memory.jl
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,14 @@
const MEMORY_STATS = AMDGPU.LockedObject(Dict{Int, MemoryStats}())

function memory_stats(dev::HIPDevice = AMDGPU.device())
Base.lock(MEMORY_STATS) do ms
get!(() -> MemoryStats(), ms, HIP.device_id(dev))
ms = MEMORY_STATS.payload
did = HIP.device_id(dev)

stats = get(ms, did, nothing)
stats ≡ nothing || return stats

Base.@lock MEMORY_STATS.lock begin
get!(() -> MemoryStats(), ms, did)
end
end

Expand Down Expand Up @@ -158,7 +164,7 @@

# Check if we are under memory pressure.
pressure = stats.live / stats.size
min_pressure = 0.5
min_pressure = blocking ? 0.5 : 0.75
pressure < min_pressure && return

# TODO take allocations into account
Expand All @@ -175,22 +181,21 @@
blocking && (max_gc_rate *= 2;)

# And even more if the pressure is high.
pressure > 0.6 && (max_gc_rate *= 2;)
pressure > 0.8 && (max_gc_rate *= 2;)
# Always try to collect if pressure ≥ 0.9.
gc_rate > max_gc_rate && pressure < 0.9 && return
pressure > 0.9 && (max_gc_rate *= 2;)
pressure > 0.95 && (max_gc_rate *= 2;)
gc_rate > max_gc_rate && return

Check warning on line 186 in src/memory.jl

View check run for this annotation

Codecov / codecov/patch

src/memory.jl#L184-L186

Added lines #L184 - L186 were not covered by tests

Base.@atomic stats.last_time = current_time

# Call the GC.
pre_gc_live = stats.live
gc_time = Base.@elapsed GC.gc(pressure > 0.8 ? true : false)
gc_time = Base.@elapsed GC.gc(false)

Check warning on line 192 in src/memory.jl

View check run for this annotation

Codecov / codecov/patch

src/memory.jl#L192

Added line #L192 was not covered by tests
post_gc_live = stats.live

# Update stats.
freed = pre_gc_live - post_gc_live
Base.@atomic stats.last_freed = freed
Base.@atomic stats.last_gc_time = gc_time
Base.@atomic stats.last_gc_time = 0.75 * stats.last_gc_time + 0.25 * gc_time

Check warning on line 198 in src/memory.jl

View check run for this annotation

Codecov / codecov/patch

src/memory.jl#L198

Added line #L198 was not covered by tests
return
end

Expand Down
13 changes: 9 additions & 4 deletions src/runtime/memory/hip.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
# Device ID => HIPMemoryPool
const MEMORY_POOLS = AMDGPU.LockedObject(
Dict{Int64, HIP.HIPMemoryPool}())
const MEMORY_POOLS = AMDGPU.LockedObject(Dict{Int64, HIP.HIPMemoryPool}())

function pool_create(dev::HIPDevice)
Base.lock(MEMORY_POOLS) do pools
get!(pools, HIP.device_id(dev)) do
mp = MEMORY_POOLS.payload
did = HIP.device_id(dev)

pool = get(mp, did, nothing)
pool ≡ nothing || return pool

Base.@lock MEMORY_POOLS.lock begin
get!(mp, HIP.device_id(dev)) do
max_size::UInt64 = AMDGPU.hard_memory_limit()
max_size = max_size != typemax(UInt64) ? max_size : UInt64(0)

Expand Down