Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/blas/highlevel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,16 @@ end
# BLAS 2
#

const ROCUpperOrUnitUpperTriangular = LinearAlgebra.UpperOrUnitUpperTriangular{
<:Any,<:Union{<:ROCArray, Adjoint{<:Any, <:ROCArray}, Transpose{<:Any, <:ROCArray}}}
const ROCLowerOrUnitLowerTriangular = LinearAlgebra.LowerOrUnitLowerTriangular{
<:Any,<:Union{<:ROCArray, Adjoint{<:Any, <:ROCArray}, Transpose{<:Any, <:ROCArray}}}

LinearAlgebra.istriu(::ROCUpperOrUnitUpperTriangular) = true
LinearAlgebra.istril(::ROCUpperOrUnitUpperTriangular) = false
LinearAlgebra.istriu(::ROCLowerOrUnitLowerTriangular) = false
LinearAlgebra.istril(::ROCLowerOrUnitLowerTriangular) = true

# multiplication
LinearAlgebra.generic_trimatmul!(
c::StridedROCVector{T}, uploc, isunitc, tfun::Function,
Expand Down
8 changes: 4 additions & 4 deletions src/dnn/convolution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ get_conv_cache_type(::Type{miopenConvBwdDataAlgorithm_t}) = CONV_BWD_DATA_BENCHM
get_conv_cache_type(::Type{miopenConvBwdWeightsAlgorithm_t}) = CONV_BWD_WEIGHT_BENCHMARK_CACHE

function get_benchmark_cache(conv_type::C, conv_args) where C <: CONV_ALGOS
perf_results = lock(get_conv_cache_type(conv_type)) do cache
get(cache, conv_args, nothing)
end
isnothing(perf_results) && return nothing
cache = get_conv_cache_type(conv_type).payload
perf_results = get(cache, conv_args, nothing)
perf_results ≡ nothing && return nothing

workspace = ROCArray{UInt8}(undef, perf_results.memory)
perf_results, workspace
end
Expand Down
15 changes: 10 additions & 5 deletions src/hip/HIP.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,18 @@ end
const CONTEXTS = AMDGPU.LockedObject(Dict{HIPDevice,HIPContext}())

function HIPContext(device::HIPDevice)
lock(CONTEXTS) do contexts
contexts = CONTEXTS.payload

ctx = get(contexts, device, nothing)
ctx ≡ nothing || return ctx

Base.@lock CONTEXTS.lock begin
get!(contexts, device) do
context_ref = Ref{hipContext_t}()
hipCtxCreate(context_ref, Cuint(0), device.device)
context = HIPContext(context_ref[], true)
ctx_ref = Ref{hipContext_t}()
hipCtxCreate(ctx_ref, Cuint(0), device.device)
ctx = HIPContext(ctx_ref[], true)
device!(device)
return context
return ctx
end
end
end
Expand Down
23 changes: 14 additions & 9 deletions src/memory.jl
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,14 @@ MemoryStats() = MemoryStats(0, 0.0, 0, 0.0, 0.0, 0)
const MEMORY_STATS = AMDGPU.LockedObject(Dict{Int, MemoryStats}())

function memory_stats(dev::HIPDevice = AMDGPU.device())
Base.lock(MEMORY_STATS) do ms
get!(() -> MemoryStats(), ms, HIP.device_id(dev))
ms = MEMORY_STATS.payload
did = HIP.device_id(dev)

stats = get(ms, did, nothing)
stats ≡ nothing || return stats

Base.@lock MEMORY_STATS.lock begin
get!(() -> MemoryStats(), ms, did)
end
end

Expand Down Expand Up @@ -158,7 +164,7 @@ function maybe_collect(; blocking::Bool = false)

# Check if we are under memory pressure.
pressure = stats.live / stats.size
min_pressure = 0.5
min_pressure = blocking ? 0.5 : 0.75
pressure < min_pressure && return

# TODO take allocations into account
Expand All @@ -175,22 +181,21 @@ function maybe_collect(; blocking::Bool = false)
blocking && (max_gc_rate *= 2;)

# And even more if the pressure is high.
pressure > 0.6 && (max_gc_rate *= 2;)
pressure > 0.8 && (max_gc_rate *= 2;)
# Always try to collect if pressure ≥ 0.9.
gc_rate > max_gc_rate && pressure < 0.9 && return
pressure > 0.9 && (max_gc_rate *= 2;)
pressure > 0.95 && (max_gc_rate *= 2;)
gc_rate > max_gc_rate && return

Base.@atomic stats.last_time = current_time

# Call the GC.
pre_gc_live = stats.live
gc_time = Base.@elapsed GC.gc(pressure > 0.8 ? true : false)
gc_time = Base.@elapsed GC.gc(false)
post_gc_live = stats.live

# Update stats.
freed = pre_gc_live - post_gc_live
Base.@atomic stats.last_freed = freed
Base.@atomic stats.last_gc_time = gc_time
Base.@atomic stats.last_gc_time = 0.75 * stats.last_gc_time + 0.25 * gc_time
return
end

Expand Down
13 changes: 9 additions & 4 deletions src/runtime/memory/hip.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
# Device ID => HIPMemoryPool
const MEMORY_POOLS = AMDGPU.LockedObject(
Dict{Int64, HIP.HIPMemoryPool}())
const MEMORY_POOLS = AMDGPU.LockedObject(Dict{Int64, HIP.HIPMemoryPool}())

function pool_create(dev::HIPDevice)
Base.lock(MEMORY_POOLS) do pools
get!(pools, HIP.device_id(dev)) do
mp = MEMORY_POOLS.payload
did = HIP.device_id(dev)

pool = get(mp, did, nothing)
pool ≡ nothing || return pool

Base.@lock MEMORY_POOLS.lock begin
get!(mp, HIP.device_id(dev)) do
max_size::UInt64 = AMDGPU.hard_memory_limit()
max_size = max_size != typemax(UInt64) ? max_size : UInt64(0)

Expand Down