Skip to content

Commit 50f4541

Browse files
committed
add byrow(join)
1 parent b983962 commit 50f4541

File tree

2 files changed

+39
-0
lines changed

2 files changed

+39
-0
lines changed

src/byrow/byrow.jl

+2
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ byrow(ds::AbstractDataset, ::typeof(stdze!), cols::MultiColumnIndex = names(ds,
143143
byrow(ds::AbstractDataset, ::typeof(hash), cols::MultiColumnIndex = :; by = identity, threads = nrow(ds) > __NCORES*10) = row_hash(ds, by, cols, threads = threads)
144144
byrow(ds::AbstractDataset, ::typeof(hash), col::ColumnIndex; by = identity, threads = nrow(ds) > __NCORES*10) = byrow(ds, hash, [col]; by = by, threads = threads)
145145

146+
byrow(ds::AbstractDataset, ::typeof(join), col::MultiColumnIndex; threads = nrow(ds) > __NCORES*10, delim = "", last = "") = row_join2(ds, col, threads = threads, delim = delim, last = last)
147+
146148
byrow(ds::AbstractDataset, ::typeof(mapreduce), cols::MultiColumnIndex = names(ds, Union{Missing, Number}); op = .+, f = identity, init = missings(mapreduce(eltype, promote_type, view(_columns(ds),index(ds)[cols])), nrow(ds)), kwargs...) = mapreduce(f, op, eachcol(ds[!, cols]), init = init; kwargs...)
147149

148150
function byrow(ds::AbstractDataset, f::Function, cols::MultiColumnIndex; threads = nrow(ds)>1000)

src/byrow/row_functions.jl

+37
Original file line numberDiff line numberDiff line change
@@ -1112,6 +1112,43 @@ function row_hash(ds::AbstractDataset, f::Function, cols = :; threads = true)
11121112
end
11131113
row_hash(ds::AbstractDataset, cols = :; threads = true) = row_hash(ds, identity, cols; threads = threads)
11141114

1115+
Base.@propagate_inbounds function _op_for_join!(x, y, delim, last, p, idx, lo, hi)
1116+
idx[] += 1
1117+
@simd for i in lo:hi
1118+
if idx[] == 1
1119+
x[i] = STRING(y[i])
1120+
x[i] *= delim
1121+
elseif idx[] < p
1122+
x[i] *= STRING(y[i])
1123+
x[i] *= delim
1124+
else
1125+
x[i] *= STRING(y[i])
1126+
x[i] *= last
1127+
end
1128+
end
1129+
x
1130+
end
1131+
1132+
function row_join2(ds::AbstractDataset, cols = :; threads = true, delim = ",", last = "")
1133+
colsidx = multiple_getindex(index(ds), cols)
1134+
init0 = Vector{Union{Missing, String}}(undef, nrow(ds))
1135+
1136+
if threads
1137+
cz = div(length(init0), __NCORES)
1138+
idx = [Ref{Int}(0) for _ in 1:__NCORES]
1139+
Threads.@threads for i in 1:__NCORES
1140+
lo = (i-1)*cz+1
1141+
i == __NCORES ? hi = length(init0) : hi = i*cz
1142+
mapreduce(identity, (x,y) -> _op_for_join!(x, y, delim, last, length(colsidx), idx[i], lo, hi), view(_columns(ds),colsidx), init = init0)
1143+
end
1144+
else
1145+
idx = Ref{Int}(0)
1146+
mapreduce(identity, (x,y) -> _op_for_join!(x, y, delim, last, length(colsidx), idx, 1, length(x)), view(_columns(ds),colsidx), init = init0)
1147+
end
1148+
init0
1149+
end
1150+
1151+
11151152
function _fill_col!(inmat, column, rows, j)
11161153
for i in 1:length(rows)
11171154
inmat[j, i] = column[rows[i]]

0 commit comments

Comments
 (0)