@@ -1112,40 +1112,52 @@ function row_hash(ds::AbstractDataset, f::Function, cols = :; threads = true)
1112
1112
end
1113
1113
row_hash (ds:: AbstractDataset , cols = :; threads = true ) = row_hash (ds, identity, cols; threads = threads)
1114
1114
1115
- Base. @propagate_inbounds function _op_for_join! (x, y, delim, last, p, idx, lo, hi)
1116
- idx[] += 1
1117
- @simd for i in lo: hi
1118
- if idx[] == 1
1119
- x[i] = STRING (y[i])
1120
- x[i] *= delim
1121
- elseif idx[] < p
1122
- x[i] *= STRING (y[i])
1123
- x[i] *= delim
1124
- else
1125
- x[i] *= STRING (y[i])
1126
- x[i] *= last
1115
+ function _convert_uint8_to_string! (res, init0, curr_pos, ds, threads)
1116
+ if threads
1117
+ Threads. @threads for i in 1 : nrow (ds)
1118
+ res[i] = String (view (init0, 1 : curr_pos[i]- 1 , i))
1119
+ end
1120
+ else
1121
+ for i in 1 : nrow (ds)
1122
+ res[i] = String (view (init0, 1 : curr_pos[i]- 1 , i))
1123
+ end
1124
+ end
1125
+ end
1126
+ function _add_last_for_join! (init0, curr_pos, ds, last_uint, last_len, threads)
1127
+ if threads
1128
+ Threads. @threads for i in 1 : nrow (ds)
1129
+ init0[curr_pos[i]- 1 : curr_pos[i]+ last_len- 2 , i] = last_uint
1130
+ curr_pos[i] += last_len- 1
1131
+ end
1132
+ else
1133
+ for i in 1 : nrow (ds)
1134
+ init0[curr_pos[i]- 1 : curr_pos[i]+ last_len- 2 , i] = last_uint
1135
+ curr_pos[i] += last_len- 1
1127
1136
end
1128
1137
end
1129
- x
1130
1138
end
1131
1139
1132
- function row_join2 (ds:: AbstractDataset , cols = :; threads = true , delim = " ," , last = " " )
1140
+ function row_join (ds:: AbstractDataset , cols = :; threads = true , delim:: AbstractString = " ," , last:: AbstractString = " " )
1133
1141
colsidx = multiple_getindex (index (ds), cols)
1134
- init0 = Vector {Union{Missing, String}} (undef, nrow (ds))
1135
1142
1136
- if threads
1137
- cz = div (length (init0), __NCORES)
1138
- idx = [Ref {Int} (0 ) for _ in 1 : __NCORES]
1139
- Threads. @threads for i in 1 : __NCORES
1140
- lo = (i- 1 )* cz+ 1
1141
- i == __NCORES ? hi = length (init0) : hi = i* cz
1142
- mapreduce (identity, (x,y) -> _op_for_join! (x, y, delim, last, length (colsidx), idx[i], lo, hi), view (_columns (ds),colsidx), init = init0)
1143
- end
1143
+ max_line_size = maximum (byrow (ds, sum, colsidx, by = y-> length (__STRING (y)), threads = threads))
1144
+ max_line_size += length (delim)* (length (colsidx)) + length (last)+ 1
1145
+ init0 = Matrix {UInt8} (undef, max_line_size, nrow (ds))
1146
+ curr_pos = ones (Int, nrow (ds))
1147
+
1148
+ delimiter = Base. CodeUnits (delim)
1149
+ row_join! (init0, curr_pos, ds, repeat ([identity], length (colsidx)), colsidx; delim = delimiter, quotechar = nothing , threads = threads)
1150
+ if length (last)> 0
1151
+ last_uint = Base. CodeUnits (last)
1152
+ last_len = length (last_uint)
1153
+ _add_last_for_join! (init0, curr_pos, ds, last_uint, last_len, threads)
1144
1154
else
1145
- idx = Ref {Int} (0 )
1146
- mapreduce (identity, (x,y) -> _op_for_join! (x, y, delim, last, length (colsidx), idx, 1 , length (x)), view (_columns (ds),colsidx), init = init0)
1155
+ curr_pos .- = 1
1147
1156
end
1148
- init0
1157
+ res = Vector {Union{String, Missing}} (undef, nrow (ds))
1158
+ _convert_uint8_to_string! (res, init0, curr_pos, ds, threads)
1159
+ res
1160
+
1149
1161
end
1150
1162
1151
1163
0 commit comments