@@ -30,68 +30,73 @@ static auto ReleaseArrowSchema(void *ptr) noexcept -> void {
30
30
delete schema;
31
31
}
32
32
33
- static auto CumSum (const struct ArrowArrayView *array_view,
33
+ template <size_t OffsetSize>
34
+ static auto CumSum (struct ArrowArrayStream *array_stream,
34
35
struct ArrowArray *out, bool skipna) {
35
36
bool seen_na = false ;
36
37
std::stringstream ss{};
37
38
38
- for (int64_t i = 0 ; i < array_view->length ; i++) {
39
- const bool isna = ArrowArrayViewIsNull (array_view, i);
40
- if (!skipna && (seen_na || isna)) {
41
- seen_na = true ;
42
- ArrowArrayAppendNull (out, 1 );
43
- } else {
44
- if (!isna) {
45
- const auto std_sv = ArrowArrayViewGetStringUnsafe (array_view, i);
46
- ss << std::string_view{std_sv.data ,
47
- static_cast <size_t >(std_sv.size_bytes )};
39
+ nanoarrow::ViewArrayStream array_stream_view (array_stream);
40
+ for (const auto &array : array_stream_view) {
41
+ for (const auto &sv : nanoarrow::ViewArrayAsBytes<OffsetSize>(&array)) {
42
+ if ((!sv || seen_na) && !skipna) {
43
+ seen_na = true ;
44
+ ArrowArrayAppendNull (out, 1 );
45
+ } else {
46
+ if (sv) {
47
+ ss << std::string_view{(*sv).data ,
48
+ static_cast <size_t >((*sv).size_bytes )};
49
+ }
50
+ const auto str = ss.str ();
51
+ const ArrowStringView asv{str.c_str (),
52
+ static_cast <int64_t >(str.size ())};
53
+ NANOARROW_THROW_NOT_OK (ArrowArrayAppendString (out, asv));
48
54
}
49
- const auto str = ss.str ();
50
- const ArrowStringView asv{str.c_str (), static_cast <int64_t >(str.size ())};
51
- NANOARROW_THROW_NOT_OK (ArrowArrayAppendString (out, asv));
52
55
}
53
56
}
54
57
}
55
58
59
+ // TODO: doesn't seem like all compilers in CI support this?
56
60
// template <typename T>
57
61
// concept MinOrMaxOp =
58
62
// std::same_as<T, std::less<>> || std::same_as<T, std::greater<>>;
59
63
60
- template <auto Op>
64
+ template <size_t OffsetSize, auto Op>
61
65
// requires MinOrMaxOp<decltype(Op)>
62
- static auto CumMinOrMax (const struct ArrowArrayView *array_view ,
66
+ static auto CumMinOrMax (struct ArrowArrayStream *array_stream ,
63
67
struct ArrowArray *out, bool skipna) {
64
68
bool seen_na = false ;
65
69
std::optional<std::string> current_str{};
66
70
67
- for (int64_t i = 0 ; i < array_view->length ; i++) {
68
- const bool isna = ArrowArrayViewIsNull (array_view, i);
69
- if (!skipna && (seen_na || isna)) {
70
- seen_na = true ;
71
- ArrowArrayAppendNull (out, 1 );
72
- } else {
73
- if (!isna || current_str) {
74
- if (!isna) {
75
- const auto asv = ArrowArrayViewGetStringUnsafe (array_view, i);
76
- const nb::str pyval{asv.data , static_cast <size_t >(asv.size_bytes )};
77
-
78
- if (current_str) {
79
- const nb::str pycurrent{current_str->data (), current_str->size ()};
80
- if (Op (pyval, pycurrent)) {
81
- current_str =
82
- std::string{asv.data , static_cast <size_t >(asv.size_bytes )};
71
+ nanoarrow::ViewArrayStream array_stream_view (array_stream);
72
+ for (const auto &array : array_stream_view) {
73
+ for (const auto &sv : nanoarrow::ViewArrayAsBytes<OffsetSize>(&array)) {
74
+ if ((!sv || seen_na) && !skipna) {
75
+ seen_na = true ;
76
+ ArrowArrayAppendNull (out, 1 );
77
+ } else {
78
+ if (sv || current_str) {
79
+ if (sv) {
80
+ const nb::str pyval{(*sv).data ,
81
+ static_cast <size_t >((*sv).size_bytes )};
82
+ if (current_str) {
83
+ const nb::str pycurrent{current_str->data (), current_str->size ()};
84
+ if (Op (pyval, pycurrent)) {
85
+ current_str = std::string{
86
+ (*sv).data , static_cast <size_t >((*sv).size_bytes )};
87
+ }
88
+ } else {
89
+ current_str = std::string{(*sv).data ,
90
+ static_cast <size_t >((*sv).size_bytes )};
83
91
}
84
- } else {
85
- current_str =
86
- std::string{asv.data , static_cast <size_t >(asv.size_bytes )};
87
92
}
88
- }
89
93
90
- struct ArrowStringView out_sv{
91
- current_str->data (), static_cast<int64_t>(current_str->size ())};
92
- NANOARROW_THROW_NOT_OK (ArrowArrayAppendString(out, out_sv));
93
- } else {
94
- ArrowArrayAppendEmpty (out, 1 );
94
+ struct ArrowStringView out_sv{
95
+ current_str->data (), static_cast<int64_t>(current_str->size ())};
96
+ NANOARROW_THROW_NOT_OK (ArrowArrayAppendString(out, out_sv));
97
+ } else {
98
+ ArrowArrayAppendEmpty (out, 1 );
99
+ }
95
100
}
96
101
}
97
102
}
@@ -131,7 +136,6 @@ class ArrowStringAccumulation {
131
136
switch (schema_view.type ) {
132
137
case NANOARROW_TYPE_STRING:
133
138
case NANOARROW_TYPE_LARGE_STRING:
134
- case NANOARROW_TYPE_STRING_VIEW:
135
139
break ;
136
140
default :
137
141
const auto error_message =
@@ -159,30 +163,29 @@ class ArrowStringAccumulation {
159
163
160
164
NANOARROW_THROW_NOT_OK (ArrowArrayStartAppending (uarray_out.get ()));
161
165
162
- nanoarrow::UniqueArray chunk{};
163
- int errcode{};
164
-
165
- while ((errcode = ArrowArrayStreamGetNext (stream_.get (), chunk.get (),
166
- nullptr ) == 0 ) &&
167
- chunk->release != nullptr ) {
168
- struct ArrowArrayView array_view{};
169
- NANOARROW_THROW_NOT_OK (
170
- ArrowArrayViewInitFromSchema (&array_view, schema_.get (), nullptr ));
171
-
172
- NANOARROW_THROW_NOT_OK (
173
- ArrowArrayViewSetArray (&array_view, chunk.get (), nullptr ));
174
-
175
- if (accumulation_ == " cumsum" ) {
176
- CumSum (&array_view, uarray_out.get (), skipna_);
177
- } else if (accumulation_ == " cummin" ) {
178
- CumMinOrMax<std::less{}>(&array_view, uarray_out.get (), skipna_);
179
- } else if (accumulation_ == " cummax" ) {
180
- CumMinOrMax<std::greater{}>(&array_view, uarray_out.get (), skipna_);
166
+ if (accumulation_ == " cumsum" ) {
167
+ if (schema_view.type == NANOARROW_TYPE_STRING) {
168
+ CumSum<32 >(stream_.get (), uarray_out.get (), skipna_);
181
169
} else {
182
- throw std::runtime_error ( " Unexpected branch " );
170
+ CumSum< 64 >(stream_. get (), uarray_out. get (), skipna_ );
183
171
}
184
172
185
- chunk.reset ();
173
+ } else if (accumulation_ == " cummin" ) {
174
+ if (schema_view.type == NANOARROW_TYPE_STRING) {
175
+ CumMinOrMax<32 , std::less{}>(stream_.get (), uarray_out.get (), skipna_);
176
+ } else {
177
+ CumMinOrMax<64 , std::less{}>(stream_.get (), uarray_out.get (), skipna_);
178
+ }
179
+ } else if (accumulation_ == " cummax" ) {
180
+ if (schema_view.type == NANOARROW_TYPE_STRING) {
181
+ CumMinOrMax<32 , std::greater{}>(stream_.get (), uarray_out.get (),
182
+ skipna_);
183
+ } else {
184
+ CumMinOrMax<64 , std::greater{}>(stream_.get (), uarray_out.get (),
185
+ skipna_);
186
+ }
187
+ } else {
188
+ throw std::runtime_error (" Unexpected branch" );
186
189
}
187
190
188
191
NANOARROW_THROW_NOT_OK (
0 commit comments