Skip to content

Commit

Permalink
apacheGH-43787: [C++] Register the new Opaque extension type by defau…
Browse files Browse the repository at this point in the history
…lt (apache#43788)

This is to resolve apache#43787

> The Opaque extension type implementation for C++ (plus python bindings) was added in apache#43458, but it was not registered by default, which we should do for canonical extension types (see apache#43458 (comment))

Additionally, this adds `bool8` extension type builds with `ARROW_JSON=false` as discussed [here](apache@5258819#r145613657)

### Rationale for this change

Canonical types should be registered by default if possible (except e.g. if they can't be compiled due to `ARROW_JSON=false`).

### What changes are included in this PR?

This adds default registration for `opaque`, changes when `bool8` is built and moves all canonical tests under the same test target.

### Are these changes tested?

Changes are tested by previously existing tests.

### Are there any user-facing changes?

`opaue` will now be registered by default and `bool8` will be present in case `ARROW_JSON=false` at build time.
* GitHub Issue: apache#43787

Authored-by: Rok Mihevc <[email protected]>
Signed-off-by: Rok Mihevc <[email protected]>
  • Loading branch information
rok authored Aug 22, 2024
1 parent fc54ead commit b4f7efe
Show file tree
Hide file tree
Showing 9 changed files with 28 additions and 27 deletions.
2 changes: 1 addition & 1 deletion cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@ set(ARROW_SRCS
datum.cc
device.cc
extension_type.cc
extension/bool8.cc
pretty_print.cc
record_batch.cc
result.cc
Expand Down Expand Up @@ -906,7 +907,6 @@ endif()

if(ARROW_JSON)
arrow_add_object_library(ARROW_JSON
extension/bool8.cc
extension/fixed_shape_tensor.cc
extension/opaque.cc
json/options.cc
Expand Down
18 changes: 6 additions & 12 deletions cpp/src/arrow/extension/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,16 @@
# specific language governing permissions and limitations
# under the License.

add_arrow_test(test
SOURCES
bool8_test.cc
PREFIX
"arrow-extension-bool8")
set(CANONICAL_EXTENSION_TESTS bool8_test.cc)

add_arrow_test(test
SOURCES
fixed_shape_tensor_test.cc
PREFIX
"arrow-fixed-shape-tensor")
if(ARROW_JSON)
list(APPEND CANONICAL_EXTENSION_TESTS fixed_shape_tensor_test.cc opaque_test.cc)
endif()

add_arrow_test(test
SOURCES
opaque_test.cc
${CANONICAL_EXTENSION_TESTS}
PREFIX
"arrow-extension-opaque")
"arrow-canonical-extensions")

arrow_install_all_headers("arrow/extension")
2 changes: 2 additions & 0 deletions cpp/src/arrow/extension/bool8.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.

#pragma once

#include "arrow/extension_type.h"

namespace arrow::extension {
Expand Down
1 change: 0 additions & 1 deletion cpp/src/arrow/extension/bool8_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include "arrow/io/memory.h"
#include "arrow/ipc/reader.h"
#include "arrow/ipc/writer.h"
#include "arrow/testing/extension_type.h"
#include "arrow/testing/gtest_util.h"

namespace arrow {
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/extension/fixed_shape_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.

#pragma once

#include "arrow/extension_type.h"

namespace arrow {
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/extension/opaque.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.

#pragma once

#include "arrow/extension_type.h"
#include "arrow/type.h"

Expand Down
2 changes: 0 additions & 2 deletions cpp/src/arrow/extension/opaque_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
#include "arrow/ipc/reader.h"
#include "arrow/ipc/writer.h"
#include "arrow/record_batch.h"
#include "arrow/testing/extension_type.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/type_fwd.h"
#include "arrow/util/checked_cast.h"
Expand Down Expand Up @@ -169,7 +168,6 @@ TEST(OpaqueType, MetadataRoundTrip) {
TEST(OpaqueType, BatchRoundTrip) {
auto type = internal::checked_pointer_cast<extension::OpaqueType>(
extension::opaque(binary(), "geometry", "adbc.postgresql"));
ExtensionTypeGuard guard(type);

auto storage = ArrayFromJSON(binary(), R"(["foobar", null])");
auto array = ExtensionType::WrapArray(type, storage);
Expand Down
21 changes: 13 additions & 8 deletions cpp/src/arrow/extension_type.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@
#include "arrow/array/util.h"
#include "arrow/chunked_array.h"
#include "arrow/config.h"
#ifdef ARROW_JSON
#include "arrow/extension/bool8.h"
#ifdef ARROW_JSON
#include "arrow/extension/fixed_shape_tensor.h"
#include "arrow/extension/opaque.h"
#endif
#include "arrow/status.h"
#include "arrow/type.h"
Expand Down Expand Up @@ -143,17 +144,21 @@ static std::once_flag registry_initialized;
namespace internal {

static void CreateGlobalRegistry() {
// Register canonical extension types

g_registry = std::make_shared<ExtensionTypeRegistryImpl>();
std::vector<std::shared_ptr<DataType>> ext_types{extension::bool8()};

#ifdef ARROW_JSON
// Register canonical extension types
auto fst_ext_type =
checked_pointer_cast<ExtensionType>(extension::fixed_shape_tensor(int64(), {}));
ARROW_CHECK_OK(g_registry->RegisterType(fst_ext_type));

auto bool8_ext_type = checked_pointer_cast<ExtensionType>(extension::bool8());
ARROW_CHECK_OK(g_registry->RegisterType(bool8_ext_type));
ext_types.push_back(extension::fixed_shape_tensor(int64(), {}));
ext_types.push_back(extension::opaque(null(), "", ""));
#endif

// Register canonical extension types
for (const auto& ext_type : ext_types) {
ARROW_CHECK_OK(
g_registry->RegisterType(checked_pointer_cast<ExtensionType>(ext_type)));
}
}

} // namespace internal
Expand Down
5 changes: 2 additions & 3 deletions python/pyarrow/tests/test_extension_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -1693,9 +1693,8 @@ def test_opaque_type(pickle_module, storage_type, storage):
arr = pa.ExtensionArray.from_storage(opaque_type, storage)
assert isinstance(arr, opaque_arr_class)

with registered_extension_type(opaque_type):
buf = ipc_write_batch(pa.RecordBatch.from_arrays([arr], ["ext"]))
batch = ipc_read_batch(buf)
buf = ipc_write_batch(pa.RecordBatch.from_arrays([arr], ["ext"]))
batch = ipc_read_batch(buf)

assert batch.column(0).type.extension_name == "arrow.opaque"
assert isinstance(batch.column(0), opaque_arr_class)
Expand Down

0 comments on commit b4f7efe

Please sign in to comment.