Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions .github/workflows/gluten_daily.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Gluten Daily Build

on:
push:
branches:
- 'main'

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

jobs:

gluten-cpp-build:
name: gluten cpp build
# prevent errors when forks ff their main branch
if: ${{ github.repository == 'IBM/velox' }}
runs-on: ubuntu-22.04
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache"
steps:
- uses: actions/checkout@v4
- name: Get Ccache
uses: actions/cache/restore@v4
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-centos7-release-default-${{github.sha}}
restore-keys: |
ccache-centos7-release-default
- name: Setup Gluten
run: |
git clone --depth 1 https://github.com/apache/incubator-gluten gluten && cd gluten
BRANCH=$(echo ${GITHUB_REF#refs/heads/})
sed -i 's/oap-project/IBM/g' ep/build-velox/src/get_velox.sh
#sed -i 's/VELOX_BRANCH=2025.*/VELOX_BRANCH=main/g' ep/build-velox/src/get_velox.sh
- name: Build Gluten native libraries
run: |
docker pull apache/gluten:vcpkg-centos-7
docker run -v $GITHUB_WORKSPACE:/work -w /work apache/gluten:vcpkg-centos-7 bash -c "
git config --global --add safe.directory /work
set -e
df -a
cd /work/gluten
export CCACHE_DIR=/work/.ccache
mkdir -p /work/.ccache
source /opt/rh/devtoolset-11/enable
export NUM_THREADS=4
ccache -sz
./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_arrow=OFF --build_tests=OFF --build_benchmarks=OFF \
--build_examples=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON --velox_home=/work
ccache -s
mkdir -p /work/.m2/repository/org/apache/arrow/
cp -r /root/.m2/repository/org/apache/arrow/* /work/.m2/repository/org/apache/arrow/
"
- name: Save ccache
uses: actions/cache/save@v4
id: ccache
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-centos7-release-default-${{github.sha}}
15 changes: 0 additions & 15 deletions .github/workflows/linux-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,6 @@
name: Linux Build using GCC

on:
push:
branches:
- main
paths:
- velox/**
- '!velox/docs/**'
- CMakeLists.txt
- CMake/**
- scripts/setup-ubuntu.sh
- scripts/setup-common.sh
- scripts/setup-versions.sh
- scripts/setup-helper-functions.sh
- .github/workflows/linux-build.yml
- .github/workflows/linux-build-base.yml

pull_request:
paths:
- velox/**
Expand Down
File renamed without changes.
7 changes: 7 additions & 0 deletions velox/connectors/hive/HiveConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,13 @@ bool HiveConfig::readStatsBasedFilterReorderDisabled(
config_->get<bool>(kReadStatsBasedFilterReorderDisabled, false));
}

bool HiveConfig::isRequestedTypeCheckEnabled(
const config::ConfigBase* session) const {
return session->get<bool>(
kEnableRequestedTypeCheckSession,
config_->get<bool>(kEnableRequestedTypeCheck, true));
}

std::string HiveConfig::hiveLocalDataPath() const {
return config_->get<std::string>(kLocalDataPath, "");
}
Expand Down
9 changes: 9 additions & 0 deletions velox/connectors/hive/HiveConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,11 @@ class HiveConfig {
static constexpr const char* kPreserveFlatMapsInMemorySession =
"hive.preserve_flat_maps_in_memory";

static constexpr const char* kEnableRequestedTypeCheck =
"enable-requested-type-check";
static constexpr const char* kEnableRequestedTypeCheckSession =
"enable_requested_type_check";

InsertExistingPartitionsBehavior insertExistingPartitionsBehavior(
const config::ConfigBase* session) const;

Expand Down Expand Up @@ -265,6 +270,10 @@ class HiveConfig {
bool readStatsBasedFilterReorderDisabled(
const config::ConfigBase* session) const;

/// Whether to enable requested type check in the ReaderBase::convertType.
/// Returns true by default.
bool isRequestedTypeCheckEnabled(const config::ConfigBase* session) const;

/// Returns the file system path containing local data. If non-empty,
/// initializes LocalHiveConnectorMetadata to provide metadata for the tables
/// in the directory.
Expand Down
35 changes: 24 additions & 11 deletions velox/connectors/hive/HiveConnector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "velox/connectors/hive/HiveDataSink.h"
#include "velox/connectors/hive/HiveDataSource.h"
#include "velox/connectors/hive/HivePartitionFunction.h"
#include "velox/connectors/hive/iceberg/IcebergDataSink.h"

#include <boost/lexical_cast.hpp>
#include <memory>
Expand Down Expand Up @@ -87,17 +88,29 @@ std::unique_ptr<DataSink> HiveConnector::createDataSink(
ConnectorInsertTableHandlePtr connectorInsertTableHandle,
ConnectorQueryCtx* connectorQueryCtx,
CommitStrategy commitStrategy) {
auto hiveInsertHandle =
std::dynamic_pointer_cast<const HiveInsertTableHandle>(
connectorInsertTableHandle);
VELOX_CHECK_NOT_NULL(
hiveInsertHandle, "Hive connector expecting hive write handle!");
return std::make_unique<HiveDataSink>(
inputType,
hiveInsertHandle,
connectorQueryCtx,
commitStrategy,
hiveConfig_);
if (auto icebergInsertHandle =
std::dynamic_pointer_cast<const iceberg::IcebergInsertTableHandle>(
connectorInsertTableHandle)) {
return std::make_unique<iceberg::IcebergDataSink>(
inputType,
icebergInsertHandle,
connectorQueryCtx,
commitStrategy,
hiveConfig_);
} else {
auto hiveInsertHandle =
std::dynamic_pointer_cast<const HiveInsertTableHandle>(
connectorInsertTableHandle);

VELOX_CHECK_NOT_NULL(
hiveInsertHandle, "Hive connector expecting hive write handle!");
return std::make_unique<HiveDataSink>(
inputType,
hiveInsertHandle,
connectorQueryCtx,
commitStrategy,
hiveConfig_);
}
}

std::unique_ptr<core::PartitionFunction> HivePartitionFunctionSpec::create(
Expand Down
11 changes: 11 additions & 0 deletions velox/connectors/hive/HiveConnectorUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
#include "velox/expression/Expr.h"
#include "velox/expression/ExprToSubfieldFilter.h"

#include <boost/lexical_cast.hpp>
#include <boost/uuid/uuid_generators.hpp>
#include <boost/uuid/uuid_io.hpp>

namespace facebook::velox::connector::hive {
namespace {

Expand Down Expand Up @@ -604,6 +608,8 @@ void configureReaderOptions(
}

readerOptions.setFileFormat(hiveSplit->fileFormat);
readerOptions.setEnableRequestedTypeCheck(
hiveConfig->isRequestedTypeCheckEnabled(sessionProperties));
}
}

Expand Down Expand Up @@ -925,4 +931,9 @@ core::TypedExprPtr extractFiltersFromRemainingFilter(
}
return expr;
}

std::string makeUuid() {
return boost::lexical_cast<std::string>(boost::uuids::random_generator()());
}

} // namespace facebook::velox::connector::hive
2 changes: 2 additions & 0 deletions velox/connectors/hive/HiveConnectorUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,6 @@ core::TypedExprPtr extractFiltersFromRemainingFilter(
common::SubfieldFilters& filters,
double& sampleRate);

std::string makeUuid();

} // namespace facebook::velox::connector::hive
Loading