Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
165 changes: 165 additions & 0 deletions .github/workflows/gluten_daily.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Gluten Daily Build

on:
push:
branches:
- 'main'

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

jobs:

gluten-cpp-build:
name: gluten cpp build
# prevent errors when forks ff their main branch
if: ${{ github.repository == 'IBM/velox' }}
runs-on: ubuntu-22.04
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache"
steps:
- uses: actions/checkout@v4
- name: Get Ccache
uses: actions/cache/restore@v4
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-centos7-release-default-${{github.sha}}
restore-keys: |
ccache-centos7-release-default
- name: Setup Gluten
run: |
git clone --depth 1 https://github.com/apache/incubator-gluten gluten && cd gluten
BRANCH=$(echo ${GITHUB_REF#refs/heads/})
sed -i 's/oap-project/IBM/g' ep/build-velox/src/get_velox.sh
#sed -i 's/VELOX_BRANCH=2025.*/VELOX_BRANCH=main/g' ep/build-velox/src/get_velox.sh
- name: Build Gluten native libraries
run: |
docker pull apache/gluten:vcpkg-centos-7
docker run -v $GITHUB_WORKSPACE:/work -w /work apache/gluten:vcpkg-centos-7 bash -c "
git config --global --add safe.directory /work
set -e
df -a
cd /work/gluten
export CCACHE_DIR=/work/.ccache
mkdir -p /work/.ccache
source /opt/rh/devtoolset-11/enable
./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_arrow=OFF --build_tests=OFF --build_benchmarks=OFF \
--build_examples=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON --velox_home=/work
ccache -s
mkdir -p /work/.m2/repository/org/apache/arrow/
cp -r /root/.m2/repository/org/apache/arrow/* /work/.m2/repository/org/apache/arrow/
"
- name: Save ccache
uses: actions/cache/save@v4
id: ccache
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-centos7-release-default-${{github.sha}}

# linux-gcc:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why comment these codes instead of drop it?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's just for testing now, will remove this

# name: Build with GCC
# if: ${{ github.repository == 'IBM/velox' }}
# runs-on: ubuntu-22.04
# container: ghcr.io/facebookincubator/velox-dev:adapters
# defaults:
# run:
# shell: bash
# env:
# CCACHE_DIR: ${{ github.workspace }}/ccache
# VELOX_DEPENDENCY_SOURCE: SYSTEM
# GTest_SOURCE: BUNDLED
# cudf_SOURCE: BUNDLED
# CUDA_VERSION: '12.8'
# faiss_SOURCE: BUNDLED
# steps:
# - uses: actions/checkout@v4
# with:
# fetch-depth: 2
# persist-credentials: false

# - name: Fix git permissions
# # Usually actions/checkout does this but as we run in a container
# # it doesn't work
# run: git config --global --add safe.directory ${GITHUB_WORKSPACE}

# - name: Install Dependencies
# run: |
# if git diff --name-only HEAD^1 HEAD | grep -q "scripts/setup-"; then
# # Overwrite old setup scripts with changed versions
# cp scripts/setup-* /

# mkdir /tmp/build
# cd /tmp/build
# source /opt/rh/gcc-toolset-12/enable
# # install basic deps
# bash /setup-centos9.sh

# source /setup-centos9.sh
# install_adapters
# install_cuda $CUDA_VERSION

# cd /
# rm -rf /tmp/build # cleanup to avoid issues with disk space
# fi

# - uses: actions/cache/restore@v4
# with:
# path: '${{ env.CCACHE_DIR }}'
# key: ccache-linux-adapters-${{ inputs.use-clang && 'clang' || 'gcc' }}-${{github.sha}}
# restore-keys: |
# ccache-linux-adapters-${{ inputs.use-clang && 'clang' || 'gcc' }}

# - name: Zero Ccache Statistics
# run: |
# ccache -sz

# - name: Make Release Build
# env:
# #MAKEFLAGS: 'NUM_THREADS=8 MAX_HIGH_MEM_JOBS=4 MAX_LINK_JOBS=4'
# CUDA_ARCHITECTURES: 70
# CUDA_COMPILER: /usr/local/cuda-${CUDA_VERSION}/bin/nvcc
# # Set compiler to GCC 12
# CUDA_FLAGS: -ccbin /opt/rh/gcc-toolset-12/root/usr/bin
# run: |
# EXTRA_CMAKE_FLAGS=(
# "-DVELOX_ENABLE_BENCHMARKS=ON"
# "-DVELOX_ENABLE_EXAMPLES=ON"
# "-DVELOX_ENABLE_ARROW=ON"
# "-DVELOX_ENABLE_GEO=ON"
# "-DVELOX_ENABLE_FAISS=ON"
# "-DVELOX_ENABLE_PARQUET=ON"
# "-DVELOX_ENABLE_HDFS=ON"
# "-DVELOX_ENABLE_S3=ON"
# "-DVELOX_ENABLE_GCS=ON"
# "-DVELOX_ENABLE_ABFS=ON"
# "-DVELOX_ENABLE_REMOTE_FUNCTIONS=ON"
# "-DVELOX_ENABLE_CUDF=ON"
# "-DVELOX_ENABLE_WAVE=ON"
# "-DVELOX_MONO_LIBRARY=ON"
# "-DVELOX_BUILD_SHARED=ON"
# )
# if [[ "${USE_CLANG}" = "true" ]]; then scripts/setup-centos9.sh install_clang15; export CC=/usr/bin/clang-15; export CXX=/usr/bin/clang++-15; CUDA_FLAGS="-ccbin /usr/lib64/llvm15/bin/clang++-15"; fi
# make release EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS[*]}"

# - name: Ccache after
# run: ccache -s

# - uses: actions/cache/save@v4
# with:
# path: '${{ env.CCACHE_DIR }}'
# key: ccache-linux-adapters-gcc-${{github.sha}}
15 changes: 0 additions & 15 deletions .github/workflows/linux-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,6 @@
name: Linux Build using GCC

on:
push:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why remove these files?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems introduced by changes in main branch, it's not related with the iceberg patch

branches:
- main
paths:
- velox/**
- '!velox/docs/**'
- CMakeLists.txt
- CMake/**
- scripts/setup-ubuntu.sh
- scripts/setup-common.sh
- scripts/setup-versions.sh
- scripts/setup-helper-functions.sh
- .github/workflows/linux-build.yml
- .github/workflows/linux-build-base.yml

pull_request:
paths:
- velox/**
Expand Down
7 changes: 7 additions & 0 deletions velox/connectors/hive/HiveConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,13 @@ bool HiveConfig::readStatsBasedFilterReorderDisabled(
config_->get<bool>(kReadStatsBasedFilterReorderDisabled, false));
}

bool HiveConfig::isRequestedTypeCheckEnabled(
const config::ConfigBase* session) const {
return session->get<bool>(
kEnableRequestedTypeCheckSession,
config_->get<bool>(kEnableRequestedTypeCheck, true));
}

std::string HiveConfig::hiveLocalDataPath() const {
return config_->get<std::string>(kLocalDataPath, "");
}
Expand Down
9 changes: 9 additions & 0 deletions velox/connectors/hive/HiveConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,11 @@ class HiveConfig {
static constexpr const char* kPreserveFlatMapsInMemorySession =
"preserve_flat_maps_in_memory";

static constexpr const char* kEnableRequestedTypeCheck =
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

requested-type-check-enabled

"enable-requested-type-check";
static constexpr const char* kEnableRequestedTypeCheckSession =
"enable_requested_type_check";

InsertExistingPartitionsBehavior insertExistingPartitionsBehavior(
const config::ConfigBase* session) const;

Expand Down Expand Up @@ -265,6 +270,10 @@ class HiveConfig {
bool readStatsBasedFilterReorderDisabled(
const config::ConfigBase* session) const;

/// Whether to enable requested type check in the ReaderBase::convertType.
/// Returns true by default.
bool isRequestedTypeCheckEnabled(const config::ConfigBase* session) const;

/// Returns the file system path containing local data. If non-empty,
/// initializes LocalHiveConnectorMetadata to provide metadata for the tables
/// in the directory.
Expand Down
35 changes: 24 additions & 11 deletions velox/connectors/hive/HiveConnector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "velox/connectors/hive/HiveDataSink.h"
#include "velox/connectors/hive/HiveDataSource.h"
#include "velox/connectors/hive/HivePartitionFunction.h"
#include "velox/connectors/hive/iceberg/IcebergDataSink.h"

#include <boost/lexical_cast.hpp>
#include <memory>
Expand Down Expand Up @@ -87,17 +88,29 @@ std::unique_ptr<DataSink> HiveConnector::createDataSink(
ConnectorInsertTableHandlePtr connectorInsertTableHandle,
ConnectorQueryCtx* connectorQueryCtx,
CommitStrategy commitStrategy) {
auto hiveInsertHandle =
std::dynamic_pointer_cast<const HiveInsertTableHandle>(
connectorInsertTableHandle);
VELOX_CHECK_NOT_NULL(
hiveInsertHandle, "Hive connector expecting hive write handle!");
return std::make_unique<HiveDataSink>(
inputType,
hiveInsertHandle,
connectorQueryCtx,
commitStrategy,
hiveConfig_);
if (auto icebergInsertHandle =
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the base does not contains IcebergInsertTableHandle?

std::dynamic_pointer_cast<const iceberg::IcebergInsertTableHandle>(
connectorInsertTableHandle)) {
return std::make_unique<iceberg::IcebergDataSink>(
inputType,
icebergInsertHandle,
connectorQueryCtx,
commitStrategy,
hiveConfig_);
} else {
auto hiveInsertHandle =
std::dynamic_pointer_cast<const HiveInsertTableHandle>(
connectorInsertTableHandle);

VELOX_CHECK_NOT_NULL(
hiveInsertHandle, "Hive connector expecting hive write handle!");
return std::make_unique<HiveDataSink>(
inputType,
hiveInsertHandle,
connectorQueryCtx,
commitStrategy,
hiveConfig_);
}
}

std::unique_ptr<core::PartitionFunction> HivePartitionFunctionSpec::create(
Expand Down
11 changes: 11 additions & 0 deletions velox/connectors/hive/HiveConnectorUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
#include "velox/expression/Expr.h"
#include "velox/expression/ExprToSubfieldFilter.h"

#include <boost/lexical_cast.hpp>
#include <boost/uuid/uuid_generators.hpp>
#include <boost/uuid/uuid_io.hpp>

namespace facebook::velox::connector::hive {
namespace {

Expand Down Expand Up @@ -604,6 +608,8 @@ void configureReaderOptions(
}

readerOptions.setFileFormat(hiveSplit->fileFormat);
readerOptions.setEnableRequestedTypeCheck(
hiveConfig->isRequestedTypeCheckEnabled(sessionProperties));
}
}

Expand Down Expand Up @@ -925,4 +931,9 @@ core::TypedExprPtr extractFiltersFromRemainingFilter(
}
return expr;
}

std::string makeUuid() {
return boost::lexical_cast<std::string>(boost::uuids::random_generator()());
}

} // namespace facebook::velox::connector::hive
2 changes: 2 additions & 0 deletions velox/connectors/hive/HiveConnectorUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,6 @@ core::TypedExprPtr extractFiltersFromRemainingFilter(
common::SubfieldFilters& filters,
double& sampleRate);

std::string makeUuid();

} // namespace facebook::velox::connector::hive
Loading