-
Notifications
You must be signed in to change notification settings - Fork 2
Add linter analysis workflow #9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
name: cpp-linter-analysis | ||
|
||
on: [push, pull_request] | ||
|
||
defaults: | ||
run: | ||
shell: bash -e -l {0} | ||
|
||
jobs: | ||
build: | ||
runs-on: ubuntu-24.04 | ||
|
||
steps: | ||
# Set up Clang and LLVM | ||
- name: Install LLVM and Clang | ||
run: | | ||
wget https://apt.llvm.org/llvm.sh | ||
chmod +x llvm.sh | ||
sudo ./llvm.sh 20 | ||
sudo apt-get install -y clang-tools-20 | ||
sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-20 200 | ||
sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-20 200 | ||
sudo update-alternatives --install /usr/bin/clang-scan-deps clang-scan-deps /usr/bin/clang-scan-deps-20 200 | ||
sudo update-alternatives --set clang /usr/bin/clang-20 | ||
sudo update-alternatives --set clang++ /usr/bin/clang++-20 | ||
sudo update-alternatives --set clang-scan-deps /usr/bin/clang-scan-deps-20 | ||
|
||
- name: Checkout repository | ||
uses: actions/checkout@v4 | ||
|
||
# Set conda environment using setup-micromamba | ||
- name: Set conda environment | ||
uses: mamba-org/setup-micromamba@main | ||
with: | ||
environment-name: myenv | ||
environment-file: environment-dev.yml | ||
init-shell: bash | ||
cache-downloads: true | ||
|
||
# Run CMake configuration | ||
- name: Configure using CMake | ||
run: | | ||
export CC=clang; export CXX=clang++ | ||
cmake -G Ninja \ | ||
-Bbuild \ | ||
-DCMAKE_BUILD_TYPE=Release \ | ||
-DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX \ | ||
-DBUILD_TESTS=ON \ | ||
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON | ||
#-DFETCH_DEPENDENCIES_WITH_CMAKE=MISSING | ||
|
||
# Run Clang-Tidy and Clang-Format Analysis | ||
- name: Run C++ analysis | ||
uses: cpp-linter/cpp-linter-action@v2 | ||
id: linter | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
with: | ||
version: 20 | ||
files-changed-only: false # check all files | ||
database: 'build' | ||
style: 'file' # Use .clang-format config file | ||
tidy-checks: '' # Use .clang-tidy config file | ||
step-summary: true | ||
ignore: 'build' | ||
extra-args: '-std=c++20' | ||
- name: Fail fast | ||
if: steps.linter.outputs.checks-failed > 0 | ||
run: exit 1 | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
#include <cstdint> | ||
#include <cstring> | ||
#include <optional> | ||
|
@@ -5,17 +5,19 @@ | |
#include <string> | ||
#include <string_view> | ||
|
||
#include "Message_generated.h" | ||
#include "Schema_generated.h" | ||
|
||
#include "serialize.hpp" | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Regarding this error:
We would probably need to link |
||
namespace | ||
{ | ||
constexpr int64_t arrow_alignment = 8; | ||
|
||
// Aligns a value to the next multiple of 8, as required by the Arrow IPC format for message bodies. | ||
int64_t align_to_8(int64_t n) | ||
{ | ||
return (n + 7) & -8; | ||
return (n + arrow_alignment - 1) & -arrow_alignment; | ||
} | ||
|
||
// TODO Complete this with all possible formats? | ||
|
@@ -137,7 +139,7 @@ | |
// Copy the metadata into the buffer, after the 4-byte length prefix | ||
memcpy(final_buffer.data() + sizeof(uint32_t), schema_builder.GetBufferPointer(), schema_len); | ||
// Write the 4-byte metadata length at the beginning of the message | ||
*(reinterpret_cast<uint32_t*>(final_buffer.data())) = schema_len; | ||
memcpy(final_buffer.data(), &schema_len, sizeof(schema_len)); | ||
} | ||
|
||
// II - Serialize the RecordBatch message | ||
|
@@ -148,11 +150,11 @@ | |
|
||
// arrow_arr.buffers[0] is the validity bitmap | ||
// arrow_arr.buffers[1] is the data buffer | ||
const uint8_t* validity_bitmap = reinterpret_cast<const uint8_t*>(arrow_arr.buffers[0]); | ||
const uint8_t* data_buffer = reinterpret_cast<const uint8_t*>(arrow_arr.buffers[1]); | ||
const auto validity_bitmap = static_cast<const uint8_t*>(arrow_arr.buffers[0]); | ||
const auto data_buffer = static_cast<const uint8_t*>(arrow_arr.buffers[1]); | ||
|
||
// Calculate the size of the validity and data buffers | ||
int64_t validity_size = (arrow_arr.length + 7) / 8; | ||
int64_t validity_size = (arrow_arr.length + arrow_alignment - 1) / arrow_alignment; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. False positive?
|
||
int64_t data_size = arrow_arr.length * sizeof(T); | ||
int64_t body_len = validity_size + data_size; // The total size of the message body | ||
|
||
|
@@ -181,22 +183,30 @@ | |
batch_builder.Finish(batch_message_offset); | ||
|
||
// III - Append the RecordBatch message to the final buffer | ||
uint32_t batch_meta_len = batch_builder.GetSize(); // Get the size of the batch metadata | ||
int64_t aligned_batch_meta_len = align_to_8(batch_meta_len); // Calculate the padded length | ||
const uint32_t batch_meta_len = batch_builder.GetSize(); // Get the size of the batch metadata | ||
const int64_t aligned_batch_meta_len = align_to_8(batch_meta_len); // Calculate the padded length | ||
|
||
size_t current_size = final_buffer.size(); // Get the current size (which is the end of the Schema message) | ||
const size_t current_size = final_buffer.size(); // Get the current size (which is the end of the Schema message) | ||
// Resize the buffer to append the new message | ||
final_buffer.resize(current_size + sizeof(uint32_t) + aligned_batch_meta_len + body_len); | ||
uint8_t* dst = final_buffer.data() + current_size; // Get a pointer to where the new message will start | ||
|
||
// Write the 4-byte metadata length for the RecordBatch message | ||
*(reinterpret_cast<uint32_t*>(dst)) = batch_meta_len; | ||
memcpy(dst, &batch_meta_len, sizeof(batch_meta_len)); | ||
dst += sizeof(uint32_t); | ||
// Copy the RecordBatch metadata into the buffer | ||
memcpy(dst, batch_builder.GetBufferPointer(), batch_meta_len); | ||
// Add padding to align the body to an 8-byte boundary | ||
memset(dst + batch_meta_len, 0, aligned_batch_meta_len - batch_meta_len); | ||
if (static_cast<size_t>(aligned_batch_meta_len) >= batch_meta_len) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Still complaining even after the cast... |
||
{ | ||
memset(dst + batch_meta_len, 0, aligned_batch_meta_len - batch_meta_len); | ||
} | ||
else | ||
{ | ||
throw std::runtime_error("aligned_batch_meta_len should be greater than batch_meta_len"); | ||
} | ||
|
||
dst += aligned_batch_meta_len; | ||
// Copy the actual data buffers (the message body) into the buffer | ||
if (validity_bitmap) | ||
{ | ||
|
@@ -205,9 +215,10 @@ | |
else | ||
{ | ||
// If validity_bitmap is null, it means there are no nulls | ||
memset(dst, 0xFF, validity_size); | ||
constexpr uint8_t no_nulls_bitmap = 0xFF; | ||
memset(dst, no_nulls_bitmap, validity_size); | ||
} | ||
dst += validity_size; | ||
if (data_buffer) | ||
{ | ||
memcpy(dst, data_buffer, data_size); | ||
|
@@ -228,7 +239,8 @@ | |
size_t current_offset = 0; | ||
|
||
// I - Deserialize the Schema message | ||
uint32_t schema_meta_len = *(reinterpret_cast<const uint32_t*>(buf_ptr + current_offset)); | ||
uint32_t schema_meta_len = 0; | ||
memcpy(&schema_meta_len, buf_ptr + current_offset, sizeof(schema_meta_len)); | ||
current_offset += sizeof(uint32_t); | ||
auto schema_message = org::apache::arrow::flatbuf::GetMessage(buf_ptr + current_offset); | ||
if (schema_message->header_type() != org::apache::arrow::flatbuf::MessageHeader::Schema) | ||
|
@@ -245,7 +257,8 @@ | |
current_offset += schema_meta_len; | ||
|
||
// II - Deserialize the RecordBatch message | ||
uint32_t batch_meta_len = *(reinterpret_cast<const uint32_t*>(buf_ptr + current_offset)); | ||
uint32_t batch_meta_len = 0; | ||
memcpy(&batch_meta_len, buf_ptr + current_offset, sizeof(batch_meta_len)); | ||
current_offset += sizeof(uint32_t); | ||
auto batch_message = org::apache::arrow::flatbuf::GetMessage(buf_ptr + current_offset); | ||
if (batch_message->header_type() != org::apache::arrow::flatbuf::MessageHeader::RecordBatch) | ||
|
@@ -254,7 +267,7 @@ | |
} | ||
auto record_batch = static_cast<const org::apache::arrow::flatbuf::RecordBatch*>(batch_message->header()); | ||
current_offset += align_to_8(batch_meta_len); | ||
const uint8_t* body_ptr = buf_ptr + current_offset; | ||
|
||
// Extract metadata from the RecordBatch | ||
auto buffers_meta = record_batch->buffers(); | ||
|
@@ -266,10 +279,10 @@ | |
int64_t validity_len = buffers_meta->Get(0)->length(); | ||
int64_t data_len = buffers_meta->Get(1)->length(); | ||
|
||
uint8_t* validity_buffer_copy = new uint8_t[validity_len]; | ||
auto validity_buffer_copy = new uint8_t[validity_len]; | ||
memcpy(validity_buffer_copy, body_ptr + buffers_meta->Get(0)->offset(), validity_len); | ||
|
||
uint8_t* data_buffer_copy = new uint8_t[data_len]; | ||
auto data_buffer_copy = new uint8_t[data_len]; | ||
memcpy(data_buffer_copy, body_ptr + buffers_meta->Get(1)->offset(), data_len); | ||
|
||
// Get name | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we keep this (and force fixing everything, or skip lines if we consider the errors/warnings as false positives) or remove it to only have annotated files as a reference?