QuantStack · Hind-M · Jul 30, 2025 · Jul 30, 2025 · Jul 31, 2025 · Jul 31, 2025
diff --git a/.github/workflows/analysis.yml b/.github/workflows/analysis.yml
@@ -0,0 +1,69 @@
+name: cpp-linter-analysis
+
+on: [push, pull_request]
+
+defaults:
+  run:
+    shell: bash -e -l {0}
+
+jobs:
+  build:
+    runs-on: ubuntu-24.04
+
+    steps:
+    # Set up Clang and LLVM
+    - name: Install LLVM and Clang
+      run: |
+        wget https://apt.llvm.org/llvm.sh
+        chmod +x llvm.sh
+        sudo ./llvm.sh 20
+        sudo apt-get install -y clang-tools-20
+        sudo update-alternatives --install /usr/bin/clang++         clang++         /usr/bin/clang++-20 200
+        sudo update-alternatives --install /usr/bin/clang           clang           /usr/bin/clang-20 200
+        sudo update-alternatives --install /usr/bin/clang-scan-deps clang-scan-deps /usr/bin/clang-scan-deps-20 200
+        sudo update-alternatives --set clang /usr/bin/clang-20
+        sudo update-alternatives --set clang++ /usr/bin/clang++-20
+        sudo update-alternatives --set clang-scan-deps /usr/bin/clang-scan-deps-20
+
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    # Set conda environment using setup-micromamba
+    - name: Set conda environment
+      uses: mamba-org/setup-micromamba@main
+      with:
+        environment-name: myenv
+        environment-file: environment-dev.yml
+        init-shell: bash
+        cache-downloads: true
+
+    # Run CMake configuration
+    - name: Configure using CMake
+      run: |
+        export CC=clang; export CXX=clang++
+        cmake -G Ninja \
+        -Bbuild \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX \
+        -DBUILD_TESTS=ON \
+        -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
+        #-DFETCH_DEPENDENCIES_WITH_CMAKE=MISSING
+
+    # Run Clang-Tidy and Clang-Format Analysis
+    - name: Run C++ analysis
+      uses: cpp-linter/cpp-linter-action@v2
+      id: linter
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      with:
+        version: 20
+        files-changed-only: false  # check all files
+        database: 'build'
+        style: 'file'  # Use .clang-format config file
+        tidy-checks: '' # Use .clang-tidy config file
+        step-summary: true
+        ignore: 'build'
+        extra-args: '-std=c++20'
+    - name: Fail fast
+      if: steps.linter.outputs.checks-failed > 0
+      run: exit 1
diff --git a/src/serialize.cpp b/src/serialize.cpp
@@ -1,3 +1,3 @@
 #include <cstdint>
 #include <cstring>
 #include <optional>
@@ -5,17 +5,19 @@
 #include <string>
 #include <string_view>

 #include "Message_generated.h"
 #include "Schema_generated.h"

 #include "serialize.hpp"
 
 namespace
 {
+    constexpr int64_t arrow_alignment = 8;
+
     // Aligns a value to the next multiple of 8, as required by the Arrow IPC format for message bodies.
     int64_t align_to_8(int64_t n)
     {
-        return (n + 7) & -8;
+        return (n + arrow_alignment - 1) & -arrow_alignment;
     }
 
     // TODO Complete this with all possible formats?
@@ -137,7 +139,7 @@
         // Copy the metadata into the buffer, after the 4-byte length prefix
         memcpy(final_buffer.data() + sizeof(uint32_t), schema_builder.GetBufferPointer(), schema_len);
         // Write the 4-byte metadata length at the beginning of the message
-        *(reinterpret_cast<uint32_t*>(final_buffer.data())) = schema_len;
+        memcpy(final_buffer.data(), &schema_len, sizeof(schema_len));
     }
 
     // II - Serialize the RecordBatch message
@@ -148,11 +150,11 @@
 
         // arrow_arr.buffers[0] is the validity bitmap
         // arrow_arr.buffers[1] is the data buffer
-        const uint8_t* validity_bitmap = reinterpret_cast<const uint8_t*>(arrow_arr.buffers[0]);
-        const uint8_t* data_buffer = reinterpret_cast<const uint8_t*>(arrow_arr.buffers[1]);
+        const auto validity_bitmap = static_cast<const uint8_t*>(arrow_arr.buffers[0]);
+        const auto data_buffer = static_cast<const uint8_t*>(arrow_arr.buffers[1]);
 
         // Calculate the size of the validity and data buffers
-        int64_t validity_size = (arrow_arr.length + 7) / 8;
+        int64_t validity_size = (arrow_arr.length + arrow_alignment - 1) / arrow_alignment;
         int64_t data_size = arrow_arr.length * sizeof(T);
         int64_t body_len = validity_size + data_size; // The total size of the message body
 
@@ -181,22 +183,30 @@
         batch_builder.Finish(batch_message_offset);
 
         // III - Append the RecordBatch message to the final buffer
-        uint32_t batch_meta_len = batch_builder.GetSize(); // Get the size of the batch metadata
-        int64_t aligned_batch_meta_len = align_to_8(batch_meta_len); // Calculate the padded length
+        const uint32_t batch_meta_len = batch_builder.GetSize(); // Get the size of the batch metadata
+        const int64_t aligned_batch_meta_len = align_to_8(batch_meta_len); // Calculate the padded length
 
-        size_t current_size = final_buffer.size(); // Get the current size (which is the end of the Schema message)
+        const size_t current_size = final_buffer.size(); // Get the current size (which is the end of the Schema message)
         // Resize the buffer to append the new message
         final_buffer.resize(current_size + sizeof(uint32_t) + aligned_batch_meta_len + body_len);
         uint8_t* dst = final_buffer.data() + current_size; // Get a pointer to where the new message will start
 
         // Write the 4-byte metadata length for the RecordBatch message
-        *(reinterpret_cast<uint32_t*>(dst)) = batch_meta_len;
+        memcpy(dst, &batch_meta_len, sizeof(batch_meta_len));
         dst += sizeof(uint32_t);
         // Copy the RecordBatch metadata into the buffer
         memcpy(dst, batch_builder.GetBufferPointer(), batch_meta_len);
         // Add padding to align the body to an 8-byte boundary
-        memset(dst + batch_meta_len, 0, aligned_batch_meta_len - batch_meta_len);
+        if (static_cast<size_t>(aligned_batch_meta_len) >= batch_meta_len)
+        {
+            memset(dst + batch_meta_len, 0, aligned_batch_meta_len - batch_meta_len);
+        }
+        else
+        {
+            throw std::runtime_error("aligned_batch_meta_len should be greater than batch_meta_len");
+        }
+
         dst += aligned_batch_meta_len;
         // Copy the actual data buffers (the message body) into the buffer
         if (validity_bitmap)
        {
@@ -205,9 +215,10 @@
         else
         {
             // If validity_bitmap is null, it means there are no nulls
-            memset(dst, 0xFF, validity_size);
+            constexpr uint8_t no_nulls_bitmap = 0xFF;
+            memset(dst, no_nulls_bitmap, validity_size);
         }
         dst += validity_size;
         if (data_buffer)
        {
            memcpy(dst, data_buffer, data_size);
@@ -228,7 +239,8 @@
     size_t current_offset = 0;
 
     // I - Deserialize the Schema message
-    uint32_t schema_meta_len = *(reinterpret_cast<const uint32_t*>(buf_ptr + current_offset));
+    uint32_t schema_meta_len = 0;
+    memcpy(&schema_meta_len, buf_ptr + current_offset, sizeof(schema_meta_len));
     current_offset += sizeof(uint32_t);
     auto schema_message = org::apache::arrow::flatbuf::GetMessage(buf_ptr + current_offset);
     if (schema_message->header_type() != org::apache::arrow::flatbuf::MessageHeader::Schema)
@@ -245,7 +257,8 @@
     current_offset += schema_meta_len;
 
     // II - Deserialize the RecordBatch message
-    uint32_t batch_meta_len = *(reinterpret_cast<const uint32_t*>(buf_ptr + current_offset));
+    uint32_t batch_meta_len = 0;
+    memcpy(&batch_meta_len, buf_ptr + current_offset, sizeof(batch_meta_len));
     current_offset += sizeof(uint32_t);
     auto batch_message = org::apache::arrow::flatbuf::GetMessage(buf_ptr + current_offset);
     if (batch_message->header_type() != org::apache::arrow::flatbuf::MessageHeader::RecordBatch)
@@ -254,7 +267,7 @@
    }
    auto record_batch = static_cast<const org::apache::arrow::flatbuf::RecordBatch*>(batch_message->header());
    current_offset += align_to_8(batch_meta_len);
    const uint8_t* body_ptr = buf_ptr + current_offset;

    // Extract metadata from the RecordBatch
    auto buffers_meta = record_batch->buffers();
@@ -266,10 +279,10 @@
     int64_t validity_len = buffers_meta->Get(0)->length();
     int64_t data_len = buffers_meta->Get(1)->length();
 
-    uint8_t* validity_buffer_copy = new uint8_t[validity_len];
+    auto validity_buffer_copy = new uint8_t[validity_len];
     memcpy(validity_buffer_copy, body_ptr + buffers_meta->Get(0)->offset(), validity_len);
 
-    uint8_t* data_buffer_copy = new uint8_t[data_len];
+    auto data_buffer_copy = new uint8_t[data_len];
     memcpy(data_buffer_copy, body_ptr + buffers_meta->Get(1)->offset(), data_len);
 
     // Get name