-
Notifications
You must be signed in to change notification settings - Fork 3
Add output streams and serializers #29
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
2e32ebb
c4d42eb
4785a7b
b3beb9a
08e0183
36fe236
b5b3d97
458a9cf
14952d0
75c9827
7d0e68b
37ec2fe
c69100f
6c33397
5ad7cc1
9c507bc
5b38569
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
#pragma once | ||
|
||
#include <cstdint> | ||
#include <ranges> | ||
|
||
#include "sparrow_ipc/output_stream.hpp" | ||
|
||
namespace sparrow_ipc | ||
{ | ||
template <typename R> | ||
requires std::ranges::random_access_range<R> | ||
&& std::ranges::random_access_range<std::ranges::range_value_t<R>> | ||
&& std::same_as<typename std::ranges::range_value_t<R>::value_type, uint8_t> | ||
class chuncked_memory_output_stream final : public output_stream | ||
{ | ||
public: | ||
|
||
explicit chuncked_memory_output_stream(R& chunks) | ||
: m_chunks(&chunks) {}; | ||
|
||
std::size_t write(std::span<const std::uint8_t> span) override | ||
{ | ||
m_chunks->emplace_back(span.begin(), span.end()); | ||
return span.size(); | ||
} | ||
|
||
std::size_t write(std::vector<uint8_t>&& buffer) | ||
{ | ||
m_chunks->emplace_back(std::move(buffer)); | ||
return m_chunks->back().size(); | ||
} | ||
|
||
std::size_t write(uint8_t value, std::size_t count) override | ||
{ | ||
m_chunks->emplace_back(count, value); | ||
return count; | ||
} | ||
|
||
void reserve(std::size_t size) override | ||
{ | ||
m_chunks->reserve(size); | ||
} | ||
|
||
void reserve(const std::function<std::size_t()>& calculate_reserve_size) override | ||
{ | ||
m_chunks->reserve(calculate_reserve_size()); | ||
} | ||
|
||
size_t size() const override | ||
{ | ||
return std::accumulate( | ||
Alex-PLACET marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
m_chunks->begin(), | ||
m_chunks->end(), | ||
0, | ||
[](size_t acc, const auto& chunk) | ||
{ | ||
return acc + chunk.size(); | ||
} | ||
); | ||
} | ||
|
||
void flush() override | ||
{ | ||
// Implementation for flushing memory | ||
} | ||
|
||
void close() override | ||
{ | ||
// Implementation for closing the stream | ||
} | ||
|
||
bool is_open() const override | ||
{ | ||
return true; | ||
} | ||
|
||
private: | ||
|
||
R* m_chunks; | ||
}; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#pragma once | ||
|
||
#include <sparrow/record_batch.hpp> | ||
|
||
#include "sparrow_ipc/chunk_memory_output_stream.hpp" | ||
#include "sparrow_ipc/memory_output_stream.hpp" | ||
#include "sparrow_ipc/serialize.hpp" | ||
#include "sparrow_ipc/serialize_utils.hpp" | ||
|
||
namespace sparrow_ipc | ||
{ | ||
class chunk_serializer | ||
{ | ||
public: | ||
|
||
chunk_serializer( | ||
const sparrow::record_batch& rb, | ||
chuncked_memory_output_stream<std::vector<std::vector<uint8_t>>>& stream | ||
); | ||
|
||
template <std::ranges::input_range R> | ||
requires std::same_as<std::ranges::range_value_t<R>, sparrow::record_batch> | ||
chunk_serializer( | ||
|
||
const R& record_batches, | ||
chuncked_memory_output_stream<std::vector<std::vector<uint8_t>>>& stream | ||
) | ||
: m_pstream(&stream) | ||
{ | ||
if (record_batches.empty()) | ||
{ | ||
throw std::invalid_argument("Record batches collection is empty"); | ||
} | ||
m_dtypes = get_column_dtypes(record_batches[0]); | ||
|
||
|
||
m_pstream->reserve(record_batches.size() + 1); | ||
std::vector<uint8_t> buffer; | ||
memory_output_stream schema_stream(buffer); | ||
serialize_schema_message(record_batches[0], schema_stream); | ||
m_pstream->write(std::move(buffer)); | ||
append(record_batches); | ||
} | ||
Alex-PLACET marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
||
void append(const sparrow::record_batch& rb); | ||
|
||
template <std::ranges::input_range R> | ||
requires std::same_as<std::ranges::range_value_t<R>, sparrow::record_batch> | ||
void append(const R& record_batches) | ||
{ | ||
if (m_ended) | ||
{ | ||
throw std::runtime_error("Cannot append to a serializer that has been ended"); | ||
} | ||
Alex-PLACET marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
||
m_pstream->reserve(m_pstream->size() + record_batches.size()); | ||
Alex-PLACET marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
||
for (const auto& rb : record_batches) | ||
{ | ||
std::vector<uint8_t> buffer; | ||
memory_output_stream stream(buffer); | ||
serialize_record_batch(rb, stream); | ||
m_pstream->write(std::move(buffer)); | ||
} | ||
Alex-PLACET marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
} | ||
|
||
void end(); | ||
|
||
private: | ||
|
||
std::vector<sparrow::data_type> m_dtypes; | ||
chuncked_memory_output_stream<std::vector<std::vector<uint8_t>>>* m_pstream; | ||
bool m_ended{false}; | ||
}; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
|
||
#include <cstdint> | ||
#include <variant> | ||
#include <span> | ||
|
||
#include "Message_generated.h" | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#include <fstream> | ||
#include <functional> | ||
|
||
#include "sparrow_ipc/output_stream.hpp" | ||
|
||
|
||
namespace sparrow_ipc | ||
{ | ||
class SPARROW_IPC_API file_output_stream final : public output_stream | ||
{ | ||
public: | ||
|
||
explicit file_output_stream(std::ofstream& file); | ||
|
||
std::size_t write(std::span<const std::uint8_t> span) override; | ||
|
||
std::size_t write(uint8_t value, std::size_t count = 1) override; | ||
|
||
size_t size() const override; | ||
|
||
void reserve(std::size_t size) override; | ||
|
||
void reserve(const std::function<std::size_t()>& calculate_reserve_size) override; | ||
|
||
void flush() override; | ||
|
||
void close() override; | ||
|
||
bool is_open() const override; | ||
|
||
private: | ||
|
||
std::ofstream& m_file; | ||
size_t m_written_bytes = 0; | ||
}; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Typo in class name 'chuncked_memory_output_stream' – should be 'chunked_memory_output_stream' to match file name and conventional spelling.
Copilot uses AI. Check for mistakes.