Skip to content

Commit 14a02ff

Browse files
Expose Reader streamcut to read from given Stream Cut (#22)
If Client application crashes and if user wants to continue read from the stream where it left off, currently, there is no method to obtain StreamCuts from the ReaderGroup. StreamCuts are set of segment/offset pairs for a single stream that represent a consistent position in the stream. Signed-off-by: Shwetha N <[email protected]>
1 parent 3a825a5 commit 14a02ff

File tree

5 files changed

+80
-16
lines changed

5 files changed

+80
-16
lines changed

src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
#[macro_use]
1313
extern crate cfg_if;
1414

15-
use crate::stream_reader_group::StreamReaderGroupConfig;
15+
use crate::stream_reader_group::{StreamCuts, StreamReaderGroupConfig};
1616

1717
mod byte_stream;
1818
mod stream_manager;
@@ -58,6 +58,7 @@ fn pravega_client(py: Python, m: &PyModule) -> PyResult<()> {
5858
m.add_class::<StreamReaderGroup>()?;
5959
m.add_class::<StreamScalingPolicy>()?;
6060
m.add_class::<StreamRetentionPolicy>()?;
61+
m.add_class::<StreamCuts>()?;
6162
m.add_class::<ByteStream>()?;
6263
let txn_exception = py.get_type::<TxnFailedException>();
6364
txn_exception.setattr("__doc__", TXNFAILED_EXCEPTION_DOCSTRING)?;

src/stream_manager.rs

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
// http://www.apache.org/licenses/LICENSE-2.0
99
//
1010

11+
use std::collections::HashMap;
12+
use pravega_client::event::reader_group::{StreamCutV1, StreamCutVersioned};
13+
use crate::stream_reader_group::StreamCuts;
1114
cfg_if! {
1215
if #[cfg(feature = "python_binding")] {
1316
use crate::stream_writer_transactional::StreamTxnWriter;
@@ -558,32 +561,45 @@ impl StreamManager {
558561
/// event.reader_group=manager.create_reader_group("rg1", "scope", "stream", true)
559562
/// ```
560563
///
561-
#[pyo3(text_signature = "($self, reader_group_name, scope_name, stream_name, read_from_tail)")]
564+
#[pyo3(text_signature = "($self, reader_group_name, scope_name, stream_name, read_from_tail, stream_cut)")]
562565
#[args(read_from_tail = "false")]
563566
pub fn create_reader_group(
564567
&self,
565568
reader_group_name: &str,
566569
scope_name: &str,
567570
stream_name: &str,
568571
read_from_tail: bool,
572+
stream_cut: Option<StreamCuts>,
569573
) -> PyResult<StreamReaderGroup> {
570574
let scope = Scope::from(scope_name.to_string());
575+
let stream = Stream::from(stream_name.to_string());
571576
let scoped_stream = ScopedStream {
572577
scope: scope.clone(),
573-
stream: Stream::from(stream_name.to_string()),
578+
stream: stream.clone(),
574579
};
575580
let handle = self.cf.runtime_handle();
576-
let rg_config = if read_from_tail {
577-
// Create a reader group to read from the current TAIL/end of the Stream.
578-
ReaderGroupConfigBuilder::default()
579-
.read_from_tail_of_stream(scoped_stream)
580-
.build()
581-
} else {
582-
// Create a reader group to read from current HEAD/start of the Stream.
583-
ReaderGroupConfigBuilder::default()
584-
.read_from_head_of_stream(scoped_stream)
585-
.build()
586-
};
581+
let rg_config = if let Some(ref stream_cut) = stream_cut {
582+
let mut positions = HashMap::new();
583+
// Iterate over the keys of the offset_map
584+
for (segment_val, position) in stream_cut.stream_cuts.segment_offset_map.iter() {
585+
let scoped_segment = ScopedSegment::new(scope.clone(), stream.clone(), Segment::from(*segment_val));
586+
positions.insert(scoped_segment, *position);
587+
}
588+
let stream_cut_v1 = StreamCutV1::new(scoped_stream.clone(), positions);
589+
// Create a reader group to read from given StreamCut .
590+
ReaderGroupConfigBuilder::default().read_from_stream(scoped_stream.clone(), StreamCutVersioned::V1(stream_cut_v1)).build()
591+
}else if read_from_tail {
592+
// Create a reader group to read from the current TAIL/end of the Stream.
593+
ReaderGroupConfigBuilder::default()
594+
.read_from_tail_of_stream(scoped_stream)
595+
.build()
596+
} else {
597+
// Create a reader group to read from current HEAD/start of the Stream.
598+
ReaderGroupConfigBuilder::default()
599+
.read_from_head_of_stream(scoped_stream)
600+
.build()
601+
};
602+
587603
let rg = handle.block_on(self.cf.create_reader_group_with_config(
588604
reader_group_name.to_string(),
589605
rg_config,

src/stream_reader.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,9 @@ impl EventData {
148148
fn data(&self) -> &[u8] {
149149
self.value.as_slice()
150150
}
151+
152+
///Return the offset
153+
fn offset(&self) -> i64 { self.offset_in_segment }
151154
/// Returns the string representation.
152155
fn to_str(&self) -> String {
153156
format!("offset {:?} data :{:?}", self.offset_in_segment, self.value)

src/stream_reader_group.rs

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010

1111
cfg_if! {
1212
if #[cfg(feature = "python_binding")] {
13-
use pravega_client_shared::ScopedStream;
1413
use pravega_client::event::reader_group::ReaderGroup;
14+
use std::collections::HashMap;
1515
use pyo3::prelude::*;
1616
use pyo3::PyResult;
1717
use pyo3::PyObjectProtocol;
@@ -22,7 +22,8 @@ cfg_if! {
2222
use crate::stream_reader::StreamReader;
2323
use pravega_client::event::reader_group::{ReaderGroupConfig, ReaderGroupConfigBuilder};
2424
use pravega_client::event::reader_group_state::ReaderGroupStateError;
25-
use pravega_client_shared::{Scope, Stream};
25+
use pravega_client_shared::{Scope, Stream, StreamCut};
26+
use pravega_client_shared::ScopedStream;
2627
use pyo3::types::PyTuple;
2728
use pyo3::exceptions;
2829
}
@@ -99,6 +100,33 @@ impl PyObjectProtocol for StreamReaderGroupConfig {
99100
}
100101
}
101102

103+
#[cfg(feature = "python_binding")]
104+
#[pyclass]
105+
#[derive(Clone)]
106+
pub(crate) struct StreamCuts {
107+
pub(crate) stream_cuts: StreamCut,
108+
}
109+
#[cfg(feature = "python_binding")]
110+
#[pymethods]
111+
impl StreamCuts {
112+
113+
fn get_segment_offset_map(&self) -> HashMap<i64, i64> {
114+
self.stream_cuts.segment_offset_map.clone()
115+
}
116+
117+
fn to_str(&self) -> String {
118+
format!("StreamCuts: {:?}", self.stream_cuts)
119+
}
120+
}
121+
122+
#[cfg(feature = "python_binding")]
123+
#[pyproto]
124+
impl PyObjectProtocol for StreamCuts {
125+
fn __repr__(&self) -> PyResult<String> {
126+
Ok(format!("StreamCuts({:?})", self.to_str()))
127+
}
128+
}
129+
102130
///
103131
/// This represents a Stream reader for a given Stream.
104132
/// Note: A python object of StreamReader cannot be created directly without using the StreamManager.
@@ -179,6 +207,21 @@ impl StreamReaderGroup {
179207
}
180208
}
181209

210+
/// Return the latest StreamCut from ReaderGroup.
211+
/// Use this StreamCut in the ReaderGroupConfig to initiate reading from this streamcut.
212+
pub fn get_streamcut(&self) -> PyResult<StreamCuts> {
213+
214+
let streamcut = self
215+
.runtime_handle
216+
.block_on(self.reader_group.get_streamcut());
217+
info!(
218+
"Got streamcut {:?} ", streamcut
219+
);
220+
Ok(StreamCuts {
221+
stream_cuts: streamcut
222+
})
223+
}
224+
182225
/// Returns the string representation.
183226
fn to_str(&self) -> String {
184227
format!(

tests/pravega_reader_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#
88
# http://www.apache.org/licenses/LICENSE-2.0
99
#
10+
import time
1011

1112
import pravega_client
1213
import random

0 commit comments

Comments
 (0)