Skip to content

Added arranged methods for Threshold, Distinct and Count #324

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"cSpell.words": [
"Hashable"
]
}
5 changes: 2 additions & 3 deletions src/algorithms/graphs/propagate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@ use operators::arrange::arrangement::Arranged;
/// Propagates labels forward, retaining the minimum label.
///
/// This variant takes a pre-arranged edge collection, to facilitate re-use, and allows
/// a method `logic` to specify the rounds in which we introduce various labels. The output
/// of `logic should be a number in the interval [0,64],
/// a method `logic` to specify the rounds in which we introduce various labels.
pub fn propagate_core<G, N, L, Tr, F, R>(edges: &Arranged<G,Tr>, nodes: &Collection<G,(N,L),R>, logic: F) -> Collection<G,(N,L),R>
where
G: Scope,
Expand Down Expand Up @@ -100,7 +99,7 @@ where
let labels =
proposals
.concat(&nodes)
.reduce_abelian::<_,DefaultValTrace<_,_,_,_>>("Propagate", |_, s, t| t.push((s[0].0.clone(), R::from(1 as i8))));
.reduce_abelian::<_,DefaultValTrace<_,_,_,_>>("Propagate", |_, s, t| t.push((s[0].0.clone(), R::from(1))));

let propagate: Collection<_, (N, L), R> =
labels
Expand Down
13 changes: 7 additions & 6 deletions src/consolidation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ pub fn consolidate_slice<T: Ord, R: Semigroup>(slice: &mut [(T, R)]) -> usize {
assert!(offset < index);

// LOOP INVARIANT: offset < index
let ptr1 = slice.as_mut_ptr().offset(offset as isize);
let ptr2 = slice.as_mut_ptr().offset(index as isize);
let ptr1 = slice.as_mut_ptr().add(offset);
let ptr2 = slice.as_mut_ptr().add(index);

if (*ptr1).0 == (*ptr2).0 {
(*ptr1).1.plus_equals(&(*ptr2).1);
Expand All @@ -65,7 +65,7 @@ pub fn consolidate_slice<T: Ord, R: Semigroup>(slice: &mut [(T, R)]) -> usize {
if !(*ptr1).1.is_zero() {
offset += 1;
}
let ptr1 = slice.as_mut_ptr().offset(offset as isize);
let ptr1 = slice.as_mut_ptr().add(offset);
std::mem::swap(&mut *ptr1, &mut *ptr2);
}
}
Expand Down Expand Up @@ -118,8 +118,9 @@ pub fn consolidate_updates_slice<D: Ord, T: Ord, R: Semigroup>(slice: &mut [(D,
unsafe {

// LOOP INVARIANT: offset < index
let ptr1 = slice.as_mut_ptr().offset(offset as isize);
let ptr2 = slice.as_mut_ptr().offset(index as isize);
debug_assert!(offset < index);
let ptr1 = slice.as_mut_ptr().add(offset);
let ptr2 = slice.as_mut_ptr().add(index);

if (*ptr1).0 == (*ptr2).0 && (*ptr1).1 == (*ptr2).1 {
(*ptr1).2.plus_equals(&(*ptr2).2);
Expand All @@ -128,7 +129,7 @@ pub fn consolidate_updates_slice<D: Ord, T: Ord, R: Semigroup>(slice: &mut [(D,
if !(*ptr1).2.is_zero() {
offset += 1;
}
let ptr1 = slice.as_mut_ptr().offset(offset as isize);
let ptr1 = slice.as_mut_ptr().add(offset);
std::mem::swap(&mut *ptr1, &mut *ptr2);
}

Expand Down
15 changes: 13 additions & 2 deletions src/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ impl<T: Timestamp+Clone, D: Data, R: Semigroup> InputSession<T, D, R> {
/// Adds to the weight of an element in the collection.
pub fn update(&mut self, element: D, change: R) {
if self.buffer.len() == self.buffer.capacity() {
if self.buffer.len() > 0 {
if !self.buffer.is_empty() {
self.handle.send_batch(&mut self.buffer);
}
// TODO : This is a fairly arbitrary choice; should probably use `Context::default_size()` or such.
Expand All @@ -260,7 +260,7 @@ impl<T: Timestamp+Clone, D: Data, R: Semigroup> InputSession<T, D, R> {
pub fn update_at(&mut self, element: D, time: T, change: R) {
assert!(self.time.less_equal(&time));
if self.buffer.len() == self.buffer.capacity() {
if self.buffer.len() > 0 {
if !self.buffer.is_empty() {
self.handle.send_batch(&mut self.buffer);
}
// TODO : This is a fairly arbitrary choice; should probably use `Context::default_size()` or such.
Expand Down Expand Up @@ -301,6 +301,17 @@ impl<T: Timestamp+Clone, D: Data, R: Semigroup> InputSession<T, D, R> {
pub fn close(self) { }
}

impl<T, D, R> Default for InputSession<T, D, R>
where
T: Timestamp + Clone,
D: Data,
R: Semigroup,
{
fn default() -> Self {
Self::new()
}
}

impl<T: Timestamp+Clone, D: Data, R: Semigroup> Drop for InputSession<T, D, R> {
fn drop(&mut self) {
self.flush();
Expand Down
109 changes: 77 additions & 32 deletions src/operators/consolidate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,27 @@
//! underlying system can more clearly see that no work must be done in the later case, and we can
//! drop out of, e.g. iterative computations.

use timely::dataflow::Scope;

use ::{Collection, ExchangeData, Hashable};
use ::difference::Semigroup;
use operators::arrange::arrangement::Arrange;
use crate::{
difference::Semigroup,
lattice::Lattice,
operators::arrange::{Arrange, Arranged, TraceAgent},
trace::{implementations::ord::OrdKeySpine, Batch, Cursor, Trace, TraceReader},
AsCollection, Collection, ExchangeData, Hashable,
};
use timely::dataflow::{channels::pact::Pipeline, operators::Operator, Scope};

/// An extension method for consolidating weighted streams.
pub trait Consolidate<D: ExchangeData+Hashable> : Sized {
pub trait Consolidate<S, D, R>: Sized
where
S: Scope,
S::Timestamp: Lattice,
D: ExchangeData + Hashable,
R: Semigroup,
{
/// Aggregates the weights of equal records into at most one record.
///
/// This method uses the type `D`'s `hashed()` method to partition the data. The data are
/// accumulated in place, each held back until their timestamp has completed.
/// This method uses the type `D`'s [`hashed()`](Hashable) method to partition the data.
/// The data is accumulated in place and held back until its timestamp has completed.
///
/// # Examples
///
Expand All @@ -40,30 +49,72 @@ pub trait Consolidate<D: ExchangeData+Hashable> : Sized {
/// });
/// }
/// ```
fn consolidate(&self) -> Self {
fn consolidate(&self) -> Collection<S, D, R> {
self.consolidate_named("Consolidate")
}

/// As `consolidate` but with the ability to name the operator.
fn consolidate_named(&self, name: &str) -> Self;
/// A `consolidate` but with the ability to name the operator.
fn consolidate_named(&self, name: &str) -> Collection<S, D, R> {
self.consolidate_core::<OrdKeySpine<_, _, _>>(name)
.as_collection(|data, &()| data.clone())
}

/// A `consolidate` that returns the intermediate [arrangement](Arranged)
///
/// # Example
///
/// ```rust
/// use differential_dataflow::{
/// input::Input,
/// operators::{Consolidate, JoinCore},
/// };
///
/// timely::example(|scope| {
/// let (_, collection) = scope.new_collection_from(0..10u32);
///
/// let keys = collection
/// .flat_map(|x| (0..x))
/// .concat(&collection.negate())
/// .consolidate_arranged();
///
/// collection
/// .map(|x| (x, x * 2))
/// .join_core(&keys, |&key, &value, &()| Some((key, value)))
/// .inspect(|x| println!("{:?}", x));
/// });
/// ```
fn consolidate_arranged(&self) -> Arranged<S, TraceAgent<OrdKeySpine<D, S::Timestamp, R>>> {
self.consolidate_core::<OrdKeySpine<_, _, _>>("Consolidate")
}

/// Aggregates the weights of equal records into at most one record,
/// returning the intermediate [arrangement](Arranged)
fn consolidate_core<Tr>(&self, name: &str) -> Arranged<S, TraceAgent<Tr>>
where
Tr: Trace + TraceReader<Key = D, Val = (), Time = S::Timestamp, R = R> + 'static,
Tr::Batch: Batch<Tr::Key, Tr::Val, Tr::Time, Tr::R>,
Tr::Cursor: Cursor<Tr::Key, Tr::Val, Tr::Time, Tr::R>;
}

impl<G: Scope, D, R> Consolidate<D> for Collection<G, D, R>
impl<S, D, R> Consolidate<S, D, R> for Collection<S, D, R>
where
D: ExchangeData+Hashable,
R: ExchangeData+Semigroup,
G::Timestamp: ::lattice::Lattice+Ord,
{
fn consolidate_named(&self, name: &str) -> Self {
use trace::implementations::ord::OrdKeySpine as DefaultKeyTrace;
self.map(|k| (k, ()))
.arrange_named::<DefaultKeyTrace<_,_,_>>(name)
.as_collection(|d: &D, _| d.clone())
S: Scope,
S::Timestamp: Lattice + Ord,
D: ExchangeData + Hashable,
R: ExchangeData + Semigroup,
{
fn consolidate_core<Tr>(&self, name: &str) -> Arranged<S, TraceAgent<Tr>>
where
Tr: Trace + TraceReader<Key = D, Val = (), Time = S::Timestamp, R = R> + 'static,
Tr::Batch: Batch<Tr::Key, Tr::Val, Tr::Time, Tr::R>,
Tr::Cursor: Cursor<Tr::Key, Tr::Val, Tr::Time, Tr::R>,
{
self.map(|key| (key, ())).arrange_named(name)
}
}

/// An extension method for consolidating weighted streams.
pub trait ConsolidateStream<D: ExchangeData+Hashable> {
pub trait ConsolidateStream<D: ExchangeData + Hashable> {
/// Aggregates the weights of equal records.
///
/// Unlike `consolidate`, this method does not exchange data and does not
Expand Down Expand Up @@ -98,19 +149,13 @@ pub trait ConsolidateStream<D: ExchangeData+Hashable> {

impl<G: Scope, D, R> ConsolidateStream<D> for Collection<G, D, R>
where
D: ExchangeData+Hashable,
R: ExchangeData+Semigroup,
G::Timestamp: ::lattice::Lattice+Ord,
{
D: ExchangeData + Hashable,
R: ExchangeData + Semigroup,
G::Timestamp: Lattice + Ord,
{
fn consolidate_stream(&self) -> Self {

use timely::dataflow::channels::pact::Pipeline;
use timely::dataflow::operators::Operator;
use collection::AsCollection;

self.inner
.unary(Pipeline, "ConsolidateStream", |_cap, _info| {

let mut vector = Vec::new();
move |input, output| {
input.for_each(|time, data| {
Expand Down
Loading